Merge branch 'master' into ldap_fix_search_limit
73
.github/workflows/master.yml
vendored
@ -2994,6 +2994,77 @@ jobs:
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
##############################################################################################
|
||||
###################################### SQLANCER FUZZERS ######################################
|
||||
##############################################################################################
|
||||
SQLancerTestRelease:
|
||||
needs: [BuilderDebRelease]
|
||||
runs-on: [self-hosted, fuzzer-unit-tester]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/sqlancer_release
|
||||
REPORTS_PATH=${{runner.temp}}/reports_dir
|
||||
CHECK_NAME=SQLancer (release)
|
||||
REPO_COPY=${{runner.temp}}/sqlancer_release/ClickHouse
|
||||
EOF
|
||||
- name: Download json reports
|
||||
uses: actions/download-artifact@v2
|
||||
with:
|
||||
path: ${{ env.REPORTS_PATH }}
|
||||
- name: Clear repository
|
||||
run: |
|
||||
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
|
||||
- name: Check out repository code
|
||||
uses: actions/checkout@v2
|
||||
- name: SQLancer
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci"
|
||||
python3 sqlancer_check.py "$CHECK_NAME"
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
SQLancerTestDebug:
|
||||
needs: [BuilderDebDebug]
|
||||
runs-on: [self-hosted, fuzzer-unit-tester]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/sqlancer_debug
|
||||
REPORTS_PATH=${{runner.temp}}/reports_dir
|
||||
CHECK_NAME=SQLancer (debug)
|
||||
REPO_COPY=${{runner.temp}}/sqlancer_debug/ClickHouse
|
||||
EOF
|
||||
- name: Download json reports
|
||||
uses: actions/download-artifact@v2
|
||||
with:
|
||||
path: ${{ env.REPORTS_PATH }}
|
||||
- name: Clear repository
|
||||
run: |
|
||||
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
|
||||
- name: Check out repository code
|
||||
uses: actions/checkout@v2
|
||||
- name: SQLancer
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci"
|
||||
python3 sqlancer_check.py "$CHECK_NAME"
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
FinishCheck:
|
||||
needs:
|
||||
- DockerHubPush
|
||||
@ -3053,6 +3124,8 @@ jobs:
|
||||
- UnitTestsUBsan
|
||||
- UnitTestsReleaseClang
|
||||
- SharedBuildSmokeTest
|
||||
- SQLancerTestRelease
|
||||
- SQLancerTestDebug
|
||||
runs-on: [self-hosted, style-checker]
|
||||
steps:
|
||||
- name: Clear repository
|
||||
|
74
.github/workflows/pull_request.yml
vendored
@ -3491,6 +3491,77 @@ jobs:
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
##############################################################################################
|
||||
###################################### SQLANCER FUZZERS ######################################
|
||||
##############################################################################################
|
||||
SQLancerTestRelease:
|
||||
needs: [BuilderDebRelease]
|
||||
runs-on: [self-hosted, fuzzer-unit-tester]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/sqlancer_release
|
||||
REPORTS_PATH=${{runner.temp}}/reports_dir
|
||||
CHECK_NAME=SQLancer (release)
|
||||
REPO_COPY=${{runner.temp}}/sqlancer_release/ClickHouse
|
||||
EOF
|
||||
- name: Download json reports
|
||||
uses: actions/download-artifact@v2
|
||||
with:
|
||||
path: ${{ env.REPORTS_PATH }}
|
||||
- name: Clear repository
|
||||
run: |
|
||||
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
|
||||
- name: Check out repository code
|
||||
uses: actions/checkout@v2
|
||||
- name: SQLancer
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci"
|
||||
python3 sqlancer_check.py "$CHECK_NAME"
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
SQLancerTestDebug:
|
||||
needs: [BuilderDebDebug]
|
||||
runs-on: [self-hosted, fuzzer-unit-tester]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/sqlancer_debug
|
||||
REPORTS_PATH=${{runner.temp}}/reports_dir
|
||||
CHECK_NAME=SQLancer (debug)
|
||||
REPO_COPY=${{runner.temp}}/sqlancer_debug/ClickHouse
|
||||
EOF
|
||||
- name: Download json reports
|
||||
uses: actions/download-artifact@v2
|
||||
with:
|
||||
path: ${{ env.REPORTS_PATH }}
|
||||
- name: Clear repository
|
||||
run: |
|
||||
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
|
||||
- name: Check out repository code
|
||||
uses: actions/checkout@v2
|
||||
- name: SQLancer
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci"
|
||||
python3 sqlancer_check.py "$CHECK_NAME"
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
#############################################################################################
|
||||
###################################### JEPSEN TESTS #########################################
|
||||
#############################################################################################
|
||||
@ -3501,7 +3572,6 @@ jobs:
|
||||
if: contains(github.event.pull_request.labels.*.name, 'jepsen-test')
|
||||
needs: [BuilderBinRelease]
|
||||
uses: ./.github/workflows/jepsen.yml
|
||||
|
||||
FinishCheck:
|
||||
needs:
|
||||
- StyleCheck
|
||||
@ -3576,6 +3646,8 @@ jobs:
|
||||
- SharedBuildSmokeTest
|
||||
- CompatibilityCheck
|
||||
- IntegrationTestsFlakyCheck
|
||||
- SQLancerTestRelease
|
||||
- SQLancerTestDebug
|
||||
runs-on: [self-hosted, style-checker]
|
||||
steps:
|
||||
- name: Clear repository
|
||||
|
3
.gitignore
vendored
@ -154,3 +154,6 @@ website/package-lock.json
|
||||
/programs/server/metadata
|
||||
/programs/server/store
|
||||
|
||||
# temporary test files
|
||||
tests/queries/0_stateless/test_*
|
||||
tests/queries/0_stateless/*.binary
|
||||
|
@ -3,10 +3,20 @@ option (ENABLE_CLANG_TIDY "Use clang-tidy static analyzer" OFF)
|
||||
|
||||
if (ENABLE_CLANG_TIDY)
|
||||
|
||||
find_program (CLANG_TIDY_PATH NAMES "clang-tidy" "clang-tidy-15" "clang-tidy-14" "clang-tidy-13" "clang-tidy-12")
|
||||
find_program (CLANG_TIDY_CACHE_PATH NAMES "clang-tidy-cache")
|
||||
if (CLANG_TIDY_CACHE_PATH)
|
||||
find_program (_CLANG_TIDY_PATH NAMES "clang-tidy" "clang-tidy-15" "clang-tidy-14" "clang-tidy-13" "clang-tidy-12")
|
||||
|
||||
# Why do we use ';' here?
|
||||
# It's a cmake black magic: https://cmake.org/cmake/help/latest/prop_tgt/LANG_CLANG_TIDY.html#prop_tgt:%3CLANG%3E_CLANG_TIDY
|
||||
# The CLANG_TIDY_PATH is passed to CMAKE_CXX_CLANG_TIDY, which follows CXX_CLANG_TIDY syntax.
|
||||
set (CLANG_TIDY_PATH "${CLANG_TIDY_CACHE_PATH};${_CLANG_TIDY_PATH}" CACHE STRING "A combined command to run clang-tidy with caching wrapper")
|
||||
else ()
|
||||
find_program (CLANG_TIDY_PATH NAMES "clang-tidy" "clang-tidy-15" "clang-tidy-14" "clang-tidy-13" "clang-tidy-12")
|
||||
endif ()
|
||||
|
||||
if (CLANG_TIDY_PATH)
|
||||
message(STATUS
|
||||
message (STATUS
|
||||
"Using clang-tidy: ${CLANG_TIDY_PATH}.
|
||||
The checks will be run during build process.
|
||||
See the .clang-tidy file at the root directory to configure the checks.")
|
||||
@ -15,11 +25,15 @@ if (ENABLE_CLANG_TIDY)
|
||||
|
||||
# clang-tidy requires assertions to guide the analysis
|
||||
# Note that NDEBUG is set implicitly by CMake for non-debug builds
|
||||
set(COMPILER_FLAGS "${COMPILER_FLAGS} -UNDEBUG")
|
||||
set (COMPILER_FLAGS "${COMPILER_FLAGS} -UNDEBUG")
|
||||
|
||||
# The variable CMAKE_CXX_CLANG_TIDY will be set inside src and base directories with non third-party code.
|
||||
# The variable CMAKE_CXX_CLANG_TIDY will be set inside the following directories with non third-party code.
|
||||
# - base
|
||||
# - programs
|
||||
# - src
|
||||
# - utils
|
||||
# set (CMAKE_CXX_CLANG_TIDY "${CLANG_TIDY_PATH}")
|
||||
else ()
|
||||
message(${RECONFIGURE_MESSAGE_LEVEL} "clang-tidy is not found")
|
||||
message (${RECONFIGURE_MESSAGE_LEVEL} "clang-tidy is not found")
|
||||
endif ()
|
||||
endif ()
|
||||
|
@ -91,6 +91,9 @@ ENV PATH="$PATH:/usr/local/go/bin"
|
||||
ENV GOPATH=/workdir/go
|
||||
ENV GOCACHE=/workdir/
|
||||
|
||||
RUN curl https://raw.githubusercontent.com/matus-chochlik/ctcache/7fd516e91c17779cbc6fc18bd119313d9532dd90/clang-tidy-cache -Lo /usr/bin/clang-tidy-cache \
|
||||
&& chmod +x /usr/bin/clang-tidy-cache
|
||||
|
||||
RUN mkdir /workdir && chmod 777 /workdir
|
||||
WORKDIR /workdir
|
||||
|
||||
|
@ -258,6 +258,10 @@ def parse_env_variables(
|
||||
if clang_tidy:
|
||||
# 15G is not enough for tidy build
|
||||
cache_maxsize = "25G"
|
||||
|
||||
# `CTCACHE_DIR` has the same purpose as the `CCACHE_DIR` above.
|
||||
# It's there to have the clang-tidy cache embedded into our standard `CCACHE_DIR`
|
||||
result.append("CTCACHE_DIR=/ccache/clang-tidy-cache")
|
||||
result.append(f"CCACHE_MAXSIZE={cache_maxsize}")
|
||||
|
||||
if distcc_hosts:
|
||||
@ -282,9 +286,7 @@ def parse_env_variables(
|
||||
cmake_flags.append("-DENABLE_TESTS=1")
|
||||
|
||||
if shared_libraries:
|
||||
cmake_flags.append(
|
||||
"-DUSE_STATIC_LIBRARIES=0 -DSPLIT_SHARED_LIBRARIES=1"
|
||||
)
|
||||
cmake_flags.append("-DUSE_STATIC_LIBRARIES=0 -DSPLIT_SHARED_LIBRARIES=1")
|
||||
# We can't always build utils because it requires too much space, but
|
||||
# we have to build them at least in some way in CI. The shared library
|
||||
# build is probably the least heavy disk-wise.
|
||||
|
@ -1,5 +1,5 @@
|
||||
# docker build -t clickhouse/sqlancer-test .
|
||||
FROM ubuntu:20.04
|
||||
FROM ubuntu:22.04
|
||||
|
||||
# ARG for quick switch to a given ubuntu mirror
|
||||
ARG apt_archive="http://archive.ubuntu.com"
|
||||
|
@ -11,13 +11,15 @@ def process_result(result_folder):
|
||||
summary = []
|
||||
paths = []
|
||||
tests = [
|
||||
"TLPWhere",
|
||||
"TLPAggregate",
|
||||
"TLPDistinct",
|
||||
"TLPGroupBy",
|
||||
"TLPHaving",
|
||||
"TLPWhere",
|
||||
"TLPWhereGroupBy",
|
||||
"TLPDistinct",
|
||||
"TLPAggregate",
|
||||
"NoREC",
|
||||
]
|
||||
failed_tests = []
|
||||
|
||||
for test in tests:
|
||||
err_path = "{}/{}.err".format(result_folder, test)
|
||||
@ -33,15 +35,11 @@ def process_result(result_folder):
|
||||
with open(err_path, "r") as f:
|
||||
if "AssertionError" in f.read():
|
||||
summary.append((test, "FAIL"))
|
||||
failed_tests.append(test)
|
||||
status = "failure"
|
||||
else:
|
||||
summary.append((test, "OK"))
|
||||
|
||||
logs_path = "{}/logs.tar.gz".format(result_folder)
|
||||
if not os.path.exists(logs_path):
|
||||
logging.info("No logs tar on path %s", logs_path)
|
||||
else:
|
||||
paths.append(logs_path)
|
||||
stdout_path = "{}/stdout.log".format(result_folder)
|
||||
if not os.path.exists(stdout_path):
|
||||
logging.info("No stdout log on path %s", stdout_path)
|
||||
@ -53,18 +51,23 @@ def process_result(result_folder):
|
||||
else:
|
||||
paths.append(stderr_path)
|
||||
|
||||
description = "SQLancer test run. See report"
|
||||
description = "SQLancer run successfully"
|
||||
if status == "failure":
|
||||
description = f"Failed oracles: {failed_tests}"
|
||||
|
||||
return status, description, summary, paths
|
||||
|
||||
|
||||
def write_results(results_file, status_file, results, status):
|
||||
def write_results(
|
||||
results_file, status_file, description_file, results, status, description
|
||||
):
|
||||
with open(results_file, "w") as f:
|
||||
out = csv.writer(f, delimiter="\t")
|
||||
out.writerows(results)
|
||||
with open(status_file, "w") as f:
|
||||
out = csv.writer(f, delimiter="\t")
|
||||
out.writerow(status)
|
||||
f.write(status + "\n")
|
||||
with open(description_file, "w") as f:
|
||||
f.write(description + "\n")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
@ -72,13 +75,20 @@ if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(
|
||||
description="ClickHouse script for parsing results of sqlancer test"
|
||||
)
|
||||
parser.add_argument("--in-results-dir", default="/test_output/")
|
||||
parser.add_argument("--out-results-file", default="/test_output/test_results.tsv")
|
||||
parser.add_argument("--out-status-file", default="/test_output/check_status.tsv")
|
||||
parser.add_argument("--in-results-dir", default="/workspace/")
|
||||
parser.add_argument("--out-results-file", default="/workspace/summary.tsv")
|
||||
parser.add_argument("--out-description-file", default="/workspace/description.txt")
|
||||
parser.add_argument("--out-status-file", default="/workspace/status.txt")
|
||||
args = parser.parse_args()
|
||||
|
||||
state, description, test_results, logs = process_result(args.in_results_dir)
|
||||
status, description, summary, logs = process_result(args.in_results_dir)
|
||||
logging.info("Result parsed")
|
||||
status = (state, description)
|
||||
write_results(args.out_results_file, args.out_status_file, test_results, status)
|
||||
write_results(
|
||||
args.out_results_file,
|
||||
args.out_status_file,
|
||||
args.out_description_file,
|
||||
summary,
|
||||
status,
|
||||
description,
|
||||
)
|
||||
logging.info("Result written")
|
||||
|
@ -1,33 +1,62 @@
|
||||
#!/bin/bash
|
||||
set -exu
|
||||
trap "exit" INT TERM
|
||||
|
||||
set -e -x
|
||||
function wget_with_retry
|
||||
{
|
||||
for _ in 1 2 3 4; do
|
||||
if wget -nv -nd -c "$1";then
|
||||
return 0
|
||||
else
|
||||
sleep 0.5
|
||||
fi
|
||||
done
|
||||
return 1
|
||||
}
|
||||
|
||||
dpkg -i package_folder/clickhouse-common-static_*.deb
|
||||
dpkg -i package_folder/clickhouse-common-static-dbg_*.deb
|
||||
dpkg -i package_folder/clickhouse-server_*.deb
|
||||
dpkg -i package_folder/clickhouse-client_*.deb
|
||||
if [ -z ${BINARY_URL_TO_DOWNLOAD+x} ]
|
||||
then
|
||||
echo "No BINARY_URL_TO_DOWNLOAD provided."
|
||||
else
|
||||
wget_with_retry "$BINARY_URL_TO_DOWNLOAD"
|
||||
chmod +x /clickhouse
|
||||
fi
|
||||
|
||||
service clickhouse-server start && sleep 5
|
||||
if [[ -f "/clickhouse" ]]; then
|
||||
echo "/clickhouse exists"
|
||||
else
|
||||
exit 1
|
||||
fi
|
||||
|
||||
cd /workspace
|
||||
/clickhouse server -P /workspace/clickhouse-server.pid -L /workspace/clickhouse-server.log -E /workspace/clickhouse-server.log.err --daemon
|
||||
|
||||
for _ in $(seq 1 60); do if [[ $(wget -q 'localhost:8123' -O-) == 'Ok.' ]]; then break ; else sleep 1; fi ; done
|
||||
|
||||
cd /sqlancer/sqlancer-master
|
||||
|
||||
export TIMEOUT=300
|
||||
export NUM_QUERIES=1000
|
||||
TIMEOUT=300
|
||||
NUM_QUERIES=1000
|
||||
NUM_THREADS=10
|
||||
TESTS=( "TLPGroupBy" "TLPHaving" "TLPWhere" "TLPDistinct" "TLPAggregate" "NoREC" )
|
||||
echo "${TESTS[@]}"
|
||||
|
||||
( java -jar target/sqlancer-*.jar --num-threads 10 --timeout-seconds $TIMEOUT --num-queries $NUM_QUERIES --username default --password "" clickhouse --oracle TLPWhere | tee /test_output/TLPWhere.out ) 3>&1 1>&2 2>&3 | tee /test_output/TLPWhere.err
|
||||
( java -jar target/sqlancer-*.jar --num-threads 10 --timeout-seconds $TIMEOUT --num-queries $NUM_QUERIES --username default --password "" clickhouse --oracle TLPGroupBy | tee /test_output/TLPGroupBy.out ) 3>&1 1>&2 2>&3 | tee /test_output/TLPGroupBy.err
|
||||
( java -jar target/sqlancer-*.jar --num-threads 10 --timeout-seconds $TIMEOUT --num-queries $NUM_QUERIES --username default --password "" clickhouse --oracle TLPHaving | tee /test_output/TLPHaving.out ) 3>&1 1>&2 2>&3 | tee /test_output/TLPHaving.err
|
||||
( java -jar target/sqlancer-*.jar --num-threads 10 --timeout-seconds $TIMEOUT --num-queries $NUM_QUERIES --username default --password "" clickhouse --oracle TLPWhere --oracle TLPGroupBy | tee /test_output/TLPWhereGroupBy.out ) 3>&1 1>&2 2>&3 | tee /test_output/TLPWhereGroupBy.err
|
||||
( java -jar target/sqlancer-*.jar --num-threads 10 --timeout-seconds $TIMEOUT --num-queries $NUM_QUERIES --username default --password "" clickhouse --oracle TLPDistinct | tee /test_output/TLPDistinct.out ) 3>&1 1>&2 2>&3 | tee /test_output/TLPDistinct.err
|
||||
( java -jar target/sqlancer-*.jar --num-threads 10 --timeout-seconds $TIMEOUT --num-queries $NUM_QUERIES --username default --password "" clickhouse --oracle TLPAggregate | tee /test_output/TLPAggregate.out ) 3>&1 1>&2 2>&3 | tee /test_output/TLPAggregate.err
|
||||
for TEST in "${TESTS[@]}"; do
|
||||
echo "$TEST"
|
||||
if [[ $(wget -q 'localhost:8123' -O-) == 'Ok.' ]]
|
||||
then
|
||||
echo "Server is OK"
|
||||
( java -jar target/sqlancer-*.jar --log-each-select true --print-failed false --num-threads "$NUM_THREADS" --timeout-seconds "$TIMEOUT" --num-queries "$NUM_QUERIES" --username default --password "" clickhouse --oracle "$TEST" | tee "/workspace/$TEST.out" ) 3>&1 1>&2 2>&3 | tee "/workspace/$TEST.err"
|
||||
else
|
||||
touch "/workspace/$TEST.err" "/workspace/$TEST.out"
|
||||
echo "Server is not responding" | tee /workspace/server_crashed.log
|
||||
fi
|
||||
done
|
||||
|
||||
service clickhouse stop
|
||||
ls /workspace
|
||||
pkill -F /workspace/clickhouse-server.pid || true
|
||||
|
||||
ls /var/log/clickhouse-server/
|
||||
tar czf /test_output/logs.tar.gz -C /var/log/clickhouse-server/ .
|
||||
tail -n 1000 /var/log/clickhouse-server/stderr.log > /test_output/stderr.log
|
||||
tail -n 1000 /var/log/clickhouse-server/stdout.log > /test_output/stdout.log
|
||||
tail -n 1000 /var/log/clickhouse-server/clickhouse-server.log > /test_output/clickhouse-server.log
|
||||
for _ in $(seq 1 60); do if [[ $(wget -q 'localhost:8123' -O-) == 'Ok.' ]]; then sleep 1 ; else break; fi ; done
|
||||
|
||||
/process_sqlancer_result.py || echo -e "failure\tCannot parse results" > /test_output/check_status.tsv
|
||||
ls /test_output
|
||||
/process_sqlancer_result.py || echo -e "failure\tCannot parse results" > /workspace/check_status.tsv
|
||||
ls /workspace
|
||||
|
@ -4,25 +4,39 @@ sidebar_label: Cell Towers
|
||||
sidebar_position: 3
|
||||
title: "Cell Towers"
|
||||
---
|
||||
import ConnectionDetails from '@site/docs/en/_snippets/_gather_your_details_http.mdx';
|
||||
|
||||
import Tabs from '@theme/Tabs';
|
||||
import TabItem from '@theme/TabItem';
|
||||
import CodeBlock from '@theme/CodeBlock';
|
||||
import ActionsMenu from '@site/docs/en/_snippets/_service_actions_menu.md';
|
||||
import SQLConsoleDetail from '@site/docs/en/_snippets/_launch_sql_console.md';
|
||||
import SupersetDocker from '@site/docs/en/_snippets/_add_superset_detail.md';
|
||||
|
||||
This dataset is from [OpenCellid](https://www.opencellid.org/) - The world's largest Open Database of Cell Towers.
|
||||
## Goal
|
||||
|
||||
In this guide you will learn how to:
|
||||
- Load the OpenCelliD data in Clickhouse
|
||||
- Connect Apache Superset to ClickHouse
|
||||
- Build a dashboard based on data available in the dataset
|
||||
|
||||
Here is a preview of the dashboard created in this guide:
|
||||
|
||||
![Dashboard of cell towers by radio type in mcc 204](@site/docs/en/getting-started/example-datasets/images/superset-cell-tower-dashboard.png)
|
||||
|
||||
## Get the Dataset {#get-the-dataset}
|
||||
|
||||
This dataset is from [OpenCelliD](https://www.opencellid.org/) - The world's largest Open Database of Cell Towers.
|
||||
|
||||
As of 2021, it contains more than 40 million records about cell towers (GSM, LTE, UMTS, etc.) around the world with their geographical coordinates and metadata (country code, network, etc).
|
||||
|
||||
OpenCelliD Project is licensed under a Creative Commons Attribution-ShareAlike 4.0 International License, and we redistribute a snapshot of this dataset under the terms of the same license. The up-to-date version of the dataset is available to download after sign in.
|
||||
|
||||
|
||||
## Get the Dataset {#get-the-dataset}
|
||||
|
||||
<Tabs groupId="deployMethod">
|
||||
<TabItem value="serverless" label="ClickHouse Cloud" default>
|
||||
|
||||
### Load the sample data
|
||||
|
||||
ClickHouse Cloud provides an easy-button for uploading this dataset from S3. Log in to your ClickHouse Cloud organization, or create a free trial at [ClickHouse.cloud](https://clickhouse.cloud).
|
||||
<ActionsMenu menu="Load Data" />
|
||||
|
||||
@ -30,13 +44,33 @@ Choose the **Cell Towers** dataset from the **Sample data** tab, and **Load data
|
||||
|
||||
![Load cell towers dataset](@site/docs/en/_snippets/images/cloud-load-data-sample.png)
|
||||
|
||||
Examine the schema of the cell_towers table:
|
||||
### Examine the schema of the cell_towers table
|
||||
```sql
|
||||
DESCRIBE TABLE cell_towers
|
||||
```
|
||||
|
||||
<SQLConsoleDetail />
|
||||
|
||||
This is the output of `DESCRIBE`. Down further in this guide the field type choices will be described.
|
||||
```response
|
||||
┌─name──────────┬─type──────────────────────────────────────────────────────────────────┬
|
||||
│ radio │ Enum8('' = 0, 'CDMA' = 1, 'GSM' = 2, 'LTE' = 3, 'NR' = 4, 'UMTS' = 5) │
|
||||
│ mcc │ UInt16 │
|
||||
│ net │ UInt16 │
|
||||
│ area │ UInt16 │
|
||||
│ cell │ UInt64 │
|
||||
│ unit │ Int16 │
|
||||
│ lon │ Float64 │
|
||||
│ lat │ Float64 │
|
||||
│ range │ UInt32 │
|
||||
│ samples │ UInt32 │
|
||||
│ changeable │ UInt8 │
|
||||
│ created │ DateTime │
|
||||
│ updated │ DateTime │
|
||||
│ averageSignal │ UInt8 │
|
||||
└───────────────┴───────────────────────────────────────────────────────────────────────┴
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
<TabItem value="selfmanaged" label="Self-managed">
|
||||
|
||||
@ -86,7 +120,7 @@ clickhouse-client --query "INSERT INTO cell_towers FORMAT CSVWithNames" < cell_t
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
## Example queries {#examples}
|
||||
## Run some example queries {#examples}
|
||||
|
||||
1. A number of cell towers by type:
|
||||
|
||||
@ -127,13 +161,13 @@ SELECT mcc, count() FROM cell_towers GROUP BY mcc ORDER BY count() DESC LIMIT 10
|
||||
10 rows in set. Elapsed: 0.019 sec. Processed 43.28 million rows, 86.55 MB (2.33 billion rows/s., 4.65 GB/s.)
|
||||
```
|
||||
|
||||
So, the top countries are: the USA, Germany, and Russia.
|
||||
Based on the above query and the [MCC list](https://en.wikipedia.org/wiki/Mobile_country_code), the countries with the most cell towers are: the USA, Germany, and Russia.
|
||||
|
||||
You may want to create an [External Dictionary](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) in ClickHouse to decode these values.
|
||||
|
||||
## Use case: Incorporate geo data {#use-case}
|
||||
|
||||
Using `pointInPolygon` function.
|
||||
Using the [`pointInPolygon`](/docs/en/sql-reference/functions/geo/coordinates.md/#pointinpolygon) function.
|
||||
|
||||
1. Create a table where we will store polygons:
|
||||
|
||||
@ -224,6 +258,110 @@ WHERE pointInPolygon((lon, lat), (SELECT * FROM moscow))
|
||||
1 rows in set. Elapsed: 0.067 sec. Processed 43.28 million rows, 692.42 MB (645.83 million rows/s., 10.33 GB/s.)
|
||||
```
|
||||
|
||||
The data is also available for interactive queries in the [Playground](https://play.clickhouse.com/play?user=play), [example](https://play.clickhouse.com/play?user=play#U0VMRUNUIG1jYywgY291bnQoKSBGUk9NIGNlbGxfdG93ZXJzIEdST1VQIEJZIG1jYyBPUkRFUiBCWSBjb3VudCgpIERFU0M=).
|
||||
## Review of the schema
|
||||
|
||||
Although you cannot create temporary tables there.
|
||||
Before building visualizations in Superset have a look at the columns that you will use. This dataset primarily provides the location (Longitude and Latitude) and radio types at mobile cellular towers worldwide. The column descriptions can be found in the [community forum](https://community.opencellid.org/t/documenting-the-columns-in-the-downloadable-cells-database-csv/186). The columns used in the visualizations that will be built are described below
|
||||
|
||||
Here is a description of the columns taken from the OpenCelliD forum:
|
||||
|
||||
| Column | Description |
|
||||
|--------------|--------------------------------------------------------|
|
||||
| radio | Technology generation: CDMA, GSM, UMTS, 5G NR |
|
||||
| mcc | Mobile Country Code: `204` is The Netherlands |
|
||||
| lon | Longitude: With Latitude, approximate tower location |
|
||||
| lat | Latitude: With Longitude, approximate tower location |
|
||||
|
||||
:::tip mcc
|
||||
To find your MCC check [Mobile network codes](https://en.wikipedia.org/wiki/Mobile_country_code), and use the three digits in the **Mobile country code** column.
|
||||
:::
|
||||
|
||||
The schema for this table was designed for compact storage on disk and query speed.
|
||||
- The `radio` data is stored as an `Enum8` (`UInt8`) rather than a string.
|
||||
- `mcc` or Mobile country code, is stored as a `UInt16` as we know the range is 1 - 999.
|
||||
- `lon` and `lat` are `Float64`.
|
||||
|
||||
None of the other fields are used in the queries or visualizations in this guide, but they are described in the forum linked above if you are interested.
|
||||
|
||||
## Build visualizations with Apache Superset
|
||||
|
||||
Superset is easy to run from Docker. If you already have Superset running, all you need to do is add ClickHouse Connect with `pip install clickhouse-connect`. If you need to install Superset open the **Launch Apache Superset in Docker** directly below.
|
||||
|
||||
<SupersetDocker />
|
||||
|
||||
To build a Superset dashboard using the OpenCelliD dataset you should:
|
||||
- Add your ClickHouse service as a Superset **database**
|
||||
- Add the table **cell_towers** as a Superset **dataset**
|
||||
- Create some **charts**
|
||||
- Add the charts to a **dashboard**
|
||||
|
||||
### Add your ClickHouse service as a Superset database
|
||||
|
||||
<ConnectionDetails />
|
||||
|
||||
In Superset a database can be added by choosing the database type, and then providing the connection details. Open Superset and look for the **+**, it has a menu with **Data** and then **Connect database** options.
|
||||
|
||||
![Add a database](@site/docs/en/getting-started/example-datasets/images/superset-add.png)
|
||||
|
||||
Choose **ClickHouse Connect** from the list:
|
||||
|
||||
![Choose clickhouse connect as database type](@site/docs/en/getting-started/example-datasets/images/superset-choose-a-database.png)
|
||||
|
||||
:::note
|
||||
If **ClickHouse Connect** is not one of your options, then you will need to install it. The comand is `pip install clickhouse-connect`, and more info is [available here](https://pypi.org/project/clickhouse-connect/).
|
||||
:::
|
||||
|
||||
#### Add your connection details:
|
||||
|
||||
:::tip
|
||||
Make sure that you set **SSL** on when connecting to ClickHouse Cloud or other ClickHouse systems that enforce the use of SSL.
|
||||
:::
|
||||
|
||||
![Add ClickHouse as a Superset datasource](@site/docs/en/getting-started/example-datasets/images/superset-connect-a-database.png)
|
||||
|
||||
### Add the table **cell_towers** as a Superset **dataset**
|
||||
|
||||
In Superset a **dataset** maps to a table within a database. Click on add a dataset and choose your ClickHouse service, the database containing your table (`default`), and choose the `cell_towers` table:
|
||||
|
||||
![Add cell_towers table as a dataset](@site/docs/en/getting-started/example-datasets/images/superset-add-dataset.png)
|
||||
|
||||
### Create some **charts**
|
||||
|
||||
When you choose to add a chart in Superset you have to specify the dataset (`cell_towers`) and the chart type. Since the OpenCelliD dataset provides longitude and latitude coordinates for cell towers we will create a **Map** chart. The **deck.gL Scatterplot** type is suited to this dataset as it works well with dense data points on a map.
|
||||
|
||||
![Create a map in Superset](@site/docs/en/getting-started/example-datasets/images/superset-create-map.png)
|
||||
|
||||
#### Specify the query used for the map
|
||||
|
||||
A deck.gl Scatterplot requires a longitude and latitude, and one or more filters can also be applied to the query. In this example two filters are applied, one for cell towers with UMTS radios, and one for the Mobile country code assigned to The Netherlands.
|
||||
|
||||
The fields `lon` and `lat` contain the longitude and latitude:
|
||||
|
||||
![Specify longitude and latitude fields](@site/docs/en/getting-started/example-datasets/images/superset-lon-lat.png)
|
||||
|
||||
Add a filter with `mcc` = `204` (or substitute any other `mcc` value):
|
||||
|
||||
![Filter on MCC 204](@site/docs/en/getting-started/example-datasets/images/superset-mcc-204.png)
|
||||
|
||||
Add a filter with `radio` = `'UMTS'` (or substitute any other `radio` value, you can see the choices in the output of `DESCRIBE TABLE cell_towers`):
|
||||
|
||||
![Filter on radio = UMTS](@site/docs/en/getting-started/example-datasets/images/superset-radio-umts.png)
|
||||
|
||||
This is the full configuration for the chart that filters on `radio = 'UMTS'` and `mcc = 204`:
|
||||
|
||||
![Chart for UMTS radios in MCC 204](@site/docs/en/getting-started/example-datasets/images/superset-umts-netherlands.png)
|
||||
|
||||
Click on **UPDATE CHART** to render the visualization.
|
||||
|
||||
### Add the charts to a **dashboard**
|
||||
|
||||
This screenshot shows cell tower locations with LTE, UMTS, and GSM radios. The charts are all created in the same way and they are added to a dashboard.
|
||||
|
||||
![Dashboard of cell towers by radio type in mcc 204](@site/docs/en/getting-started/example-datasets/images/superset-cell-tower-dashboard.png)
|
||||
|
||||
:::tip
|
||||
The data is also available for interactive queries in the [Playground](https://play.clickhouse.com/play?user=play).
|
||||
|
||||
This [example](https://play.clickhouse.com/play?user=play#U0VMRUNUIG1jYywgY291bnQoKSBGUk9NIGNlbGxfdG93ZXJzIEdST1VQIEJZIG1jYyBPUkRFUiBCWSBjb3VudCgpIERFU0M=) will populate the username and even the query for you.
|
||||
|
||||
Although you cannot create tables in the Playground, you can run all of the queries and even use Superset (adjust the hostname and port number).
|
||||
:::
|
||||
|
After Width: | Height: | Size: 35 KiB |
BIN
docs/en/getting-started/example-datasets/images/superset-add.png
Normal file
After Width: | Height: | Size: 35 KiB |
After Width: | Height: | Size: 475 KiB |
After Width: | Height: | Size: 53 KiB |
After Width: | Height: | Size: 73 KiB |
After Width: | Height: | Size: 290 KiB |
After Width: | Height: | Size: 38 KiB |
After Width: | Height: | Size: 12 KiB |
After Width: | Height: | Size: 12 KiB |
After Width: | Height: | Size: 46 KiB |
@ -4,7 +4,7 @@ sidebar_label: Recipes Dataset
|
||||
title: "Recipes Dataset"
|
||||
---
|
||||
|
||||
RecipeNLG dataset is available for download [here](https://recipenlg.cs.put.poznan.pl/dataset). It contains 2.2 million recipes. The size is slightly less than 1 GB.
|
||||
The RecipeNLG dataset is available for download [here](https://recipenlg.cs.put.poznan.pl/dataset). It contains 2.2 million recipes. The size is slightly less than 1 GB.
|
||||
|
||||
## Download and Unpack the Dataset
|
||||
|
||||
|
@ -309,7 +309,7 @@ Sessions with Ephemerals (1):
|
||||
/clickhouse/task_queue/ddl
|
||||
```
|
||||
|
||||
## [experimental] Migration from ZooKeeper {#migration-from-zookeeper}
|
||||
## Migration from ZooKeeper {#migration-from-zookeeper}
|
||||
|
||||
Seamlessly migration from ZooKeeper to ClickHouse Keeper is impossible you have to stop your ZooKeeper cluster, convert data and start ClickHouse Keeper. `clickhouse-keeper-converter` tool allows converting ZooKeeper logs and snapshots to ClickHouse Keeper snapshot. It works only with ZooKeeper > 3.4. Steps for migration:
|
||||
|
||||
|
@ -2939,7 +2939,7 @@ Possible values:
|
||||
- 0 — Projection optimization disabled.
|
||||
- 1 — Projection optimization enabled.
|
||||
|
||||
Default value: `0`.
|
||||
Default value: `1`.
|
||||
|
||||
## force_optimize_projection {#force-optimize-projection}
|
||||
|
||||
|
@ -28,18 +28,34 @@ sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 8919F6BD2B48D7
|
||||
sudo apt-get update
|
||||
```
|
||||
|
||||
### You Get the Unsupported Architecture Warning with Apt-get {#you-get-the-unsupported-architecture-warning-with-apt-get}
|
||||
### You Get Different Warnings with `apt-get update` {#you-get-different-warnings-with-apt-get-update}
|
||||
|
||||
- The completed warning message is as follows:
|
||||
- The completed warning messages are as one of following:
|
||||
|
||||
```
|
||||
N: Skipping acquire of configured file 'main/binary-i386/Packages' as repository 'https://packages.clickhouse.com/deb stable InRelease' doesn't support architecture 'i386'
|
||||
```
|
||||
|
||||
```
|
||||
E: Failed to fetch https://packages.clickhouse.com/deb/dists/stable/main/binary-amd64/Packages.gz File has unexpected size (30451 != 28154). Mirror sync in progress?
|
||||
```
|
||||
|
||||
```
|
||||
E: Repository 'https://packages.clickhouse.com/deb stable InRelease' changed its 'Origin' value from 'Artifactory' to 'ClickHouse'
|
||||
E: Repository 'https://packages.clickhouse.com/deb stable InRelease' changed its 'Label' value from 'Artifactory' to 'ClickHouse'
|
||||
N: Repository 'https://packages.clickhouse.com/deb stable InRelease' changed its 'Suite' value from 'stable' to ''
|
||||
N: This must be accepted explicitly before updates for this repository can be applied. See apt-secure(8) manpage for details.
|
||||
```
|
||||
|
||||
```
|
||||
Err:11 https://packages.clickhouse.com/deb stable InRelease
|
||||
400 Bad Request [IP: 172.66.40.249 443]
|
||||
```
|
||||
|
||||
To resolve the above issue, please use the following script:
|
||||
|
||||
```bash
|
||||
sudo rm /var/lib/apt/lists/packages.clickhouse.com_* /var/lib/dpkg/arch
|
||||
sudo rm /var/lib/apt/lists/packages.clickhouse.com_* /var/lib/dpkg/arch /var/lib/apt/lists/partial/packages.clickhouse.com_*
|
||||
sudo apt-get clean
|
||||
sudo apt-get autoclean
|
||||
```
|
||||
|
@ -303,17 +303,25 @@ or
|
||||
CREATE DICTIONARY somedict (
|
||||
id UInt64,
|
||||
first Date,
|
||||
last Date
|
||||
last Date,
|
||||
advertiser_id UInt64
|
||||
)
|
||||
PRIMARY KEY id
|
||||
SOURCE(CLICKHOUSE(TABLE 'date_table'))
|
||||
LIFETIME(MIN 1 MAX 1000)
|
||||
LAYOUT(RANGE_HASHED())
|
||||
RANGE(MIN first MAX last)
|
||||
```
|
||||
|
||||
To work with these dictionaries, you need to pass an additional argument to the `dictGetT` function, for which a range is selected:
|
||||
To work with these dictionaries, you need to pass an additional argument to the `dictGet` function, for which a range is selected:
|
||||
|
||||
``` sql
|
||||
dictGetT('dict_name', 'attr_name', id, date)
|
||||
dictGet('dict_name', 'attr_name', id, date)
|
||||
```
|
||||
Query example:
|
||||
|
||||
``` sql
|
||||
SELECT dictGet('somedict', 'advertiser_id', 1, '2022-10-20 23:20:10.000'::DateTime64::UInt64);
|
||||
```
|
||||
|
||||
This function returns the value for the specified `id`s and the date range that includes the passed date.
|
||||
|
@ -8,70 +8,69 @@ title: "Geo Functions"
|
||||
|
||||
## Geographical Coordinates Functions
|
||||
|
||||
- [greatCircleDistance](./coordinates.md#greatCircleDistance)
|
||||
- [geoDistance](./coordinates.md#geoDistance)
|
||||
- [greatCircleAngle](./coordinates.md#greatCircleAngle)
|
||||
- [pointInEllipses](./coordinates.md#pointInEllipses)
|
||||
- [pointInPolygon](./coordinates.md#pointInPolygon)
|
||||
- [greatCircleDistance](./coordinates.md#greatcircledistance)
|
||||
- [geoDistance](./coordinates.md#geodistance)
|
||||
- [greatCircleAngle](./coordinates.md#greatcircleangle)
|
||||
- [pointInEllipses](./coordinates.md#pointinellipses)
|
||||
- [pointInPolygon](./coordinates.md#pointinpolygon)
|
||||
|
||||
## Geohash Functions
|
||||
- [geohashEncode](./geohash.md#geohashEncode)
|
||||
- [geohashDecode](./geohash.md#geohashDecode)
|
||||
- [geohashesInBox](./geohash.md#geohashesInBox)
|
||||
- [geohashEncode](./geohash.md#geohashencode)
|
||||
- [geohashDecode](./geohash.md#geohashdecode)
|
||||
- [geohashesInBox](./geohash.md#geohashesinbox)
|
||||
|
||||
## H3 Indexes Functions
|
||||
|
||||
- [h3IsValid](./h3.md#h3IsValid)
|
||||
- [h3GetResolution](./h3.md#h3GetResolution)
|
||||
- [h3EdgeAngle](./h3.md#h3EdgeAngle)
|
||||
- [h3EdgeLengthM](./h3.md#h3EdgeLengthM)
|
||||
- [h3EdgeLengthKm](./h3.md#h3EdgeLengthKm)
|
||||
- [geoToH3](./h3.md#geoToH3)
|
||||
- [h3ToGeo](./h3.md#h3ToGeo)
|
||||
- [h3ToGeoBoundary](./h3.md#h3ToGeoBoundary)
|
||||
- [h3kRing](./h3.md#h3kRing)
|
||||
- [h3GetBaseCell](./h3.md#h3GetBaseCell)
|
||||
- [h3HexAreaM2](./h3.md#h3HexAreaM2)
|
||||
- [h3HexAreaKm2](./h3.md#h3HexAreaKm2)
|
||||
- [h3IndexesAreNeighbors](./h3.md#h3IndexesAreNeighbors)
|
||||
- [h3ToChildren](./h3.md#h3ToChildren)
|
||||
- [h3ToParent](./h3.md#h3ToParent)
|
||||
- [h3ToString](./h3.md#h3ToString)
|
||||
- [stringToH3](./h3.md#stringToH3)
|
||||
- [h3GetResolution](./h3.md#h3GetResolution)
|
||||
- [h3IsResClassIII](./h3.md#h3IsResClassIII)
|
||||
- [h3IsPentagon](./h3.md#h3IsPentagon)
|
||||
- [h3GetFaces](./h3.md#h3GetFaces)
|
||||
- [h3CellAreaM2](./h3.md#h3CellAreaM2)
|
||||
- [h3CellAreaRads2](./h3.md#h3CellAreaRads2)
|
||||
- [h3ToCenterChild](./h3.md#h3ToCenterChild)
|
||||
- [h3ExactEdgeLengthM](./h3.md#h3ExactEdgeLengthM)
|
||||
- [h3ExactEdgeLengthKm](./h3.md#h3ExactEdgeLengthKm)
|
||||
- [h3ExactEdgeLengthRads](./h3.md#h3ExactEdgeLengthRads)
|
||||
- [h3NumHexagons](./h3.md#h3NumHexagons)
|
||||
- [h3Line](./h3.md#h3Line)
|
||||
- [h3Distance](./h3.md#h3Distance)
|
||||
- [h3HexRing](./h3.md#h3HexRing)
|
||||
- [h3GetUnidirectionalEdge](./h3.md#h3GetUnidirectionalEdge)
|
||||
- [h3UnidirectionalEdgeIsValid](./h3.md#h3UnidirectionalEdgeIsValid)
|
||||
- [h3GetOriginIndexFromUnidirectionalEdge](./h3.md#h3GetOriginIndexFromUnidirectionalEdge)
|
||||
- [h3GetDestinationIndexFromUnidirectionalEdge](./h3.md#h3GetDestinationIndexFromUnidirectionalEdge)
|
||||
- [h3GetIndexesFromUnidirectionalEdge](./h3.md#h3GetIndexesFromUnidirectionalEdge)
|
||||
- [h3GetUnidirectionalEdgesFromHexagon](./h3.md#h3GetUnidirectionalEdgesFromHexagon)
|
||||
- [h3GetUnidirectionalEdgeBoundary](./h3.md#h3GetUnidirectionalEdgeBoundary)
|
||||
- [h3IsValid](./h3.md#h3isvalid)
|
||||
- [h3GetResolution](./h3.md#h3getresolution)
|
||||
- [h3EdgeAngle](./h3.md#h3edgeangle)
|
||||
- [h3EdgeLengthM](./h3.md#h3edgelengthm)
|
||||
- [h3EdgeLengthKm](./h3.md#h3edgelengthkm)
|
||||
- [geoToH3](./h3.md#geotoh3)
|
||||
- [h3ToGeo](./h3.md#h3togeo)
|
||||
- [h3ToGeoBoundary](./h3.md#h3togeoboundary)
|
||||
- [h3kRing](./h3.md#h3kring)
|
||||
- [h3GetBaseCell](./h3.md#h3getbasecell)
|
||||
- [h3HexAreaM2](./h3.md#h3hexaream2)
|
||||
- [h3HexAreaKm2](./h3.md#h3hexareakm2)
|
||||
- [h3IndexesAreNeighbors](./h3.md#h3indexesareneighbors)
|
||||
- [h3ToChildren](./h3.md#h3tochildren)
|
||||
- [h3ToParent](./h3.md#h3toparent)
|
||||
- [h3ToString](./h3.md#h3tostring)
|
||||
- [stringToH3](./h3.md#stringtoh3)
|
||||
- [h3GetResolution](./h3.md#h3getresolution)
|
||||
- [h3IsResClassIII](./h3.md#h3isresclassiii)
|
||||
- [h3IsPentagon](./h3.md#h3ispentagon)
|
||||
- [h3GetFaces](./h3.md#h3getfaces)
|
||||
- [h3CellAreaM2](./h3.md#h3cellaream2)
|
||||
- [h3CellAreaRads2](./h3.md#h3cellarearads2)
|
||||
- [h3ToCenterChild](./h3.md#h3tocenterchild)
|
||||
- [h3ExactEdgeLengthM](./h3.md#h3exactedgelengthm)
|
||||
- [h3ExactEdgeLengthKm](./h3.md#h3exactedgelengthkm)
|
||||
- [h3ExactEdgeLengthRads](./h3.md#h3exactedgelengthrads)
|
||||
- [h3NumHexagons](./h3.md#h3numhexagons)
|
||||
- [h3Line](./h3.md#h3line)
|
||||
- [h3Distance](./h3.md#h3distance)
|
||||
- [h3HexRing](./h3.md#h3hexring)
|
||||
- [h3GetUnidirectionalEdge](./h3.md#h3getunidirectionaledge)
|
||||
- [h3UnidirectionalEdgeIsValid](./h3.md#h3unidirectionaledgeisvalid)
|
||||
- [h3GetOriginIndexFromUnidirectionalEdge](./h3.md#h3getoriginindexfromunidirectionaledge)
|
||||
- [h3GetDestinationIndexFromUnidirectionalEdge](./h3.md#h3getdestinationindexfromunidirectionaledge)
|
||||
- [h3GetIndexesFromUnidirectionalEdge](./h3.md#h3getindexesfromunidirectionaledge)
|
||||
- [h3GetUnidirectionalEdgesFromHexagon](./h3.md#h3getunidirectionaledgesfromhexagon)
|
||||
- [h3GetUnidirectionalEdgeBoundary](./h3.md#h3getunidirectionaledgeboundary)
|
||||
|
||||
## S2 Index Functions
|
||||
|
||||
- [geoToS2](./s2.md#geoToS2)
|
||||
- [s2ToGeo](./s2.md#s2ToGeo)
|
||||
- [s2GetNeighbors](./s2.md#s2GetNeighbors)
|
||||
- [s2CellsIntersect](./s2.md#s2CellsIntersect)
|
||||
- [s2CapContains](./s2.md#s2CapContains)
|
||||
- [s2CapUnion](./s2.md#s2CapUnion)
|
||||
- [s2RectAdd](./s2.md#s2RectAdd)
|
||||
- [s2RectContains](./s2.md#s2RectContains)
|
||||
- [s2RectUinion](./s2.md#s2RectUinion)
|
||||
- [s2RectIntersection](./s2.md#s2RectIntersection)
|
||||
- [geoToS2](./s2.md#geotos2)
|
||||
- [s2ToGeo](./s2.md#s2togeo)
|
||||
- [s2GetNeighbors](./s2.md#s2getneighbors)
|
||||
- [s2CellsIntersect](./s2.md#s2cellsintersect)
|
||||
- [s2CapContains](./s2.md#s2capcontains)
|
||||
- [s2CapUnion](./s2.md#s2capunion)
|
||||
- [s2RectAdd](./s2.md#s2rectadd)
|
||||
- [s2RectContains](./s2.md#s2rectcontains)
|
||||
- [s2RectUnion](./s2.md#s2rectunion)
|
||||
- [s2RectIntersection](./s2.md#s2rectintersection)
|
||||
|
||||
|
||||
[Original article](https://clickhouse.com/docs/en/sql-reference/functions/geo/) <!--hide-->
|
||||
|
@ -593,6 +593,27 @@ LIMIT 10
|
||||
└────────────────┴─────────┘
|
||||
```
|
||||
|
||||
## formatReadableDecimalSize(x)
|
||||
|
||||
Accepts the size (number of bytes). Returns a rounded size with a suffix (KB, MB, etc.) as a string.
|
||||
|
||||
Example:
|
||||
|
||||
``` sql
|
||||
SELECT
|
||||
arrayJoin([1, 1024, 1024*1024, 192851925]) AS filesize_bytes,
|
||||
formatReadableDecimalSize(filesize_bytes) AS filesize
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─filesize_bytes─┬─filesize───┐
|
||||
│ 1 │ 1.00 B │
|
||||
│ 1024 │ 1.02 KB │
|
||||
│ 1048576 │ 1.05 MB │
|
||||
│ 192851925 │ 192.85 MB │
|
||||
└────────────────┴────────────┘
|
||||
```
|
||||
|
||||
## formatReadableSize(x)
|
||||
|
||||
Accepts the size (number of bytes). Returns a rounded size with a suffix (KiB, MiB, etc.) as a string.
|
||||
|
@ -571,13 +571,13 @@ Similar to base58Decode, but returns an empty string in case of error.
|
||||
|
||||
## base64Encode(s)
|
||||
|
||||
Encodes ‘s’ string into base64
|
||||
Encodes ‘s’ FixedString or String into base64.
|
||||
|
||||
Alias: `TO_BASE64`.
|
||||
|
||||
## base64Decode(s)
|
||||
|
||||
Decode base64-encoded string ‘s’ into original string. In case of failure raises an exception.
|
||||
Decode base64-encoded FixedString or String ‘s’ into original string. In case of failure raises an exception.
|
||||
|
||||
Alias: `FROM_BASE64`.
|
||||
|
||||
|
@ -6,28 +6,29 @@ sidebar_label: For Replacing in Strings
|
||||
|
||||
# Functions for Searching and Replacing in Strings
|
||||
|
||||
:::note
|
||||
:::note
|
||||
Functions for [searching](../../sql-reference/functions/string-search-functions.md) and [other manipulations with strings](../../sql-reference/functions/string-functions.md) are described separately.
|
||||
:::
|
||||
|
||||
## replaceOne(haystack, pattern, replacement)
|
||||
|
||||
Replaces the first occurrence, if it exists, of the ‘pattern’ substring in ‘haystack’ with the ‘replacement’ substring.
|
||||
Hereafter, ‘pattern’ and ‘replacement’ must be constants.
|
||||
Replaces the first occurrence of the substring ‘pattern’ (if it exists) in ‘haystack’ by the ‘replacement’ string.
|
||||
‘pattern’ and ‘replacement’ must be constants.
|
||||
|
||||
## replaceAll(haystack, pattern, replacement), replace(haystack, pattern, replacement)
|
||||
|
||||
Replaces all occurrences of the ‘pattern’ substring in ‘haystack’ with the ‘replacement’ substring.
|
||||
Replaces all occurrences of the substring ‘pattern’ in ‘haystack’ by the ‘replacement’ string.
|
||||
|
||||
## replaceRegexpOne(haystack, pattern, replacement)
|
||||
|
||||
Replacement using the ‘pattern’ regular expression. A re2 regular expression.
|
||||
Replaces only the first occurrence, if it exists.
|
||||
A pattern can be specified as ‘replacement’. This pattern can include substitutions `\0-\9`.
|
||||
The substitution `\0` includes the entire regular expression. Substitutions `\1-\9` correspond to the subpattern numbers.To use the `\` character in a template, escape it using `\`.
|
||||
Also keep in mind that a string literal requires an extra escape.
|
||||
Replaces the first occurrence of the substring matching the regular expression ‘pattern’ in ‘haystack‘ by the ‘replacement‘ string.
|
||||
‘pattern‘ must be a constant [re2 regular expression](https://github.com/google/re2/wiki/Syntax).
|
||||
‘replacement’ must be a plain constant string or a constant string containing substitutions `\0-\9`.
|
||||
Substitutions `\1-\9` correspond to the 1st to 9th capturing group (submatch), substitution `\0` corresponds to the entire match.
|
||||
To use a verbatim `\` character in the ‘pattern‘ or ‘replacement‘ string, escape it using `\`.
|
||||
Also keep in mind that string literals require an extra escaping.
|
||||
|
||||
Example 1. Converting the date to American format:
|
||||
Example 1. Converting ISO dates to American format:
|
||||
|
||||
``` sql
|
||||
SELECT DISTINCT
|
||||
@ -62,7 +63,7 @@ SELECT replaceRegexpOne('Hello, World!', '.*', '\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0')
|
||||
|
||||
## replaceRegexpAll(haystack, pattern, replacement)
|
||||
|
||||
This does the same thing, but replaces all the occurrences. Example:
|
||||
Like ‘replaceRegexpOne‘, but replaces all occurrences of the pattern. Example:
|
||||
|
||||
``` sql
|
||||
SELECT replaceRegexpAll('Hello, World!', '.', '\\0\\0') AS res
|
||||
|
@ -37,7 +37,7 @@ deb:
|
||||
contents:
|
||||
- src: root/etc/clickhouse-client/config.xml
|
||||
dst: /etc/clickhouse-client/config.xml
|
||||
type: config
|
||||
type: config|noreplace
|
||||
- src: root/usr/bin/clickhouse-benchmark
|
||||
dst: /usr/bin/clickhouse-benchmark
|
||||
- src: root/usr/bin/clickhouse-compressor
|
||||
|
@ -29,7 +29,7 @@ deb:
|
||||
contents:
|
||||
- src: root/etc/clickhouse-keeper/keeper_config.xml
|
||||
dst: /etc/clickhouse-keeper/keeper_config.xml
|
||||
type: config
|
||||
type: config|noreplace
|
||||
- src: root/usr/bin/clickhouse-keeper
|
||||
dst: /usr/bin/clickhouse-keeper
|
||||
# docs
|
||||
|
@ -44,10 +44,10 @@ deb:
|
||||
contents:
|
||||
- src: root/etc/clickhouse-server/config.xml
|
||||
dst: /etc/clickhouse-server/config.xml
|
||||
type: config
|
||||
type: config|noreplace
|
||||
- src: root/etc/clickhouse-server/users.xml
|
||||
dst: /etc/clickhouse-server/users.xml
|
||||
type: config
|
||||
type: config|noreplace
|
||||
- src: clickhouse-server.init
|
||||
dst: /etc/init.d/clickhouse-server
|
||||
- src: clickhouse-server.service
|
||||
|
@ -1088,7 +1088,8 @@ void Client::processConfig()
|
||||
}
|
||||
else
|
||||
{
|
||||
need_render_progress = config().getBool("progress", false);
|
||||
std::string progress = config().getString("progress", "tty");
|
||||
need_render_progress = (Poco::icompare(progress, "off") && Poco::icompare(progress, "no") && Poco::icompare(progress, "false") && Poco::icompare(progress, "0"));
|
||||
echo_queries = config().getBool("echo", false);
|
||||
ignore_error = config().getBool("ignore-error", false);
|
||||
|
||||
|
@ -489,7 +489,8 @@ void LocalServer::processConfig()
|
||||
}
|
||||
else
|
||||
{
|
||||
need_render_progress = config().getBool("progress", false);
|
||||
std::string progress = config().getString("progress", "tty");
|
||||
need_render_progress = (Poco::icompare(progress, "off") && Poco::icompare(progress, "no") && Poco::icompare(progress, "false") && Poco::icompare(progress, "0"));
|
||||
echo_queries = config().hasOption("echo") || config().hasOption("verbose");
|
||||
ignore_error = config().getBool("ignore-error", false);
|
||||
is_multiquery = true;
|
||||
|
@ -1336,17 +1336,13 @@
|
||||
name - name for the rule (optional)
|
||||
regexp - RE2 compatible regular expression (mandatory)
|
||||
replace - substitution string for sensitive data (optional, by default - six asterisks)
|
||||
-->
|
||||
<query_masking_rules>
|
||||
<rule>
|
||||
<name>hide encrypt/decrypt arguments</name>
|
||||
<regexp>((?:aes_)?(?:encrypt|decrypt)(?:_mysql)?)\s*\(\s*(?:'(?:\\'|.)+'|.*?)\s*\)</regexp>
|
||||
<!-- or more secure, but also more invasive:
|
||||
(aes_\w+)\s*\(.*\)
|
||||
-->
|
||||
<replace>\1(???)</replace>
|
||||
</rule>
|
||||
</query_masking_rules>
|
||||
</query_masking_rules> -->
|
||||
|
||||
<!-- Uncomment to use custom http handlers.
|
||||
rules are checked from top to bottom, first match runs the handler
|
||||
|
@ -18,8 +18,10 @@ AggregateFunctionPtr createAggregateFunctionAnalysisOfVariance(const std::string
|
||||
assertNoParameters(name, parameters);
|
||||
assertBinary(name, arguments);
|
||||
|
||||
if (!isNumber(arguments[0]) || !isNumber(arguments[1]))
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Aggregate function {} only supports numerical types", name);
|
||||
if (!isNumber(arguments[0]))
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Aggregate function {} only supports numerical argument types", name);
|
||||
if (!WhichDataType(arguments[1]).isNativeUInt())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second argument of aggregate function {} should be a native unsigned integer", name);
|
||||
|
||||
return std::make_shared<AggregateFunctionAnalysisOfVariance>(arguments, parameters);
|
||||
}
|
||||
|
@ -77,7 +77,7 @@ public:
|
||||
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
|
||||
{
|
||||
auto f_stat = data(place).getFStatistic();
|
||||
if (std::isinf(f_stat) || isNaN(f_stat))
|
||||
if (std::isinf(f_stat) || isNaN(f_stat) || f_stat < 0)
|
||||
throw Exception("F statistic is not defined or infinite for these arguments", ErrorCodes::BAD_ARGUMENTS);
|
||||
|
||||
auto p_value = data(place).getPValue(f_stat);
|
||||
|
@ -482,6 +482,8 @@ struct ZTestMoments
|
||||
template <typename T>
|
||||
struct AnalysisOfVarianceMoments
|
||||
{
|
||||
constexpr static size_t MAX_GROUPS_NUMBER = 1024 * 1024;
|
||||
|
||||
/// Sums of values within a group
|
||||
std::vector<T> xs1{};
|
||||
/// Sums of squared values within a group
|
||||
@ -494,6 +496,10 @@ struct AnalysisOfVarianceMoments
|
||||
if (xs1.size() >= possible_size)
|
||||
return;
|
||||
|
||||
if (possible_size > MAX_GROUPS_NUMBER)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Too many groups for analysis of variance (should be no more than {}, got {})",
|
||||
MAX_GROUPS_NUMBER, possible_size);
|
||||
|
||||
xs1.resize(possible_size, 0.0);
|
||||
xs2.resize(possible_size, 0.0);
|
||||
ns.resize(possible_size, 0);
|
||||
|
@ -126,6 +126,7 @@ BackupWriterS3::BackupWriterS3(
|
||||
, max_single_read_retries(context_->getSettingsRef().s3_max_single_read_retries)
|
||||
, read_settings(context_->getReadSettings())
|
||||
, rw_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString()).rw_settings)
|
||||
, log(&Poco::Logger::get("BackupWriterS3"))
|
||||
{
|
||||
rw_settings.updateFromSettingsIfEmpty(context_->getSettingsRef());
|
||||
}
|
||||
@ -146,9 +147,12 @@ void BackupWriterS3::copyObjectImpl(
|
||||
const String & src_key,
|
||||
const String & dst_bucket,
|
||||
const String & dst_key,
|
||||
std::optional<Aws::S3::Model::HeadObjectResult> head,
|
||||
std::optional<ObjectAttributes> metadata) const
|
||||
const Aws::S3::Model::HeadObjectResult & head,
|
||||
const std::optional<ObjectAttributes> & metadata) const
|
||||
{
|
||||
size_t size = head.GetContentLength();
|
||||
LOG_TRACE(log, "Copying {} bytes using single-operation copy", size);
|
||||
|
||||
Aws::S3::Model::CopyObjectRequest request;
|
||||
request.SetCopySource(src_bucket + "/" + src_key);
|
||||
request.SetBucket(dst_bucket);
|
||||
@ -186,13 +190,11 @@ void BackupWriterS3::copyObjectMultipartImpl(
|
||||
const String & src_key,
|
||||
const String & dst_bucket,
|
||||
const String & dst_key,
|
||||
std::optional<Aws::S3::Model::HeadObjectResult> head,
|
||||
std::optional<ObjectAttributes> metadata) const
|
||||
const Aws::S3::Model::HeadObjectResult & head,
|
||||
const std::optional<ObjectAttributes> & metadata) const
|
||||
{
|
||||
if (!head)
|
||||
head = requestObjectHeadData(src_bucket, src_key).GetResult();
|
||||
|
||||
size_t size = head->GetContentLength();
|
||||
size_t size = head.GetContentLength();
|
||||
LOG_TRACE(log, "Copying {} bytes using multipart upload copy", size);
|
||||
|
||||
String multipart_upload_id;
|
||||
|
||||
@ -213,16 +215,20 @@ void BackupWriterS3::copyObjectMultipartImpl(
|
||||
|
||||
std::vector<String> part_tags;
|
||||
|
||||
size_t position = 0;
|
||||
size_t upload_part_size = rw_settings.min_upload_part_size;
|
||||
for (size_t position = 0, part_number = 1; position < size; ++part_number, position += upload_part_size)
|
||||
|
||||
for (size_t part_number = 1; position < size; ++part_number)
|
||||
{
|
||||
size_t next_position = std::min(position + upload_part_size, size);
|
||||
|
||||
Aws::S3::Model::UploadPartCopyRequest part_request;
|
||||
part_request.SetCopySource(src_bucket + "/" + src_key);
|
||||
part_request.SetBucket(dst_bucket);
|
||||
part_request.SetKey(dst_key);
|
||||
part_request.SetUploadId(multipart_upload_id);
|
||||
part_request.SetPartNumber(static_cast<int>(part_number));
|
||||
part_request.SetCopySourceRange(fmt::format("bytes={}-{}", position, std::min(size, position + upload_part_size) - 1));
|
||||
part_request.SetCopySourceRange(fmt::format("bytes={}-{}", position, next_position - 1));
|
||||
|
||||
auto outcome = client->UploadPartCopy(part_request);
|
||||
if (!outcome.IsSuccess())
|
||||
@ -239,6 +245,14 @@ void BackupWriterS3::copyObjectMultipartImpl(
|
||||
|
||||
auto etag = outcome.GetResult().GetCopyPartResult().GetETag();
|
||||
part_tags.push_back(etag);
|
||||
|
||||
position = next_position;
|
||||
|
||||
if (part_number % rw_settings.upload_part_size_multiply_parts_count_threshold == 0)
|
||||
{
|
||||
upload_part_size *= rw_settings.upload_part_size_multiply_factor;
|
||||
upload_part_size = std::min(upload_part_size, rw_settings.max_upload_part_size);
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
@ -280,15 +294,14 @@ void BackupWriterS3::copyFileNative(DiskPtr from_disk, const String & file_name_
|
||||
auto file_path = fs::path(s3_uri.key) / file_name_to;
|
||||
|
||||
auto head = requestObjectHeadData(source_bucket, objects[0].absolute_path).GetResult();
|
||||
static constexpr int64_t multipart_upload_threashold = 5UL * 1024 * 1024 * 1024;
|
||||
if (head.GetContentLength() >= multipart_upload_threashold)
|
||||
if (static_cast<size_t>(head.GetContentLength()) < rw_settings.max_single_operation_copy_size)
|
||||
{
|
||||
copyObjectMultipartImpl(
|
||||
copyObjectImpl(
|
||||
source_bucket, objects[0].absolute_path, s3_uri.bucket, file_path, head);
|
||||
}
|
||||
else
|
||||
{
|
||||
copyObjectImpl(
|
||||
copyObjectMultipartImpl(
|
||||
source_bucket, objects[0].absolute_path, s3_uri.bucket, file_path, head);
|
||||
}
|
||||
}
|
||||
|
@ -61,7 +61,6 @@ public:
|
||||
void copyFileNative(DiskPtr from_disk, const String & file_name_from, const String & file_name_to) override;
|
||||
|
||||
private:
|
||||
|
||||
Aws::S3::Model::HeadObjectOutcome requestObjectHeadData(const std::string & bucket_from, const std::string & key) const;
|
||||
|
||||
void copyObjectImpl(
|
||||
@ -69,22 +68,23 @@ private:
|
||||
const String & src_key,
|
||||
const String & dst_bucket,
|
||||
const String & dst_key,
|
||||
std::optional<Aws::S3::Model::HeadObjectResult> head = std::nullopt,
|
||||
std::optional<ObjectAttributes> metadata = std::nullopt) const;
|
||||
const Aws::S3::Model::HeadObjectResult & head,
|
||||
const std::optional<ObjectAttributes> & metadata = std::nullopt) const;
|
||||
|
||||
void copyObjectMultipartImpl(
|
||||
const String & src_bucket,
|
||||
const String & src_key,
|
||||
const String & dst_bucket,
|
||||
const String & dst_key,
|
||||
std::optional<Aws::S3::Model::HeadObjectResult> head = std::nullopt,
|
||||
std::optional<ObjectAttributes> metadata = std::nullopt) const;
|
||||
const Aws::S3::Model::HeadObjectResult & head,
|
||||
const std::optional<ObjectAttributes> & metadata = std::nullopt) const;
|
||||
|
||||
S3::URI s3_uri;
|
||||
std::shared_ptr<Aws::S3::S3Client> client;
|
||||
UInt64 max_single_read_retries;
|
||||
ReadSettings read_settings;
|
||||
S3Settings::ReadWriteSettings rw_settings;
|
||||
Poco::Logger * log;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -138,12 +138,12 @@ private:
|
||||
|
||||
|
||||
BackupImpl::BackupImpl(
|
||||
const String & backup_name_,
|
||||
const String & backup_name_for_logging_,
|
||||
const ArchiveParams & archive_params_,
|
||||
const std::optional<BackupInfo> & base_backup_info_,
|
||||
std::shared_ptr<IBackupReader> reader_,
|
||||
const ContextPtr & context_)
|
||||
: backup_name(backup_name_)
|
||||
: backup_name_for_logging(backup_name_for_logging_)
|
||||
, archive_params(archive_params_)
|
||||
, use_archives(!archive_params.archive_name.empty())
|
||||
, open_mode(OpenMode::READ)
|
||||
@ -158,7 +158,7 @@ BackupImpl::BackupImpl(
|
||||
|
||||
|
||||
BackupImpl::BackupImpl(
|
||||
const String & backup_name_,
|
||||
const String & backup_name_for_logging_,
|
||||
const ArchiveParams & archive_params_,
|
||||
const std::optional<BackupInfo> & base_backup_info_,
|
||||
std::shared_ptr<IBackupWriter> writer_,
|
||||
@ -166,7 +166,7 @@ BackupImpl::BackupImpl(
|
||||
bool is_internal_backup_,
|
||||
const std::shared_ptr<IBackupCoordination> & coordination_,
|
||||
const std::optional<UUID> & backup_uuid_)
|
||||
: backup_name(backup_name_)
|
||||
: backup_name_for_logging(backup_name_for_logging_)
|
||||
, archive_params(archive_params_)
|
||||
, use_archives(!archive_params.archive_name.empty())
|
||||
, open_mode(OpenMode::WRITE)
|
||||
@ -225,10 +225,19 @@ void BackupImpl::open(const ContextPtr & context)
|
||||
base_backup = BackupFactory::instance().createBackup(params);
|
||||
|
||||
if (open_mode == OpenMode::WRITE)
|
||||
{
|
||||
base_backup_uuid = base_backup->getUUID();
|
||||
}
|
||||
else if (base_backup_uuid != base_backup->getUUID())
|
||||
throw Exception(ErrorCodes::WRONG_BASE_BACKUP, "Backup {}: The base backup {} has different UUID ({} != {})",
|
||||
backup_name, base_backup->getName(), toString(base_backup->getUUID()), (base_backup_uuid ? toString(*base_backup_uuid) : ""));
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::WRONG_BASE_BACKUP,
|
||||
"Backup {}: The base backup {} has different UUID ({} != {})",
|
||||
backup_name_for_logging,
|
||||
base_backup->getNameForLogging(),
|
||||
toString(base_backup->getUUID()),
|
||||
(base_backup_uuid ? toString(*base_backup_uuid) : ""));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -349,14 +358,14 @@ void BackupImpl::readBackupMetadata()
|
||||
if (use_archives)
|
||||
{
|
||||
if (!reader->fileExists(archive_params.archive_name))
|
||||
throw Exception(ErrorCodes::BACKUP_NOT_FOUND, "Backup {} not found", backup_name);
|
||||
throw Exception(ErrorCodes::BACKUP_NOT_FOUND, "Backup {} not found", backup_name_for_logging);
|
||||
setCompressedSize();
|
||||
in = getArchiveReader("")->readFile(".backup");
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!reader->fileExists(".backup"))
|
||||
throw Exception(ErrorCodes::BACKUP_NOT_FOUND, "Backup {} not found", backup_name);
|
||||
throw Exception(ErrorCodes::BACKUP_NOT_FOUND, "Backup {} not found", backup_name_for_logging);
|
||||
in = reader->readFile(".backup");
|
||||
}
|
||||
|
||||
@ -369,7 +378,8 @@ void BackupImpl::readBackupMetadata()
|
||||
|
||||
version = config->getInt("version");
|
||||
if ((version < INITIAL_BACKUP_VERSION) || (version > CURRENT_BACKUP_VERSION))
|
||||
throw Exception(ErrorCodes::BACKUP_VERSION_NOT_SUPPORTED, "Backup {}: Version {} is not supported", backup_name, version);
|
||||
throw Exception(
|
||||
ErrorCodes::BACKUP_VERSION_NOT_SUPPORTED, "Backup {}: Version {} is not supported", backup_name_for_logging, version);
|
||||
|
||||
timestamp = parse<LocalDateTime>(config->getString("timestamp")).to_time_t();
|
||||
uuid = parse<UUID>(config->getString("uuid"));
|
||||
@ -400,7 +410,13 @@ void BackupImpl::readBackupMetadata()
|
||||
use_base = true;
|
||||
|
||||
if (info.base_size > info.size)
|
||||
throw Exception(ErrorCodes::BACKUP_DAMAGED, "Backup {}: Base size must not be greater than the size of entry {}", backup_name, quoteString(info.file_name));
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::BACKUP_DAMAGED,
|
||||
"Backup {}: Base size must not be greater than the size of entry {}",
|
||||
backup_name_for_logging,
|
||||
quoteString(info.file_name));
|
||||
}
|
||||
|
||||
if (use_base)
|
||||
{
|
||||
@ -436,14 +452,14 @@ void BackupImpl::checkBackupDoesntExist() const
|
||||
file_name_to_check_existence = ".backup";
|
||||
|
||||
if (writer->fileExists(file_name_to_check_existence))
|
||||
throw Exception(ErrorCodes::BACKUP_ALREADY_EXISTS, "Backup {} already exists", backup_name);
|
||||
throw Exception(ErrorCodes::BACKUP_ALREADY_EXISTS, "Backup {} already exists", backup_name_for_logging);
|
||||
|
||||
/// Check that no other backup (excluding internal backups) is writing to the same destination.
|
||||
if (!is_internal_backup)
|
||||
{
|
||||
assert(!lock_file_name.empty());
|
||||
if (writer->fileExists(lock_file_name))
|
||||
throw Exception(ErrorCodes::BACKUP_ALREADY_EXISTS, "Backup {} is being written already", backup_name);
|
||||
throw Exception(ErrorCodes::BACKUP_ALREADY_EXISTS, "Backup {} is being written already", backup_name_for_logging);
|
||||
}
|
||||
}
|
||||
|
||||
@ -466,8 +482,16 @@ bool BackupImpl::checkLockFile(bool throw_if_failed) const
|
||||
if (throw_if_failed)
|
||||
{
|
||||
if (!writer->fileExists(lock_file_name))
|
||||
throw Exception(ErrorCodes::FAILED_TO_SYNC_BACKUP_OR_RESTORE, "Lock file {} suddenly disappeared while writing backup {}", lock_file_name, backup_name);
|
||||
throw Exception(ErrorCodes::BACKUP_ALREADY_EXISTS, "A concurrent backup writing to the same destination {} detected", backup_name);
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::FAILED_TO_SYNC_BACKUP_OR_RESTORE,
|
||||
"Lock file {} suddenly disappeared while writing backup {}",
|
||||
lock_file_name,
|
||||
backup_name_for_logging);
|
||||
}
|
||||
|
||||
throw Exception(
|
||||
ErrorCodes::BACKUP_ALREADY_EXISTS, "A concurrent backup writing to the same destination {} detected", backup_name_for_logging);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
@ -514,8 +538,13 @@ UInt64 BackupImpl::getFileSize(const String & file_name) const
|
||||
auto adjusted_path = removeLeadingSlash(file_name);
|
||||
auto info = coordination->getFileInfo(adjusted_path);
|
||||
if (!info)
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::BACKUP_ENTRY_NOT_FOUND, "Backup {}: Entry {} not found in the backup", backup_name, quoteString(file_name));
|
||||
ErrorCodes::BACKUP_ENTRY_NOT_FOUND,
|
||||
"Backup {}: Entry {} not found in the backup",
|
||||
backup_name_for_logging,
|
||||
quoteString(file_name));
|
||||
}
|
||||
return info->size;
|
||||
}
|
||||
|
||||
@ -525,8 +554,13 @@ UInt128 BackupImpl::getFileChecksum(const String & file_name) const
|
||||
auto adjusted_path = removeLeadingSlash(file_name);
|
||||
auto info = coordination->getFileInfo(adjusted_path);
|
||||
if (!info)
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::BACKUP_ENTRY_NOT_FOUND, "Backup {}: Entry {} not found in the backup", backup_name, quoteString(file_name));
|
||||
ErrorCodes::BACKUP_ENTRY_NOT_FOUND,
|
||||
"Backup {}: Entry {} not found in the backup",
|
||||
backup_name_for_logging,
|
||||
quoteString(file_name));
|
||||
}
|
||||
return info->checksum;
|
||||
}
|
||||
|
||||
@ -536,8 +570,13 @@ SizeAndChecksum BackupImpl::getFileSizeAndChecksum(const String & file_name) con
|
||||
auto adjusted_path = removeLeadingSlash(file_name);
|
||||
auto info = coordination->getFileInfo(adjusted_path);
|
||||
if (!info)
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::BACKUP_ENTRY_NOT_FOUND, "Backup {}: Entry {} not found in the backup", backup_name, quoteString(file_name));
|
||||
ErrorCodes::BACKUP_ENTRY_NOT_FOUND,
|
||||
"Backup {}: Entry {} not found in the backup",
|
||||
backup_name_for_logging,
|
||||
quoteString(file_name));
|
||||
}
|
||||
return {info->size, info->checksum};
|
||||
}
|
||||
|
||||
@ -560,8 +599,13 @@ BackupEntryPtr BackupImpl::readFile(const SizeAndChecksum & size_and_checksum) c
|
||||
|
||||
auto info_opt = coordination->getFileInfo(size_and_checksum);
|
||||
if (!info_opt)
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::BACKUP_ENTRY_NOT_FOUND, "Backup {}: Entry {} not found in the backup", backup_name, formatSizeAndChecksum(size_and_checksum));
|
||||
ErrorCodes::BACKUP_ENTRY_NOT_FOUND,
|
||||
"Backup {}: Entry {} not found in the backup",
|
||||
backup_name_for_logging,
|
||||
formatSizeAndChecksum(size_and_checksum));
|
||||
}
|
||||
|
||||
const auto & info = *info_opt;
|
||||
|
||||
@ -577,7 +621,7 @@ BackupEntryPtr BackupImpl::readFile(const SizeAndChecksum & size_and_checksum) c
|
||||
throw Exception(
|
||||
ErrorCodes::NO_BASE_BACKUP,
|
||||
"Backup {}: Entry {} is marked to be read from a base backup, but there is no base backup specified",
|
||||
backup_name, formatSizeAndChecksum(size_and_checksum));
|
||||
backup_name_for_logging, formatSizeAndChecksum(size_and_checksum));
|
||||
}
|
||||
|
||||
if (!base_backup->fileExists(std::pair(info.base_size, info.base_checksum)))
|
||||
@ -585,7 +629,7 @@ BackupEntryPtr BackupImpl::readFile(const SizeAndChecksum & size_and_checksum) c
|
||||
throw Exception(
|
||||
ErrorCodes::WRONG_BASE_BACKUP,
|
||||
"Backup {}: Entry {} is marked to be read from a base backup, but doesn't exist there",
|
||||
backup_name, formatSizeAndChecksum(size_and_checksum));
|
||||
backup_name_for_logging, formatSizeAndChecksum(size_and_checksum));
|
||||
}
|
||||
|
||||
auto base_entry = base_backup->readFile(std::pair{info.base_size, info.base_checksum});
|
||||
@ -695,9 +739,12 @@ void BackupImpl::writeFile(const String & file_name, BackupEntryPtr entry)
|
||||
LOG_TRACE(log, "Writing backup for file {} from {}", file_name, from_file_name);
|
||||
|
||||
auto adjusted_path = removeLeadingSlash(file_name);
|
||||
|
||||
if (coordination->getFileInfo(adjusted_path))
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::BACKUP_ENTRY_ALREADY_EXISTS, "Backup {}: Entry {} already exists", backup_name, quoteString(file_name));
|
||||
ErrorCodes::BACKUP_ENTRY_ALREADY_EXISTS, "Backup {}: Entry {} already exists", backup_name_for_logging, quoteString(file_name));
|
||||
}
|
||||
|
||||
FileInfo info
|
||||
{
|
||||
@ -893,12 +940,12 @@ void BackupImpl::finalizeWriting()
|
||||
|
||||
if (!is_internal_backup)
|
||||
{
|
||||
LOG_TRACE(log, "Finalizing backup {}", backup_name);
|
||||
LOG_TRACE(log, "Finalizing backup {}", backup_name_for_logging);
|
||||
writeBackupMetadata();
|
||||
closeArchives();
|
||||
setCompressedSize();
|
||||
removeLockFile();
|
||||
LOG_TRACE(log, "Finalized backup {}", backup_name);
|
||||
LOG_TRACE(log, "Finalized backup {}", backup_name_for_logging);
|
||||
}
|
||||
|
||||
writing_finalized = true;
|
||||
@ -971,7 +1018,7 @@ void BackupImpl::removeAllFilesAfterFailure()
|
||||
|
||||
try
|
||||
{
|
||||
LOG_INFO(log, "Removing all files of backup {} after failure", backup_name);
|
||||
LOG_INFO(log, "Removing all files of backup {} after failure", backup_name_for_logging);
|
||||
|
||||
Strings files_to_remove;
|
||||
if (use_archives)
|
||||
|
@ -35,14 +35,14 @@ public:
|
||||
};
|
||||
|
||||
BackupImpl(
|
||||
const String & backup_name_,
|
||||
const String & backup_name_for_logging_,
|
||||
const ArchiveParams & archive_params_,
|
||||
const std::optional<BackupInfo> & base_backup_info_,
|
||||
std::shared_ptr<IBackupReader> reader_,
|
||||
const ContextPtr & context_);
|
||||
|
||||
BackupImpl(
|
||||
const String & backup_name_,
|
||||
const String & backup_name_for_logging_,
|
||||
const ArchiveParams & archive_params_,
|
||||
const std::optional<BackupInfo> & base_backup_info_,
|
||||
std::shared_ptr<IBackupWriter> writer_,
|
||||
@ -53,7 +53,7 @@ public:
|
||||
|
||||
~BackupImpl() override;
|
||||
|
||||
const String & getName() const override { return backup_name; }
|
||||
const String & getNameForLogging() const override { return backup_name_for_logging; }
|
||||
OpenMode getOpenMode() const override { return open_mode; }
|
||||
time_t getTimestamp() const override { return timestamp; }
|
||||
UUID getUUID() const override { return *uuid; }
|
||||
@ -107,7 +107,7 @@ private:
|
||||
/// Calculates and sets `compressed_size`.
|
||||
void setCompressedSize();
|
||||
|
||||
const String backup_name;
|
||||
const String backup_name_for_logging;
|
||||
const ArchiveParams archive_params;
|
||||
const bool use_archives;
|
||||
const OpenMode open_mode;
|
||||
|
@ -6,6 +6,7 @@
|
||||
#include <Parsers/ExpressionElementParsers.h>
|
||||
#include <Parsers/formatAST.h>
|
||||
#include <Parsers/parseQuery.h>
|
||||
#include <Interpreters/maskSensitiveInfoInQueryForLogging.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -92,4 +93,10 @@ BackupInfo BackupInfo::fromAST(const IAST & ast)
|
||||
}
|
||||
|
||||
|
||||
String BackupInfo::toStringForLogging(const ContextPtr & context) const
|
||||
{
|
||||
ASTPtr ast = toAST();
|
||||
return maskSensitiveInfoInBackupNameForLogging(serializeAST(*ast), ast, context);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,6 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <Core/Field.h>
|
||||
#include <Interpreters/Context_fwd.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -20,6 +21,8 @@ struct BackupInfo
|
||||
|
||||
ASTPtr toAST() const;
|
||||
static BackupInfo fromAST(const IAST & ast);
|
||||
|
||||
String toStringForLogging(const ContextPtr & context) const;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -166,9 +166,10 @@ OperationID BackupsWorker::startMakingBackup(const ASTPtr & query, const Context
|
||||
}
|
||||
|
||||
auto backup_info = BackupInfo::fromAST(*backup_query->backup_name);
|
||||
String backup_name_for_logging = backup_info.toStringForLogging(context);
|
||||
try
|
||||
{
|
||||
addInfo(backup_id, backup_info.toString(), backup_settings.internal, BackupStatus::CREATING_BACKUP);
|
||||
addInfo(backup_id, backup_name_for_logging, backup_settings.internal, BackupStatus::CREATING_BACKUP);
|
||||
|
||||
/// Prepare context to use.
|
||||
ContextPtr context_in_use = context;
|
||||
@ -184,13 +185,14 @@ OperationID BackupsWorker::startMakingBackup(const ASTPtr & query, const Context
|
||||
if (backup_settings.async)
|
||||
{
|
||||
backups_thread_pool.scheduleOrThrowOnError(
|
||||
[this, backup_query, backup_id, backup_settings, backup_info, backup_coordination, context_in_use, mutable_context]
|
||||
[this, backup_query, backup_id, backup_name_for_logging, backup_info, backup_settings, backup_coordination, context_in_use, mutable_context]
|
||||
{
|
||||
doBackup(
|
||||
backup_query,
|
||||
backup_id,
|
||||
backup_settings,
|
||||
backup_name_for_logging,
|
||||
backup_info,
|
||||
backup_settings,
|
||||
backup_coordination,
|
||||
context_in_use,
|
||||
mutable_context,
|
||||
@ -202,8 +204,9 @@ OperationID BackupsWorker::startMakingBackup(const ASTPtr & query, const Context
|
||||
doBackup(
|
||||
backup_query,
|
||||
backup_id,
|
||||
backup_settings,
|
||||
backup_name_for_logging,
|
||||
backup_info,
|
||||
backup_settings,
|
||||
backup_coordination,
|
||||
context_in_use,
|
||||
mutable_context,
|
||||
@ -214,7 +217,7 @@ OperationID BackupsWorker::startMakingBackup(const ASTPtr & query, const Context
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException(log, fmt::format("Failed to start {} {}", (backup_settings.internal ? "internal backup" : "backup"), backup_info.toString()));
|
||||
tryLogCurrentException(log, fmt::format("Failed to start {} {}", (backup_settings.internal ? "internal backup" : "backup"), backup_name_for_logging));
|
||||
/// Something bad happened, the backup has not built.
|
||||
setStatusSafe(backup_id, BackupStatus::BACKUP_FAILED);
|
||||
sendCurrentExceptionToCoordination(backup_coordination, backup_settings.host_id);
|
||||
@ -226,8 +229,9 @@ OperationID BackupsWorker::startMakingBackup(const ASTPtr & query, const Context
|
||||
void BackupsWorker::doBackup(
|
||||
const std::shared_ptr<ASTBackupQuery> & backup_query,
|
||||
const OperationID & backup_id,
|
||||
BackupSettings backup_settings,
|
||||
const String & backup_name_for_logging,
|
||||
const BackupInfo & backup_info,
|
||||
BackupSettings backup_settings,
|
||||
std::shared_ptr<IBackupCoordination> backup_coordination,
|
||||
const ContextPtr & context,
|
||||
ContextMutablePtr mutable_context,
|
||||
@ -336,7 +340,7 @@ void BackupsWorker::doBackup(
|
||||
/// Close the backup.
|
||||
backup.reset();
|
||||
|
||||
LOG_INFO(log, "{} {} was created successfully", (backup_settings.internal ? "Internal backup" : "Backup"), backup_info.toString());
|
||||
LOG_INFO(log, "{} {} was created successfully", (backup_settings.internal ? "Internal backup" : "Backup"), backup_name_for_logging);
|
||||
setStatus(backup_id, BackupStatus::BACKUP_CREATED);
|
||||
setNumFilesAndSize(backup_id, num_files, uncompressed_size, compressed_size);
|
||||
}
|
||||
@ -345,7 +349,7 @@ void BackupsWorker::doBackup(
|
||||
/// Something bad happened, the backup has not built.
|
||||
if (called_async)
|
||||
{
|
||||
tryLogCurrentException(log, fmt::format("Failed to make {} {}", (backup_settings.internal ? "internal backup" : "backup"), backup_info.toString()));
|
||||
tryLogCurrentException(log, fmt::format("Failed to make {} {}", (backup_settings.internal ? "internal backup" : "backup"), backup_name_for_logging));
|
||||
setStatusSafe(backup_id, BackupStatus::BACKUP_FAILED);
|
||||
sendCurrentExceptionToCoordination(backup_coordination, backup_settings.host_id);
|
||||
}
|
||||
@ -384,7 +388,8 @@ OperationID BackupsWorker::startRestoring(const ASTPtr & query, ContextMutablePt
|
||||
try
|
||||
{
|
||||
auto backup_info = BackupInfo::fromAST(*restore_query->backup_name);
|
||||
addInfo(restore_id, backup_info.toString(), restore_settings.internal, BackupStatus::RESTORING);
|
||||
String backup_name_for_logging = backup_info.toStringForLogging(context);
|
||||
addInfo(restore_id, backup_name_for_logging, restore_settings.internal, BackupStatus::RESTORING);
|
||||
|
||||
/// Prepare context to use.
|
||||
ContextMutablePtr context_in_use = context;
|
||||
@ -399,12 +404,14 @@ OperationID BackupsWorker::startRestoring(const ASTPtr & query, ContextMutablePt
|
||||
if (restore_settings.async)
|
||||
{
|
||||
backups_thread_pool.scheduleOrThrowOnError(
|
||||
[this, restore_query, restore_id, restore_settings, backup_info, restore_coordination, context_in_use] {
|
||||
[this, restore_query, restore_id, backup_name_for_logging, backup_info, restore_settings, restore_coordination, context_in_use]
|
||||
{
|
||||
doRestore(
|
||||
restore_query,
|
||||
restore_id,
|
||||
restore_settings,
|
||||
backup_name_for_logging,
|
||||
backup_info,
|
||||
restore_settings,
|
||||
restore_coordination,
|
||||
context_in_use,
|
||||
/* called_async= */ true);
|
||||
@ -415,8 +422,9 @@ OperationID BackupsWorker::startRestoring(const ASTPtr & query, ContextMutablePt
|
||||
doRestore(
|
||||
restore_query,
|
||||
restore_id,
|
||||
restore_settings,
|
||||
backup_name_for_logging,
|
||||
backup_info,
|
||||
restore_settings,
|
||||
restore_coordination,
|
||||
context_in_use,
|
||||
/* called_async= */ false);
|
||||
@ -437,8 +445,9 @@ OperationID BackupsWorker::startRestoring(const ASTPtr & query, ContextMutablePt
|
||||
void BackupsWorker::doRestore(
|
||||
const std::shared_ptr<ASTBackupQuery> & restore_query,
|
||||
const OperationID & restore_id,
|
||||
RestoreSettings restore_settings,
|
||||
const String & backup_name_for_logging,
|
||||
const BackupInfo & backup_info,
|
||||
RestoreSettings restore_settings,
|
||||
std::shared_ptr<IRestoreCoordination> restore_coordination,
|
||||
ContextMutablePtr context,
|
||||
bool called_async)
|
||||
@ -541,7 +550,7 @@ void BackupsWorker::doRestore(
|
||||
restore_coordination->setStage(restore_settings.host_id, Stage::COMPLETED, "");
|
||||
}
|
||||
|
||||
LOG_INFO(log, "Restored from {} {} successfully", (restore_settings.internal ? "internal backup" : "backup"), backup_info.toString());
|
||||
LOG_INFO(log, "Restored from {} {} successfully", (restore_settings.internal ? "internal backup" : "backup"), backup_name_for_logging);
|
||||
setStatus(restore_id, BackupStatus::RESTORED);
|
||||
}
|
||||
catch (...)
|
||||
@ -549,7 +558,7 @@ void BackupsWorker::doRestore(
|
||||
/// Something bad happened, the backup has not built.
|
||||
if (called_async)
|
||||
{
|
||||
tryLogCurrentException(log, fmt::format("Failed to restore from {} {}", (restore_settings.internal ? "internal backup" : "backup"), backup_info.toString()));
|
||||
tryLogCurrentException(log, fmt::format("Failed to restore from {} {}", (restore_settings.internal ? "internal backup" : "backup"), backup_name_for_logging));
|
||||
setStatusSafe(restore_id, BackupStatus::RESTORE_FAILED);
|
||||
sendCurrentExceptionToCoordination(restore_coordination, restore_settings.host_id);
|
||||
}
|
||||
|
@ -76,14 +76,28 @@ public:
|
||||
private:
|
||||
OperationID startMakingBackup(const ASTPtr & query, const ContextPtr & context);
|
||||
|
||||
void doBackup(const std::shared_ptr<ASTBackupQuery> & backup_query, const OperationID & backup_id, BackupSettings backup_settings,
|
||||
const BackupInfo & backup_info, std::shared_ptr<IBackupCoordination> backup_coordination, const ContextPtr & context,
|
||||
ContextMutablePtr mutable_context, bool called_async);
|
||||
void doBackup(
|
||||
const std::shared_ptr<ASTBackupQuery> & backup_query,
|
||||
const OperationID & backup_id,
|
||||
const String & backup_name_for_logging,
|
||||
const BackupInfo & backup_info,
|
||||
BackupSettings backup_settings,
|
||||
std::shared_ptr<IBackupCoordination> backup_coordination,
|
||||
const ContextPtr & context,
|
||||
ContextMutablePtr mutable_context,
|
||||
bool called_async);
|
||||
|
||||
OperationID startRestoring(const ASTPtr & query, ContextMutablePtr context);
|
||||
|
||||
void doRestore(const std::shared_ptr<ASTBackupQuery> & restore_query, const OperationID & restore_id, RestoreSettings restore_settings, const BackupInfo & backup_info,
|
||||
std::shared_ptr<IRestoreCoordination> restore_coordination, ContextMutablePtr context, bool called_async);
|
||||
void doRestore(
|
||||
const std::shared_ptr<ASTBackupQuery> & restore_query,
|
||||
const OperationID & restore_id,
|
||||
const String & backup_name_for_logging,
|
||||
const BackupInfo & backup_info,
|
||||
RestoreSettings restore_settings,
|
||||
std::shared_ptr<IRestoreCoordination> restore_coordination,
|
||||
ContextMutablePtr context,
|
||||
bool called_async);
|
||||
|
||||
void addInfo(const OperationID & id, const String & name, bool internal, BackupStatus status);
|
||||
void setStatus(const OperationID & id, BackupStatus status, bool throw_if_error = true);
|
||||
|
@ -19,7 +19,8 @@ public:
|
||||
virtual ~IBackup() = default;
|
||||
|
||||
/// Name of the backup.
|
||||
virtual const String & getName() const = 0;
|
||||
//virtual const String & getName() const = 0;
|
||||
virtual const String & getNameForLogging() const = 0;
|
||||
|
||||
enum class OpenMode
|
||||
{
|
||||
|
@ -47,7 +47,7 @@ void registerBackupEngineS3(BackupFactory & factory)
|
||||
auto creator_fn = []([[maybe_unused]] const BackupFactory::CreateParams & params) -> std::unique_ptr<IBackup>
|
||||
{
|
||||
#if USE_AWS_S3
|
||||
String backup_name = params.backup_info.toString();
|
||||
String backup_name_for_logging = params.backup_info.toStringForLogging(params.context);
|
||||
const String & id_arg = params.backup_info.id_arg;
|
||||
const auto & args = params.backup_info.args;
|
||||
|
||||
@ -111,12 +111,12 @@ void registerBackupEngineS3(BackupFactory & factory)
|
||||
if (params.open_mode == IBackup::OpenMode::READ)
|
||||
{
|
||||
auto reader = std::make_shared<BackupReaderS3>(S3::URI{Poco::URI{s3_uri}}, access_key_id, secret_access_key, params.context);
|
||||
return std::make_unique<BackupImpl>(backup_name, archive_params, params.base_backup_info, reader, params.context);
|
||||
return std::make_unique<BackupImpl>(backup_name_for_logging, archive_params, params.base_backup_info, reader, params.context);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto writer = std::make_shared<BackupWriterS3>(S3::URI{Poco::URI{s3_uri}}, access_key_id, secret_access_key, params.context);
|
||||
return std::make_unique<BackupImpl>(backup_name, archive_params, params.base_backup_info, writer, params.context, params.is_internal_backup, params.backup_coordination, params.backup_uuid);
|
||||
return std::make_unique<BackupImpl>(backup_name_for_logging, archive_params, params.base_backup_info, writer, params.context, params.is_internal_backup, params.backup_coordination, params.backup_uuid);
|
||||
}
|
||||
#else
|
||||
throw Exception("S3 support is disabled", ErrorCodes::SUPPORT_IS_DISABLED);
|
||||
|
@ -99,7 +99,7 @@ void registerBackupEnginesFileAndDisk(BackupFactory & factory)
|
||||
{
|
||||
auto creator_fn = [](const BackupFactory::CreateParams & params) -> std::unique_ptr<IBackup>
|
||||
{
|
||||
String backup_name = params.backup_info.toString();
|
||||
String backup_name_for_logging = params.backup_info.toStringForLogging(params.context);
|
||||
const String & engine_name = params.backup_info.backup_engine_name;
|
||||
|
||||
if (!params.backup_info.id_arg.empty())
|
||||
@ -172,7 +172,7 @@ void registerBackupEnginesFileAndDisk(BackupFactory & factory)
|
||||
reader = std::make_shared<BackupReaderFile>(path);
|
||||
else
|
||||
reader = std::make_shared<BackupReaderDisk>(disk, path);
|
||||
return std::make_unique<BackupImpl>(backup_name, archive_params, params.base_backup_info, reader, params.context);
|
||||
return std::make_unique<BackupImpl>(backup_name_for_logging, archive_params, params.base_backup_info, reader, params.context);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -181,7 +181,7 @@ void registerBackupEnginesFileAndDisk(BackupFactory & factory)
|
||||
writer = std::make_shared<BackupWriterFile>(path);
|
||||
else
|
||||
writer = std::make_shared<BackupWriterDisk>(disk, path);
|
||||
return std::make_unique<BackupImpl>(backup_name, archive_params, params.base_backup_info, writer, params.context, params.is_internal_backup, params.backup_coordination, params.backup_uuid);
|
||||
return std::make_unique<BackupImpl>(backup_name_for_logging, archive_params, params.base_backup_info, writer, params.context, params.is_internal_backup, params.backup_coordination, params.backup_uuid);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -65,10 +65,12 @@
|
||||
#include <Interpreters/ReplaceQueryParameterVisitor.h>
|
||||
#include <Interpreters/ProfileEventsExt.h>
|
||||
#include <IO/WriteBufferFromOStream.h>
|
||||
#include <IO/WriteBufferFromFileDescriptor.h>
|
||||
#include <IO/CompressionMethod.h>
|
||||
#include <Client/InternalTextLogs.h>
|
||||
#include <IO/ForkWriteBuffer.h>
|
||||
#include <Parsers/Kusto/ParserKQLStatement.h>
|
||||
#include <boost/algorithm/string/case_conv.hpp>
|
||||
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
@ -103,6 +105,7 @@ namespace ErrorCodes
|
||||
extern const int CANNOT_SET_SIGNAL_HANDLER;
|
||||
extern const int UNRECOGNIZED_ARGUMENTS;
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int CANNOT_OPEN_FILE;
|
||||
}
|
||||
|
||||
}
|
||||
@ -116,6 +119,25 @@ namespace ProfileEvents
|
||||
namespace DB
|
||||
{
|
||||
|
||||
std::istream& operator>> (std::istream & in, ProgressOption & progress)
|
||||
{
|
||||
std::string token;
|
||||
in >> token;
|
||||
|
||||
boost::to_upper(token);
|
||||
|
||||
if (token == "OFF" || token == "FALSE" || token == "0" || token == "NO")
|
||||
progress = ProgressOption::OFF;
|
||||
else if (token == "TTY" || token == "ON" || token == "TRUE" || token == "1" || token == "YES")
|
||||
progress = ProgressOption::TTY;
|
||||
else if (token == "ERR")
|
||||
progress = ProgressOption::ERR;
|
||||
else
|
||||
throw boost::program_options::validation_error(boost::program_options::validation_error::invalid_option_value);
|
||||
|
||||
return in;
|
||||
}
|
||||
|
||||
static ClientInfo::QueryKind parseQueryKind(const String & query_kind)
|
||||
{
|
||||
if (query_kind == "initial_query")
|
||||
@ -413,8 +435,8 @@ void ClientBase::onData(Block & block, ASTPtr parsed_query)
|
||||
return;
|
||||
|
||||
/// If results are written INTO OUTFILE, we can avoid clearing progress to avoid flicker.
|
||||
if (need_render_progress && (stdout_is_a_tty || is_interactive) && (!select_into_file || select_into_file_and_stdout))
|
||||
progress_indication.clearProgressOutput();
|
||||
if (need_render_progress && tty_buf && (!select_into_file || select_into_file_and_stdout))
|
||||
progress_indication.clearProgressOutput(*tty_buf);
|
||||
|
||||
try
|
||||
{
|
||||
@ -431,11 +453,11 @@ void ClientBase::onData(Block & block, ASTPtr parsed_query)
|
||||
output_format->flush();
|
||||
|
||||
/// Restore progress bar after data block.
|
||||
if (need_render_progress && (stdout_is_a_tty || is_interactive))
|
||||
if (need_render_progress && tty_buf)
|
||||
{
|
||||
if (select_into_file && !select_into_file_and_stdout)
|
||||
std::cerr << "\r";
|
||||
progress_indication.writeProgress();
|
||||
progress_indication.writeProgress(*tty_buf);
|
||||
}
|
||||
}
|
||||
|
||||
@ -443,7 +465,8 @@ void ClientBase::onData(Block & block, ASTPtr parsed_query)
|
||||
void ClientBase::onLogData(Block & block)
|
||||
{
|
||||
initLogsOutputStream();
|
||||
progress_indication.clearProgressOutput();
|
||||
if (need_render_progress && tty_buf)
|
||||
progress_indication.clearProgressOutput(*tty_buf);
|
||||
logs_out_stream->writeLogs(block);
|
||||
logs_out_stream->flush();
|
||||
}
|
||||
@ -639,6 +662,58 @@ void ClientBase::initLogsOutputStream()
|
||||
}
|
||||
}
|
||||
|
||||
void ClientBase::initTtyBuffer(bool to_err)
|
||||
{
|
||||
if (!tty_buf)
|
||||
{
|
||||
static constexpr auto tty_file_name = "/dev/tty";
|
||||
|
||||
/// Output all progress bar commands to terminal at once to avoid flicker.
|
||||
/// This size is usually greater than the window size.
|
||||
static constexpr size_t buf_size = 1024;
|
||||
|
||||
if (!to_err)
|
||||
{
|
||||
std::error_code ec;
|
||||
std::filesystem::file_status tty = std::filesystem::status(tty_file_name, ec);
|
||||
|
||||
if (!ec && exists(tty) && is_character_file(tty)
|
||||
&& (tty.permissions() & std::filesystem::perms::others_write) != std::filesystem::perms::none)
|
||||
{
|
||||
try
|
||||
{
|
||||
tty_buf = std::make_unique<WriteBufferFromFile>(tty_file_name, buf_size);
|
||||
|
||||
/// It is possible that the terminal file has writeable permissions
|
||||
/// but we cannot write anything there. Check it with invisible character.
|
||||
tty_buf->write('\0');
|
||||
tty_buf->next();
|
||||
|
||||
return;
|
||||
}
|
||||
catch (const Exception & e)
|
||||
{
|
||||
if (tty_buf)
|
||||
tty_buf.reset();
|
||||
|
||||
if (e.code() != ErrorCodes::CANNOT_OPEN_FILE)
|
||||
throw;
|
||||
|
||||
/// It is normal if file exists, indicated as writeable but still cannot be opened.
|
||||
/// Fallback to other options.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (stderr_is_a_tty)
|
||||
{
|
||||
tty_buf = std::make_unique<WriteBufferFromFileDescriptor>(STDERR_FILENO, buf_size);
|
||||
}
|
||||
else
|
||||
need_render_progress = false;
|
||||
}
|
||||
}
|
||||
|
||||
void ClientBase::updateSuggest(const ASTPtr & ast)
|
||||
{
|
||||
std::vector<std::string> new_words;
|
||||
@ -937,14 +1012,15 @@ void ClientBase::onProgress(const Progress & value)
|
||||
if (output_format)
|
||||
output_format->onProgress(value);
|
||||
|
||||
if (need_render_progress)
|
||||
progress_indication.writeProgress();
|
||||
if (need_render_progress && tty_buf)
|
||||
progress_indication.writeProgress(*tty_buf);
|
||||
}
|
||||
|
||||
|
||||
void ClientBase::onEndOfStream()
|
||||
{
|
||||
progress_indication.clearProgressOutput();
|
||||
if (need_render_progress && tty_buf)
|
||||
progress_indication.clearProgressOutput(*tty_buf);
|
||||
|
||||
if (output_format)
|
||||
output_format->finalize();
|
||||
@ -952,10 +1028,7 @@ void ClientBase::onEndOfStream()
|
||||
resetOutput();
|
||||
|
||||
if (is_interactive && !written_first_block)
|
||||
{
|
||||
progress_indication.clearProgressOutput();
|
||||
std::cout << "Ok." << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -998,15 +1071,16 @@ void ClientBase::onProfileEvents(Block & block)
|
||||
}
|
||||
progress_indication.updateThreadEventData(thread_times);
|
||||
|
||||
if (need_render_progress)
|
||||
progress_indication.writeProgress();
|
||||
if (need_render_progress && tty_buf)
|
||||
progress_indication.writeProgress(*tty_buf);
|
||||
|
||||
if (profile_events.print)
|
||||
{
|
||||
if (profile_events.watch.elapsedMilliseconds() >= profile_events.delay_ms)
|
||||
{
|
||||
initLogsOutputStream();
|
||||
progress_indication.clearProgressOutput();
|
||||
if (need_render_progress && tty_buf)
|
||||
progress_indication.clearProgressOutput(*tty_buf);
|
||||
logs_out_stream->writeProfileEvents(block);
|
||||
logs_out_stream->flush();
|
||||
|
||||
@ -1180,7 +1254,8 @@ void ClientBase::sendData(Block & sample, const ColumnsDescription & columns_des
|
||||
progress_indication.updateProgress(Progress(file_progress));
|
||||
|
||||
/// Set callback to be called on file progress.
|
||||
progress_indication.setFileProgressCallback(global_context, true);
|
||||
if (tty_buf)
|
||||
progress_indication.setFileProgressCallback(global_context, *tty_buf);
|
||||
}
|
||||
|
||||
/// If data fetched from file (maybe compressed file)
|
||||
@ -1432,12 +1507,12 @@ bool ClientBase::receiveEndOfQuery()
|
||||
void ClientBase::cancelQuery()
|
||||
{
|
||||
connection->sendCancel();
|
||||
if (need_render_progress && tty_buf)
|
||||
progress_indication.clearProgressOutput(*tty_buf);
|
||||
|
||||
if (is_interactive)
|
||||
{
|
||||
progress_indication.clearProgressOutput();
|
||||
std::cout << "Cancelling query." << std::endl;
|
||||
|
||||
}
|
||||
cancelled = true;
|
||||
}
|
||||
|
||||
@ -1557,7 +1632,8 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin
|
||||
if (profile_events.last_block)
|
||||
{
|
||||
initLogsOutputStream();
|
||||
progress_indication.clearProgressOutput();
|
||||
if (need_render_progress && tty_buf)
|
||||
progress_indication.clearProgressOutput(*tty_buf);
|
||||
logs_out_stream->writeProfileEvents(profile_events.last_block);
|
||||
logs_out_stream->flush();
|
||||
|
||||
@ -2248,7 +2324,7 @@ void ClientBase::init(int argc, char ** argv)
|
||||
("stage", po::value<std::string>()->default_value("complete"), "Request query processing up to specified stage: complete,fetch_columns,with_mergeable_state,with_mergeable_state_after_aggregation,with_mergeable_state_after_aggregation_and_limit")
|
||||
("query_kind", po::value<std::string>()->default_value("initial_query"), "One of initial_query/secondary_query/no_query")
|
||||
("query_id", po::value<std::string>(), "query_id")
|
||||
("progress", "print progress of queries execution")
|
||||
("progress", po::value<ProgressOption>()->implicit_value(ProgressOption::TTY, "tty")->default_value(ProgressOption::TTY, "tty"), "Print progress of queries execution - to TTY (default): tty|on|1|true|yes; to STDERR: err; OFF: off|0|false|no")
|
||||
|
||||
("disable_suggestion,A", "Disable loading suggestion data. Note that suggestion data is loaded asynchronously through a second connection to ClickHouse server. Also it is reasonable to disable suggestion if you want to paste a query with TAB characters. Shorthand option -A is for those who get used to mysql client.")
|
||||
("time,t", "print query execution time to stderr in non-interactive mode (for benchmarks)")
|
||||
@ -2303,6 +2379,11 @@ void ClientBase::init(int argc, char ** argv)
|
||||
parseAndCheckOptions(options_description, options, common_arguments);
|
||||
po::notify(options);
|
||||
|
||||
if (options["progress"].as<ProgressOption>() == ProgressOption::OFF)
|
||||
need_render_progress = false;
|
||||
else
|
||||
initTtyBuffer(options["progress"].as<ProgressOption>() == ProgressOption::ERR);
|
||||
|
||||
if (options.count("version") || options.count("V"))
|
||||
{
|
||||
showClientVersion();
|
||||
@ -2353,7 +2434,20 @@ void ClientBase::init(int argc, char ** argv)
|
||||
if (options.count("profile-events-delay-ms"))
|
||||
config().setUInt64("profile-events-delay-ms", options["profile-events-delay-ms"].as<UInt64>());
|
||||
if (options.count("progress"))
|
||||
config().setBool("progress", true);
|
||||
{
|
||||
switch (options["progress"].as<ProgressOption>())
|
||||
{
|
||||
case OFF:
|
||||
config().setString("progress", "off");
|
||||
break;
|
||||
case TTY:
|
||||
config().setString("progress", "tty");
|
||||
break;
|
||||
case ERR:
|
||||
config().setString("progress", "err");
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (options.count("echo"))
|
||||
config().setBool("echo", true);
|
||||
if (options.count("disable_suggestion"))
|
||||
|
@ -15,6 +15,7 @@
|
||||
#include <Storages/StorageFile.h>
|
||||
#include <Storages/SelectQueryInfo.h>
|
||||
|
||||
|
||||
namespace po = boost::program_options;
|
||||
|
||||
|
||||
@ -35,9 +36,18 @@ enum MultiQueryProcessingStage
|
||||
PARSING_FAILED,
|
||||
};
|
||||
|
||||
enum ProgressOption
|
||||
{
|
||||
OFF,
|
||||
TTY,
|
||||
ERR,
|
||||
};
|
||||
std::istream& operator>> (std::istream & in, ProgressOption & progress);
|
||||
|
||||
void interruptSignalHandler(int signum);
|
||||
|
||||
class InternalTextLogs;
|
||||
class WriteBufferFromFileDescriptor;
|
||||
|
||||
class ClientBase : public Poco::Util::Application, public IHints<2, ClientBase>
|
||||
{
|
||||
@ -143,6 +153,7 @@ private:
|
||||
|
||||
void initOutputFormat(const Block & block, ASTPtr parsed_query);
|
||||
void initLogsOutputStream();
|
||||
void initTtyBuffer(bool to_err = false);
|
||||
|
||||
String prompt() const;
|
||||
|
||||
@ -218,6 +229,10 @@ protected:
|
||||
String server_logs_file;
|
||||
std::unique_ptr<InternalTextLogs> logs_out_stream;
|
||||
|
||||
/// /dev/tty if accessible or std::cerr - for progress bar.
|
||||
/// We prefer to output progress bar directly to tty to allow user to redirect stdout and stderr and still get the progress indication.
|
||||
std::unique_ptr<WriteBufferFromFileDescriptor> tty_buf;
|
||||
|
||||
String home_path;
|
||||
String history_file; /// Path to a file containing command history.
|
||||
|
||||
|
@ -12,6 +12,7 @@
|
||||
M(FailedQuery, "Number of failed queries.") \
|
||||
M(FailedSelectQuery, "Same as FailedQuery, but only for SELECT queries.") \
|
||||
M(FailedInsertQuery, "Same as FailedQuery, but only for INSERT queries.") \
|
||||
M(FailedAsyncInsertQuery, "Number of failed ASYNC INSERT queries.") \
|
||||
M(QueryTimeMicroseconds, "Total time of all queries.") \
|
||||
M(SelectQueryTimeMicroseconds, "Total time of SELECT queries.") \
|
||||
M(InsertQueryTimeMicroseconds, "Total time of INSERT queries.") \
|
||||
|
@ -2,6 +2,7 @@
|
||||
#include <algorithm>
|
||||
#include <cstddef>
|
||||
#include <numeric>
|
||||
#include <filesystem>
|
||||
#include <cmath>
|
||||
#include <IO/WriteBufferFromFileDescriptor.h>
|
||||
#include <base/types.h>
|
||||
@ -11,6 +12,9 @@
|
||||
#include "IO/WriteBufferFromString.h"
|
||||
#include <Databases/DatabaseMemory.h>
|
||||
|
||||
/// http://en.wikipedia.org/wiki/ANSI_escape_code
|
||||
#define CLEAR_TO_END_OF_LINE "\033[K"
|
||||
|
||||
|
||||
namespace
|
||||
{
|
||||
@ -44,15 +48,6 @@ bool ProgressIndication::updateProgress(const Progress & value)
|
||||
return progress.incrementPiecewiseAtomically(value);
|
||||
}
|
||||
|
||||
void ProgressIndication::clearProgressOutput()
|
||||
{
|
||||
if (written_progress_chars)
|
||||
{
|
||||
written_progress_chars = 0;
|
||||
std::cerr << "\r" CLEAR_TO_END_OF_LINE;
|
||||
}
|
||||
}
|
||||
|
||||
void ProgressIndication::resetProgress()
|
||||
{
|
||||
watch.restart();
|
||||
@ -67,15 +62,12 @@ void ProgressIndication::resetProgress()
|
||||
}
|
||||
}
|
||||
|
||||
void ProgressIndication::setFileProgressCallback(ContextMutablePtr context, bool write_progress_on_update_)
|
||||
void ProgressIndication::setFileProgressCallback(ContextMutablePtr context, WriteBufferFromFileDescriptor & message)
|
||||
{
|
||||
write_progress_on_update = write_progress_on_update_;
|
||||
context->setFileProgressCallback([&](const FileProgress & file_progress)
|
||||
{
|
||||
progress.incrementPiecewiseAtomically(Progress(file_progress));
|
||||
|
||||
if (write_progress_on_update)
|
||||
writeProgress();
|
||||
writeProgress(message);
|
||||
});
|
||||
}
|
||||
|
||||
@ -142,13 +134,10 @@ void ProgressIndication::writeFinalProgress()
|
||||
std::cout << ". ";
|
||||
}
|
||||
|
||||
void ProgressIndication::writeProgress()
|
||||
void ProgressIndication::writeProgress(WriteBufferFromFileDescriptor & message)
|
||||
{
|
||||
std::lock_guard lock(progress_mutex);
|
||||
|
||||
/// Output all progress bar commands to stderr at once to avoid flicker.
|
||||
WriteBufferFromFileDescriptor message(STDERR_FILENO, 1024);
|
||||
|
||||
static size_t increment = 0;
|
||||
static const char * indicators[8] = {
|
||||
"\033[1;30m→\033[0m",
|
||||
@ -307,4 +296,14 @@ void ProgressIndication::writeProgress()
|
||||
message.next();
|
||||
}
|
||||
|
||||
void ProgressIndication::clearProgressOutput(WriteBufferFromFileDescriptor & message)
|
||||
{
|
||||
if (written_progress_chars)
|
||||
{
|
||||
written_progress_chars = 0;
|
||||
message << "\r" CLEAR_TO_END_OF_LINE;
|
||||
message.next();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -9,12 +9,12 @@
|
||||
#include <Common/Stopwatch.h>
|
||||
#include <Common/EventRateMeter.h>
|
||||
|
||||
/// http://en.wikipedia.org/wiki/ANSI_escape_code
|
||||
#define CLEAR_TO_END_OF_LINE "\033[K"
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class WriteBufferFromFileDescriptor;
|
||||
|
||||
struct ThreadEventData
|
||||
{
|
||||
UInt64 time() const noexcept { return user_ms + system_ms; }
|
||||
@ -30,14 +30,13 @@ using HostToThreadTimesMap = std::unordered_map<String, ThreadIdToTimeMap>;
|
||||
class ProgressIndication
|
||||
{
|
||||
public:
|
||||
/// Write progress to stderr.
|
||||
void writeProgress();
|
||||
/// Write progress bar.
|
||||
void writeProgress(WriteBufferFromFileDescriptor & message);
|
||||
void clearProgressOutput(WriteBufferFromFileDescriptor & message);
|
||||
|
||||
/// Write summary.
|
||||
void writeFinalProgress();
|
||||
|
||||
/// Clear stderr output.
|
||||
void clearProgressOutput();
|
||||
|
||||
/// Reset progress values.
|
||||
void resetProgress();
|
||||
|
||||
@ -52,7 +51,7 @@ public:
|
||||
/// In some cases there is a need to update progress value, when there is no access to progress_inidcation object.
|
||||
/// In this case it is added via context.
|
||||
/// `write_progress_on_update` is needed to write progress for loading files data via pipe in non-interactive mode.
|
||||
void setFileProgressCallback(ContextMutablePtr context, bool write_progress_on_update = false);
|
||||
void setFileProgressCallback(ContextMutablePtr context, WriteBufferFromFileDescriptor & message);
|
||||
|
||||
/// How much seconds passed since query execution start.
|
||||
double elapsedSeconds() const { return getElapsedNanoseconds() / 1e9; }
|
||||
|
@ -3,10 +3,10 @@
|
||||
#include <cstring>
|
||||
#include <iostream>
|
||||
#include <Core/Defines.h>
|
||||
#include <Common/Stopwatch.h>
|
||||
#include <Common/TargetSpecific.h>
|
||||
#include <base/types.h>
|
||||
#include <base/unaligned.h>
|
||||
#include <Common/Stopwatch.h>
|
||||
#include <Common/TargetSpecific.h>
|
||||
|
||||
#ifdef __SSE2__
|
||||
#include <emmintrin.h>
|
||||
@ -599,6 +599,9 @@ bool NO_INLINE decompressImpl(const char * const source, char * const dest, size
|
||||
|
||||
copy_end = op + length;
|
||||
|
||||
if (unlikely(copy_end > output_end))
|
||||
return false;
|
||||
|
||||
/** Here we can write up to copy_amount - 1 - 4 * 2 bytes after buffer.
|
||||
* The worst case when offset = 1 and length = 4
|
||||
*/
|
||||
|
@ -1,8 +1,5 @@
|
||||
#include <Compression/CompressionFactory.h>
|
||||
|
||||
#include <Common/PODArray.h>
|
||||
#include <Common/Stopwatch.h>
|
||||
#include <base/types.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/IDataType.h>
|
||||
#include <IO/ReadBufferFromMemory.h>
|
||||
@ -10,6 +7,12 @@
|
||||
#include <Parsers/ExpressionElementParsers.h>
|
||||
#include <Parsers/IParser.h>
|
||||
#include <Parsers/TokenIterator.h>
|
||||
#include <base/types.h>
|
||||
#include <Common/PODArray.h>
|
||||
#include <Common/Stopwatch.h>
|
||||
|
||||
#include <Compression/LZ4_decompress_faster.h>
|
||||
#include <IO/BufferWithOwnMemory.h>
|
||||
|
||||
#include <random>
|
||||
#include <bitset>
|
||||
@ -1319,4 +1322,34 @@ INSTANTIATE_TEST_SUITE_P(Gorilla,
|
||||
// ),
|
||||
//);
|
||||
|
||||
TEST(LZ4Test, DecompressMalformedInput)
|
||||
{
|
||||
/// This malformed input was initially found by lz4_decompress_fuzzer and causes failure under UBSAN.
|
||||
constexpr unsigned char data[]
|
||||
= {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0x00,
|
||||
0x00, 0x20, 0x00, 0x00, 0x66, 0x66, 0x66, 0x66, 0xff, 0xff, 0xff, 0x17, 0xff, 0xff, 0x0f, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
||||
0xfe, 0x1f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00};
|
||||
|
||||
const char * const source = reinterpret_cast<const char * const>(data);
|
||||
const uint32_t source_size = std::size(data);
|
||||
constexpr uint32_t uncompressed_size = 80;
|
||||
|
||||
DB::Memory<> memory;
|
||||
memory.resize(ICompressionCodec::getHeaderSize() + uncompressed_size + LZ4::ADDITIONAL_BYTES_AT_END_OF_BUFFER);
|
||||
unalignedStoreLE<uint8_t>(memory.data(), static_cast<uint8_t>(CompressionMethodByte::LZ4));
|
||||
unalignedStoreLE<uint32_t>(&memory[1], source_size);
|
||||
unalignedStoreLE<uint32_t>(&memory[5], uncompressed_size);
|
||||
|
||||
auto codec = CompressionCodecFactory::instance().get("LZ4", {});
|
||||
ASSERT_THROW(codec->decompress(source, source_size, memory.data()), Exception);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -377,6 +377,9 @@ void KeeperStorage::UncommittedState::commit(int64_t commit_zxid)
|
||||
{
|
||||
assert(deltas.empty() || deltas.front().zxid >= commit_zxid);
|
||||
|
||||
// collect nodes that have no further modification in the current transaction
|
||||
std::unordered_set<std::string> modified_nodes;
|
||||
|
||||
while (!deltas.empty() && deltas.front().zxid == commit_zxid)
|
||||
{
|
||||
if (std::holds_alternative<SubDeltaEnd>(deltas.front().operation))
|
||||
@ -393,7 +396,17 @@ void KeeperStorage::UncommittedState::commit(int64_t commit_zxid)
|
||||
assert(path_deltas.front() == &front_delta);
|
||||
path_deltas.pop_front();
|
||||
if (path_deltas.empty())
|
||||
{
|
||||
deltas_for_path.erase(front_delta.path);
|
||||
|
||||
// no more deltas for path -> no modification
|
||||
modified_nodes.insert(std::move(front_delta.path));
|
||||
}
|
||||
else if (path_deltas.front()->zxid > commit_zxid)
|
||||
{
|
||||
// next delta has a zxid from a different transaction -> no modification in this transaction
|
||||
modified_nodes.insert(std::move(front_delta.path));
|
||||
}
|
||||
}
|
||||
else if (auto * add_auth = std::get_if<AddAuthDelta>(&front_delta.operation))
|
||||
{
|
||||
@ -409,9 +422,12 @@ void KeeperStorage::UncommittedState::commit(int64_t commit_zxid)
|
||||
}
|
||||
|
||||
// delete all cached nodes that were not modified after the commit_zxid
|
||||
// the commit can end on SubDeltaEnd so we don't want to clear cached nodes too soon
|
||||
if (deltas.empty() || deltas.front().zxid > commit_zxid)
|
||||
std::erase_if(nodes, [commit_zxid](const auto & node) { return node.second.zxid == commit_zxid; });
|
||||
// we only need to check the nodes that were modified in this transaction
|
||||
for (const auto & node : modified_nodes)
|
||||
{
|
||||
if (nodes[node].zxid == commit_zxid)
|
||||
nodes.erase(node);
|
||||
}
|
||||
}
|
||||
|
||||
void KeeperStorage::UncommittedState::rollback(int64_t rollback_zxid)
|
||||
|
@ -84,11 +84,12 @@ void SerializationString::deserializeBinary(IColumn & column, ReadBuffer & istr)
|
||||
|
||||
void SerializationString::serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const
|
||||
{
|
||||
const ColumnString & column_string = typeid_cast<const ColumnString &>(column);
|
||||
const auto & full_column = column.convertToFullColumnIfLowCardinality();
|
||||
const ColumnString & column_string = typeid_cast<const ColumnString &>(*full_column);
|
||||
const ColumnString::Chars & data = column_string.getChars();
|
||||
const ColumnString::Offsets & offsets = column_string.getOffsets();
|
||||
|
||||
size_t size = column.size();
|
||||
size_t size = column_string.size();
|
||||
if (!size)
|
||||
return;
|
||||
|
||||
|
@ -62,11 +62,12 @@ std::pair<String, StoragePtr> createTableFromAST(
|
||||
if (ast_create_query.as_table_function)
|
||||
{
|
||||
const auto & factory = TableFunctionFactory::instance();
|
||||
auto table_function = factory.get(ast_create_query.as_table_function, context);
|
||||
auto table_function_ast = ast_create_query.as_table_function->ptr();
|
||||
auto table_function = factory.get(table_function_ast, context);
|
||||
ColumnsDescription columns;
|
||||
if (ast_create_query.columns_list && ast_create_query.columns_list->columns)
|
||||
columns = InterpreterCreateQuery::getColumnsDescription(*ast_create_query.columns_list->columns, context, true);
|
||||
StoragePtr storage = table_function->execute(ast_create_query.as_table_function, context, ast_create_query.getTable(), std::move(columns));
|
||||
StoragePtr storage = table_function->execute(table_function_ast, context, ast_create_query.getTable(), std::move(columns));
|
||||
storage->renameInMemory(ast_create_query);
|
||||
return {ast_create_query.getTable(), storage};
|
||||
}
|
||||
|
@ -443,6 +443,11 @@ ASTPtr DatabasePostgreSQL::getColumnDeclaration(const DataTypePtr & data_type) c
|
||||
if (which.isArray())
|
||||
return makeASTFunction("Array", getColumnDeclaration(typeid_cast<const DataTypeArray *>(data_type.get())->getNestedType()));
|
||||
|
||||
if (which.isDateTime64())
|
||||
{
|
||||
return makeASTFunction("DateTime64", std::make_shared<ASTLiteral>(static_cast<UInt32>(6)));
|
||||
}
|
||||
|
||||
return std::make_shared<ASTIdentifier>(data_type->getName());
|
||||
}
|
||||
|
||||
|
@ -2,21 +2,19 @@
|
||||
#include "config.h"
|
||||
|
||||
#if USE_BASE64
|
||||
# include <Columns/ColumnConst.h>
|
||||
# include <Common/MemorySanitizer.h>
|
||||
# include <Columns/ColumnFixedString.h>
|
||||
# include <Columns/ColumnString.h>
|
||||
# include <DataTypes/DataTypeString.h>
|
||||
# include <Functions/FunctionFactory.h>
|
||||
# include <Functions/FunctionHelpers.h>
|
||||
# include <Functions/GatherUtils/Algorithms.h>
|
||||
# include <IO/WriteHelpers.h>
|
||||
# include <Functions/IFunction.h>
|
||||
# include <Interpreters/Context_fwd.h>
|
||||
# include <turbob64.h>
|
||||
# include <Common/MemorySanitizer.h>
|
||||
|
||||
# include <span>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
using namespace GatherUtils;
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
@ -25,33 +23,86 @@ namespace ErrorCodes
|
||||
extern const int INCORRECT_DATA;
|
||||
}
|
||||
|
||||
namespace Detail
|
||||
{
|
||||
inline size_t base64Decode(const std::span<const UInt8> src, UInt8 * dst)
|
||||
{
|
||||
# if defined(__aarch64__)
|
||||
return tb64sdec(reinterpret_cast<const uint8_t *>(src.data()), src.size(), reinterpret_cast<uint8_t *>(dst));
|
||||
# else
|
||||
return _tb64d(reinterpret_cast<const uint8_t *>(src.data()), src.size(), reinterpret_cast<uint8_t *>(dst));
|
||||
# endif
|
||||
}
|
||||
}
|
||||
|
||||
struct Base64Encode
|
||||
{
|
||||
static constexpr auto name = "base64Encode";
|
||||
static size_t getBufferSize(size_t string_length, size_t string_count)
|
||||
|
||||
static size_t getBufferSize(const size_t string_length, const size_t string_count)
|
||||
{
|
||||
return ((string_length - string_count) / 3 + string_count) * 4 + string_count;
|
||||
}
|
||||
|
||||
static size_t performCoding(const std::span<const UInt8> src, UInt8 * dst)
|
||||
{
|
||||
/*
|
||||
* Some bug in sse arm64 implementation?
|
||||
* `base64Encode(repeat('a', 46))` returns wrong padding character
|
||||
*/
|
||||
# if defined(__aarch64__)
|
||||
return tb64senc(reinterpret_cast<const uint8_t *>(src.data()), src.size(), reinterpret_cast<uint8_t *>(dst));
|
||||
# else
|
||||
return _tb64e(reinterpret_cast<const uint8_t *>(src.data()), src.size(), reinterpret_cast<uint8_t *>(dst));
|
||||
# endif
|
||||
}
|
||||
};
|
||||
|
||||
struct Base64Decode
|
||||
{
|
||||
static constexpr auto name = "base64Decode";
|
||||
|
||||
static size_t getBufferSize(size_t string_length, size_t string_count)
|
||||
static size_t getBufferSize(const size_t string_length, const size_t string_count)
|
||||
{
|
||||
return ((string_length - string_count) / 4 + string_count) * 3 + string_count;
|
||||
}
|
||||
|
||||
static size_t performCoding(const std::span<const UInt8> src, UInt8 * dst)
|
||||
{
|
||||
const auto outlen = Detail::base64Decode(src, dst);
|
||||
if (src.size() > 0 && !outlen)
|
||||
throw Exception(
|
||||
ErrorCodes::INCORRECT_DATA,
|
||||
"Failed to {} input '{}'",
|
||||
name,
|
||||
String(reinterpret_cast<const char *>(src.data()), src.size()));
|
||||
|
||||
return outlen;
|
||||
}
|
||||
};
|
||||
|
||||
struct TryBase64Decode
|
||||
{
|
||||
static constexpr auto name = "tryBase64Decode";
|
||||
|
||||
static size_t getBufferSize(size_t string_length, size_t string_count)
|
||||
static size_t getBufferSize(const size_t string_length, const size_t string_count)
|
||||
{
|
||||
return Base64Decode::getBufferSize(string_length, string_count);
|
||||
}
|
||||
|
||||
static size_t performCoding(const std::span<const UInt8> src, UInt8 * dst)
|
||||
{
|
||||
if (src.empty())
|
||||
return 0;
|
||||
|
||||
const auto outlen = Detail::base64Decode(src, dst);
|
||||
// during decoding character array can be partially polluted
|
||||
// if fail, revert back and clean
|
||||
if (!outlen)
|
||||
*dst = 0;
|
||||
|
||||
return outlen;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Func>
|
||||
@ -71,99 +122,60 @@ public:
|
||||
if (arguments.size() != 1)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong number of arguments for function {}: 1 expected.", getName());
|
||||
|
||||
if (!WhichDataType(arguments[0].type).isString())
|
||||
if (!WhichDataType(arguments[0].type).isStringOrFixedString())
|
||||
throw Exception(
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||
"Illegal type {} of 1st argument of function {}. Must be String.",
|
||||
arguments[0].type->getName(), getName());
|
||||
"Illegal type {} of 1st argument of function {}. Must be FixedString or String.",
|
||||
arguments[0].type->getName(),
|
||||
getName());
|
||||
|
||||
return std::make_shared<DataTypeString>();
|
||||
}
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, const size_t input_rows_count) const override
|
||||
{
|
||||
const ColumnPtr column_string = arguments[0].column;
|
||||
const ColumnString * input = checkAndGetColumn<ColumnString>(column_string.get());
|
||||
const auto & input_column = arguments[0].column;
|
||||
if (const auto * src_column_as_fixed_string = checkAndGetColumn<ColumnFixedString>(*input_column))
|
||||
return execute(*src_column_as_fixed_string, input_rows_count);
|
||||
else if (const auto * src_column_as_string = checkAndGetColumn<ColumnString>(*input_column))
|
||||
return execute(*src_column_as_string, input_rows_count);
|
||||
|
||||
if (!input)
|
||||
throw Exception(
|
||||
ErrorCodes::ILLEGAL_COLUMN,
|
||||
"Illegal column {} of first argument of function {}, must be of type String",
|
||||
arguments[0].column->getName(), getName());
|
||||
throw Exception(
|
||||
ErrorCodes::ILLEGAL_COLUMN,
|
||||
"Illegal column {} of first argument of function {}, must be of type FixedString or String.",
|
||||
input_column->getName(),
|
||||
getName());
|
||||
}
|
||||
|
||||
private:
|
||||
static ColumnPtr execute(const ColumnString & src_column, const size_t src_row_count)
|
||||
{
|
||||
auto dst_column = ColumnString::create();
|
||||
auto & dst_data = dst_column->getChars();
|
||||
auto & dst_chars = dst_column->getChars();
|
||||
auto & dst_offsets = dst_column->getOffsets();
|
||||
|
||||
size_t reserve = Func::getBufferSize(input->getChars().size(), input->size());
|
||||
dst_data.resize(reserve);
|
||||
dst_offsets.resize(input_rows_count);
|
||||
const auto reserve = Func::getBufferSize(src_column.byteSize(), src_column.size());
|
||||
dst_chars.resize(reserve);
|
||||
dst_offsets.resize(src_row_count);
|
||||
|
||||
const ColumnString::Offsets & src_offsets = input->getOffsets();
|
||||
const auto & src_chars = src_column.getChars();
|
||||
const auto & src_offsets = src_column.getOffsets();
|
||||
|
||||
const auto * source = input->getChars().data();
|
||||
auto * dst = dst_data.data();
|
||||
auto * dst = dst_chars.data();
|
||||
auto * dst_pos = dst;
|
||||
const auto * src = src_chars.data();
|
||||
|
||||
size_t src_offset_prev = 0;
|
||||
|
||||
for (size_t row = 0; row < input_rows_count; ++row)
|
||||
for (size_t row = 0; row < src_row_count; ++row)
|
||||
{
|
||||
size_t srclen = src_offsets[row] - src_offset_prev - 1;
|
||||
size_t outlen = 0;
|
||||
|
||||
if constexpr (std::is_same_v<Func, Base64Encode>)
|
||||
{
|
||||
/*
|
||||
* Some bug in sse arm64 implementation?
|
||||
* `base64Encode(repeat('a', 46))` returns wrong padding character
|
||||
*/
|
||||
#if defined(__aarch64__)
|
||||
outlen = tb64senc(reinterpret_cast<const uint8_t *>(source), srclen, reinterpret_cast<uint8_t *>(dst_pos));
|
||||
#else
|
||||
outlen = _tb64e(reinterpret_cast<const uint8_t *>(source), srclen, reinterpret_cast<uint8_t *>(dst_pos));
|
||||
#endif
|
||||
}
|
||||
else if constexpr (std::is_same_v<Func, Base64Decode>)
|
||||
{
|
||||
if (srclen > 0)
|
||||
{
|
||||
#if defined(__aarch64__)
|
||||
outlen = tb64sdec(reinterpret_cast<const uint8_t *>(source), srclen, reinterpret_cast<uint8_t *>(dst_pos));
|
||||
#else
|
||||
outlen = _tb64d(reinterpret_cast<const uint8_t *>(source), srclen, reinterpret_cast<uint8_t *>(dst_pos));
|
||||
#endif
|
||||
|
||||
if (!outlen)
|
||||
throw Exception(
|
||||
ErrorCodes::INCORRECT_DATA,
|
||||
"Failed to {} input '{}'",
|
||||
getName(), String(reinterpret_cast<const char *>(source), srclen));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (srclen > 0)
|
||||
{
|
||||
// during decoding character array can be partially polluted
|
||||
// if fail, revert back and clean
|
||||
auto * savepoint = dst_pos;
|
||||
outlen = _tb64d(reinterpret_cast<const uint8_t *>(source), srclen, reinterpret_cast<uint8_t *>(dst_pos));
|
||||
if (!outlen)
|
||||
{
|
||||
outlen = 0;
|
||||
dst_pos = savepoint; //-V1048
|
||||
// clean the symbol
|
||||
dst_pos[0] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
const size_t src_length = src_offsets[row] - src_offset_prev - 1;
|
||||
const auto outlen = Func::performCoding({src, src_length}, dst_pos);
|
||||
|
||||
/// Base64 library is using AVX-512 with some shuffle operations.
|
||||
/// Memory sanitizer don't understand if there was uninitialized memory in SIMD register but it was not used in the result of shuffle.
|
||||
__msan_unpoison(dst_pos, outlen);
|
||||
|
||||
source += srclen + 1;
|
||||
src += src_length + 1;
|
||||
dst_pos += outlen;
|
||||
*dst_pos = '\0';
|
||||
dst_pos += 1;
|
||||
@ -172,8 +184,44 @@ public:
|
||||
src_offset_prev = src_offsets[row];
|
||||
}
|
||||
|
||||
dst_data.resize(dst_pos - dst);
|
||||
dst_chars.resize(dst_pos - dst);
|
||||
return dst_column;
|
||||
}
|
||||
|
||||
static ColumnPtr execute(const ColumnFixedString & src_column, const size_t src_row_count)
|
||||
{
|
||||
auto dst_column = ColumnString::create();
|
||||
auto & dst_chars = dst_column->getChars();
|
||||
auto & dst_offsets = dst_column->getOffsets();
|
||||
|
||||
const auto reserve = Func::getBufferSize(src_column.byteSize(), src_column.size());
|
||||
dst_chars.resize(reserve);
|
||||
dst_offsets.resize(src_row_count);
|
||||
|
||||
const auto & src_chars = src_column.getChars();
|
||||
const auto & src_n = src_column.getN();
|
||||
|
||||
auto * dst = dst_chars.data();
|
||||
auto * dst_pos = dst;
|
||||
const auto * src = src_chars.data();
|
||||
|
||||
for (size_t row = 0; row < src_row_count; ++row)
|
||||
{
|
||||
const auto outlen = Func::performCoding({src, src_n}, dst_pos);
|
||||
|
||||
/// Base64 library is using AVX-512 with some shuffle operations.
|
||||
/// Memory sanitizer don't understand if there was uninitialized memory in SIMD register but it was not used in the result of shuffle.
|
||||
__msan_unpoison(dst_pos, outlen);
|
||||
|
||||
src += src_n;
|
||||
dst_pos += outlen;
|
||||
*dst_pos = '\0';
|
||||
dst_pos += 1;
|
||||
|
||||
dst_offsets[row] = dst_pos - dst;
|
||||
}
|
||||
|
||||
dst_chars.resize(dst_pos - dst);
|
||||
return dst_column;
|
||||
}
|
||||
};
|
||||
|
@ -38,18 +38,21 @@ public:
|
||||
{
|
||||
if (!isStringOrFixedString(arguments[0]))
|
||||
throw Exception(
|
||||
"Illegal type " + arguments[0]->getName() + " of first argument of function " + getName(),
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||
"Illegal type {} of first argument of function {}",
|
||||
arguments[0]->getName(), getName());
|
||||
|
||||
if (!isStringOrFixedString(arguments[1]))
|
||||
throw Exception(
|
||||
"Illegal type " + arguments[1]->getName() + " of second argument of function " + getName(),
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||
"Illegal type {} of second argument of function {}",
|
||||
arguments[1]->getName(), getName());
|
||||
|
||||
if (!isStringOrFixedString(arguments[2]))
|
||||
throw Exception(
|
||||
"Illegal type " + arguments[2]->getName() + " of third argument of function " + getName(),
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||
"Illegal type {} of third argument of function {}",
|
||||
arguments[2]->getName(), getName());
|
||||
|
||||
return std::make_shared<DataTypeString>();
|
||||
}
|
||||
@ -61,7 +64,10 @@ public:
|
||||
const ColumnPtr column_replacement = arguments[2].column;
|
||||
|
||||
if (!isColumnConst(*column_needle) || !isColumnConst(*column_replacement))
|
||||
throw Exception("2nd and 3rd arguments of function " + getName() + " must be constants.", ErrorCodes::ILLEGAL_COLUMN);
|
||||
throw Exception(
|
||||
ErrorCodes::ILLEGAL_COLUMN,
|
||||
"2nd and 3rd arguments of function {} must be constants.",
|
||||
getName());
|
||||
|
||||
const IColumn * c1 = arguments[1].column.get();
|
||||
const IColumn * c2 = arguments[2].column.get();
|
||||
@ -71,7 +77,9 @@ public:
|
||||
String replacement = c2_const->getValue<String>();
|
||||
|
||||
if (needle.empty())
|
||||
throw Exception("Length of the second argument of function replace must be greater than 0.", ErrorCodes::ARGUMENT_OUT_OF_BOUND);
|
||||
throw Exception(
|
||||
ErrorCodes::ARGUMENT_OUT_OF_BOUND,
|
||||
"Length of the second argument of function replace must be greater than 0.");
|
||||
|
||||
if (const ColumnString * col = checkAndGetColumn<ColumnString>(column_src.get()))
|
||||
{
|
||||
@ -87,8 +95,9 @@ public:
|
||||
}
|
||||
else
|
||||
throw Exception(
|
||||
"Illegal column " + arguments[0].column->getName() + " of first argument of function " + getName(),
|
||||
ErrorCodes::ILLEGAL_COLUMN);
|
||||
ErrorCodes::ILLEGAL_COLUMN,
|
||||
"Illegal column {} of first argument of function {}",
|
||||
arguments[0].column->getName(), getName());
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -91,7 +91,7 @@ struct MultiMatchAllIndicesImpl
|
||||
hs_error_t err = hs_clone_scratch(regexps->getScratch(), &scratch);
|
||||
|
||||
if (err != HS_SUCCESS)
|
||||
throw Exception("Could not clone scratch space for hyperscan", ErrorCodes::CANNOT_ALLOCATE_MEMORY);
|
||||
throw Exception("Could not clone scratch space for vectorscan", ErrorCodes::CANNOT_ALLOCATE_MEMORY);
|
||||
|
||||
MultiRegexps::ScratchPtr smart_scratch(scratch);
|
||||
|
||||
@ -203,7 +203,7 @@ struct MultiMatchAllIndicesImpl
|
||||
hs_error_t err = hs_clone_scratch(regexps->getScratch(), &scratch);
|
||||
|
||||
if (err != HS_SUCCESS)
|
||||
throw Exception("Could not clone scratch space for hyperscan", ErrorCodes::CANNOT_ALLOCATE_MEMORY);
|
||||
throw Exception("Could not clone scratch space for vectorscan", ErrorCodes::CANNOT_ALLOCATE_MEMORY);
|
||||
|
||||
MultiRegexps::ScratchPtr smart_scratch(scratch);
|
||||
|
||||
|
@ -38,6 +38,7 @@ namespace ErrorCodes
|
||||
|
||||
namespace Regexps
|
||||
{
|
||||
|
||||
using Regexp = OptimizedRegularExpressionSingleThreaded;
|
||||
using RegexpPtr = std::shared_ptr<Regexp>;
|
||||
|
||||
@ -112,11 +113,11 @@ struct HyperscanDeleter
|
||||
};
|
||||
|
||||
/// Helper unique pointers to correctly delete the allocated space when hyperscan cannot compile something and we throw an exception.
|
||||
using CompilerError = std::unique_ptr<hs_compile_error_t, HyperscanDeleter<decltype(&hs_free_compile_error), &hs_free_compile_error>>;
|
||||
using CompilerErrorPtr = std::unique_ptr<hs_compile_error_t, HyperscanDeleter<decltype(&hs_free_compile_error), &hs_free_compile_error>>;
|
||||
using ScratchPtr = std::unique_ptr<hs_scratch_t, HyperscanDeleter<decltype(&hs_free_scratch), &hs_free_scratch>>;
|
||||
using DataBasePtr = std::unique_ptr<hs_database_t, HyperscanDeleter<decltype(&hs_free_database), &hs_free_database>>;
|
||||
|
||||
/// Database is thread safe across multiple threads and Scratch is not but we can copy it whenever we use it in the searcher.
|
||||
/// Database is immutable/thread-safe across multiple threads. Scratch is not but we can copy it whenever we use it in the searcher.
|
||||
class Regexps
|
||||
{
|
||||
public:
|
||||
@ -154,7 +155,7 @@ private:
|
||||
|
||||
using DeferredConstructedRegexpsPtr = std::shared_ptr<DeferredConstructedRegexps>;
|
||||
|
||||
template <bool save_indices, bool WithEditDistance>
|
||||
template <bool save_indices, bool with_edit_distance>
|
||||
inline Regexps constructRegexps(const std::vector<String> & str_patterns, [[maybe_unused]] std::optional<UInt32> edit_distance)
|
||||
{
|
||||
/// Common pointers
|
||||
@ -168,7 +169,7 @@ inline Regexps constructRegexps(const std::vector<String> & str_patterns, [[mayb
|
||||
patterns.reserve(str_patterns.size());
|
||||
flags.reserve(str_patterns.size());
|
||||
|
||||
if constexpr (WithEditDistance)
|
||||
if constexpr (with_edit_distance)
|
||||
{
|
||||
ext_exprs.reserve(str_patterns.size());
|
||||
ext_exprs_ptrs.reserve(str_patterns.size());
|
||||
@ -186,7 +187,7 @@ inline Regexps constructRegexps(const std::vector<String> & str_patterns, [[mayb
|
||||
* as it is said in the Hyperscan documentation. https://intel.github.io/hyperscan/dev-reference/performance.html#single-match-flag
|
||||
*/
|
||||
flags.push_back(HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH | HS_FLAG_ALLOWEMPTY | HS_FLAG_UTF8);
|
||||
if constexpr (WithEditDistance)
|
||||
if constexpr (with_edit_distance)
|
||||
{
|
||||
/// Hyperscan currently does not support UTF8 matching with edit distance.
|
||||
flags.back() &= ~HS_FLAG_UTF8;
|
||||
@ -211,7 +212,7 @@ inline Regexps constructRegexps(const std::vector<String> & str_patterns, [[mayb
|
||||
}
|
||||
|
||||
hs_error_t err;
|
||||
if constexpr (!WithEditDistance)
|
||||
if constexpr (!with_edit_distance)
|
||||
err = hs_compile_multi(
|
||||
patterns.data(),
|
||||
flags.data(),
|
||||
@ -236,7 +237,7 @@ inline Regexps constructRegexps(const std::vector<String> & str_patterns, [[mayb
|
||||
if (err != HS_SUCCESS)
|
||||
{
|
||||
/// CompilerError is a unique_ptr, so correct memory free after the exception is thrown.
|
||||
CompilerError error(compile_error);
|
||||
CompilerErrorPtr error(compile_error);
|
||||
|
||||
if (error->expression < 0)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, String(error->message));
|
||||
@ -253,7 +254,7 @@ inline Regexps constructRegexps(const std::vector<String> & str_patterns, [[mayb
|
||||
|
||||
/// If not HS_SUCCESS, it is guaranteed that the memory would not be allocated for scratch.
|
||||
if (err != HS_SUCCESS)
|
||||
throw Exception(ErrorCodes::CANNOT_ALLOCATE_MEMORY, "Could not allocate scratch space for hyperscan");
|
||||
throw Exception(ErrorCodes::CANNOT_ALLOCATE_MEMORY, "Could not allocate scratch space for vectorscan");
|
||||
|
||||
return {db, scratch};
|
||||
}
|
||||
@ -288,9 +289,9 @@ struct GlobalCacheTable
|
||||
}
|
||||
};
|
||||
|
||||
/// If WithEditDistance is False, edit_distance must be nullopt. Also, we use templates here because each instantiation of function template
|
||||
/// If with_edit_distance is False, edit_distance must be nullopt. Also, we use templates here because each instantiation of function template
|
||||
/// has its own copy of local static variables which must not be the same for different hyperscan compilations.
|
||||
template <bool save_indices, bool WithEditDistance>
|
||||
template <bool save_indices, bool with_edit_distance>
|
||||
inline DeferredConstructedRegexpsPtr getOrSet(const std::vector<std::string_view> & patterns, std::optional<UInt32> edit_distance)
|
||||
{
|
||||
static GlobalCacheTable pool; /// Different variables for different pattern parameters, thread-safe in C++11
|
||||
@ -320,7 +321,7 @@ inline DeferredConstructedRegexpsPtr getOrSet(const std::vector<std::string_view
|
||||
auto deferred_constructed_regexps = std::make_shared<DeferredConstructedRegexps>(
|
||||
[str_patterns, edit_distance]()
|
||||
{
|
||||
return constructRegexps<save_indices, WithEditDistance>(str_patterns, edit_distance);
|
||||
return constructRegexps<save_indices, with_edit_distance>(str_patterns, edit_distance);
|
||||
});
|
||||
bucket = {std::move(str_patterns), edit_distance, deferred_constructed_regexps};
|
||||
}
|
||||
@ -331,7 +332,7 @@ inline DeferredConstructedRegexpsPtr getOrSet(const std::vector<std::string_view
|
||||
auto deferred_constructed_regexps = std::make_shared<DeferredConstructedRegexps>(
|
||||
[str_patterns, edit_distance]()
|
||||
{
|
||||
return constructRegexps<save_indices, WithEditDistance>(str_patterns, edit_distance);
|
||||
return constructRegexps<save_indices, with_edit_distance>(str_patterns, edit_distance);
|
||||
});
|
||||
bucket = {std::move(str_patterns), edit_distance, deferred_constructed_regexps};
|
||||
}
|
||||
|
@ -1,7 +1,6 @@
|
||||
#pragma once
|
||||
|
||||
#include <base/types.h>
|
||||
#include <Common/Volnitsky.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
|
||||
@ -17,131 +16,130 @@ namespace ErrorCodes
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
struct ReplaceRegexpTraits
|
||||
{
|
||||
enum class Replace
|
||||
{
|
||||
First,
|
||||
All
|
||||
};
|
||||
};
|
||||
|
||||
/** Replace all matches of regexp 'needle' to string 'replacement'. 'needle' and 'replacement' are constants.
|
||||
* 'replacement' could contain substitutions, for example: '\2-\3-\1'
|
||||
* 'replacement' can contain substitutions, for example: '\2-\3-\1'
|
||||
*/
|
||||
template <bool replace_one = false>
|
||||
template <ReplaceRegexpTraits::Replace replace>
|
||||
struct ReplaceRegexpImpl
|
||||
{
|
||||
/// Sequence of instructions, describing how to get resulting string.
|
||||
struct Instruction
|
||||
{
|
||||
/// If not negative - perform substitution of n-th subpattern from the regexp match.
|
||||
/// If not negative, perform substitution of n-th subpattern from the regexp match.
|
||||
int substitution_num = -1;
|
||||
/// Otherwise - paste this string verbatim.
|
||||
std::string literal;
|
||||
/// Otherwise, paste this literal string verbatim.
|
||||
String literal;
|
||||
|
||||
Instruction(int substitution_num_) : substitution_num(substitution_num_) {} /// NOLINT
|
||||
Instruction(std::string literal_) : literal(std::move(literal_)) {} /// NOLINT
|
||||
explicit Instruction(int substitution_num_) : substitution_num(substitution_num_) {}
|
||||
explicit Instruction(String literal_) : literal(std::move(literal_)) {}
|
||||
};
|
||||
|
||||
/// Decomposes the replacement string into a sequence of substitutions and literals.
|
||||
/// E.g. "abc\1de\2fg\1\2" --> inst("abc"), inst(1), inst("de"), inst(2), inst("fg"), inst(1), inst(2)
|
||||
using Instructions = std::vector<Instruction>;
|
||||
|
||||
static const size_t max_captures = 10;
|
||||
static constexpr int max_captures = 10;
|
||||
|
||||
|
||||
static Instructions createInstructions(const std::string & s, int num_captures)
|
||||
static Instructions createInstructions(std::string_view replacement, int num_captures)
|
||||
{
|
||||
Instructions instructions;
|
||||
|
||||
String now;
|
||||
for (size_t i = 0; i < s.size(); ++i)
|
||||
String literals;
|
||||
for (size_t i = 0; i < replacement.size(); ++i)
|
||||
{
|
||||
if (s[i] == '\\' && i + 1 < s.size())
|
||||
if (replacement[i] == '\\' && i + 1 < replacement.size())
|
||||
{
|
||||
if (isNumericASCII(s[i + 1])) /// Substitution
|
||||
if (isNumericASCII(replacement[i + 1])) /// Substitution
|
||||
{
|
||||
if (!now.empty())
|
||||
if (!literals.empty())
|
||||
{
|
||||
instructions.emplace_back(now);
|
||||
now = "";
|
||||
instructions.emplace_back(literals);
|
||||
literals = "";
|
||||
}
|
||||
instructions.emplace_back(s[i + 1] - '0');
|
||||
instructions.emplace_back(replacement[i + 1] - '0');
|
||||
}
|
||||
else
|
||||
now += s[i + 1]; /// Escaping
|
||||
literals += replacement[i + 1]; /// Escaping
|
||||
++i;
|
||||
}
|
||||
else
|
||||
now += s[i]; /// Plain character
|
||||
literals += replacement[i]; /// Plain character
|
||||
}
|
||||
|
||||
if (!now.empty())
|
||||
{
|
||||
instructions.emplace_back(now);
|
||||
now = "";
|
||||
}
|
||||
if (!literals.empty())
|
||||
instructions.emplace_back(literals);
|
||||
|
||||
for (const auto & it : instructions)
|
||||
if (it.substitution_num >= num_captures)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Invalid replace instruction in replacement string. Id: {}, but regexp has only {} subpatterns",
|
||||
it.substitution_num, num_captures - 1);
|
||||
for (const auto & instr : instructions)
|
||||
if (instr.substitution_num >= num_captures)
|
||||
throw Exception(
|
||||
ErrorCodes::BAD_ARGUMENTS,
|
||||
"Id {} in replacement string is an invalid substitution, regexp has only {} capturing groups",
|
||||
instr.substitution_num, num_captures - 1);
|
||||
|
||||
return instructions;
|
||||
}
|
||||
|
||||
|
||||
static void processString(
|
||||
const re2_st::StringPiece & input,
|
||||
const char * haystack_data,
|
||||
size_t haystack_length,
|
||||
ColumnString::Chars & res_data,
|
||||
ColumnString::Offset & res_offset,
|
||||
re2_st::RE2 & searcher,
|
||||
const re2_st::RE2 & searcher,
|
||||
int num_captures,
|
||||
const Instructions & instructions)
|
||||
{
|
||||
re2_st::StringPiece haystack(haystack_data, haystack_length);
|
||||
re2_st::StringPiece matches[max_captures];
|
||||
|
||||
size_t copy_pos = 0;
|
||||
size_t match_pos = 0;
|
||||
|
||||
while (match_pos < static_cast<size_t>(input.length()))
|
||||
while (match_pos < haystack_length)
|
||||
{
|
||||
/// If no more replacements possible for current string
|
||||
bool can_finish_current_string = false;
|
||||
|
||||
if (searcher.Match(input, match_pos, input.length(), re2_st::RE2::Anchor::UNANCHORED, matches, num_captures))
|
||||
if (searcher.Match(haystack, match_pos, haystack_length, re2_st::RE2::Anchor::UNANCHORED, matches, num_captures))
|
||||
{
|
||||
const auto & match = matches[0];
|
||||
size_t bytes_to_copy = (match.data() - input.data()) - copy_pos;
|
||||
const auto & match = matches[0]; /// Complete match (\0)
|
||||
size_t bytes_to_copy = (match.data() - haystack.data()) - copy_pos;
|
||||
|
||||
/// Copy prefix before matched regexp without modification
|
||||
/// Copy prefix before current match without modification
|
||||
res_data.resize(res_data.size() + bytes_to_copy);
|
||||
memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], input.data() + copy_pos, bytes_to_copy);
|
||||
memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], haystack.data() + copy_pos, bytes_to_copy);
|
||||
res_offset += bytes_to_copy;
|
||||
copy_pos += bytes_to_copy + match.length();
|
||||
match_pos = copy_pos;
|
||||
|
||||
/// Do substitution instructions
|
||||
for (const auto & it : instructions)
|
||||
/// Substitute inside current match using instructions
|
||||
for (const auto & instr : instructions)
|
||||
{
|
||||
if (it.substitution_num >= 0)
|
||||
{
|
||||
const auto & substitution = matches[it.substitution_num];
|
||||
|
||||
res_data.resize(res_data.size() + substitution.length());
|
||||
memcpy(&res_data[res_offset], substitution.data(), substitution.length());
|
||||
res_offset += substitution.length();
|
||||
}
|
||||
std::string_view replacement;
|
||||
if (instr.substitution_num >= 0)
|
||||
replacement = std::string_view(matches[instr.substitution_num].data(), matches[instr.substitution_num].size());
|
||||
else
|
||||
{
|
||||
const auto & literal = it.literal;
|
||||
|
||||
res_data.resize(res_data.size() + literal.size());
|
||||
memcpy(&res_data[res_offset], literal.data(), literal.size());
|
||||
res_offset += literal.size();
|
||||
}
|
||||
replacement = instr.literal;
|
||||
res_data.resize(res_data.size() + replacement.size());
|
||||
memcpy(&res_data[res_offset], replacement.data(), replacement.size());
|
||||
res_offset += replacement.size();
|
||||
}
|
||||
|
||||
if (replace_one)
|
||||
if constexpr (replace == ReplaceRegexpTraits::Replace::First)
|
||||
can_finish_current_string = true;
|
||||
|
||||
if (match.length() == 0)
|
||||
if (match.empty())
|
||||
{
|
||||
/// Step one character to avoid infinite loop
|
||||
++match_pos;
|
||||
if (match_pos >= static_cast<size_t>(input.length()))
|
||||
if (match_pos >= haystack_length)
|
||||
can_finish_current_string = true;
|
||||
}
|
||||
}
|
||||
@ -151,10 +149,10 @@ struct ReplaceRegexpImpl
|
||||
/// If ready, append suffix after match to end of string.
|
||||
if (can_finish_current_string)
|
||||
{
|
||||
res_data.resize(res_data.size() + input.length() - copy_pos);
|
||||
memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], input.data() + copy_pos, input.length() - copy_pos);
|
||||
res_offset += input.length() - copy_pos;
|
||||
copy_pos = input.length();
|
||||
res_data.resize(res_data.size() + haystack_length - copy_pos);
|
||||
memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], haystack.data() + copy_pos, haystack_length - copy_pos);
|
||||
res_offset += haystack_length - copy_pos;
|
||||
copy_pos = haystack_length;
|
||||
match_pos = copy_pos;
|
||||
}
|
||||
}
|
||||
@ -164,12 +162,11 @@ struct ReplaceRegexpImpl
|
||||
++res_offset;
|
||||
}
|
||||
|
||||
|
||||
static void vector(
|
||||
const ColumnString::Chars & data,
|
||||
const ColumnString::Offsets & offsets,
|
||||
const std::string & needle,
|
||||
const std::string & replacement,
|
||||
const String & needle,
|
||||
const String & replacement,
|
||||
ColumnString::Chars & res_data,
|
||||
ColumnString::Offsets & res_offsets)
|
||||
{
|
||||
@ -178,11 +175,19 @@ struct ReplaceRegexpImpl
|
||||
size_t size = offsets.size();
|
||||
res_offsets.resize(size);
|
||||
|
||||
typename re2_st::RE2::Options regexp_options;
|
||||
/// Never write error messages to stderr. It's ignorant to do it from library code.
|
||||
re2_st::RE2::Options regexp_options;
|
||||
/// Don't write error messages to stderr.
|
||||
regexp_options.set_log_errors(false);
|
||||
|
||||
re2_st::RE2 searcher(needle, regexp_options);
|
||||
int num_captures = std::min(searcher.NumberOfCapturingGroups() + 1, static_cast<int>(max_captures));
|
||||
|
||||
if (!searcher.ok())
|
||||
throw Exception(
|
||||
ErrorCodes::BAD_ARGUMENTS,
|
||||
"The pattern argument is not a valid re2 pattern: {}",
|
||||
searcher.error());
|
||||
|
||||
int num_captures = std::min(searcher.NumberOfCapturingGroups() + 1, max_captures);
|
||||
|
||||
Instructions instructions = createInstructions(replacement, num_captures);
|
||||
|
||||
@ -190,9 +195,10 @@ struct ReplaceRegexpImpl
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
size_t from = i > 0 ? offsets[i - 1] : 0;
|
||||
re2_st::StringPiece input(reinterpret_cast<const char *>(data.data() + from), offsets[i] - from - 1);
|
||||
const char * haystack_data = reinterpret_cast<const char *>(data.data() + from);
|
||||
const size_t haystack_length = static_cast<unsigned>(offsets[i] - from - 1);
|
||||
|
||||
processString(input, res_data, res_offset, searcher, num_captures, instructions);
|
||||
processString(haystack_data, haystack_length, res_data, res_offset, searcher, num_captures, instructions);
|
||||
res_offsets[i] = res_offset;
|
||||
}
|
||||
}
|
||||
@ -200,8 +206,8 @@ struct ReplaceRegexpImpl
|
||||
static void vectorFixed(
|
||||
const ColumnString::Chars & data,
|
||||
size_t n,
|
||||
const std::string & needle,
|
||||
const std::string & replacement,
|
||||
const String & needle,
|
||||
const String & replacement,
|
||||
ColumnString::Chars & res_data,
|
||||
ColumnString::Offsets & res_offsets)
|
||||
{
|
||||
@ -210,20 +216,29 @@ struct ReplaceRegexpImpl
|
||||
res_data.reserve(data.size());
|
||||
res_offsets.resize(size);
|
||||
|
||||
typename re2_st::RE2::Options regexp_options;
|
||||
/// Never write error messages to stderr. It's ignorant to do it from library code.
|
||||
re2_st::RE2::Options regexp_options;
|
||||
/// Don't write error messages to stderr.
|
||||
regexp_options.set_log_errors(false);
|
||||
|
||||
re2_st::RE2 searcher(needle, regexp_options);
|
||||
int num_captures = std::min(searcher.NumberOfCapturingGroups() + 1, static_cast<int>(max_captures));
|
||||
|
||||
if (!searcher.ok())
|
||||
throw Exception(
|
||||
ErrorCodes::BAD_ARGUMENTS,
|
||||
"The pattern argument is not a valid re2 pattern: {}",
|
||||
searcher.error());
|
||||
|
||||
int num_captures = std::min(searcher.NumberOfCapturingGroups() + 1, max_captures);
|
||||
|
||||
Instructions instructions = createInstructions(replacement, num_captures);
|
||||
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
size_t from = i * n;
|
||||
re2_st::StringPiece input(reinterpret_cast<const char *>(data.data() + from), n);
|
||||
const char * haystack_data = reinterpret_cast<const char *>(data.data() + from);
|
||||
const size_t haystack_length = n;
|
||||
|
||||
processString(input, res_data, res_offset, searcher, num_captures, instructions);
|
||||
processString(haystack_data, haystack_length, res_data, res_offset, searcher, num_captures, instructions);
|
||||
res_offsets[i] = res_offset;
|
||||
}
|
||||
}
|
||||
|
@ -8,9 +8,17 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
struct ReplaceStringTraits
|
||||
{
|
||||
enum class Replace
|
||||
{
|
||||
First,
|
||||
All
|
||||
};
|
||||
};
|
||||
/** Replace one or all occurencies of substring 'needle' to 'replacement'. 'needle' and 'replacement' are constants.
|
||||
*/
|
||||
template <bool replace_one = false>
|
||||
template <ReplaceStringTraits::Replace replace>
|
||||
struct ReplaceStringImpl
|
||||
{
|
||||
static void vector(
|
||||
@ -66,7 +74,7 @@ struct ReplaceStringImpl
|
||||
memcpy(&res_data[res_offset], replacement.data(), replacement.size());
|
||||
res_offset += replacement.size();
|
||||
pos = match + needle.size();
|
||||
if (replace_one)
|
||||
if constexpr (replace == ReplaceStringTraits::Replace::First)
|
||||
can_finish_current_string = true;
|
||||
}
|
||||
else
|
||||
@ -155,7 +163,7 @@ struct ReplaceStringImpl
|
||||
memcpy(&res_data[res_offset], replacement.data(), replacement.size());
|
||||
res_offset += replacement.size();
|
||||
pos = match + needle.size();
|
||||
if (replace_one || pos == begin + n * (i + 1))
|
||||
if (replace == ReplaceStringTraits::Replace::First || pos == begin + n * (i + 1))
|
||||
can_finish_current_string = true;
|
||||
}
|
||||
else
|
||||
|
@ -87,7 +87,7 @@ restoreUserDefinedSQLObjects(RestorerFromBackup & restorer, const String & data_
|
||||
parser,
|
||||
statement_def.data(),
|
||||
statement_def.data() + statement_def.size(),
|
||||
"in file " + filepath + " from backup " + backup->getName(),
|
||||
"in file " + filepath + " from backup " + backup->getNameForLogging(),
|
||||
0,
|
||||
context->getSettingsRef().max_parser_depth);
|
||||
break;
|
||||
|
@ -1,8 +1,7 @@
|
||||
#include <Functions/FunctionBase64Conversion.h>
|
||||
|
||||
#if USE_BASE64
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -15,4 +14,5 @@ REGISTER_FUNCTION(Base64Decode)
|
||||
factory.registerAlias("FROM_BASE64", "base64Decode", FunctionFactory::CaseInsensitive);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -1,10 +1,7 @@
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/FunctionBase64Conversion.h>
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#if USE_BASE64
|
||||
# include <DataTypes/DataTypeString.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -17,4 +14,5 @@ REGISTER_FUNCTION(Base64Encode)
|
||||
factory.registerAlias("TO_BASE64", "base64Encode", FunctionFactory::CaseInsensitive);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
35
src/Functions/formatReadableDecimalSize.cpp
Normal file
@ -0,0 +1,35 @@
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/formatReadable.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace
|
||||
{
|
||||
struct Impl
|
||||
{
|
||||
static constexpr auto name = "formatReadableDecimalSize";
|
||||
|
||||
static void format(double value, DB::WriteBuffer & out)
|
||||
{
|
||||
formatReadableSizeWithDecimalSuffix(value, out);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
REGISTER_FUNCTION(FormatReadableDecimalSize)
|
||||
{
|
||||
factory.registerFunction<FunctionFormatReadable<Impl>>(
|
||||
{
|
||||
R"(
|
||||
Accepts the size (number of bytes). Returns a rounded size with a suffix (KB, MB, etc.) as a string.
|
||||
)",
|
||||
Documentation::Examples{
|
||||
{"formatReadableDecimalSize", "SELECT formatReadableDecimalSize(1000)"}},
|
||||
Documentation::Categories{"OtherFunctions"}
|
||||
},
|
||||
FunctionFactory::CaseSensitive);
|
||||
}
|
||||
|
||||
}
|
@ -13,7 +13,7 @@ struct NameReplaceAll
|
||||
static constexpr auto name = "replaceAll";
|
||||
};
|
||||
|
||||
using FunctionReplaceAll = FunctionStringReplace<ReplaceStringImpl<false>, NameReplaceAll>;
|
||||
using FunctionReplaceAll = FunctionStringReplace<ReplaceStringImpl<ReplaceStringTraits::Replace::All>, NameReplaceAll>;
|
||||
|
||||
}
|
||||
|
||||
|
@ -13,7 +13,7 @@ struct NameReplaceOne
|
||||
static constexpr auto name = "replaceOne";
|
||||
};
|
||||
|
||||
using FunctionReplaceOne = FunctionStringReplace<ReplaceStringImpl<true>, NameReplaceOne>;
|
||||
using FunctionReplaceOne = FunctionStringReplace<ReplaceStringImpl<ReplaceStringTraits::Replace::First>, NameReplaceOne>;
|
||||
|
||||
}
|
||||
|
||||
|
@ -13,7 +13,7 @@ struct NameReplaceRegexpAll
|
||||
static constexpr auto name = "replaceRegexpAll";
|
||||
};
|
||||
|
||||
using FunctionReplaceRegexpAll = FunctionStringReplace<ReplaceRegexpImpl<false>, NameReplaceRegexpAll>;
|
||||
using FunctionReplaceRegexpAll = FunctionStringReplace<ReplaceRegexpImpl<ReplaceRegexpTraits::Replace::All>, NameReplaceRegexpAll>;
|
||||
|
||||
}
|
||||
|
||||
|
@ -13,7 +13,7 @@ struct NameReplaceRegexpOne
|
||||
static constexpr auto name = "replaceRegexpOne";
|
||||
};
|
||||
|
||||
using FunctionReplaceRegexpOne = FunctionStringReplace<ReplaceRegexpImpl<true>, NameReplaceRegexpOne>;
|
||||
using FunctionReplaceRegexpOne = FunctionStringReplace<ReplaceRegexpImpl<ReplaceRegexpTraits::Replace::First>, NameReplaceRegexpOne>;
|
||||
|
||||
}
|
||||
|
||||
|
@ -1,7 +1,7 @@
|
||||
#include <Functions/FunctionBase64Conversion.h>
|
||||
|
||||
#if USE_BASE64
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -10,4 +10,5 @@ REGISTER_FUNCTION(TryBase64Decode)
|
||||
factory.registerFunction<FunctionBase64Conversion<TryBase64Decode>>();
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -528,16 +528,17 @@ namespace detail
|
||||
|
||||
auto on_retriable_error = [&]()
|
||||
{
|
||||
retry_with_range_header = true;
|
||||
impl.reset();
|
||||
auto http_session = session->getSession();
|
||||
http_session->reset();
|
||||
sleepForMilliseconds(milliseconds_to_wait);
|
||||
retry_with_range_header = true;
|
||||
impl.reset();
|
||||
auto http_session = session->getSession();
|
||||
http_session->reset();
|
||||
sleepForMilliseconds(milliseconds_to_wait);
|
||||
};
|
||||
|
||||
for (size_t i = 0; i < settings.http_max_tries; ++i)
|
||||
{
|
||||
exception = nullptr;
|
||||
initialization_error = InitializeError::NONE;
|
||||
|
||||
try
|
||||
{
|
||||
|
@ -123,7 +123,10 @@ void WriteBufferFromS3::nextImpl()
|
||||
void WriteBufferFromS3::allocateBuffer()
|
||||
{
|
||||
if (total_parts_uploaded != 0 && total_parts_uploaded % s3_settings.upload_part_size_multiply_parts_count_threshold == 0)
|
||||
{
|
||||
upload_part_size *= s3_settings.upload_part_size_multiply_factor;
|
||||
upload_part_size = std::min(upload_part_size, s3_settings.max_upload_part_size);
|
||||
}
|
||||
|
||||
temporary_buffer = Aws::MakeShared<Aws::StringStream>("temporary buffer");
|
||||
temporary_buffer->exceptions(std::ios::badbit);
|
||||
|
@ -2637,7 +2637,7 @@ void NO_INLINE Aggregator::mergeBucketImpl(
|
||||
ManyAggregatedDataVariants Aggregator::prepareVariantsToMerge(ManyAggregatedDataVariants & data_variants) const
|
||||
{
|
||||
if (data_variants.empty())
|
||||
throw Exception("Empty data passed to Aggregator::mergeAndConvertToBlocks.", ErrorCodes::EMPTY_DATA_PASSED);
|
||||
throw Exception("Empty data passed to Aggregator::prepareVariantsToMerge.", ErrorCodes::EMPTY_DATA_PASSED);
|
||||
|
||||
LOG_TRACE(log, "Merging aggregated data");
|
||||
|
||||
|
@ -37,6 +37,7 @@ namespace ProfileEvents
|
||||
{
|
||||
extern const Event AsyncInsertQuery;
|
||||
extern const Event AsyncInsertBytes;
|
||||
extern const Event FailedAsyncInsertQuery;
|
||||
}
|
||||
|
||||
namespace DB
|
||||
@ -101,6 +102,8 @@ void AsynchronousInsertQueue::InsertData::Entry::finish(std::exception_ptr excep
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
finished = true;
|
||||
if (exception_)
|
||||
ProfileEvents::increment(ProfileEvents::FailedAsyncInsertQuery, 1);
|
||||
exception = exception_;
|
||||
cv.notify_all();
|
||||
}
|
||||
|
@ -217,7 +217,7 @@ bool ClusterDiscovery::needUpdate(const Strings & node_uuids, const NodesInfo &
|
||||
|
||||
ClusterPtr ClusterDiscovery::makeCluster(const ClusterInfo & cluster_info)
|
||||
{
|
||||
std::vector<std::vector<String>> shards;
|
||||
std::vector<Strings> shards;
|
||||
{
|
||||
std::map<size_t, Strings> replica_adresses;
|
||||
|
||||
@ -244,7 +244,7 @@ ClusterPtr ClusterDiscovery::makeCluster(const ClusterInfo & cluster_info)
|
||||
/* password= */ "",
|
||||
/* clickhouse_port= */ secure ? context->getTCPPortSecure().value_or(DBMS_DEFAULT_SECURE_PORT) : context->getTCPPort(),
|
||||
/* treat_local_as_remote= */ false,
|
||||
/* treat_local_port_as_remote= */ context->getApplicationType() == Context::ApplicationType::LOCAL,
|
||||
/* treat_local_port_as_remote= */ false, /// should be set only for clickhouse-local, but cluster discovery is not used there
|
||||
/* secure= */ secure);
|
||||
return cluster;
|
||||
}
|
||||
|
@ -8,12 +8,14 @@
|
||||
#include <IO/ReadBufferFromString.h>
|
||||
#include <Poco/Net/NetException.h>
|
||||
#include <Common/logger_useful.h>
|
||||
#include <Parsers/ParserQuery.h>
|
||||
#include <Parsers/parseQuery.h>
|
||||
#include <Parsers/ASTQueryWithOnCluster.h>
|
||||
#include <Parsers/ParserQuery.h>
|
||||
#include <Parsers/formatAST.h>
|
||||
#include <Parsers/parseQuery.h>
|
||||
#include <Parsers/queryToString.h>
|
||||
#include <Parsers/ASTQueryWithTableAndOutput.h>
|
||||
#include <Databases/DatabaseReplicated.h>
|
||||
#include <Interpreters/maskSensitiveInfoInQueryForLogging.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -168,6 +170,13 @@ void DDLTaskBase::parseQueryFromEntry(ContextPtr context)
|
||||
query = parseQuery(parser_query, begin, end, description, 0, settings.max_parser_depth);
|
||||
}
|
||||
|
||||
void DDLTaskBase::formatRewrittenQuery(ContextPtr context)
|
||||
{
|
||||
/// Convert rewritten AST back to string.
|
||||
query_str = queryToString(*query);
|
||||
query_for_logging = maskSensitiveInfoInQueryForLogging(query_str, query, context);
|
||||
}
|
||||
|
||||
ContextMutablePtr DDLTaskBase::makeQueryContext(ContextPtr from_context, const ZooKeeperPtr & /*zookeeper*/)
|
||||
{
|
||||
auto query_context = Context::createCopy(from_context);
|
||||
@ -265,6 +274,7 @@ void DDLTask::setClusterInfo(ContextPtr context, Poco::Logger * log)
|
||||
host_id.readableString(), entry_name, address_in_cluster.readableString(), cluster_name);
|
||||
}
|
||||
|
||||
/// Rewrite AST without ON CLUSTER.
|
||||
WithoutOnClusterASTRewriteParams params;
|
||||
params.default_database = address_in_cluster.default_database;
|
||||
params.host_id = address_in_cluster.toString();
|
||||
@ -405,6 +415,7 @@ void DatabaseReplicatedTask::parseQueryFromEntry(ContextPtr context)
|
||||
chassert(!ddl_query->database);
|
||||
ddl_query->setDatabase(database->getDatabaseName());
|
||||
}
|
||||
formatRewrittenQuery(context);
|
||||
}
|
||||
|
||||
ContextMutablePtr DatabaseReplicatedTask::makeQueryContext(ContextPtr from_context, const ZooKeeperPtr & zookeeper)
|
||||
|
@ -99,6 +99,9 @@ struct DDLTaskBase
|
||||
String host_id_str;
|
||||
ASTPtr query;
|
||||
|
||||
String query_str;
|
||||
String query_for_logging;
|
||||
|
||||
bool is_initial_query = false;
|
||||
bool is_circular_replicated = false;
|
||||
bool execute_on_leader = false;
|
||||
@ -114,6 +117,7 @@ struct DDLTaskBase
|
||||
virtual ~DDLTaskBase() = default;
|
||||
|
||||
virtual void parseQueryFromEntry(ContextPtr context);
|
||||
void formatRewrittenQuery(ContextPtr context);
|
||||
|
||||
virtual String getShardID() const = 0;
|
||||
|
||||
|
@ -10,8 +10,6 @@
|
||||
#include <Parsers/ASTCreateIndexQuery.h>
|
||||
#include <Parsers/ASTDropIndexQuery.h>
|
||||
#include <Parsers/ParserQuery.h>
|
||||
#include <Parsers/parseQuery.h>
|
||||
#include <Parsers/queryToString.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/ReadBufferFromString.h>
|
||||
@ -207,6 +205,8 @@ DDLTaskPtr DDLWorker::initAndCheckTask(const String & entry_name, String & out_r
|
||||
task->parseQueryFromEntry(context);
|
||||
/// Stage 3.2: check cluster and find the host in cluster
|
||||
task->setClusterInfo(context, log);
|
||||
/// Stage 3.3: output rewritten query back to string
|
||||
task->formatRewrittenQuery(context);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
@ -431,11 +431,12 @@ DDLTaskBase & DDLWorker::saveTask(DDLTaskPtr && task)
|
||||
return *current_tasks.back();
|
||||
}
|
||||
|
||||
bool DDLWorker::tryExecuteQuery(const String & query, DDLTaskBase & task, const ZooKeeperPtr & zookeeper)
|
||||
bool DDLWorker::tryExecuteQuery(DDLTaskBase & task, const ZooKeeperPtr & zookeeper)
|
||||
{
|
||||
/// Add special comment at the start of query to easily identify DDL-produced queries in query_log
|
||||
String query_prefix = "/* ddl_entry=" + task.entry_name + " */ ";
|
||||
String query_to_execute = query_prefix + query;
|
||||
String query_to_execute = query_prefix + task.query_str;
|
||||
String query_to_show_in_logs = query_prefix + task.query_for_logging;
|
||||
|
||||
ReadBufferFromString istr(query_to_execute);
|
||||
String dummy_string;
|
||||
@ -463,7 +464,7 @@ bool DDLWorker::tryExecuteQuery(const String & query, DDLTaskBase & task, const
|
||||
throw;
|
||||
|
||||
task.execution_status = ExecutionStatus::fromCurrentException();
|
||||
tryLogCurrentException(log, "Query " + query + " wasn't finished successfully");
|
||||
tryLogCurrentException(log, "Query " + query_to_show_in_logs + " wasn't finished successfully");
|
||||
|
||||
/// We use return value of tryExecuteQuery(...) in tryExecuteQueryOnLeaderReplica(...) to determine
|
||||
/// if replica has stopped being leader and we should retry query.
|
||||
@ -484,7 +485,7 @@ bool DDLWorker::tryExecuteQuery(const String & query, DDLTaskBase & task, const
|
||||
throw;
|
||||
|
||||
task.execution_status = ExecutionStatus::fromCurrentException();
|
||||
tryLogCurrentException(log, "Query " + query + " wasn't finished successfully");
|
||||
tryLogCurrentException(log, "Query " + query_to_show_in_logs + " wasn't finished successfully");
|
||||
|
||||
/// We don't know what exactly happened, but maybe it's Poco::NetException or std::bad_alloc,
|
||||
/// so we consider unknown exception as retryable error.
|
||||
@ -492,7 +493,7 @@ bool DDLWorker::tryExecuteQuery(const String & query, DDLTaskBase & task, const
|
||||
}
|
||||
|
||||
task.execution_status = ExecutionStatus(0);
|
||||
LOG_DEBUG(log, "Executed query: {}", query);
|
||||
LOG_DEBUG(log, "Executed query: {}", query_to_show_in_logs);
|
||||
|
||||
return true;
|
||||
}
|
||||
@ -514,7 +515,7 @@ void DDLWorker::updateMaxDDLEntryID(const String & entry_name)
|
||||
|
||||
void DDLWorker::processTask(DDLTaskBase & task, const ZooKeeperPtr & zookeeper)
|
||||
{
|
||||
LOG_DEBUG(log, "Processing task {} ({})", task.entry_name, task.entry.query);
|
||||
LOG_DEBUG(log, "Processing task {} ({})", task.entry_name, task.query_for_logging);
|
||||
chassert(!task.completely_processed);
|
||||
|
||||
/// Setup tracing context on current thread for current DDL
|
||||
@ -587,8 +588,7 @@ void DDLWorker::processTask(DDLTaskBase & task, const ZooKeeperPtr & zookeeper)
|
||||
|
||||
try
|
||||
{
|
||||
String rewritten_query = queryToString(task.query);
|
||||
LOG_DEBUG(log, "Executing query: {}", rewritten_query);
|
||||
LOG_DEBUG(log, "Executing query: {}", task.query_for_logging);
|
||||
|
||||
StoragePtr storage;
|
||||
if (auto * query_with_table = dynamic_cast<ASTQueryWithTableAndOutput *>(task.query.get()); query_with_table)
|
||||
@ -605,12 +605,12 @@ void DDLWorker::processTask(DDLTaskBase & task, const ZooKeeperPtr & zookeeper)
|
||||
|
||||
if (task.execute_on_leader)
|
||||
{
|
||||
tryExecuteQueryOnLeaderReplica(task, storage, rewritten_query, task.entry_path, zookeeper, execute_on_leader_lock);
|
||||
tryExecuteQueryOnLeaderReplica(task, storage, task.entry_path, zookeeper, execute_on_leader_lock);
|
||||
}
|
||||
else
|
||||
{
|
||||
storage.reset();
|
||||
tryExecuteQuery(rewritten_query, task, zookeeper);
|
||||
tryExecuteQuery(task, zookeeper);
|
||||
}
|
||||
}
|
||||
catch (const Coordination::Exception &)
|
||||
@ -694,7 +694,6 @@ bool DDLWorker::taskShouldBeExecutedOnLeader(const ASTPtr & ast_ddl, const Stora
|
||||
bool DDLWorker::tryExecuteQueryOnLeaderReplica(
|
||||
DDLTaskBase & task,
|
||||
StoragePtr storage,
|
||||
const String & rewritten_query,
|
||||
const String & /*node_path*/,
|
||||
const ZooKeeperPtr & zookeeper,
|
||||
std::unique_ptr<zkutil::ZooKeeperLock> & execute_on_leader_lock)
|
||||
@ -793,7 +792,7 @@ bool DDLWorker::tryExecuteQueryOnLeaderReplica(
|
||||
|
||||
/// If the leader will unexpectedly changed this method will return false
|
||||
/// and on the next iteration new leader will take lock
|
||||
if (tryExecuteQuery(rewritten_query, task, zookeeper))
|
||||
if (tryExecuteQuery(task, zookeeper))
|
||||
{
|
||||
executed_by_us = true;
|
||||
break;
|
||||
|
@ -101,12 +101,11 @@ protected:
|
||||
bool tryExecuteQueryOnLeaderReplica(
|
||||
DDLTaskBase & task,
|
||||
StoragePtr storage,
|
||||
const String & rewritten_query,
|
||||
const String & node_path,
|
||||
const ZooKeeperPtr & zookeeper,
|
||||
std::unique_ptr<zkutil::ZooKeeperLock> & execute_on_leader_lock);
|
||||
|
||||
bool tryExecuteQuery(const String & query, DDLTaskBase & task, const ZooKeeperPtr & zookeeper);
|
||||
bool tryExecuteQuery(DDLTaskBase & task, const ZooKeeperPtr & zookeeper);
|
||||
|
||||
/// Checks and cleanups queue's nodes
|
||||
void cleanupQueue(Int64 current_time_seconds, const ZooKeeperPtr & zookeeper);
|
||||
|
@ -658,7 +658,9 @@ void HashJoin::initRightBlockStructure(Block & saved_block_sample)
|
||||
/// Save non key columns
|
||||
for (auto & column : sample_block_with_columns_to_add)
|
||||
{
|
||||
if (!saved_block_sample.findByName(column.name))
|
||||
if (auto * col = saved_block_sample.findByName(column.name))
|
||||
*col = column;
|
||||
else
|
||||
saved_block_sample.insert(column);
|
||||
}
|
||||
}
|
||||
|
@ -25,13 +25,47 @@ public:
|
||||
{}
|
||||
|
||||
void visit(T & ast)
|
||||
{
|
||||
if (ostr)
|
||||
visitImpl</* with_dump= */ true>(ast);
|
||||
else
|
||||
visitImpl</* with_dump= */ false>(ast);
|
||||
}
|
||||
|
||||
private:
|
||||
Data & data;
|
||||
size_t visit_depth;
|
||||
WriteBuffer * ostr;
|
||||
|
||||
template <bool with_dump>
|
||||
void visitImpl(T & ast)
|
||||
{
|
||||
checkStackSize();
|
||||
DumpASTNode dump(*ast, ostr, visit_depth, typeid(Matcher).name());
|
||||
if constexpr (with_dump)
|
||||
{
|
||||
DumpASTNode dump(*ast, ostr, visit_depth, typeid(Matcher).name());
|
||||
visitImplMain</* with_dump= */ true>(ast);
|
||||
}
|
||||
else
|
||||
{
|
||||
visitImplMain</* with_dump= */ false>(ast);
|
||||
}
|
||||
}
|
||||
|
||||
template <bool with_dump>
|
||||
void visitImplMain(T & ast)
|
||||
{
|
||||
if constexpr (!_top_to_bottom)
|
||||
visitChildren(ast);
|
||||
visitChildren<with_dump>(ast);
|
||||
|
||||
doVisit(ast);
|
||||
|
||||
if constexpr (_top_to_bottom)
|
||||
visitChildren<with_dump>(ast);
|
||||
}
|
||||
|
||||
void doVisit(T & ast)
|
||||
{
|
||||
try
|
||||
{
|
||||
Matcher::visit(ast, data);
|
||||
@ -41,16 +75,9 @@ public:
|
||||
e.addMessage("While processing {}", ast->formatForErrorMessage());
|
||||
throw;
|
||||
}
|
||||
|
||||
if constexpr (_top_to_bottom)
|
||||
visitChildren(ast);
|
||||
}
|
||||
|
||||
private:
|
||||
Data & data;
|
||||
size_t visit_depth;
|
||||
WriteBuffer * ostr;
|
||||
|
||||
template <bool with_dump>
|
||||
void visitChildren(T & ast)
|
||||
{
|
||||
for (auto & child : ast->children)
|
||||
@ -62,7 +89,7 @@ private:
|
||||
need_visit_child = Matcher::needChildVisit(ast, child);
|
||||
|
||||
if (need_visit_child)
|
||||
visit(child);
|
||||
visitImpl<with_dump>(child);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
@ -726,7 +726,8 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::getTableProperti
|
||||
else if (create.as_table_function)
|
||||
{
|
||||
/// Table function without columns list.
|
||||
auto table_function = TableFunctionFactory::instance().get(create.as_table_function, getContext());
|
||||
auto table_function_ast = create.as_table_function->ptr();
|
||||
auto table_function = TableFunctionFactory::instance().get(table_function_ast, getContext());
|
||||
properties.columns = table_function->getActualTableStructure(getContext());
|
||||
}
|
||||
else if (create.is_dictionary)
|
||||
@ -967,7 +968,7 @@ void InterpreterCreateQuery::setEngine(ASTCreateQuery & create) const
|
||||
if (as_create.storage)
|
||||
create.set(create.storage, as_create.storage->ptr());
|
||||
else if (as_create.as_table_function)
|
||||
create.as_table_function = as_create.as_table_function->clone();
|
||||
create.set(create.as_table_function, as_create.as_table_function->ptr());
|
||||
else
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot set engine, it's a bug.");
|
||||
|
||||
@ -1343,12 +1344,12 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create,
|
||||
/// NOTE: CREATE query may be rewritten by Storage creator or table function
|
||||
if (create.as_table_function)
|
||||
{
|
||||
const auto & factory = TableFunctionFactory::instance();
|
||||
auto table_func = factory.get(create.as_table_function, getContext());
|
||||
auto table_function_ast = create.as_table_function->ptr();
|
||||
auto table_function = TableFunctionFactory::instance().get(table_function_ast, getContext());
|
||||
/// In case of CREATE AS table_function() query we should use global context
|
||||
/// in storage creation because there will be no query context on server startup
|
||||
/// and because storage lifetime is bigger than query context lifetime.
|
||||
res = table_func->execute(create.as_table_function, getContext(), create.getTable(), properties.columns, /*use_global_context=*/true);
|
||||
res = table_function->execute(table_function_ast, getContext(), create.getTable(), properties.columns, /*use_global_context=*/true);
|
||||
res->renameInMemory({create.getDatabase(), create.getTable(), create.uuid});
|
||||
}
|
||||
else
|
||||
|
@ -546,10 +546,13 @@ std::vector<TableNeededColumns> normalizeColumnNamesExtractNeeded(
|
||||
{
|
||||
auto alias = aliases.find(ident->name())->second;
|
||||
auto alias_ident = alias->clone();
|
||||
alias_ident->as<ASTIdentifier>()->restoreTable();
|
||||
bool alias_equals_column_name = alias_ident->getColumnNameWithoutAlias() == ident->getColumnNameWithoutAlias();
|
||||
if (!alias_equals_column_name)
|
||||
throw Exception("Alias clashes with qualified column '" + ident->name() + "'", ErrorCodes::AMBIGUOUS_COLUMN_NAME);
|
||||
if (auto * alias_ident_typed = alias_ident->as<ASTIdentifier>())
|
||||
{
|
||||
alias_ident_typed->restoreTable();
|
||||
bool alias_equals_column_name = alias_ident->getColumnNameWithoutAlias() == ident->getColumnNameWithoutAlias();
|
||||
if (!alias_equals_column_name)
|
||||
throw Exception("Alias clashes with qualified column '" + ident->name() + "'", ErrorCodes::AMBIGUOUS_COLUMN_NAME);
|
||||
}
|
||||
}
|
||||
String short_name = ident->shortName();
|
||||
String original_long_name;
|
||||
|
@ -34,7 +34,6 @@
|
||||
#include <Parsers/queryToString.h>
|
||||
#include <Parsers/formatAST.h>
|
||||
#include <Parsers/toOneLineQuery.h>
|
||||
#include <Parsers/wipePasswordFromQuery.h>
|
||||
|
||||
#include <Formats/FormatFactory.h>
|
||||
#include <Storages/StorageInput.h>
|
||||
@ -56,9 +55,9 @@
|
||||
#include <Interpreters/SelectQueryOptions.h>
|
||||
#include <Interpreters/TransactionLog.h>
|
||||
#include <Interpreters/executeQuery.h>
|
||||
#include <Interpreters/maskSensitiveInfoInQueryForLogging.h>
|
||||
#include <Common/ProfileEvents.h>
|
||||
|
||||
#include <Common/SensitiveDataMasker.h>
|
||||
#include <IO/CompressionMethod.h>
|
||||
|
||||
#include <Processors/Transforms/LimitsCheckingTransform.h>
|
||||
@ -77,7 +76,6 @@
|
||||
|
||||
namespace ProfileEvents
|
||||
{
|
||||
extern const Event QueryMaskingRulesMatch;
|
||||
extern const Event FailedQuery;
|
||||
extern const Event FailedInsertQuery;
|
||||
extern const Event FailedSelectQuery;
|
||||
@ -109,37 +107,6 @@ static void checkASTSizeLimits(const IAST & ast, const Settings & settings)
|
||||
}
|
||||
|
||||
|
||||
/// Makes a version of a query without sensitive information (e.g. passwords) for logging.
|
||||
/// The parameter `parsed query` can be nullptr if the query cannot be parsed.
|
||||
static String prepareQueryForLogging(const String & query, const ASTPtr & parsed_query, ContextPtr context)
|
||||
{
|
||||
String res = query;
|
||||
|
||||
// Wiping a password or hash from CREATE/ALTER USER query because we don't want it to go to logs.
|
||||
if (parsed_query && canContainPassword(*parsed_query))
|
||||
{
|
||||
ASTPtr ast_for_logging = parsed_query->clone();
|
||||
wipePasswordFromQuery(ast_for_logging);
|
||||
res = serializeAST(*ast_for_logging);
|
||||
}
|
||||
|
||||
// Wiping sensitive data before cropping query by log_queries_cut_to_length,
|
||||
// otherwise something like credit card without last digit can go to log.
|
||||
if (auto * masker = SensitiveDataMasker::getInstance())
|
||||
{
|
||||
auto matches = masker->wipeSensitiveData(res);
|
||||
if (matches > 0)
|
||||
{
|
||||
ProfileEvents::increment(ProfileEvents::QueryMaskingRulesMatch, matches);
|
||||
}
|
||||
}
|
||||
|
||||
res = res.substr(0, context->getSettingsRef().log_queries_cut_to_length);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
/// Log query into text log (not into system table).
|
||||
static void logQuery(const String & query, ContextPtr context, bool internal, QueryProcessingStage::Enum stage)
|
||||
{
|
||||
@ -425,14 +392,14 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
|
||||
/// MUST go before any modification (except for prepared statements,
|
||||
/// since it substitute parameters and without them query does not contain
|
||||
/// parameters), to keep query as-is in query_log and server log.
|
||||
query_for_logging = prepareQueryForLogging(query, ast, context);
|
||||
query_for_logging = maskSensitiveInfoInQueryForLogging(query, ast, context);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
/// Anyway log the query.
|
||||
if (query.empty())
|
||||
query.assign(begin, std::min(end - begin, static_cast<ptrdiff_t>(max_query_size)));
|
||||
query_for_logging = prepareQueryForLogging(query, ast, context);
|
||||
query_for_logging = maskSensitiveInfoInQueryForLogging(query, ast, context);
|
||||
|
||||
logQuery(query_for_logging, context, internal, stage);
|
||||
|
||||
|
623
src/Interpreters/maskSensitiveInfoInQueryForLogging.cpp
Normal file
@ -0,0 +1,623 @@
|
||||
#include <Interpreters/maskSensitiveInfoInQueryForLogging.h>
|
||||
|
||||
#include <Formats/FormatFactory.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Interpreters/InDepthNodeVisitor.h>
|
||||
#include <Interpreters/evaluateConstantExpression.h>
|
||||
#include <Parsers/ASTBackupQuery.h>
|
||||
#include <Parsers/ASTCreateQuery.h>
|
||||
#include <Parsers/ASTFunction.h>
|
||||
#include <Parsers/ASTLiteral.h>
|
||||
#include <Parsers/Access/ASTCreateUserQuery.h>
|
||||
#include <Parsers/formatAST.h>
|
||||
#include <TableFunctions/TableFunctionFactory.h>
|
||||
#include <Common/ProfileEvents.h>
|
||||
#include <Common/SensitiveDataMasker.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
|
||||
|
||||
namespace ProfileEvents
|
||||
{
|
||||
extern const Event QueryMaskingRulesMatch;
|
||||
}
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace
|
||||
{
|
||||
enum class PasswordWipingMode
|
||||
{
|
||||
Query,
|
||||
BackupName,
|
||||
};
|
||||
|
||||
|
||||
template <bool check_only>
|
||||
class PasswordWipingVisitor
|
||||
{
|
||||
public:
|
||||
struct Data
|
||||
{
|
||||
bool can_contain_password = false;
|
||||
bool password_was_hidden = false;
|
||||
bool is_create_table_query = false;
|
||||
bool is_create_database_query = false;
|
||||
bool is_create_dictionary_query = false;
|
||||
ContextPtr context;
|
||||
PasswordWipingMode mode = PasswordWipingMode::Query;
|
||||
};
|
||||
|
||||
using Visitor = std::conditional_t<
|
||||
check_only,
|
||||
ConstInDepthNodeVisitor<PasswordWipingVisitor, /* top_to_bottom= */ true, /* need_child_accept_data= */ true>,
|
||||
InDepthNodeVisitor<PasswordWipingVisitor, /* top_to_bottom= */ true, /* need_child_accept_data= */ true>>;
|
||||
|
||||
static bool needChildVisit(const ASTPtr & /* ast */, const ASTPtr & /* child */, Data & data)
|
||||
{
|
||||
if constexpr (check_only)
|
||||
{
|
||||
return !data.can_contain_password;
|
||||
}
|
||||
else
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
static void visit(ASTPtr ast, Data & data)
|
||||
{
|
||||
if (auto * create_user_query = ast->as<ASTCreateUserQuery>())
|
||||
{
|
||||
visitCreateUserQuery(*create_user_query, data);
|
||||
}
|
||||
else if (auto * create_query = ast->as<ASTCreateQuery>())
|
||||
{
|
||||
visitCreateQuery(*create_query, data);
|
||||
}
|
||||
else if (auto * backup_query = ast->as<ASTBackupQuery>())
|
||||
{
|
||||
visitBackupQuery(*backup_query, data);
|
||||
}
|
||||
else if (auto * storage = ast->as<ASTStorage>())
|
||||
{
|
||||
if (data.is_create_table_query)
|
||||
visitTableEngine(*storage, data);
|
||||
else if (data.is_create_database_query)
|
||||
visitDatabaseEngine(*storage, data);
|
||||
}
|
||||
else if (auto * dictionary = ast->as<ASTDictionary>())
|
||||
{
|
||||
if (data.is_create_dictionary_query)
|
||||
visitDictionaryDef(*dictionary, data);
|
||||
}
|
||||
else if (auto * function = ast->as<ASTFunction>())
|
||||
{
|
||||
if (data.mode == PasswordWipingMode::BackupName)
|
||||
wipePasswordFromBackupEngineArguments(*function, data);
|
||||
else
|
||||
visitFunction(*function, data);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
static void visitCreateUserQuery(ASTCreateUserQuery & query, Data & data)
|
||||
{
|
||||
if (!query.auth_data)
|
||||
return;
|
||||
|
||||
auto auth_type = query.auth_data->getType();
|
||||
if (auth_type == AuthenticationType::NO_PASSWORD || auth_type == AuthenticationType::LDAP
|
||||
|| auth_type == AuthenticationType::KERBEROS || auth_type == AuthenticationType::SSL_CERTIFICATE)
|
||||
return; /// No password, nothing to hide.
|
||||
|
||||
if constexpr (check_only)
|
||||
{
|
||||
data.can_contain_password = true;
|
||||
return;
|
||||
}
|
||||
|
||||
query.show_password = false;
|
||||
data.password_was_hidden = true;
|
||||
}
|
||||
|
||||
static void visitCreateQuery(ASTCreateQuery & query, Data & data)
|
||||
{
|
||||
if (query.is_dictionary)
|
||||
data.is_create_dictionary_query = true;
|
||||
else if (query.table)
|
||||
data.is_create_table_query = true;
|
||||
else if (query.database)
|
||||
data.is_create_database_query = true;
|
||||
}
|
||||
|
||||
static void visitTableEngine(ASTStorage & storage, Data & data)
|
||||
{
|
||||
if (!storage.engine)
|
||||
return;
|
||||
|
||||
const String & engine_name = storage.engine->name;
|
||||
|
||||
if (engine_name == "ExternalDistributed")
|
||||
{
|
||||
/// ExternalDistributed('engine', 'host:port', 'database', 'table', 'user', 'password')
|
||||
wipePasswordFromArgument(*storage.engine, data, 5);
|
||||
}
|
||||
else if (engine_name == "MySQL")
|
||||
{
|
||||
/// MySQL('host:port', 'database', 'table', 'user', 'password', ...)
|
||||
wipePasswordFromArgument(*storage.engine, data, 4);
|
||||
}
|
||||
else if (engine_name == "PostgreSQL")
|
||||
{
|
||||
/// PostgreSQL('host:port', 'database', 'table', 'user', 'password', ...)
|
||||
wipePasswordFromArgument(*storage.engine, data, 4);
|
||||
}
|
||||
else if (engine_name == "MaterializedPostgreSQL")
|
||||
{
|
||||
/// MaterializedPostgreSQL('host:port', 'database', 'table', 'user', 'password', ...)
|
||||
wipePasswordFromArgument(*storage.engine, data, 4);
|
||||
}
|
||||
else if (engine_name == "MongoDB")
|
||||
{
|
||||
/// MongoDB('host:port', 'database', 'collection', 'user', 'password', ...)
|
||||
wipePasswordFromArgument(*storage.engine, data, 4);
|
||||
}
|
||||
else if (engine_name == "S3" || engine_name == "COSN")
|
||||
{
|
||||
/// S3('url', ['aws_access_key_id', 'aws_secret_access_key',] ...)
|
||||
wipePasswordFromS3TableEngineArguments(*storage.engine, data);
|
||||
}
|
||||
}
|
||||
|
||||
static void wipePasswordFromS3TableEngineArguments(ASTFunction & engine, Data & data)
|
||||
{
|
||||
/// We replace 'aws_secret_access_key' with '[HIDDEN'] for the following signatures:
|
||||
/// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format')
|
||||
/// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression')
|
||||
|
||||
/// But we should check the number of arguments first because we don't need to do that replacements in case of
|
||||
/// S3('url' [, 'format' [, 'compression']])
|
||||
size_t num_arguments;
|
||||
if (!tryGetNumArguments(engine, &num_arguments) || (num_arguments < 4))
|
||||
return;
|
||||
|
||||
wipePasswordFromArgument(engine, data, 2);
|
||||
}
|
||||
|
||||
static void visitDatabaseEngine(ASTStorage & storage, Data & data)
|
||||
{
|
||||
if (!storage.engine)
|
||||
return;
|
||||
|
||||
const String & engine_name = storage.engine->name;
|
||||
|
||||
if (engine_name == "MySQL" || engine_name == "MaterializeMySQL" || engine_name == "MaterializedMySQL")
|
||||
{
|
||||
/// MySQL('host:port', 'database', 'user', 'password')
|
||||
wipePasswordFromArgument(*storage.engine, data, 3);
|
||||
}
|
||||
else if (engine_name == "PostgreSQL" || engine_name == "MaterializedPostgreSQL")
|
||||
{
|
||||
/// PostgreSQL('host:port', 'database', 'user', 'password', ...)
|
||||
wipePasswordFromArgument(*storage.engine, data, 3);
|
||||
}
|
||||
}
|
||||
|
||||
static void visitFunction(ASTFunction & function, Data & data)
|
||||
{
|
||||
if (function.name == "mysql")
|
||||
{
|
||||
/// mysql('host:port', 'database', 'table', 'user', 'password', ...)
|
||||
wipePasswordFromArgument(function, data, 4);
|
||||
}
|
||||
else if (function.name == "postgresql")
|
||||
{
|
||||
/// postgresql('host:port', 'database', 'table', 'user', 'password', ...)
|
||||
wipePasswordFromArgument(function, data, 4);
|
||||
}
|
||||
else if (function.name == "mongodb")
|
||||
{
|
||||
/// mongodb('host:port', 'database', 'collection', 'user', 'password', ...)
|
||||
wipePasswordFromArgument(function, data, 4);
|
||||
}
|
||||
else if (function.name == "s3" || function.name == "cosn")
|
||||
{
|
||||
/// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...)
|
||||
wipePasswordFromS3FunctionArguments(function, data, /* is_cluster_function= */ false);
|
||||
}
|
||||
else if (function.name == "s3Cluster")
|
||||
{
|
||||
/// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', ...)
|
||||
wipePasswordFromS3FunctionArguments(function, data, /* is_cluster_function= */ true);
|
||||
}
|
||||
else if (function.name == "remote" || function.name == "remoteSecure")
|
||||
{
|
||||
/// remote('addresses_expr', 'db', 'table', 'user', 'password', ...)
|
||||
wipePasswordFromRemoteFunctionArguments(function, data);
|
||||
}
|
||||
else if (
|
||||
function.name == "encrypt" || function.name == "decrypt" || function.name == "aes_encrypt_mysql"
|
||||
|| function.name == "aes_decrypt_mysql" || function.name == "tryDecrypt")
|
||||
{
|
||||
/// encrypt('mode', 'plaintext', 'key' [, iv, aad])
|
||||
wipePasswordFromEncryptionFunctionArguments(function, data);
|
||||
}
|
||||
}
|
||||
|
||||
static void wipePasswordFromS3FunctionArguments(ASTFunction & function, Data & data, bool is_cluster_function)
|
||||
{
|
||||
/// s3Cluster('cluster_name', 'url', ...) has 'url' as its second argument.
|
||||
size_t url_arg_idx = is_cluster_function ? 1 : 0;
|
||||
|
||||
/// We're going to replace 'aws_secret_access_key' with '[HIDDEN'] for the following signatures:
|
||||
/// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...)
|
||||
/// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression')
|
||||
|
||||
/// But we should check the number of arguments first because we don't need to do any replacements in case of
|
||||
/// s3('url' [, 'format']) or s3Cluster('cluster_name', 'url' [, 'format'])
|
||||
size_t num_arguments;
|
||||
if (!tryGetNumArguments(function, &num_arguments) || (num_arguments < url_arg_idx + 3))
|
||||
return;
|
||||
|
||||
if (num_arguments >= url_arg_idx + 5)
|
||||
{
|
||||
/// s3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'structure', ...)
|
||||
wipePasswordFromArgument(function, data, url_arg_idx + 2);
|
||||
}
|
||||
else
|
||||
{
|
||||
/// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...)
|
||||
/// We need to distinguish that from s3('url', 'format', 'structure' [, 'compression_method']).
|
||||
/// So we will check whether the argument after 'url' is a format.
|
||||
String format;
|
||||
if (!tryGetEvaluatedConstStringFromArgument(function, data, url_arg_idx + 1, &format))
|
||||
return;
|
||||
|
||||
if (FormatFactory::instance().getAllFormats().contains(format))
|
||||
return; /// The argument after 'url' is a format: s3('url', 'format', ...)
|
||||
|
||||
/// The argument after 'url' is not a format so we do our replacement:
|
||||
/// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...) -> s3('url', 'aws_access_key_id', '[HIDDEN]', ...)
|
||||
wipePasswordFromArgument(function, data, url_arg_idx + 2);
|
||||
}
|
||||
}
|
||||
|
||||
static void wipePasswordFromRemoteFunctionArguments(ASTFunction & function, Data & data)
|
||||
{
|
||||
/// We're going to replace 'password' with '[HIDDEN'] for the following signatures:
|
||||
/// remote('addresses_expr', db.table, 'user' [, 'password'] [, sharding_key])
|
||||
/// remote('addresses_expr', 'db', 'table', 'user' [, 'password'] [, sharding_key])
|
||||
/// remote('addresses_expr', table_function(), 'user' [, 'password'] [, sharding_key])
|
||||
|
||||
/// But we should check the number of arguments first because we don't need to do any replacements in case of
|
||||
/// remote('addresses_expr', db.table)
|
||||
size_t num_arguments;
|
||||
if (!tryGetNumArguments(function, &num_arguments) || (num_arguments < 3))
|
||||
return;
|
||||
|
||||
auto & arguments = assert_cast<ASTExpressionList &>(*function.arguments).children;
|
||||
size_t arg_num = 1;
|
||||
|
||||
/// Skip 1 or 2 arguments with table_function() or db.table or 'db', 'table'.
|
||||
const auto * table_function = arguments[arg_num]->as<ASTFunction>();
|
||||
if (table_function && TableFunctionFactory::instance().isTableFunctionName(table_function->name))
|
||||
{
|
||||
++arg_num;
|
||||
}
|
||||
else
|
||||
{
|
||||
String database;
|
||||
if (!tryGetEvaluatedConstDatabaseNameFromArgument(function, data, arg_num, &database))
|
||||
return;
|
||||
++arg_num;
|
||||
|
||||
auto qualified_name = QualifiedTableName::parseFromString(database);
|
||||
if (qualified_name.database.empty())
|
||||
++arg_num; /// skip 'table' argument
|
||||
}
|
||||
|
||||
/// Check if username and password are specified
|
||||
/// (sharding_key can be of any type so while we're getting string literals they're username & password).
|
||||
String username, password;
|
||||
bool username_specified = tryGetStringFromArgument(function, arg_num, &username);
|
||||
bool password_specified = username_specified && tryGetStringFromArgument(function, arg_num + 1, &password);
|
||||
|
||||
if (password_specified)
|
||||
{
|
||||
/// Password is specified so we do our replacement:
|
||||
/// remote('addresses_expr', db.table, 'user', 'password', ...) -> remote('addresses_expr', db.table, 'user', '[HIDDEN]', ...)
|
||||
wipePasswordFromArgument(function, data, arg_num + 1);
|
||||
}
|
||||
}
|
||||
|
||||
static void wipePasswordFromEncryptionFunctionArguments(ASTFunction & function, Data & data)
|
||||
{
|
||||
/// We replace all arguments after 'mode' with '[HIDDEN]':
|
||||
/// encrypt('mode', 'plaintext', 'key' [, iv, aad]) -> encrypt('mode', '[HIDDEN]')
|
||||
wipePasswordFromArgument(function, data, 1);
|
||||
removeArgumentsAfter(function, data, 2);
|
||||
}
|
||||
|
||||
static void visitBackupQuery(ASTBackupQuery & query, Data & data)
|
||||
{
|
||||
if (query.backup_name)
|
||||
{
|
||||
if (auto * backup_engine = query.backup_name->as<ASTFunction>())
|
||||
wipePasswordFromBackupEngineArguments(*backup_engine, data);
|
||||
}
|
||||
|
||||
if (query.base_backup_name)
|
||||
{
|
||||
if (auto * base_backup_engine = query.base_backup_name->as<ASTFunction>())
|
||||
wipePasswordFromBackupEngineArguments(*base_backup_engine, data);
|
||||
}
|
||||
}
|
||||
|
||||
static void wipePasswordFromBackupEngineArguments(ASTFunction & engine, Data & data)
|
||||
{
|
||||
if (engine.name == "S3")
|
||||
{
|
||||
/// BACKUP ... TO S3(url, [aws_access_key_id, aws_secret_access_key])
|
||||
wipePasswordFromArgument(engine, data, 2);
|
||||
}
|
||||
}
|
||||
|
||||
static void wipePasswordFromArgument(ASTFunction & function, Data & data, size_t arg_idx)
|
||||
{
|
||||
if (!function.arguments)
|
||||
return;
|
||||
|
||||
auto * expr_list = function.arguments->as<ASTExpressionList>();
|
||||
if (!expr_list)
|
||||
return; /// return because we don't want to validate query here
|
||||
|
||||
auto & arguments = expr_list->children;
|
||||
if (arg_idx >= arguments.size())
|
||||
return;
|
||||
|
||||
if constexpr (check_only)
|
||||
{
|
||||
data.can_contain_password = true;
|
||||
return;
|
||||
}
|
||||
|
||||
arguments[arg_idx] = std::make_shared<ASTLiteral>("[HIDDEN]");
|
||||
data.password_was_hidden = true;
|
||||
}
|
||||
|
||||
static void removeArgumentsAfter(ASTFunction & function, Data & data, size_t new_num_arguments)
|
||||
{
|
||||
if (!function.arguments)
|
||||
return;
|
||||
|
||||
auto * expr_list = function.arguments->as<ASTExpressionList>();
|
||||
if (!expr_list)
|
||||
return; /// return because we don't want to validate query here
|
||||
|
||||
auto & arguments = expr_list->children;
|
||||
if (new_num_arguments >= arguments.size())
|
||||
return;
|
||||
|
||||
if constexpr (check_only)
|
||||
{
|
||||
data.can_contain_password = true;
|
||||
return;
|
||||
}
|
||||
|
||||
arguments.resize(new_num_arguments);
|
||||
data.password_was_hidden = true;
|
||||
}
|
||||
|
||||
static bool tryGetNumArguments(const ASTFunction & function, size_t * num_arguments)
|
||||
{
|
||||
if (!function.arguments)
|
||||
return false;
|
||||
|
||||
auto * expr_list = function.arguments->as<ASTExpressionList>();
|
||||
if (!expr_list)
|
||||
return false; /// return false because we don't want to validate query here
|
||||
|
||||
const auto & arguments = expr_list->children;
|
||||
*num_arguments = arguments.size();
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool tryGetStringFromArgument(const ASTFunction & function, size_t arg_idx, String * value)
|
||||
{
|
||||
if (!function.arguments)
|
||||
return false;
|
||||
|
||||
const auto * expr_list = function.arguments->as<ASTExpressionList>();
|
||||
if (!expr_list)
|
||||
return false; /// return false because we don't want to validate query here
|
||||
|
||||
const auto & arguments = expr_list->children;
|
||||
if (arg_idx >= arguments.size())
|
||||
return false;
|
||||
|
||||
const auto * literal = arguments[arg_idx]->as<ASTLiteral>();
|
||||
if (!literal || literal->value.getType() != Field::Types::String)
|
||||
return false;
|
||||
|
||||
*value = literal->value.safeGet<String>();
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool tryGetEvaluatedConstStringFromArgument(const ASTFunction & function, Data & data, size_t arg_idx, String * value)
|
||||
{
|
||||
if (!function.arguments)
|
||||
return false;
|
||||
|
||||
const auto * expr_list = function.arguments->as<ASTExpressionList>();
|
||||
if (!expr_list)
|
||||
return false; /// return false because we don't want to validate query here
|
||||
|
||||
const auto & arguments = expr_list->children;
|
||||
if (arg_idx >= arguments.size())
|
||||
return false;
|
||||
|
||||
if constexpr (check_only)
|
||||
{
|
||||
data.can_contain_password = true;
|
||||
return false;
|
||||
}
|
||||
|
||||
ASTPtr argument = arguments[arg_idx];
|
||||
try
|
||||
{
|
||||
argument = evaluateConstantExpressionOrIdentifierAsLiteral(argument, data.context);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
const auto & literal = assert_cast<const ASTLiteral &>(*argument);
|
||||
if (literal.value.getType() != Field::Types::String)
|
||||
return false;
|
||||
|
||||
*value = literal.value.safeGet<String>();
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool tryGetEvaluatedConstDatabaseNameFromArgument(const ASTFunction & function, Data & data, size_t arg_idx, String * value)
|
||||
{
|
||||
if (!function.arguments)
|
||||
return false;
|
||||
|
||||
const auto * expr_list = function.arguments->as<ASTExpressionList>();
|
||||
if (!expr_list)
|
||||
return false; /// return false because we don't want to validate query here
|
||||
|
||||
const auto & arguments = expr_list->children;
|
||||
if (arg_idx >= arguments.size())
|
||||
return false;
|
||||
|
||||
if constexpr (check_only)
|
||||
{
|
||||
data.can_contain_password = true;
|
||||
return false;
|
||||
}
|
||||
|
||||
ASTPtr argument = arguments[arg_idx];
|
||||
try
|
||||
{
|
||||
argument = evaluateConstantExpressionForDatabaseName(argument, data.context);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
const auto & literal = assert_cast<const ASTLiteral &>(*argument);
|
||||
if (literal.value.getType() != Field::Types::String)
|
||||
return false;
|
||||
|
||||
*value = literal.value.safeGet<String>();
|
||||
return true;
|
||||
}
|
||||
|
||||
static void visitDictionaryDef(ASTDictionary & dictionary, Data & data)
|
||||
{
|
||||
if (!dictionary.source || !dictionary.source->elements)
|
||||
return;
|
||||
|
||||
const auto * expr_list = dictionary.source->elements->as<ASTExpressionList>();
|
||||
if (!expr_list)
|
||||
return; /// return because we don't want to validate query here
|
||||
|
||||
const auto & elements = expr_list->children;
|
||||
|
||||
/// We replace password in the dictionary's definition:
|
||||
/// SOURCE(CLICKHOUSE(host 'example01-01-1' port 9000 user 'default' password 'qwe123' db 'default' table 'ids')) ->
|
||||
/// SOURCE(CLICKHOUSE(host 'example01-01-1' port 9000 user 'default' password '[HIDDEN]' db 'default' table 'ids'))
|
||||
for (const auto & element : elements)
|
||||
{
|
||||
auto * pair = element->as<ASTPair>();
|
||||
if (!pair)
|
||||
continue; /// just skip because we don't want to validate query here
|
||||
|
||||
if (pair->first == "password")
|
||||
{
|
||||
if constexpr (check_only)
|
||||
{
|
||||
data.can_contain_password = true;
|
||||
return;
|
||||
}
|
||||
pair->set(pair->second, std::make_shared<ASTLiteral>("[HIDDEN]"));
|
||||
data.password_was_hidden = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
/// Checks the type of a specified AST and returns true if it can contain a password.
|
||||
bool canContainPassword(const IAST & ast, PasswordWipingMode mode)
|
||||
{
|
||||
using WipingVisitor = PasswordWipingVisitor</*check_only= */ true>;
|
||||
WipingVisitor::Data data;
|
||||
data.mode = mode;
|
||||
WipingVisitor::Visitor visitor{data};
|
||||
ASTPtr ast_ptr = std::const_pointer_cast<IAST>(ast.shared_from_this());
|
||||
visitor.visit(ast_ptr);
|
||||
return data.can_contain_password;
|
||||
}
|
||||
|
||||
/// Removes a password or its hash from a query if it's specified there or replaces it with some placeholder.
|
||||
/// This function is used to prepare a query for storing in logs (we don't want logs to contain sensitive information).
|
||||
bool wipePasswordFromQuery(ASTPtr ast, PasswordWipingMode mode, const ContextPtr & context)
|
||||
{
|
||||
using WipingVisitor = PasswordWipingVisitor</*check_only= */ false>;
|
||||
WipingVisitor::Data data;
|
||||
data.context = context;
|
||||
data.mode = mode;
|
||||
WipingVisitor::Visitor visitor{data};
|
||||
visitor.visit(ast);
|
||||
return data.password_was_hidden;
|
||||
}
|
||||
|
||||
/// Common utility for masking sensitive information.
|
||||
String maskSensitiveInfoImpl(const String & query, const ASTPtr & parsed_query, PasswordWipingMode mode, const ContextPtr & context)
|
||||
{
|
||||
String res = query;
|
||||
|
||||
// Wiping a password or hash from the query because we don't want it to go to logs.
|
||||
if (parsed_query && canContainPassword(*parsed_query, mode))
|
||||
{
|
||||
ASTPtr ast_without_password = parsed_query->clone();
|
||||
if (wipePasswordFromQuery(ast_without_password, mode, context))
|
||||
res = serializeAST(*ast_without_password);
|
||||
}
|
||||
|
||||
// Wiping sensitive data before cropping query by log_queries_cut_to_length,
|
||||
// otherwise something like credit card without last digit can go to log.
|
||||
if (auto * masker = SensitiveDataMasker::getInstance())
|
||||
{
|
||||
auto matches = masker->wipeSensitiveData(res);
|
||||
if (matches > 0)
|
||||
{
|
||||
ProfileEvents::increment(ProfileEvents::QueryMaskingRulesMatch, matches);
|
||||
}
|
||||
}
|
||||
|
||||
res = res.substr(0, context->getSettingsRef().log_queries_cut_to_length);
|
||||
|
||||
return res;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
String maskSensitiveInfoInQueryForLogging(const String & query, const ASTPtr & parsed_query, const ContextPtr & context)
|
||||
{
|
||||
return maskSensitiveInfoImpl(query, parsed_query, PasswordWipingMode::Query, context);
|
||||
}
|
||||
|
||||
|
||||
String maskSensitiveInfoInBackupNameForLogging(const String & backup_name, const ASTPtr & ast, const ContextPtr & context)
|
||||
{
|
||||
return maskSensitiveInfoImpl(backup_name, ast, PasswordWipingMode::BackupName, context);
|
||||
}
|
||||
|
||||
}
|
19
src/Interpreters/maskSensitiveInfoInQueryForLogging.h
Normal file
@ -0,0 +1,19 @@
|
||||
#pragma once
|
||||
|
||||
#include <Parsers/IAST_fwd.h>
|
||||
#include <Interpreters/Context_fwd.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// Makes a version of a query without sensitive information (e.g. passwords) for logging.
|
||||
/// The parameter `parsed query` is allowed to be nullptr if the query cannot be parsed.
|
||||
/// Does not validate AST, works a best-effort way.
|
||||
String maskSensitiveInfoInQueryForLogging(const String & query, const ASTPtr & parsed_query, const ContextPtr & context);
|
||||
|
||||
/// Makes a version of backup name without sensitive information (e.g. passwords) for logging.
|
||||
/// Does not validate AST, works a best-effort way.
|
||||
String maskSensitiveInfoInBackupNameForLogging(const String & backup_name, const ASTPtr & ast, const ContextPtr & context);
|
||||
|
||||
}
|
@ -245,7 +245,21 @@ String ASTBackupQuery::getID(char) const
|
||||
|
||||
ASTPtr ASTBackupQuery::clone() const
|
||||
{
|
||||
return std::make_shared<ASTBackupQuery>(*this);
|
||||
auto res = std::make_shared<ASTBackupQuery>(*this);
|
||||
|
||||
if (backup_name)
|
||||
res->backup_name = backup_name->clone();
|
||||
|
||||
if (base_backup_name)
|
||||
res->base_backup_name = base_backup_name->clone();
|
||||
|
||||
if (cluster_host_ids)
|
||||
res->cluster_host_ids = cluster_host_ids->clone();
|
||||
|
||||
if (settings)
|
||||
res->settings = settings->clone();
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
|
@ -210,6 +210,8 @@ ASTPtr ASTCreateQuery::clone() const
|
||||
res->set(res->dictionary, dictionary->clone());
|
||||
}
|
||||
|
||||
if (as_table_function)
|
||||
res->set(res->as_table_function, as_table_function->clone());
|
||||
if (comment)
|
||||
res->set(res->comment, comment->clone());
|
||||
|
||||
|
@ -83,7 +83,7 @@ public:
|
||||
ASTPtr lateness_function;
|
||||
String as_database;
|
||||
String as_table;
|
||||
ASTPtr as_table_function;
|
||||
IAST * as_table_function = nullptr;
|
||||
ASTSelectWithUnionQuery * select = nullptr;
|
||||
IAST * comment = nullptr;
|
||||
|
||||
|
@ -141,7 +141,12 @@ String ASTCreateQuotaQuery::getID(char) const
|
||||
|
||||
ASTPtr ASTCreateQuotaQuery::clone() const
|
||||
{
|
||||
return std::make_shared<ASTCreateQuotaQuery>(*this);
|
||||
auto res = std::make_shared<ASTCreateQuotaQuery>(*this);
|
||||
|
||||
if (roles)
|
||||
res->roles = std::static_pointer_cast<ASTRolesOrUsersSet>(roles->clone());
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
|
@ -42,7 +42,12 @@ String ASTCreateRoleQuery::getID(char) const
|
||||
|
||||
ASTPtr ASTCreateRoleQuery::clone() const
|
||||
{
|
||||
return std::make_shared<ASTCreateRoleQuery>(*this);
|
||||
auto res = std::make_shared<ASTCreateRoleQuery>(*this);
|
||||
|
||||
if (settings)
|
||||
res->settings = std::static_pointer_cast<ASTSettingsProfileElements>(settings->clone());
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
|
@ -124,7 +124,25 @@ String ASTCreateRowPolicyQuery::getID(char) const
|
||||
|
||||
ASTPtr ASTCreateRowPolicyQuery::clone() const
|
||||
{
|
||||
return std::make_shared<ASTCreateRowPolicyQuery>(*this);
|
||||
auto res = std::make_shared<ASTCreateRowPolicyQuery>(*this);
|
||||
|
||||
if (names)
|
||||
res->names = std::static_pointer_cast<ASTRowPolicyNames>(names->clone());
|
||||
|
||||
if (roles)
|
||||
res->roles = std::static_pointer_cast<ASTRolesOrUsersSet>(roles->clone());
|
||||
|
||||
/// `res->filters` is already initialized by the copy constructor of ASTCreateRowPolicyQuery (see the first line of this function).
|
||||
/// But the copy constructor just copied the pointers inside `filters` instead of cloning.
|
||||
/// We need to make a deep copy and not a shallow copy, so we have to manually clone each pointer in `res->filters`.
|
||||
chassert(res->filters.size() == filters.size());
|
||||
for (auto & [_, res_filter] : res->filters)
|
||||
{
|
||||
if (res_filter)
|
||||
res_filter = res_filter->clone();
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
|
@ -49,7 +49,15 @@ String ASTCreateSettingsProfileQuery::getID(char) const
|
||||
|
||||
ASTPtr ASTCreateSettingsProfileQuery::clone() const
|
||||
{
|
||||
return std::make_shared<ASTCreateSettingsProfileQuery>(*this);
|
||||
auto res = std::make_shared<ASTCreateSettingsProfileQuery>(*this);
|
||||
|
||||
if (to_roles)
|
||||
res->to_roles = std::static_pointer_cast<ASTRolesOrUsersSet>(to_roles->clone());
|
||||
|
||||
if (settings)
|
||||
res->settings = std::static_pointer_cast<ASTSettingsProfileElements>(settings->clone());
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
|
@ -275,7 +275,24 @@ String ASTCreateUserQuery::getID(char) const
|
||||
|
||||
ASTPtr ASTCreateUserQuery::clone() const
|
||||
{
|
||||
return std::make_shared<ASTCreateUserQuery>(*this);
|
||||
auto res = std::make_shared<ASTCreateUserQuery>(*this);
|
||||
|
||||
if (names)
|
||||
res->names = std::static_pointer_cast<ASTUserNamesWithHost>(names->clone());
|
||||
|
||||
if (default_roles)
|
||||
res->default_roles = std::static_pointer_cast<ASTRolesOrUsersSet>(default_roles->clone());
|
||||
|
||||
if (default_database)
|
||||
res->default_database = std::static_pointer_cast<ASTDatabaseOrNone>(default_database->clone());
|
||||
|
||||
if (grantees)
|
||||
res->grantees = std::static_pointer_cast<ASTRolesOrUsersSet>(grantees->clone());
|
||||
|
||||
if (settings)
|
||||
res->settings = std::static_pointer_cast<ASTSettingsProfileElements>(settings->clone());
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
|
@ -29,7 +29,12 @@ String ASTDropAccessEntityQuery::getID(char) const
|
||||
|
||||
ASTPtr ASTDropAccessEntityQuery::clone() const
|
||||
{
|
||||
return std::make_shared<ASTDropAccessEntityQuery>(*this);
|
||||
auto res = std::make_shared<ASTDropAccessEntityQuery>(*this);
|
||||
|
||||
if (row_policy_names)
|
||||
res->row_policy_names = std::static_pointer_cast<ASTRowPolicyNames>(row_policy_names->clone());
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
|