diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml
index fba8a975ca6..3d43a960534 100644
--- a/.github/workflows/master.yml
+++ b/.github/workflows/master.yml
@@ -2994,6 +2994,77 @@ jobs:
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH"
+##############################################################################################
+###################################### SQLANCER FUZZERS ######################################
+##############################################################################################
+ SQLancerTestRelease:
+ needs: [BuilderDebRelease]
+ runs-on: [self-hosted, fuzzer-unit-tester]
+ steps:
+ - name: Set envs
+ run: |
+ cat >> "$GITHUB_ENV" << 'EOF'
+ TEMP_PATH=${{runner.temp}}/sqlancer_release
+ REPORTS_PATH=${{runner.temp}}/reports_dir
+ CHECK_NAME=SQLancer (release)
+ REPO_COPY=${{runner.temp}}/sqlancer_release/ClickHouse
+ EOF
+ - name: Download json reports
+ uses: actions/download-artifact@v2
+ with:
+ path: ${{ env.REPORTS_PATH }}
+ - name: Clear repository
+ run: |
+ sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
+ - name: Check out repository code
+ uses: actions/checkout@v2
+ - name: SQLancer
+ run: |
+ sudo rm -fr "$TEMP_PATH"
+ mkdir -p "$TEMP_PATH"
+ cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
+ cd "$REPO_COPY/tests/ci"
+ python3 sqlancer_check.py "$CHECK_NAME"
+ - name: Cleanup
+ if: always()
+ run: |
+ docker ps --quiet | xargs --no-run-if-empty docker kill ||:
+ docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
+ sudo rm -fr "$TEMP_PATH"
+ SQLancerTestDebug:
+ needs: [BuilderDebDebug]
+ runs-on: [self-hosted, fuzzer-unit-tester]
+ steps:
+ - name: Set envs
+ run: |
+ cat >> "$GITHUB_ENV" << 'EOF'
+ TEMP_PATH=${{runner.temp}}/sqlancer_debug
+ REPORTS_PATH=${{runner.temp}}/reports_dir
+ CHECK_NAME=SQLancer (debug)
+ REPO_COPY=${{runner.temp}}/sqlancer_debug/ClickHouse
+ EOF
+ - name: Download json reports
+ uses: actions/download-artifact@v2
+ with:
+ path: ${{ env.REPORTS_PATH }}
+ - name: Clear repository
+ run: |
+ sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
+ - name: Check out repository code
+ uses: actions/checkout@v2
+ - name: SQLancer
+ run: |
+ sudo rm -fr "$TEMP_PATH"
+ mkdir -p "$TEMP_PATH"
+ cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
+ cd "$REPO_COPY/tests/ci"
+ python3 sqlancer_check.py "$CHECK_NAME"
+ - name: Cleanup
+ if: always()
+ run: |
+ docker ps --quiet | xargs --no-run-if-empty docker kill ||:
+ docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
+ sudo rm -fr "$TEMP_PATH"
FinishCheck:
needs:
- DockerHubPush
@@ -3053,6 +3124,8 @@ jobs:
- UnitTestsUBsan
- UnitTestsReleaseClang
- SharedBuildSmokeTest
+ - SQLancerTestRelease
+ - SQLancerTestDebug
runs-on: [self-hosted, style-checker]
steps:
- name: Clear repository
diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml
index 23245c16374..09ca64977f0 100644
--- a/.github/workflows/pull_request.yml
+++ b/.github/workflows/pull_request.yml
@@ -3491,6 +3491,77 @@ jobs:
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH"
+##############################################################################################
+###################################### SQLANCER FUZZERS ######################################
+##############################################################################################
+ SQLancerTestRelease:
+ needs: [BuilderDebRelease]
+ runs-on: [self-hosted, fuzzer-unit-tester]
+ steps:
+ - name: Set envs
+ run: |
+ cat >> "$GITHUB_ENV" << 'EOF'
+ TEMP_PATH=${{runner.temp}}/sqlancer_release
+ REPORTS_PATH=${{runner.temp}}/reports_dir
+ CHECK_NAME=SQLancer (release)
+ REPO_COPY=${{runner.temp}}/sqlancer_release/ClickHouse
+ EOF
+ - name: Download json reports
+ uses: actions/download-artifact@v2
+ with:
+ path: ${{ env.REPORTS_PATH }}
+ - name: Clear repository
+ run: |
+ sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
+ - name: Check out repository code
+ uses: actions/checkout@v2
+ - name: SQLancer
+ run: |
+ sudo rm -fr "$TEMP_PATH"
+ mkdir -p "$TEMP_PATH"
+ cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
+ cd "$REPO_COPY/tests/ci"
+ python3 sqlancer_check.py "$CHECK_NAME"
+ - name: Cleanup
+ if: always()
+ run: |
+ docker ps --quiet | xargs --no-run-if-empty docker kill ||:
+ docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
+ sudo rm -fr "$TEMP_PATH"
+ SQLancerTestDebug:
+ needs: [BuilderDebDebug]
+ runs-on: [self-hosted, fuzzer-unit-tester]
+ steps:
+ - name: Set envs
+ run: |
+ cat >> "$GITHUB_ENV" << 'EOF'
+ TEMP_PATH=${{runner.temp}}/sqlancer_debug
+ REPORTS_PATH=${{runner.temp}}/reports_dir
+ CHECK_NAME=SQLancer (debug)
+ REPO_COPY=${{runner.temp}}/sqlancer_debug/ClickHouse
+ EOF
+ - name: Download json reports
+ uses: actions/download-artifact@v2
+ with:
+ path: ${{ env.REPORTS_PATH }}
+ - name: Clear repository
+ run: |
+ sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
+ - name: Check out repository code
+ uses: actions/checkout@v2
+ - name: SQLancer
+ run: |
+ sudo rm -fr "$TEMP_PATH"
+ mkdir -p "$TEMP_PATH"
+ cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
+ cd "$REPO_COPY/tests/ci"
+ python3 sqlancer_check.py "$CHECK_NAME"
+ - name: Cleanup
+ if: always()
+ run: |
+ docker ps --quiet | xargs --no-run-if-empty docker kill ||:
+ docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
+ sudo rm -fr "$TEMP_PATH"
#############################################################################################
###################################### JEPSEN TESTS #########################################
#############################################################################################
@@ -3501,7 +3572,6 @@ jobs:
if: contains(github.event.pull_request.labels.*.name, 'jepsen-test')
needs: [BuilderBinRelease]
uses: ./.github/workflows/jepsen.yml
-
FinishCheck:
needs:
- StyleCheck
@@ -3576,6 +3646,8 @@ jobs:
- SharedBuildSmokeTest
- CompatibilityCheck
- IntegrationTestsFlakyCheck
+ - SQLancerTestRelease
+ - SQLancerTestDebug
runs-on: [self-hosted, style-checker]
steps:
- name: Clear repository
diff --git a/.gitignore b/.gitignore
index 5b8f2ca452d..09d3f4a4e33 100644
--- a/.gitignore
+++ b/.gitignore
@@ -154,3 +154,6 @@ website/package-lock.json
/programs/server/metadata
/programs/server/store
+# temporary test files
+tests/queries/0_stateless/test_*
+tests/queries/0_stateless/*.binary
diff --git a/docker/test/sqlancer/Dockerfile b/docker/test/sqlancer/Dockerfile
index 0821d516e23..2ebc61e35a9 100644
--- a/docker/test/sqlancer/Dockerfile
+++ b/docker/test/sqlancer/Dockerfile
@@ -1,5 +1,5 @@
# docker build -t clickhouse/sqlancer-test .
-FROM ubuntu:20.04
+FROM ubuntu:22.04
# ARG for quick switch to a given ubuntu mirror
ARG apt_archive="http://archive.ubuntu.com"
diff --git a/docker/test/sqlancer/process_sqlancer_result.py b/docker/test/sqlancer/process_sqlancer_result.py
index 37b8f465498..3bed4578565 100755
--- a/docker/test/sqlancer/process_sqlancer_result.py
+++ b/docker/test/sqlancer/process_sqlancer_result.py
@@ -11,13 +11,15 @@ def process_result(result_folder):
summary = []
paths = []
tests = [
- "TLPWhere",
+ "TLPAggregate",
+ "TLPDistinct",
"TLPGroupBy",
"TLPHaving",
+ "TLPWhere",
"TLPWhereGroupBy",
- "TLPDistinct",
- "TLPAggregate",
+ "NoREC",
]
+ failed_tests = []
for test in tests:
err_path = "{}/{}.err".format(result_folder, test)
@@ -33,15 +35,11 @@ def process_result(result_folder):
with open(err_path, "r") as f:
if "AssertionError" in f.read():
summary.append((test, "FAIL"))
+ failed_tests.append(test)
status = "failure"
else:
summary.append((test, "OK"))
- logs_path = "{}/logs.tar.gz".format(result_folder)
- if not os.path.exists(logs_path):
- logging.info("No logs tar on path %s", logs_path)
- else:
- paths.append(logs_path)
stdout_path = "{}/stdout.log".format(result_folder)
if not os.path.exists(stdout_path):
logging.info("No stdout log on path %s", stdout_path)
@@ -53,18 +51,23 @@ def process_result(result_folder):
else:
paths.append(stderr_path)
- description = "SQLancer test run. See report"
+ description = "SQLancer run successfully"
+ if status == "failure":
+ description = f"Failed oracles: {failed_tests}"
return status, description, summary, paths
-def write_results(results_file, status_file, results, status):
+def write_results(
+ results_file, status_file, description_file, results, status, description
+):
with open(results_file, "w") as f:
out = csv.writer(f, delimiter="\t")
out.writerows(results)
with open(status_file, "w") as f:
- out = csv.writer(f, delimiter="\t")
- out.writerow(status)
+ f.write(status + "\n")
+ with open(description_file, "w") as f:
+ f.write(description + "\n")
if __name__ == "__main__":
@@ -72,13 +75,20 @@ if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="ClickHouse script for parsing results of sqlancer test"
)
- parser.add_argument("--in-results-dir", default="/test_output/")
- parser.add_argument("--out-results-file", default="/test_output/test_results.tsv")
- parser.add_argument("--out-status-file", default="/test_output/check_status.tsv")
+ parser.add_argument("--in-results-dir", default="/workspace/")
+ parser.add_argument("--out-results-file", default="/workspace/summary.tsv")
+ parser.add_argument("--out-description-file", default="/workspace/description.txt")
+ parser.add_argument("--out-status-file", default="/workspace/status.txt")
args = parser.parse_args()
- state, description, test_results, logs = process_result(args.in_results_dir)
+ status, description, summary, logs = process_result(args.in_results_dir)
logging.info("Result parsed")
- status = (state, description)
- write_results(args.out_results_file, args.out_status_file, test_results, status)
+ write_results(
+ args.out_results_file,
+ args.out_status_file,
+ args.out_description_file,
+ summary,
+ status,
+ description,
+ )
logging.info("Result written")
diff --git a/docker/test/sqlancer/run.sh b/docker/test/sqlancer/run.sh
index a1891569d34..4a0f0f6a512 100755
--- a/docker/test/sqlancer/run.sh
+++ b/docker/test/sqlancer/run.sh
@@ -1,33 +1,62 @@
#!/bin/bash
+set -exu
+trap "exit" INT TERM
-set -e -x
+function wget_with_retry
+{
+ for _ in 1 2 3 4; do
+ if wget -nv -nd -c "$1";then
+ return 0
+ else
+ sleep 0.5
+ fi
+ done
+ return 1
+}
-dpkg -i package_folder/clickhouse-common-static_*.deb
-dpkg -i package_folder/clickhouse-common-static-dbg_*.deb
-dpkg -i package_folder/clickhouse-server_*.deb
-dpkg -i package_folder/clickhouse-client_*.deb
+if [ -z ${BINARY_URL_TO_DOWNLOAD+x} ]
+then
+ echo "No BINARY_URL_TO_DOWNLOAD provided."
+else
+ wget_with_retry "$BINARY_URL_TO_DOWNLOAD"
+ chmod +x /clickhouse
+fi
-service clickhouse-server start && sleep 5
+if [[ -f "/clickhouse" ]]; then
+ echo "/clickhouse exists"
+else
+ exit 1
+fi
+
+cd /workspace
+/clickhouse server -P /workspace/clickhouse-server.pid -L /workspace/clickhouse-server.log -E /workspace/clickhouse-server.log.err --daemon
+
+for _ in $(seq 1 60); do if [[ $(wget -q 'localhost:8123' -O-) == 'Ok.' ]]; then break ; else sleep 1; fi ; done
cd /sqlancer/sqlancer-master
-export TIMEOUT=300
-export NUM_QUERIES=1000
+TIMEOUT=300
+NUM_QUERIES=1000
+NUM_THREADS=10
+TESTS=( "TLPGroupBy" "TLPHaving" "TLPWhere" "TLPDistinct" "TLPAggregate" "NoREC" )
+echo "${TESTS[@]}"
-( java -jar target/sqlancer-*.jar --num-threads 10 --timeout-seconds $TIMEOUT --num-queries $NUM_QUERIES --username default --password "" clickhouse --oracle TLPWhere | tee /test_output/TLPWhere.out ) 3>&1 1>&2 2>&3 | tee /test_output/TLPWhere.err
-( java -jar target/sqlancer-*.jar --num-threads 10 --timeout-seconds $TIMEOUT --num-queries $NUM_QUERIES --username default --password "" clickhouse --oracle TLPGroupBy | tee /test_output/TLPGroupBy.out ) 3>&1 1>&2 2>&3 | tee /test_output/TLPGroupBy.err
-( java -jar target/sqlancer-*.jar --num-threads 10 --timeout-seconds $TIMEOUT --num-queries $NUM_QUERIES --username default --password "" clickhouse --oracle TLPHaving | tee /test_output/TLPHaving.out ) 3>&1 1>&2 2>&3 | tee /test_output/TLPHaving.err
-( java -jar target/sqlancer-*.jar --num-threads 10 --timeout-seconds $TIMEOUT --num-queries $NUM_QUERIES --username default --password "" clickhouse --oracle TLPWhere --oracle TLPGroupBy | tee /test_output/TLPWhereGroupBy.out ) 3>&1 1>&2 2>&3 | tee /test_output/TLPWhereGroupBy.err
-( java -jar target/sqlancer-*.jar --num-threads 10 --timeout-seconds $TIMEOUT --num-queries $NUM_QUERIES --username default --password "" clickhouse --oracle TLPDistinct | tee /test_output/TLPDistinct.out ) 3>&1 1>&2 2>&3 | tee /test_output/TLPDistinct.err
-( java -jar target/sqlancer-*.jar --num-threads 10 --timeout-seconds $TIMEOUT --num-queries $NUM_QUERIES --username default --password "" clickhouse --oracle TLPAggregate | tee /test_output/TLPAggregate.out ) 3>&1 1>&2 2>&3 | tee /test_output/TLPAggregate.err
+for TEST in "${TESTS[@]}"; do
+ echo "$TEST"
+ if [[ $(wget -q 'localhost:8123' -O-) == 'Ok.' ]]
+ then
+ echo "Server is OK"
+ ( java -jar target/sqlancer-*.jar --log-each-select true --print-failed false --num-threads "$NUM_THREADS" --timeout-seconds "$TIMEOUT" --num-queries "$NUM_QUERIES" --username default --password "" clickhouse --oracle "$TEST" | tee "/workspace/$TEST.out" ) 3>&1 1>&2 2>&3 | tee "/workspace/$TEST.err"
+ else
+ touch "/workspace/$TEST.err" "/workspace/$TEST.out"
+ echo "Server is not responding" | tee /workspace/server_crashed.log
+ fi
+done
-service clickhouse stop
+ls /workspace
+pkill -F /workspace/clickhouse-server.pid || true
-ls /var/log/clickhouse-server/
-tar czf /test_output/logs.tar.gz -C /var/log/clickhouse-server/ .
-tail -n 1000 /var/log/clickhouse-server/stderr.log > /test_output/stderr.log
-tail -n 1000 /var/log/clickhouse-server/stdout.log > /test_output/stdout.log
-tail -n 1000 /var/log/clickhouse-server/clickhouse-server.log > /test_output/clickhouse-server.log
+for _ in $(seq 1 60); do if [[ $(wget -q 'localhost:8123' -O-) == 'Ok.' ]]; then sleep 1 ; else break; fi ; done
-/process_sqlancer_result.py || echo -e "failure\tCannot parse results" > /test_output/check_status.tsv
-ls /test_output
+/process_sqlancer_result.py || echo -e "failure\tCannot parse results" > /workspace/check_status.tsv
+ls /workspace
diff --git a/docs/en/getting-started/example-datasets/cell-towers.md b/docs/en/getting-started/example-datasets/cell-towers.md
index 3d993c3e224..67ee8cdb7e2 100644
--- a/docs/en/getting-started/example-datasets/cell-towers.md
+++ b/docs/en/getting-started/example-datasets/cell-towers.md
@@ -4,25 +4,39 @@ sidebar_label: Cell Towers
sidebar_position: 3
title: "Cell Towers"
---
+import ConnectionDetails from '@site/docs/en/_snippets/_gather_your_details_http.mdx';
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
import CodeBlock from '@theme/CodeBlock';
import ActionsMenu from '@site/docs/en/_snippets/_service_actions_menu.md';
import SQLConsoleDetail from '@site/docs/en/_snippets/_launch_sql_console.md';
+import SupersetDocker from '@site/docs/en/_snippets/_add_superset_detail.md';
-This dataset is from [OpenCellid](https://www.opencellid.org/) - The world's largest Open Database of Cell Towers.
+## Goal
+
+In this guide you will learn how to:
+- Load the OpenCelliD data in Clickhouse
+- Connect Apache Superset to ClickHouse
+- Build a dashboard based on data available in the dataset
+
+Here is a preview of the dashboard created in this guide:
+
+![Dashboard of cell towers by radio type in mcc 204](@site/docs/en/getting-started/example-datasets/images/superset-cell-tower-dashboard.png)
+
+## Get the Dataset {#get-the-dataset}
+
+This dataset is from [OpenCelliD](https://www.opencellid.org/) - The world's largest Open Database of Cell Towers.
As of 2021, it contains more than 40 million records about cell towers (GSM, LTE, UMTS, etc.) around the world with their geographical coordinates and metadata (country code, network, etc).
OpenCelliD Project is licensed under a Creative Commons Attribution-ShareAlike 4.0 International License, and we redistribute a snapshot of this dataset under the terms of the same license. The up-to-date version of the dataset is available to download after sign in.
-
-## Get the Dataset {#get-the-dataset}
-
+### Load the sample data
+
ClickHouse Cloud provides an easy-button for uploading this dataset from S3. Log in to your ClickHouse Cloud organization, or create a free trial at [ClickHouse.cloud](https://clickhouse.cloud).
@@ -30,13 +44,33 @@ Choose the **Cell Towers** dataset from the **Sample data** tab, and **Load data
![Load cell towers dataset](@site/docs/en/_snippets/images/cloud-load-data-sample.png)
-Examine the schema of the cell_towers table:
+### Examine the schema of the cell_towers table
```sql
DESCRIBE TABLE cell_towers
```
+This is the output of `DESCRIBE`. Down further in this guide the field type choices will be described.
+```response
+┌─name──────────┬─type──────────────────────────────────────────────────────────────────┬
+│ radio │ Enum8('' = 0, 'CDMA' = 1, 'GSM' = 2, 'LTE' = 3, 'NR' = 4, 'UMTS' = 5) │
+│ mcc │ UInt16 │
+│ net │ UInt16 │
+│ area │ UInt16 │
+│ cell │ UInt64 │
+│ unit │ Int16 │
+│ lon │ Float64 │
+│ lat │ Float64 │
+│ range │ UInt32 │
+│ samples │ UInt32 │
+│ changeable │ UInt8 │
+│ created │ DateTime │
+│ updated │ DateTime │
+│ averageSignal │ UInt8 │
+└───────────────┴───────────────────────────────────────────────────────────────────────┴
+```
+
@@ -86,7 +120,7 @@ clickhouse-client --query "INSERT INTO cell_towers FORMAT CSVWithNames" < cell_t
-## Example queries {#examples}
+## Run some example queries {#examples}
1. A number of cell towers by type:
@@ -127,13 +161,13 @@ SELECT mcc, count() FROM cell_towers GROUP BY mcc ORDER BY count() DESC LIMIT 10
10 rows in set. Elapsed: 0.019 sec. Processed 43.28 million rows, 86.55 MB (2.33 billion rows/s., 4.65 GB/s.)
```
-So, the top countries are: the USA, Germany, and Russia.
+Based on the above query and the [MCC list](https://en.wikipedia.org/wiki/Mobile_country_code), the countries with the most cell towers are: the USA, Germany, and Russia.
You may want to create an [External Dictionary](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) in ClickHouse to decode these values.
## Use case: Incorporate geo data {#use-case}
-Using `pointInPolygon` function.
+Using the [`pointInPolygon`](/docs/en/sql-reference/functions/geo/coordinates.md/#pointinpolygon) function.
1. Create a table where we will store polygons:
@@ -224,6 +258,110 @@ WHERE pointInPolygon((lon, lat), (SELECT * FROM moscow))
1 rows in set. Elapsed: 0.067 sec. Processed 43.28 million rows, 692.42 MB (645.83 million rows/s., 10.33 GB/s.)
```
-The data is also available for interactive queries in the [Playground](https://play.clickhouse.com/play?user=play), [example](https://play.clickhouse.com/play?user=play#U0VMRUNUIG1jYywgY291bnQoKSBGUk9NIGNlbGxfdG93ZXJzIEdST1VQIEJZIG1jYyBPUkRFUiBCWSBjb3VudCgpIERFU0M=).
+## Review of the schema
-Although you cannot create temporary tables there.
+Before building visualizations in Superset have a look at the columns that you will use. This dataset primarily provides the location (Longitude and Latitude) and radio types at mobile cellular towers worldwide. The column descriptions can be found in the [community forum](https://community.opencellid.org/t/documenting-the-columns-in-the-downloadable-cells-database-csv/186). The columns used in the visualizations that will be built are described below
+
+Here is a description of the columns taken from the OpenCelliD forum:
+
+| Column | Description |
+|--------------|--------------------------------------------------------|
+| radio | Technology generation: CDMA, GSM, UMTS, 5G NR |
+| mcc | Mobile Country Code: `204` is The Netherlands |
+| lon | Longitude: With Latitude, approximate tower location |
+| lat | Latitude: With Longitude, approximate tower location |
+
+:::tip mcc
+To find your MCC check [Mobile network codes](https://en.wikipedia.org/wiki/Mobile_country_code), and use the three digits in the **Mobile country code** column.
+:::
+
+The schema for this table was designed for compact storage on disk and query speed.
+- The `radio` data is stored as an `Enum8` (`UInt8`) rather than a string.
+- `mcc` or Mobile country code, is stored as a `UInt16` as we know the range is 1 - 999.
+- `lon` and `lat` are `Float64`.
+
+None of the other fields are used in the queries or visualizations in this guide, but they are described in the forum linked above if you are interested.
+
+## Build visualizations with Apache Superset
+
+Superset is easy to run from Docker. If you already have Superset running, all you need to do is add ClickHouse Connect with `pip install clickhouse-connect`. If you need to install Superset open the **Launch Apache Superset in Docker** directly below.
+
+
+
+To build a Superset dashboard using the OpenCelliD dataset you should:
+- Add your ClickHouse service as a Superset **database**
+- Add the table **cell_towers** as a Superset **dataset**
+- Create some **charts**
+- Add the charts to a **dashboard**
+
+### Add your ClickHouse service as a Superset database
+
+
+
+ In Superset a database can be added by choosing the database type, and then providing the connection details. Open Superset and look for the **+**, it has a menu with **Data** and then **Connect database** options.
+
+ ![Add a database](@site/docs/en/getting-started/example-datasets/images/superset-add.png)
+
+ Choose **ClickHouse Connect** from the list:
+
+ ![Choose clickhouse connect as database type](@site/docs/en/getting-started/example-datasets/images/superset-choose-a-database.png)
+
+:::note
+ If **ClickHouse Connect** is not one of your options, then you will need to install it. The comand is `pip install clickhouse-connect`, and more info is [available here](https://pypi.org/project/clickhouse-connect/).
+:::
+
+#### Add your connection details:
+
+:::tip
+ Make sure that you set **SSL** on when connecting to ClickHouse Cloud or other ClickHouse systems that enforce the use of SSL.
+:::
+
+ ![Add ClickHouse as a Superset datasource](@site/docs/en/getting-started/example-datasets/images/superset-connect-a-database.png)
+
+### Add the table **cell_towers** as a Superset **dataset**
+
+ In Superset a **dataset** maps to a table within a database. Click on add a dataset and choose your ClickHouse service, the database containing your table (`default`), and choose the `cell_towers` table:
+
+![Add cell_towers table as a dataset](@site/docs/en/getting-started/example-datasets/images/superset-add-dataset.png)
+
+### Create some **charts**
+
+When you choose to add a chart in Superset you have to specify the dataset (`cell_towers`) and the chart type. Since the OpenCelliD dataset provides longitude and latitude coordinates for cell towers we will create a **Map** chart. The **deck.gL Scatterplot** type is suited to this dataset as it works well with dense data points on a map.
+
+![Create a map in Superset](@site/docs/en/getting-started/example-datasets/images/superset-create-map.png)
+
+#### Specify the query used for the map
+
+A deck.gl Scatterplot requires a longitude and latitude, and one or more filters can also be applied to the query. In this example two filters are applied, one for cell towers with UMTS radios, and one for the Mobile country code assigned to The Netherlands.
+
+The fields `lon` and `lat` contain the longitude and latitude:
+
+![Specify longitude and latitude fields](@site/docs/en/getting-started/example-datasets/images/superset-lon-lat.png)
+
+Add a filter with `mcc` = `204` (or substitute any other `mcc` value):
+
+![Filter on MCC 204](@site/docs/en/getting-started/example-datasets/images/superset-mcc-204.png)
+
+Add a filter with `radio` = `'UMTS'` (or substitute any other `radio` value, you can see the choices in the output of `DESCRIBE TABLE cell_towers`):
+
+![Filter on radio = UMTS](@site/docs/en/getting-started/example-datasets/images/superset-radio-umts.png)
+
+This is the full configuration for the chart that filters on `radio = 'UMTS'` and `mcc = 204`:
+
+![Chart for UMTS radios in MCC 204](@site/docs/en/getting-started/example-datasets/images/superset-umts-netherlands.png)
+
+Click on **UPDATE CHART** to render the visualization.
+
+### Add the charts to a **dashboard**
+
+This screenshot shows cell tower locations with LTE, UMTS, and GSM radios. The charts are all created in the same way and they are added to a dashboard.
+
+ ![Dashboard of cell towers by radio type in mcc 204](@site/docs/en/getting-started/example-datasets/images/superset-cell-tower-dashboard.png)
+
+:::tip
+The data is also available for interactive queries in the [Playground](https://play.clickhouse.com/play?user=play).
+
+This [example](https://play.clickhouse.com/play?user=play#U0VMRUNUIG1jYywgY291bnQoKSBGUk9NIGNlbGxfdG93ZXJzIEdST1VQIEJZIG1jYyBPUkRFUiBCWSBjb3VudCgpIERFU0M=) will populate the username and even the query for you.
+
+Although you cannot create tables in the Playground, you can run all of the queries and even use Superset (adjust the hostname and port number).
+:::
diff --git a/docs/en/getting-started/example-datasets/images/superset-add-dataset.png b/docs/en/getting-started/example-datasets/images/superset-add-dataset.png
new file mode 100644
index 00000000000..aaa976d76ce
Binary files /dev/null and b/docs/en/getting-started/example-datasets/images/superset-add-dataset.png differ
diff --git a/docs/en/getting-started/example-datasets/images/superset-add.png b/docs/en/getting-started/example-datasets/images/superset-add.png
new file mode 100644
index 00000000000..54bbf11a014
Binary files /dev/null and b/docs/en/getting-started/example-datasets/images/superset-add.png differ
diff --git a/docs/en/getting-started/example-datasets/images/superset-cell-tower-dashboard.png b/docs/en/getting-started/example-datasets/images/superset-cell-tower-dashboard.png
new file mode 100644
index 00000000000..8197ea223c2
Binary files /dev/null and b/docs/en/getting-started/example-datasets/images/superset-cell-tower-dashboard.png differ
diff --git a/docs/en/getting-started/example-datasets/images/superset-choose-a-database.png b/docs/en/getting-started/example-datasets/images/superset-choose-a-database.png
new file mode 100644
index 00000000000..40c71e0a053
Binary files /dev/null and b/docs/en/getting-started/example-datasets/images/superset-choose-a-database.png differ
diff --git a/docs/en/getting-started/example-datasets/images/superset-connect-a-database.png b/docs/en/getting-started/example-datasets/images/superset-connect-a-database.png
new file mode 100644
index 00000000000..f67d0663063
Binary files /dev/null and b/docs/en/getting-started/example-datasets/images/superset-connect-a-database.png differ
diff --git a/docs/en/getting-started/example-datasets/images/superset-create-map.png b/docs/en/getting-started/example-datasets/images/superset-create-map.png
new file mode 100644
index 00000000000..5ad4395eb13
Binary files /dev/null and b/docs/en/getting-started/example-datasets/images/superset-create-map.png differ
diff --git a/docs/en/getting-started/example-datasets/images/superset-lon-lat.png b/docs/en/getting-started/example-datasets/images/superset-lon-lat.png
new file mode 100644
index 00000000000..f07fb899e72
Binary files /dev/null and b/docs/en/getting-started/example-datasets/images/superset-lon-lat.png differ
diff --git a/docs/en/getting-started/example-datasets/images/superset-mcc-204.png b/docs/en/getting-started/example-datasets/images/superset-mcc-204.png
new file mode 100644
index 00000000000..a561c539b58
Binary files /dev/null and b/docs/en/getting-started/example-datasets/images/superset-mcc-204.png differ
diff --git a/docs/en/getting-started/example-datasets/images/superset-radio-umts.png b/docs/en/getting-started/example-datasets/images/superset-radio-umts.png
new file mode 100644
index 00000000000..b0b31b6dbc0
Binary files /dev/null and b/docs/en/getting-started/example-datasets/images/superset-radio-umts.png differ
diff --git a/docs/en/getting-started/example-datasets/images/superset-umts-netherlands.png b/docs/en/getting-started/example-datasets/images/superset-umts-netherlands.png
new file mode 100644
index 00000000000..5cb887cb5c1
Binary files /dev/null and b/docs/en/getting-started/example-datasets/images/superset-umts-netherlands.png differ
diff --git a/docs/en/getting-started/example-datasets/recipes.md b/docs/en/getting-started/example-datasets/recipes.md
index cc059f6bd26..6a003571f6e 100644
--- a/docs/en/getting-started/example-datasets/recipes.md
+++ b/docs/en/getting-started/example-datasets/recipes.md
@@ -4,7 +4,7 @@ sidebar_label: Recipes Dataset
title: "Recipes Dataset"
---
-RecipeNLG dataset is available for download [here](https://recipenlg.cs.put.poznan.pl/dataset). It contains 2.2 million recipes. The size is slightly less than 1 GB.
+The RecipeNLG dataset is available for download [here](https://recipenlg.cs.put.poznan.pl/dataset). It contains 2.2 million recipes. The size is slightly less than 1 GB.
## Download and Unpack the Dataset
diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index dfcef4ae200..7f7c14817ba 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -2939,7 +2939,7 @@ Possible values:
- 0 — Projection optimization disabled.
- 1 — Projection optimization enabled.
-Default value: `0`.
+Default value: `1`.
## force_optimize_projection {#force-optimize-projection}
diff --git a/docs/en/operations/troubleshooting.md b/docs/en/operations/troubleshooting.md
index 6a1ca3176ad..ad92e773ea3 100644
--- a/docs/en/operations/troubleshooting.md
+++ b/docs/en/operations/troubleshooting.md
@@ -28,18 +28,34 @@ sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 8919F6BD2B48D7
sudo apt-get update
```
-### You Get the Unsupported Architecture Warning with Apt-get {#you-get-the-unsupported-architecture-warning-with-apt-get}
+### You Get Different Warnings with `apt-get update` {#you-get-different-warnings-with-apt-get-update}
-- The completed warning message is as follows:
+- The completed warning messages are as one of following:
```
N: Skipping acquire of configured file 'main/binary-i386/Packages' as repository 'https://packages.clickhouse.com/deb stable InRelease' doesn't support architecture 'i386'
```
+```
+E: Failed to fetch https://packages.clickhouse.com/deb/dists/stable/main/binary-amd64/Packages.gz File has unexpected size (30451 != 28154). Mirror sync in progress?
+```
+
+```
+E: Repository 'https://packages.clickhouse.com/deb stable InRelease' changed its 'Origin' value from 'Artifactory' to 'ClickHouse'
+E: Repository 'https://packages.clickhouse.com/deb stable InRelease' changed its 'Label' value from 'Artifactory' to 'ClickHouse'
+N: Repository 'https://packages.clickhouse.com/deb stable InRelease' changed its 'Suite' value from 'stable' to ''
+N: This must be accepted explicitly before updates for this repository can be applied. See apt-secure(8) manpage for details.
+```
+
+```
+Err:11 https://packages.clickhouse.com/deb stable InRelease
+ 400 Bad Request [IP: 172.66.40.249 443]
+```
+
To resolve the above issue, please use the following script:
```bash
-sudo rm /var/lib/apt/lists/packages.clickhouse.com_* /var/lib/dpkg/arch
+sudo rm /var/lib/apt/lists/packages.clickhouse.com_* /var/lib/dpkg/arch /var/lib/apt/lists/partial/packages.clickhouse.com_*
sudo apt-get clean
sudo apt-get autoclean
```
diff --git a/docs/en/sql-reference/functions/geo/index.md b/docs/en/sql-reference/functions/geo/index.md
index 64e23094105..8d659236d4c 100644
--- a/docs/en/sql-reference/functions/geo/index.md
+++ b/docs/en/sql-reference/functions/geo/index.md
@@ -8,70 +8,69 @@ title: "Geo Functions"
## Geographical Coordinates Functions
-- [greatCircleDistance](./coordinates.md#greatCircleDistance)
-- [geoDistance](./coordinates.md#geoDistance)
-- [greatCircleAngle](./coordinates.md#greatCircleAngle)
-- [pointInEllipses](./coordinates.md#pointInEllipses)
-- [pointInPolygon](./coordinates.md#pointInPolygon)
+- [greatCircleDistance](./coordinates.md#greatcircledistance)
+- [geoDistance](./coordinates.md#geodistance)
+- [greatCircleAngle](./coordinates.md#greatcircleangle)
+- [pointInEllipses](./coordinates.md#pointinellipses)
+- [pointInPolygon](./coordinates.md#pointinpolygon)
## Geohash Functions
-- [geohashEncode](./geohash.md#geohashEncode)
-- [geohashDecode](./geohash.md#geohashDecode)
-- [geohashesInBox](./geohash.md#geohashesInBox)
+- [geohashEncode](./geohash.md#geohashencode)
+- [geohashDecode](./geohash.md#geohashdecode)
+- [geohashesInBox](./geohash.md#geohashesinbox)
## H3 Indexes Functions
-- [h3IsValid](./h3.md#h3IsValid)
-- [h3GetResolution](./h3.md#h3GetResolution)
-- [h3EdgeAngle](./h3.md#h3EdgeAngle)
-- [h3EdgeLengthM](./h3.md#h3EdgeLengthM)
-- [h3EdgeLengthKm](./h3.md#h3EdgeLengthKm)
-- [geoToH3](./h3.md#geoToH3)
-- [h3ToGeo](./h3.md#h3ToGeo)
-- [h3ToGeoBoundary](./h3.md#h3ToGeoBoundary)
-- [h3kRing](./h3.md#h3kRing)
-- [h3GetBaseCell](./h3.md#h3GetBaseCell)
-- [h3HexAreaM2](./h3.md#h3HexAreaM2)
-- [h3HexAreaKm2](./h3.md#h3HexAreaKm2)
-- [h3IndexesAreNeighbors](./h3.md#h3IndexesAreNeighbors)
-- [h3ToChildren](./h3.md#h3ToChildren)
-- [h3ToParent](./h3.md#h3ToParent)
-- [h3ToString](./h3.md#h3ToString)
-- [stringToH3](./h3.md#stringToH3)
-- [h3GetResolution](./h3.md#h3GetResolution)
-- [h3IsResClassIII](./h3.md#h3IsResClassIII)
-- [h3IsPentagon](./h3.md#h3IsPentagon)
-- [h3GetFaces](./h3.md#h3GetFaces)
-- [h3CellAreaM2](./h3.md#h3CellAreaM2)
-- [h3CellAreaRads2](./h3.md#h3CellAreaRads2)
-- [h3ToCenterChild](./h3.md#h3ToCenterChild)
-- [h3ExactEdgeLengthM](./h3.md#h3ExactEdgeLengthM)
-- [h3ExactEdgeLengthKm](./h3.md#h3ExactEdgeLengthKm)
-- [h3ExactEdgeLengthRads](./h3.md#h3ExactEdgeLengthRads)
-- [h3NumHexagons](./h3.md#h3NumHexagons)
-- [h3Line](./h3.md#h3Line)
-- [h3Distance](./h3.md#h3Distance)
-- [h3HexRing](./h3.md#h3HexRing)
-- [h3GetUnidirectionalEdge](./h3.md#h3GetUnidirectionalEdge)
-- [h3UnidirectionalEdgeIsValid](./h3.md#h3UnidirectionalEdgeIsValid)
-- [h3GetOriginIndexFromUnidirectionalEdge](./h3.md#h3GetOriginIndexFromUnidirectionalEdge)
-- [h3GetDestinationIndexFromUnidirectionalEdge](./h3.md#h3GetDestinationIndexFromUnidirectionalEdge)
-- [h3GetIndexesFromUnidirectionalEdge](./h3.md#h3GetIndexesFromUnidirectionalEdge)
-- [h3GetUnidirectionalEdgesFromHexagon](./h3.md#h3GetUnidirectionalEdgesFromHexagon)
-- [h3GetUnidirectionalEdgeBoundary](./h3.md#h3GetUnidirectionalEdgeBoundary)
+- [h3IsValid](./h3.md#h3isvalid)
+- [h3GetResolution](./h3.md#h3getresolution)
+- [h3EdgeAngle](./h3.md#h3edgeangle)
+- [h3EdgeLengthM](./h3.md#h3edgelengthm)
+- [h3EdgeLengthKm](./h3.md#h3edgelengthkm)
+- [geoToH3](./h3.md#geotoh3)
+- [h3ToGeo](./h3.md#h3togeo)
+- [h3ToGeoBoundary](./h3.md#h3togeoboundary)
+- [h3kRing](./h3.md#h3kring)
+- [h3GetBaseCell](./h3.md#h3getbasecell)
+- [h3HexAreaM2](./h3.md#h3hexaream2)
+- [h3HexAreaKm2](./h3.md#h3hexareakm2)
+- [h3IndexesAreNeighbors](./h3.md#h3indexesareneighbors)
+- [h3ToChildren](./h3.md#h3tochildren)
+- [h3ToParent](./h3.md#h3toparent)
+- [h3ToString](./h3.md#h3tostring)
+- [stringToH3](./h3.md#stringtoh3)
+- [h3GetResolution](./h3.md#h3getresolution)
+- [h3IsResClassIII](./h3.md#h3isresclassiii)
+- [h3IsPentagon](./h3.md#h3ispentagon)
+- [h3GetFaces](./h3.md#h3getfaces)
+- [h3CellAreaM2](./h3.md#h3cellaream2)
+- [h3CellAreaRads2](./h3.md#h3cellarearads2)
+- [h3ToCenterChild](./h3.md#h3tocenterchild)
+- [h3ExactEdgeLengthM](./h3.md#h3exactedgelengthm)
+- [h3ExactEdgeLengthKm](./h3.md#h3exactedgelengthkm)
+- [h3ExactEdgeLengthRads](./h3.md#h3exactedgelengthrads)
+- [h3NumHexagons](./h3.md#h3numhexagons)
+- [h3Line](./h3.md#h3line)
+- [h3Distance](./h3.md#h3distance)
+- [h3HexRing](./h3.md#h3hexring)
+- [h3GetUnidirectionalEdge](./h3.md#h3getunidirectionaledge)
+- [h3UnidirectionalEdgeIsValid](./h3.md#h3unidirectionaledgeisvalid)
+- [h3GetOriginIndexFromUnidirectionalEdge](./h3.md#h3getoriginindexfromunidirectionaledge)
+- [h3GetDestinationIndexFromUnidirectionalEdge](./h3.md#h3getdestinationindexfromunidirectionaledge)
+- [h3GetIndexesFromUnidirectionalEdge](./h3.md#h3getindexesfromunidirectionaledge)
+- [h3GetUnidirectionalEdgesFromHexagon](./h3.md#h3getunidirectionaledgesfromhexagon)
+- [h3GetUnidirectionalEdgeBoundary](./h3.md#h3getunidirectionaledgeboundary)
## S2 Index Functions
-- [geoToS2](./s2.md#geoToS2)
-- [s2ToGeo](./s2.md#s2ToGeo)
-- [s2GetNeighbors](./s2.md#s2GetNeighbors)
-- [s2CellsIntersect](./s2.md#s2CellsIntersect)
-- [s2CapContains](./s2.md#s2CapContains)
-- [s2CapUnion](./s2.md#s2CapUnion)
-- [s2RectAdd](./s2.md#s2RectAdd)
-- [s2RectContains](./s2.md#s2RectContains)
-- [s2RectUinion](./s2.md#s2RectUinion)
-- [s2RectIntersection](./s2.md#s2RectIntersection)
+- [geoToS2](./s2.md#geotos2)
+- [s2ToGeo](./s2.md#s2togeo)
+- [s2GetNeighbors](./s2.md#s2getneighbors)
+- [s2CellsIntersect](./s2.md#s2cellsintersect)
+- [s2CapContains](./s2.md#s2capcontains)
+- [s2CapUnion](./s2.md#s2capunion)
+- [s2RectAdd](./s2.md#s2rectadd)
+- [s2RectContains](./s2.md#s2rectcontains)
+- [s2RectUnion](./s2.md#s2rectunion)
+- [s2RectIntersection](./s2.md#s2rectintersection)
-[Original article](https://clickhouse.com/docs/en/sql-reference/functions/geo/)
diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md
index 6490d4c2272..b9ec21bb59d 100644
--- a/docs/en/sql-reference/functions/other-functions.md
+++ b/docs/en/sql-reference/functions/other-functions.md
@@ -593,6 +593,27 @@ LIMIT 10
└────────────────┴─────────┘
```
+## formatReadableDecimalSize(x)
+
+Accepts the size (number of bytes). Returns a rounded size with a suffix (KB, MB, etc.) as a string.
+
+Example:
+
+``` sql
+SELECT
+ arrayJoin([1, 1024, 1024*1024, 192851925]) AS filesize_bytes,
+ formatReadableDecimalSize(filesize_bytes) AS filesize
+```
+
+``` text
+┌─filesize_bytes─┬─filesize───┐
+│ 1 │ 1.00 B │
+│ 1024 │ 1.02 KB │
+│ 1048576 │ 1.05 MB │
+│ 192851925 │ 192.85 MB │
+└────────────────┴────────────┘
+```
+
## formatReadableSize(x)
Accepts the size (number of bytes). Returns a rounded size with a suffix (KiB, MiB, etc.) as a string.
diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md
index a8ba4843279..e0418a81f14 100644
--- a/docs/en/sql-reference/functions/string-functions.md
+++ b/docs/en/sql-reference/functions/string-functions.md
@@ -571,13 +571,13 @@ Similar to base58Decode, but returns an empty string in case of error.
## base64Encode(s)
-Encodes ‘s’ string into base64
+Encodes ‘s’ FixedString or String into base64.
Alias: `TO_BASE64`.
## base64Decode(s)
-Decode base64-encoded string ‘s’ into original string. In case of failure raises an exception.
+Decode base64-encoded FixedString or String ‘s’ into original string. In case of failure raises an exception.
Alias: `FROM_BASE64`.
diff --git a/docs/en/sql-reference/functions/string-replace-functions.md b/docs/en/sql-reference/functions/string-replace-functions.md
index adf2a07b732..d1f0e44f6b4 100644
--- a/docs/en/sql-reference/functions/string-replace-functions.md
+++ b/docs/en/sql-reference/functions/string-replace-functions.md
@@ -6,28 +6,29 @@ sidebar_label: For Replacing in Strings
# Functions for Searching and Replacing in Strings
-:::note
+:::note
Functions for [searching](../../sql-reference/functions/string-search-functions.md) and [other manipulations with strings](../../sql-reference/functions/string-functions.md) are described separately.
:::
## replaceOne(haystack, pattern, replacement)
-Replaces the first occurrence, if it exists, of the ‘pattern’ substring in ‘haystack’ with the ‘replacement’ substring.
-Hereafter, ‘pattern’ and ‘replacement’ must be constants.
+Replaces the first occurrence of the substring ‘pattern’ (if it exists) in ‘haystack’ by the ‘replacement’ string.
+‘pattern’ and ‘replacement’ must be constants.
## replaceAll(haystack, pattern, replacement), replace(haystack, pattern, replacement)
-Replaces all occurrences of the ‘pattern’ substring in ‘haystack’ with the ‘replacement’ substring.
+Replaces all occurrences of the substring ‘pattern’ in ‘haystack’ by the ‘replacement’ string.
## replaceRegexpOne(haystack, pattern, replacement)
-Replacement using the ‘pattern’ regular expression. A re2 regular expression.
-Replaces only the first occurrence, if it exists.
-A pattern can be specified as ‘replacement’. This pattern can include substitutions `\0-\9`.
-The substitution `\0` includes the entire regular expression. Substitutions `\1-\9` correspond to the subpattern numbers.To use the `\` character in a template, escape it using `\`.
-Also keep in mind that a string literal requires an extra escape.
+Replaces the first occurrence of the substring matching the regular expression ‘pattern’ in ‘haystack‘ by the ‘replacement‘ string.
+‘pattern‘ must be a constant [re2 regular expression](https://github.com/google/re2/wiki/Syntax).
+‘replacement’ must be a plain constant string or a constant string containing substitutions `\0-\9`.
+Substitutions `\1-\9` correspond to the 1st to 9th capturing group (submatch), substitution `\0` corresponds to the entire match.
+To use a verbatim `\` character in the ‘pattern‘ or ‘replacement‘ string, escape it using `\`.
+Also keep in mind that string literals require an extra escaping.
-Example 1. Converting the date to American format:
+Example 1. Converting ISO dates to American format:
``` sql
SELECT DISTINCT
@@ -62,7 +63,7 @@ SELECT replaceRegexpOne('Hello, World!', '.*', '\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0')
## replaceRegexpAll(haystack, pattern, replacement)
-This does the same thing, but replaces all the occurrences. Example:
+Like ‘replaceRegexpOne‘, but replaces all occurrences of the pattern. Example:
``` sql
SELECT replaceRegexpAll('Hello, World!', '.', '\\0\\0') AS res
diff --git a/packages/clickhouse-client.yaml b/packages/clickhouse-client.yaml
index 459a09ee0b8..d4fd9300208 100644
--- a/packages/clickhouse-client.yaml
+++ b/packages/clickhouse-client.yaml
@@ -37,7 +37,7 @@ deb:
contents:
- src: root/etc/clickhouse-client/config.xml
dst: /etc/clickhouse-client/config.xml
- type: config
+ type: config|noreplace
- src: root/usr/bin/clickhouse-benchmark
dst: /usr/bin/clickhouse-benchmark
- src: root/usr/bin/clickhouse-compressor
diff --git a/packages/clickhouse-keeper.yaml b/packages/clickhouse-keeper.yaml
index 8f319c97b65..f2095dda02a 100644
--- a/packages/clickhouse-keeper.yaml
+++ b/packages/clickhouse-keeper.yaml
@@ -29,7 +29,7 @@ deb:
contents:
- src: root/etc/clickhouse-keeper/keeper_config.xml
dst: /etc/clickhouse-keeper/keeper_config.xml
- type: config
+ type: config|noreplace
- src: root/usr/bin/clickhouse-keeper
dst: /usr/bin/clickhouse-keeper
# docs
diff --git a/packages/clickhouse-server.yaml b/packages/clickhouse-server.yaml
index b0778e6bf72..fe59828ca43 100644
--- a/packages/clickhouse-server.yaml
+++ b/packages/clickhouse-server.yaml
@@ -44,10 +44,10 @@ deb:
contents:
- src: root/etc/clickhouse-server/config.xml
dst: /etc/clickhouse-server/config.xml
- type: config
+ type: config|noreplace
- src: root/etc/clickhouse-server/users.xml
dst: /etc/clickhouse-server/users.xml
- type: config
+ type: config|noreplace
- src: clickhouse-server.init
dst: /etc/init.d/clickhouse-server
- src: clickhouse-server.service
diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp
index 58569a32619..93136df2a5b 100644
--- a/programs/client/Client.cpp
+++ b/programs/client/Client.cpp
@@ -1088,7 +1088,8 @@ void Client::processConfig()
}
else
{
- need_render_progress = config().getBool("progress", false);
+ std::string progress = config().getString("progress", "tty");
+ need_render_progress = (Poco::icompare(progress, "off") && Poco::icompare(progress, "no") && Poco::icompare(progress, "false") && Poco::icompare(progress, "0"));
echo_queries = config().getBool("echo", false);
ignore_error = config().getBool("ignore-error", false);
diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp
index e5fd4d6bf8d..3ac9c1e7c37 100644
--- a/programs/local/LocalServer.cpp
+++ b/programs/local/LocalServer.cpp
@@ -489,7 +489,8 @@ void LocalServer::processConfig()
}
else
{
- need_render_progress = config().getBool("progress", false);
+ std::string progress = config().getString("progress", "tty");
+ need_render_progress = (Poco::icompare(progress, "off") && Poco::icompare(progress, "no") && Poco::icompare(progress, "false") && Poco::icompare(progress, "0"));
echo_queries = config().hasOption("echo") || config().hasOption("verbose");
ignore_error = config().getBool("ignore-error", false);
is_multiquery = true;
diff --git a/programs/server/config.xml b/programs/server/config.xml
index 7f3a749b629..deebb434120 100644
--- a/programs/server/config.xml
+++ b/programs/server/config.xml
@@ -1336,17 +1336,13 @@
name - name for the rule (optional)
regexp - RE2 compatible regular expression (mandatory)
replace - substitution string for sensitive data (optional, by default - six asterisks)
- -->
hide encrypt/decrypt arguments
((?:aes_)?(?:encrypt|decrypt)(?:_mysql)?)\s*\(\s*(?:'(?:\\'|.)+'|.*?)\s*\)
-
\1(???)
-
+ -->
inst("abc"), inst(1), inst("de"), inst(2), inst("fg"), inst(1), inst(2)
using Instructions = std::vector;
- static const size_t max_captures = 10;
+ static constexpr int max_captures = 10;
-
- static Instructions createInstructions(const std::string & s, int num_captures)
+ static Instructions createInstructions(std::string_view replacement, int num_captures)
{
Instructions instructions;
- String now;
- for (size_t i = 0; i < s.size(); ++i)
+ String literals;
+ for (size_t i = 0; i < replacement.size(); ++i)
{
- if (s[i] == '\\' && i + 1 < s.size())
+ if (replacement[i] == '\\' && i + 1 < replacement.size())
{
- if (isNumericASCII(s[i + 1])) /// Substitution
+ if (isNumericASCII(replacement[i + 1])) /// Substitution
{
- if (!now.empty())
+ if (!literals.empty())
{
- instructions.emplace_back(now);
- now = "";
+ instructions.emplace_back(literals);
+ literals = "";
}
- instructions.emplace_back(s[i + 1] - '0');
+ instructions.emplace_back(replacement[i + 1] - '0');
}
else
- now += s[i + 1]; /// Escaping
+ literals += replacement[i + 1]; /// Escaping
++i;
}
else
- now += s[i]; /// Plain character
+ literals += replacement[i]; /// Plain character
}
- if (!now.empty())
- {
- instructions.emplace_back(now);
- now = "";
- }
+ if (!literals.empty())
+ instructions.emplace_back(literals);
- for (const auto & it : instructions)
- if (it.substitution_num >= num_captures)
- throw Exception(ErrorCodes::BAD_ARGUMENTS,
- "Invalid replace instruction in replacement string. Id: {}, but regexp has only {} subpatterns",
- it.substitution_num, num_captures - 1);
+ for (const auto & instr : instructions)
+ if (instr.substitution_num >= num_captures)
+ throw Exception(
+ ErrorCodes::BAD_ARGUMENTS,
+ "Id {} in replacement string is an invalid substitution, regexp has only {} capturing groups",
+ instr.substitution_num, num_captures - 1);
return instructions;
}
-
static void processString(
- const re2_st::StringPiece & input,
+ const char * haystack_data,
+ size_t haystack_length,
ColumnString::Chars & res_data,
ColumnString::Offset & res_offset,
- re2_st::RE2 & searcher,
+ const re2_st::RE2 & searcher,
int num_captures,
const Instructions & instructions)
{
+ re2_st::StringPiece haystack(haystack_data, haystack_length);
re2_st::StringPiece matches[max_captures];
size_t copy_pos = 0;
size_t match_pos = 0;
- while (match_pos < static_cast(input.length()))
+ while (match_pos < haystack_length)
{
/// If no more replacements possible for current string
bool can_finish_current_string = false;
- if (searcher.Match(input, match_pos, input.length(), re2_st::RE2::Anchor::UNANCHORED, matches, num_captures))
+ if (searcher.Match(haystack, match_pos, haystack_length, re2_st::RE2::Anchor::UNANCHORED, matches, num_captures))
{
- const auto & match = matches[0];
- size_t bytes_to_copy = (match.data() - input.data()) - copy_pos;
+ const auto & match = matches[0]; /// Complete match (\0)
+ size_t bytes_to_copy = (match.data() - haystack.data()) - copy_pos;
- /// Copy prefix before matched regexp without modification
+ /// Copy prefix before current match without modification
res_data.resize(res_data.size() + bytes_to_copy);
- memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], input.data() + copy_pos, bytes_to_copy);
+ memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], haystack.data() + copy_pos, bytes_to_copy);
res_offset += bytes_to_copy;
copy_pos += bytes_to_copy + match.length();
match_pos = copy_pos;
- /// Do substitution instructions
- for (const auto & it : instructions)
+ /// Substitute inside current match using instructions
+ for (const auto & instr : instructions)
{
- if (it.substitution_num >= 0)
- {
- const auto & substitution = matches[it.substitution_num];
-
- res_data.resize(res_data.size() + substitution.length());
- memcpy(&res_data[res_offset], substitution.data(), substitution.length());
- res_offset += substitution.length();
- }
+ std::string_view replacement;
+ if (instr.substitution_num >= 0)
+ replacement = std::string_view(matches[instr.substitution_num].data(), matches[instr.substitution_num].size());
else
- {
- const auto & literal = it.literal;
-
- res_data.resize(res_data.size() + literal.size());
- memcpy(&res_data[res_offset], literal.data(), literal.size());
- res_offset += literal.size();
- }
+ replacement = instr.literal;
+ res_data.resize(res_data.size() + replacement.size());
+ memcpy(&res_data[res_offset], replacement.data(), replacement.size());
+ res_offset += replacement.size();
}
- if (replace_one)
+ if constexpr (replace == ReplaceRegexpTraits::Replace::First)
can_finish_current_string = true;
- if (match.length() == 0)
+ if (match.empty())
{
/// Step one character to avoid infinite loop
++match_pos;
- if (match_pos >= static_cast(input.length()))
+ if (match_pos >= haystack_length)
can_finish_current_string = true;
}
}
@@ -151,10 +149,10 @@ struct ReplaceRegexpImpl
/// If ready, append suffix after match to end of string.
if (can_finish_current_string)
{
- res_data.resize(res_data.size() + input.length() - copy_pos);
- memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], input.data() + copy_pos, input.length() - copy_pos);
- res_offset += input.length() - copy_pos;
- copy_pos = input.length();
+ res_data.resize(res_data.size() + haystack_length - copy_pos);
+ memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], haystack.data() + copy_pos, haystack_length - copy_pos);
+ res_offset += haystack_length - copy_pos;
+ copy_pos = haystack_length;
match_pos = copy_pos;
}
}
@@ -164,12 +162,11 @@ struct ReplaceRegexpImpl
++res_offset;
}
-
static void vector(
const ColumnString::Chars & data,
const ColumnString::Offsets & offsets,
- const std::string & needle,
- const std::string & replacement,
+ const String & needle,
+ const String & replacement,
ColumnString::Chars & res_data,
ColumnString::Offsets & res_offsets)
{
@@ -178,11 +175,19 @@ struct ReplaceRegexpImpl
size_t size = offsets.size();
res_offsets.resize(size);
- typename re2_st::RE2::Options regexp_options;
- /// Never write error messages to stderr. It's ignorant to do it from library code.
+ re2_st::RE2::Options regexp_options;
+ /// Don't write error messages to stderr.
regexp_options.set_log_errors(false);
+
re2_st::RE2 searcher(needle, regexp_options);
- int num_captures = std::min(searcher.NumberOfCapturingGroups() + 1, static_cast(max_captures));
+
+ if (!searcher.ok())
+ throw Exception(
+ ErrorCodes::BAD_ARGUMENTS,
+ "The pattern argument is not a valid re2 pattern: {}",
+ searcher.error());
+
+ int num_captures = std::min(searcher.NumberOfCapturingGroups() + 1, max_captures);
Instructions instructions = createInstructions(replacement, num_captures);
@@ -190,9 +195,10 @@ struct ReplaceRegexpImpl
for (size_t i = 0; i < size; ++i)
{
size_t from = i > 0 ? offsets[i - 1] : 0;
- re2_st::StringPiece input(reinterpret_cast(data.data() + from), offsets[i] - from - 1);
+ const char * haystack_data = reinterpret_cast(data.data() + from);
+ const size_t haystack_length = static_cast(offsets[i] - from - 1);
- processString(input, res_data, res_offset, searcher, num_captures, instructions);
+ processString(haystack_data, haystack_length, res_data, res_offset, searcher, num_captures, instructions);
res_offsets[i] = res_offset;
}
}
@@ -200,8 +206,8 @@ struct ReplaceRegexpImpl
static void vectorFixed(
const ColumnString::Chars & data,
size_t n,
- const std::string & needle,
- const std::string & replacement,
+ const String & needle,
+ const String & replacement,
ColumnString::Chars & res_data,
ColumnString::Offsets & res_offsets)
{
@@ -210,20 +216,29 @@ struct ReplaceRegexpImpl
res_data.reserve(data.size());
res_offsets.resize(size);
- typename re2_st::RE2::Options regexp_options;
- /// Never write error messages to stderr. It's ignorant to do it from library code.
+ re2_st::RE2::Options regexp_options;
+ /// Don't write error messages to stderr.
regexp_options.set_log_errors(false);
+
re2_st::RE2 searcher(needle, regexp_options);
- int num_captures = std::min(searcher.NumberOfCapturingGroups() + 1, static_cast(max_captures));
+
+ if (!searcher.ok())
+ throw Exception(
+ ErrorCodes::BAD_ARGUMENTS,
+ "The pattern argument is not a valid re2 pattern: {}",
+ searcher.error());
+
+ int num_captures = std::min(searcher.NumberOfCapturingGroups() + 1, max_captures);
Instructions instructions = createInstructions(replacement, num_captures);
for (size_t i = 0; i < size; ++i)
{
size_t from = i * n;
- re2_st::StringPiece input(reinterpret_cast(data.data() + from), n);
+ const char * haystack_data = reinterpret_cast(data.data() + from);
+ const size_t haystack_length = n;
- processString(input, res_data, res_offset, searcher, num_captures, instructions);
+ processString(haystack_data, haystack_length, res_data, res_offset, searcher, num_captures, instructions);
res_offsets[i] = res_offset;
}
}
diff --git a/src/Functions/ReplaceStringImpl.h b/src/Functions/ReplaceStringImpl.h
index ab0e53d3c45..1a9ec49c58c 100644
--- a/src/Functions/ReplaceStringImpl.h
+++ b/src/Functions/ReplaceStringImpl.h
@@ -8,9 +8,17 @@
namespace DB
{
+struct ReplaceStringTraits
+{
+ enum class Replace
+ {
+ First,
+ All
+ };
+};
/** Replace one or all occurencies of substring 'needle' to 'replacement'. 'needle' and 'replacement' are constants.
*/
-template
+template
struct ReplaceStringImpl
{
static void vector(
@@ -66,7 +74,7 @@ struct ReplaceStringImpl
memcpy(&res_data[res_offset], replacement.data(), replacement.size());
res_offset += replacement.size();
pos = match + needle.size();
- if (replace_one)
+ if constexpr (replace == ReplaceStringTraits::Replace::First)
can_finish_current_string = true;
}
else
@@ -155,7 +163,7 @@ struct ReplaceStringImpl
memcpy(&res_data[res_offset], replacement.data(), replacement.size());
res_offset += replacement.size();
pos = match + needle.size();
- if (replace_one || pos == begin + n * (i + 1))
+ if (replace == ReplaceStringTraits::Replace::First || pos == begin + n * (i + 1))
can_finish_current_string = true;
}
else
diff --git a/src/Functions/UserDefined/UserDefinedSQLObjectsBackup.cpp b/src/Functions/UserDefined/UserDefinedSQLObjectsBackup.cpp
index 60f0219e92d..6962c21280d 100644
--- a/src/Functions/UserDefined/UserDefinedSQLObjectsBackup.cpp
+++ b/src/Functions/UserDefined/UserDefinedSQLObjectsBackup.cpp
@@ -87,7 +87,7 @@ restoreUserDefinedSQLObjects(RestorerFromBackup & restorer, const String & data_
parser,
statement_def.data(),
statement_def.data() + statement_def.size(),
- "in file " + filepath + " from backup " + backup->getName(),
+ "in file " + filepath + " from backup " + backup->getNameForLogging(),
0,
context->getSettingsRef().max_parser_depth);
break;
diff --git a/src/Functions/base64Decode.cpp b/src/Functions/base64Decode.cpp
index f6943233d44..4060aafe1a3 100644
--- a/src/Functions/base64Decode.cpp
+++ b/src/Functions/base64Decode.cpp
@@ -1,8 +1,7 @@
#include
+
#if USE_BASE64
#include
-#include
-
namespace DB
{
@@ -15,4 +14,5 @@ REGISTER_FUNCTION(Base64Decode)
factory.registerAlias("FROM_BASE64", "base64Decode", FunctionFactory::CaseInsensitive);
}
}
+
#endif
diff --git a/src/Functions/base64Encode.cpp b/src/Functions/base64Encode.cpp
index fc06935e0a1..773db7e09d9 100644
--- a/src/Functions/base64Encode.cpp
+++ b/src/Functions/base64Encode.cpp
@@ -1,10 +1,7 @@
-#include
#include
-#include "config.h"
-
#if USE_BASE64
-# include
+#include
namespace DB
{
@@ -17,4 +14,5 @@ REGISTER_FUNCTION(Base64Encode)
factory.registerAlias("TO_BASE64", "base64Encode", FunctionFactory::CaseInsensitive);
}
}
+
#endif
diff --git a/src/Functions/formatReadableDecimalSize.cpp b/src/Functions/formatReadableDecimalSize.cpp
new file mode 100644
index 00000000000..184b574abdf
--- /dev/null
+++ b/src/Functions/formatReadableDecimalSize.cpp
@@ -0,0 +1,35 @@
+#include
+#include
+
+
+namespace DB
+{
+
+namespace
+{
+ struct Impl
+ {
+ static constexpr auto name = "formatReadableDecimalSize";
+
+ static void format(double value, DB::WriteBuffer & out)
+ {
+ formatReadableSizeWithDecimalSuffix(value, out);
+ }
+ };
+}
+
+REGISTER_FUNCTION(FormatReadableDecimalSize)
+{
+ factory.registerFunction>(
+ {
+ R"(
+Accepts the size (number of bytes). Returns a rounded size with a suffix (KB, MB, etc.) as a string.
+)",
+ Documentation::Examples{
+ {"formatReadableDecimalSize", "SELECT formatReadableDecimalSize(1000)"}},
+ Documentation::Categories{"OtherFunctions"}
+ },
+ FunctionFactory::CaseSensitive);
+}
+
+}
diff --git a/src/Functions/replaceAll.cpp b/src/Functions/replaceAll.cpp
index 7c5cd82ca5d..d85d192d199 100644
--- a/src/Functions/replaceAll.cpp
+++ b/src/Functions/replaceAll.cpp
@@ -13,7 +13,7 @@ struct NameReplaceAll
static constexpr auto name = "replaceAll";
};
-using FunctionReplaceAll = FunctionStringReplace, NameReplaceAll>;
+using FunctionReplaceAll = FunctionStringReplace, NameReplaceAll>;
}
diff --git a/src/Functions/replaceOne.cpp b/src/Functions/replaceOne.cpp
index c0c21dbf51f..6557339537e 100644
--- a/src/Functions/replaceOne.cpp
+++ b/src/Functions/replaceOne.cpp
@@ -13,7 +13,7 @@ struct NameReplaceOne
static constexpr auto name = "replaceOne";
};
-using FunctionReplaceOne = FunctionStringReplace, NameReplaceOne>;
+using FunctionReplaceOne = FunctionStringReplace, NameReplaceOne>;
}
diff --git a/src/Functions/replaceRegexpAll.cpp b/src/Functions/replaceRegexpAll.cpp
index 0250b4a5ba6..4eaf46c05d4 100644
--- a/src/Functions/replaceRegexpAll.cpp
+++ b/src/Functions/replaceRegexpAll.cpp
@@ -13,7 +13,7 @@ struct NameReplaceRegexpAll
static constexpr auto name = "replaceRegexpAll";
};
-using FunctionReplaceRegexpAll = FunctionStringReplace, NameReplaceRegexpAll>;
+using FunctionReplaceRegexpAll = FunctionStringReplace, NameReplaceRegexpAll>;
}
diff --git a/src/Functions/replaceRegexpOne.cpp b/src/Functions/replaceRegexpOne.cpp
index b40992b73fc..60e29213a9a 100644
--- a/src/Functions/replaceRegexpOne.cpp
+++ b/src/Functions/replaceRegexpOne.cpp
@@ -13,7 +13,7 @@ struct NameReplaceRegexpOne
static constexpr auto name = "replaceRegexpOne";
};
-using FunctionReplaceRegexpOne = FunctionStringReplace, NameReplaceRegexpOne>;
+using FunctionReplaceRegexpOne = FunctionStringReplace, NameReplaceRegexpOne>;
}
diff --git a/src/Functions/tryBase64Decode.cpp b/src/Functions/tryBase64Decode.cpp
index 1102c7a3418..bd452b8357b 100644
--- a/src/Functions/tryBase64Decode.cpp
+++ b/src/Functions/tryBase64Decode.cpp
@@ -1,7 +1,7 @@
#include
+
#if USE_BASE64
#include
-#include
namespace DB
{
@@ -10,4 +10,5 @@ REGISTER_FUNCTION(TryBase64Decode)
factory.registerFunction>();
}
}
+
#endif
diff --git a/src/IO/ReadWriteBufferFromHTTP.h b/src/IO/ReadWriteBufferFromHTTP.h
index de2b5654ae5..b60fdee1184 100644
--- a/src/IO/ReadWriteBufferFromHTTP.h
+++ b/src/IO/ReadWriteBufferFromHTTP.h
@@ -528,16 +528,17 @@ namespace detail
auto on_retriable_error = [&]()
{
- retry_with_range_header = true;
- impl.reset();
- auto http_session = session->getSession();
- http_session->reset();
- sleepForMilliseconds(milliseconds_to_wait);
+ retry_with_range_header = true;
+ impl.reset();
+ auto http_session = session->getSession();
+ http_session->reset();
+ sleepForMilliseconds(milliseconds_to_wait);
};
for (size_t i = 0; i < settings.http_max_tries; ++i)
{
exception = nullptr;
+ initialization_error = InitializeError::NONE;
try
{
diff --git a/src/IO/WriteBufferFromS3.cpp b/src/IO/WriteBufferFromS3.cpp
index f823015bd7d..9ed2c41fd01 100644
--- a/src/IO/WriteBufferFromS3.cpp
+++ b/src/IO/WriteBufferFromS3.cpp
@@ -123,7 +123,10 @@ void WriteBufferFromS3::nextImpl()
void WriteBufferFromS3::allocateBuffer()
{
if (total_parts_uploaded != 0 && total_parts_uploaded % s3_settings.upload_part_size_multiply_parts_count_threshold == 0)
+ {
upload_part_size *= s3_settings.upload_part_size_multiply_factor;
+ upload_part_size = std::min(upload_part_size, s3_settings.max_upload_part_size);
+ }
temporary_buffer = Aws::MakeShared("temporary buffer");
temporary_buffer->exceptions(std::ios::badbit);
diff --git a/src/Interpreters/Aggregator.cpp b/src/Interpreters/Aggregator.cpp
index 182f2292b28..c38006af975 100644
--- a/src/Interpreters/Aggregator.cpp
+++ b/src/Interpreters/Aggregator.cpp
@@ -2637,7 +2637,7 @@ void NO_INLINE Aggregator::mergeBucketImpl(
ManyAggregatedDataVariants Aggregator::prepareVariantsToMerge(ManyAggregatedDataVariants & data_variants) const
{
if (data_variants.empty())
- throw Exception("Empty data passed to Aggregator::mergeAndConvertToBlocks.", ErrorCodes::EMPTY_DATA_PASSED);
+ throw Exception("Empty data passed to Aggregator::prepareVariantsToMerge.", ErrorCodes::EMPTY_DATA_PASSED);
LOG_TRACE(log, "Merging aggregated data");
diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp
index 3aadea918fb..bf85affcb90 100644
--- a/src/Interpreters/AsynchronousInsertQueue.cpp
+++ b/src/Interpreters/AsynchronousInsertQueue.cpp
@@ -37,6 +37,7 @@ namespace ProfileEvents
{
extern const Event AsyncInsertQuery;
extern const Event AsyncInsertBytes;
+ extern const Event FailedAsyncInsertQuery;
}
namespace DB
@@ -101,6 +102,8 @@ void AsynchronousInsertQueue::InsertData::Entry::finish(std::exception_ptr excep
{
std::lock_guard lock(mutex);
finished = true;
+ if (exception_)
+ ProfileEvents::increment(ProfileEvents::FailedAsyncInsertQuery, 1);
exception = exception_;
cv.notify_all();
}
diff --git a/src/Interpreters/ClusterDiscovery.cpp b/src/Interpreters/ClusterDiscovery.cpp
index 88d7cedec83..36b2f17e8a1 100644
--- a/src/Interpreters/ClusterDiscovery.cpp
+++ b/src/Interpreters/ClusterDiscovery.cpp
@@ -217,7 +217,7 @@ bool ClusterDiscovery::needUpdate(const Strings & node_uuids, const NodesInfo &
ClusterPtr ClusterDiscovery::makeCluster(const ClusterInfo & cluster_info)
{
- std::vector> shards;
+ std::vector shards;
{
std::map replica_adresses;
@@ -244,7 +244,7 @@ ClusterPtr ClusterDiscovery::makeCluster(const ClusterInfo & cluster_info)
/* password= */ "",
/* clickhouse_port= */ secure ? context->getTCPPortSecure().value_or(DBMS_DEFAULT_SECURE_PORT) : context->getTCPPort(),
/* treat_local_as_remote= */ false,
- /* treat_local_port_as_remote= */ context->getApplicationType() == Context::ApplicationType::LOCAL,
+ /* treat_local_port_as_remote= */ false, /// should be set only for clickhouse-local, but cluster discovery is not used there
/* secure= */ secure);
return cluster;
}
diff --git a/src/Interpreters/DDLTask.cpp b/src/Interpreters/DDLTask.cpp
index 2d609c00406..980e8f6e7b6 100644
--- a/src/Interpreters/DDLTask.cpp
+++ b/src/Interpreters/DDLTask.cpp
@@ -8,12 +8,14 @@
#include
#include
#include
-#include
-#include
#include
+#include
#include
+#include
+#include
#include
#include
+#include
namespace DB
@@ -168,6 +170,13 @@ void DDLTaskBase::parseQueryFromEntry(ContextPtr context)
query = parseQuery(parser_query, begin, end, description, 0, settings.max_parser_depth);
}
+void DDLTaskBase::formatRewrittenQuery(ContextPtr context)
+{
+ /// Convert rewritten AST back to string.
+ query_str = queryToString(*query);
+ query_for_logging = maskSensitiveInfoInQueryForLogging(query_str, query, context);
+}
+
ContextMutablePtr DDLTaskBase::makeQueryContext(ContextPtr from_context, const ZooKeeperPtr & /*zookeeper*/)
{
auto query_context = Context::createCopy(from_context);
@@ -265,6 +274,7 @@ void DDLTask::setClusterInfo(ContextPtr context, Poco::Logger * log)
host_id.readableString(), entry_name, address_in_cluster.readableString(), cluster_name);
}
+ /// Rewrite AST without ON CLUSTER.
WithoutOnClusterASTRewriteParams params;
params.default_database = address_in_cluster.default_database;
params.host_id = address_in_cluster.toString();
@@ -405,6 +415,7 @@ void DatabaseReplicatedTask::parseQueryFromEntry(ContextPtr context)
chassert(!ddl_query->database);
ddl_query->setDatabase(database->getDatabaseName());
}
+ formatRewrittenQuery(context);
}
ContextMutablePtr DatabaseReplicatedTask::makeQueryContext(ContextPtr from_context, const ZooKeeperPtr & zookeeper)
diff --git a/src/Interpreters/DDLTask.h b/src/Interpreters/DDLTask.h
index 661cee84a45..2043de6701e 100644
--- a/src/Interpreters/DDLTask.h
+++ b/src/Interpreters/DDLTask.h
@@ -99,6 +99,9 @@ struct DDLTaskBase
String host_id_str;
ASTPtr query;
+ String query_str;
+ String query_for_logging;
+
bool is_initial_query = false;
bool is_circular_replicated = false;
bool execute_on_leader = false;
@@ -114,6 +117,7 @@ struct DDLTaskBase
virtual ~DDLTaskBase() = default;
virtual void parseQueryFromEntry(ContextPtr context);
+ void formatRewrittenQuery(ContextPtr context);
virtual String getShardID() const = 0;
diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp
index 6bfa9ecd591..2e1918e1a37 100644
--- a/src/Interpreters/DDLWorker.cpp
+++ b/src/Interpreters/DDLWorker.cpp
@@ -10,8 +10,6 @@
#include
#include
#include
-#include
-#include
#include
#include
#include
@@ -207,6 +205,8 @@ DDLTaskPtr DDLWorker::initAndCheckTask(const String & entry_name, String & out_r
task->parseQueryFromEntry(context);
/// Stage 3.2: check cluster and find the host in cluster
task->setClusterInfo(context, log);
+ /// Stage 3.3: output rewritten query back to string
+ task->formatRewrittenQuery(context);
}
catch (...)
{
@@ -431,11 +431,12 @@ DDLTaskBase & DDLWorker::saveTask(DDLTaskPtr && task)
return *current_tasks.back();
}
-bool DDLWorker::tryExecuteQuery(const String & query, DDLTaskBase & task, const ZooKeeperPtr & zookeeper)
+bool DDLWorker::tryExecuteQuery(DDLTaskBase & task, const ZooKeeperPtr & zookeeper)
{
/// Add special comment at the start of query to easily identify DDL-produced queries in query_log
String query_prefix = "/* ddl_entry=" + task.entry_name + " */ ";
- String query_to_execute = query_prefix + query;
+ String query_to_execute = query_prefix + task.query_str;
+ String query_to_show_in_logs = query_prefix + task.query_for_logging;
ReadBufferFromString istr(query_to_execute);
String dummy_string;
@@ -463,7 +464,7 @@ bool DDLWorker::tryExecuteQuery(const String & query, DDLTaskBase & task, const
throw;
task.execution_status = ExecutionStatus::fromCurrentException();
- tryLogCurrentException(log, "Query " + query + " wasn't finished successfully");
+ tryLogCurrentException(log, "Query " + query_to_show_in_logs + " wasn't finished successfully");
/// We use return value of tryExecuteQuery(...) in tryExecuteQueryOnLeaderReplica(...) to determine
/// if replica has stopped being leader and we should retry query.
@@ -484,7 +485,7 @@ bool DDLWorker::tryExecuteQuery(const String & query, DDLTaskBase & task, const
throw;
task.execution_status = ExecutionStatus::fromCurrentException();
- tryLogCurrentException(log, "Query " + query + " wasn't finished successfully");
+ tryLogCurrentException(log, "Query " + query_to_show_in_logs + " wasn't finished successfully");
/// We don't know what exactly happened, but maybe it's Poco::NetException or std::bad_alloc,
/// so we consider unknown exception as retryable error.
@@ -492,7 +493,7 @@ bool DDLWorker::tryExecuteQuery(const String & query, DDLTaskBase & task, const
}
task.execution_status = ExecutionStatus(0);
- LOG_DEBUG(log, "Executed query: {}", query);
+ LOG_DEBUG(log, "Executed query: {}", query_to_show_in_logs);
return true;
}
@@ -514,7 +515,7 @@ void DDLWorker::updateMaxDDLEntryID(const String & entry_name)
void DDLWorker::processTask(DDLTaskBase & task, const ZooKeeperPtr & zookeeper)
{
- LOG_DEBUG(log, "Processing task {} ({})", task.entry_name, task.entry.query);
+ LOG_DEBUG(log, "Processing task {} ({})", task.entry_name, task.query_for_logging);
chassert(!task.completely_processed);
/// Setup tracing context on current thread for current DDL
@@ -587,8 +588,7 @@ void DDLWorker::processTask(DDLTaskBase & task, const ZooKeeperPtr & zookeeper)
try
{
- String rewritten_query = queryToString(task.query);
- LOG_DEBUG(log, "Executing query: {}", rewritten_query);
+ LOG_DEBUG(log, "Executing query: {}", task.query_for_logging);
StoragePtr storage;
if (auto * query_with_table = dynamic_cast(task.query.get()); query_with_table)
@@ -605,12 +605,12 @@ void DDLWorker::processTask(DDLTaskBase & task, const ZooKeeperPtr & zookeeper)
if (task.execute_on_leader)
{
- tryExecuteQueryOnLeaderReplica(task, storage, rewritten_query, task.entry_path, zookeeper, execute_on_leader_lock);
+ tryExecuteQueryOnLeaderReplica(task, storage, task.entry_path, zookeeper, execute_on_leader_lock);
}
else
{
storage.reset();
- tryExecuteQuery(rewritten_query, task, zookeeper);
+ tryExecuteQuery(task, zookeeper);
}
}
catch (const Coordination::Exception &)
@@ -694,7 +694,6 @@ bool DDLWorker::taskShouldBeExecutedOnLeader(const ASTPtr & ast_ddl, const Stora
bool DDLWorker::tryExecuteQueryOnLeaderReplica(
DDLTaskBase & task,
StoragePtr storage,
- const String & rewritten_query,
const String & /*node_path*/,
const ZooKeeperPtr & zookeeper,
std::unique_ptr & execute_on_leader_lock)
@@ -793,7 +792,7 @@ bool DDLWorker::tryExecuteQueryOnLeaderReplica(
/// If the leader will unexpectedly changed this method will return false
/// and on the next iteration new leader will take lock
- if (tryExecuteQuery(rewritten_query, task, zookeeper))
+ if (tryExecuteQuery(task, zookeeper))
{
executed_by_us = true;
break;
diff --git a/src/Interpreters/DDLWorker.h b/src/Interpreters/DDLWorker.h
index 5aea460ad2e..65ef4b440a1 100644
--- a/src/Interpreters/DDLWorker.h
+++ b/src/Interpreters/DDLWorker.h
@@ -101,12 +101,11 @@ protected:
bool tryExecuteQueryOnLeaderReplica(
DDLTaskBase & task,
StoragePtr storage,
- const String & rewritten_query,
const String & node_path,
const ZooKeeperPtr & zookeeper,
std::unique_ptr & execute_on_leader_lock);
- bool tryExecuteQuery(const String & query, DDLTaskBase & task, const ZooKeeperPtr & zookeeper);
+ bool tryExecuteQuery(DDLTaskBase & task, const ZooKeeperPtr & zookeeper);
/// Checks and cleanups queue's nodes
void cleanupQueue(Int64 current_time_seconds, const ZooKeeperPtr & zookeeper);
diff --git a/src/Interpreters/InDepthNodeVisitor.h b/src/Interpreters/InDepthNodeVisitor.h
index 785c88ec77b..9c4fed56fd4 100644
--- a/src/Interpreters/InDepthNodeVisitor.h
+++ b/src/Interpreters/InDepthNodeVisitor.h
@@ -25,13 +25,47 @@ public:
{}
void visit(T & ast)
+ {
+ if (ostr)
+ visitImpl* with_dump= */ true>(ast);
+ else
+ visitImpl* with_dump= */ false>(ast);
+ }
+
+private:
+ Data & data;
+ size_t visit_depth;
+ WriteBuffer * ostr;
+
+ template
+ void visitImpl(T & ast)
{
checkStackSize();
- DumpASTNode dump(*ast, ostr, visit_depth, typeid(Matcher).name());
+ if constexpr (with_dump)
+ {
+ DumpASTNode dump(*ast, ostr, visit_depth, typeid(Matcher).name());
+ visitImplMain* with_dump= */ true>(ast);
+ }
+ else
+ {
+ visitImplMain* with_dump= */ false>(ast);
+ }
+ }
+ template
+ void visitImplMain(T & ast)
+ {
if constexpr (!_top_to_bottom)
- visitChildren(ast);
+ visitChildren(ast);
+ doVisit(ast);
+
+ if constexpr (_top_to_bottom)
+ visitChildren(ast);
+ }
+
+ void doVisit(T & ast)
+ {
try
{
Matcher::visit(ast, data);
@@ -41,16 +75,9 @@ public:
e.addMessage("While processing {}", ast->formatForErrorMessage());
throw;
}
-
- if constexpr (_top_to_bottom)
- visitChildren(ast);
}
-private:
- Data & data;
- size_t visit_depth;
- WriteBuffer * ostr;
-
+ template
void visitChildren(T & ast)
{
for (auto & child : ast->children)
@@ -62,7 +89,7 @@ private:
need_visit_child = Matcher::needChildVisit(ast, child);
if (need_visit_child)
- visit(child);
+ visitImpl(child);
}
}
};
diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp
index 4a6ce63eb84..e9cf06c5c69 100644
--- a/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuery.cpp
@@ -726,7 +726,8 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::getTableProperti
else if (create.as_table_function)
{
/// Table function without columns list.
- auto table_function = TableFunctionFactory::instance().get(create.as_table_function, getContext());
+ auto table_function_ast = create.as_table_function->ptr();
+ auto table_function = TableFunctionFactory::instance().get(table_function_ast, getContext());
properties.columns = table_function->getActualTableStructure(getContext());
}
else if (create.is_dictionary)
@@ -967,7 +968,7 @@ void InterpreterCreateQuery::setEngine(ASTCreateQuery & create) const
if (as_create.storage)
create.set(create.storage, as_create.storage->ptr());
else if (as_create.as_table_function)
- create.as_table_function = as_create.as_table_function->clone();
+ create.set(create.as_table_function, as_create.as_table_function->ptr());
else
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot set engine, it's a bug.");
@@ -1343,12 +1344,12 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create,
/// NOTE: CREATE query may be rewritten by Storage creator or table function
if (create.as_table_function)
{
- const auto & factory = TableFunctionFactory::instance();
- auto table_func = factory.get(create.as_table_function, getContext());
+ auto table_function_ast = create.as_table_function->ptr();
+ auto table_function = TableFunctionFactory::instance().get(table_function_ast, getContext());
/// In case of CREATE AS table_function() query we should use global context
/// in storage creation because there will be no query context on server startup
/// and because storage lifetime is bigger than query context lifetime.
- res = table_func->execute(create.as_table_function, getContext(), create.getTable(), properties.columns, /*use_global_context=*/true);
+ res = table_function->execute(table_function_ast, getContext(), create.getTable(), properties.columns, /*use_global_context=*/true);
res->renameInMemory({create.getDatabase(), create.getTable(), create.uuid});
}
else
diff --git a/src/Interpreters/JoinToSubqueryTransformVisitor.cpp b/src/Interpreters/JoinToSubqueryTransformVisitor.cpp
index 8e515caace4..5879c96f7b3 100644
--- a/src/Interpreters/JoinToSubqueryTransformVisitor.cpp
+++ b/src/Interpreters/JoinToSubqueryTransformVisitor.cpp
@@ -546,10 +546,13 @@ std::vector normalizeColumnNamesExtractNeeded(
{
auto alias = aliases.find(ident->name())->second;
auto alias_ident = alias->clone();
- alias_ident->as()->restoreTable();
- bool alias_equals_column_name = alias_ident->getColumnNameWithoutAlias() == ident->getColumnNameWithoutAlias();
- if (!alias_equals_column_name)
- throw Exception("Alias clashes with qualified column '" + ident->name() + "'", ErrorCodes::AMBIGUOUS_COLUMN_NAME);
+ if (auto * alias_ident_typed = alias_ident->as())
+ {
+ alias_ident_typed->restoreTable();
+ bool alias_equals_column_name = alias_ident->getColumnNameWithoutAlias() == ident->getColumnNameWithoutAlias();
+ if (!alias_equals_column_name)
+ throw Exception("Alias clashes with qualified column '" + ident->name() + "'", ErrorCodes::AMBIGUOUS_COLUMN_NAME);
+ }
}
String short_name = ident->shortName();
String original_long_name;
diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp
index 479a6b38e8e..087f3fd8887 100644
--- a/src/Interpreters/executeQuery.cpp
+++ b/src/Interpreters/executeQuery.cpp
@@ -34,7 +34,6 @@
#include
#include
#include
-#include
#include
#include
@@ -56,9 +55,9 @@
#include
#include
#include
+#include
#include
-#include
#include
#include
@@ -77,7 +76,6 @@
namespace ProfileEvents
{
- extern const Event QueryMaskingRulesMatch;
extern const Event FailedQuery;
extern const Event FailedInsertQuery;
extern const Event FailedSelectQuery;
@@ -109,37 +107,6 @@ static void checkASTSizeLimits(const IAST & ast, const Settings & settings)
}
-/// Makes a version of a query without sensitive information (e.g. passwords) for logging.
-/// The parameter `parsed query` can be nullptr if the query cannot be parsed.
-static String prepareQueryForLogging(const String & query, const ASTPtr & parsed_query, ContextPtr context)
-{
- String res = query;
-
- // Wiping a password or hash from CREATE/ALTER USER query because we don't want it to go to logs.
- if (parsed_query && canContainPassword(*parsed_query))
- {
- ASTPtr ast_for_logging = parsed_query->clone();
- wipePasswordFromQuery(ast_for_logging);
- res = serializeAST(*ast_for_logging);
- }
-
- // Wiping sensitive data before cropping query by log_queries_cut_to_length,
- // otherwise something like credit card without last digit can go to log.
- if (auto * masker = SensitiveDataMasker::getInstance())
- {
- auto matches = masker->wipeSensitiveData(res);
- if (matches > 0)
- {
- ProfileEvents::increment(ProfileEvents::QueryMaskingRulesMatch, matches);
- }
- }
-
- res = res.substr(0, context->getSettingsRef().log_queries_cut_to_length);
-
- return res;
-}
-
-
/// Log query into text log (not into system table).
static void logQuery(const String & query, ContextPtr context, bool internal, QueryProcessingStage::Enum stage)
{
@@ -425,14 +392,14 @@ static std::tuple executeQueryImpl(
/// MUST go before any modification (except for prepared statements,
/// since it substitute parameters and without them query does not contain
/// parameters), to keep query as-is in query_log and server log.
- query_for_logging = prepareQueryForLogging(query, ast, context);
+ query_for_logging = maskSensitiveInfoInQueryForLogging(query, ast, context);
}
catch (...)
{
/// Anyway log the query.
if (query.empty())
query.assign(begin, std::min(end - begin, static_cast(max_query_size)));
- query_for_logging = prepareQueryForLogging(query, ast, context);
+ query_for_logging = maskSensitiveInfoInQueryForLogging(query, ast, context);
logQuery(query_for_logging, context, internal, stage);
diff --git a/src/Interpreters/maskSensitiveInfoInQueryForLogging.cpp b/src/Interpreters/maskSensitiveInfoInQueryForLogging.cpp
new file mode 100644
index 00000000000..c69f91394b9
--- /dev/null
+++ b/src/Interpreters/maskSensitiveInfoInQueryForLogging.cpp
@@ -0,0 +1,623 @@
+#include
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+
+namespace ProfileEvents
+{
+ extern const Event QueryMaskingRulesMatch;
+}
+
+
+namespace DB
+{
+
+namespace
+{
+ enum class PasswordWipingMode
+ {
+ Query,
+ BackupName,
+ };
+
+
+ template
+ class PasswordWipingVisitor
+ {
+ public:
+ struct Data
+ {
+ bool can_contain_password = false;
+ bool password_was_hidden = false;
+ bool is_create_table_query = false;
+ bool is_create_database_query = false;
+ bool is_create_dictionary_query = false;
+ ContextPtr context;
+ PasswordWipingMode mode = PasswordWipingMode::Query;
+ };
+
+ using Visitor = std::conditional_t<
+ check_only,
+ ConstInDepthNodeVisitor,
+ InDepthNodeVisitor>;
+
+ static bool needChildVisit(const ASTPtr & /* ast */, const ASTPtr & /* child */, Data & data)
+ {
+ if constexpr (check_only)
+ {
+ return !data.can_contain_password;
+ }
+ else
+ {
+ return true;
+ }
+ }
+
+ static void visit(ASTPtr ast, Data & data)
+ {
+ if (auto * create_user_query = ast->as())
+ {
+ visitCreateUserQuery(*create_user_query, data);
+ }
+ else if (auto * create_query = ast->as())
+ {
+ visitCreateQuery(*create_query, data);
+ }
+ else if (auto * backup_query = ast->as())
+ {
+ visitBackupQuery(*backup_query, data);
+ }
+ else if (auto * storage = ast->as())
+ {
+ if (data.is_create_table_query)
+ visitTableEngine(*storage, data);
+ else if (data.is_create_database_query)
+ visitDatabaseEngine(*storage, data);
+ }
+ else if (auto * dictionary = ast->as())
+ {
+ if (data.is_create_dictionary_query)
+ visitDictionaryDef(*dictionary, data);
+ }
+ else if (auto * function = ast->as())
+ {
+ if (data.mode == PasswordWipingMode::BackupName)
+ wipePasswordFromBackupEngineArguments(*function, data);
+ else
+ visitFunction(*function, data);
+ }
+ }
+
+ private:
+ static void visitCreateUserQuery(ASTCreateUserQuery & query, Data & data)
+ {
+ if (!query.auth_data)
+ return;
+
+ auto auth_type = query.auth_data->getType();
+ if (auth_type == AuthenticationType::NO_PASSWORD || auth_type == AuthenticationType::LDAP
+ || auth_type == AuthenticationType::KERBEROS || auth_type == AuthenticationType::SSL_CERTIFICATE)
+ return; /// No password, nothing to hide.
+
+ if constexpr (check_only)
+ {
+ data.can_contain_password = true;
+ return;
+ }
+
+ query.show_password = false;
+ data.password_was_hidden = true;
+ }
+
+ static void visitCreateQuery(ASTCreateQuery & query, Data & data)
+ {
+ if (query.is_dictionary)
+ data.is_create_dictionary_query = true;
+ else if (query.table)
+ data.is_create_table_query = true;
+ else if (query.database)
+ data.is_create_database_query = true;
+ }
+
+ static void visitTableEngine(ASTStorage & storage, Data & data)
+ {
+ if (!storage.engine)
+ return;
+
+ const String & engine_name = storage.engine->name;
+
+ if (engine_name == "ExternalDistributed")
+ {
+ /// ExternalDistributed('engine', 'host:port', 'database', 'table', 'user', 'password')
+ wipePasswordFromArgument(*storage.engine, data, 5);
+ }
+ else if (engine_name == "MySQL")
+ {
+ /// MySQL('host:port', 'database', 'table', 'user', 'password', ...)
+ wipePasswordFromArgument(*storage.engine, data, 4);
+ }
+ else if (engine_name == "PostgreSQL")
+ {
+ /// PostgreSQL('host:port', 'database', 'table', 'user', 'password', ...)
+ wipePasswordFromArgument(*storage.engine, data, 4);
+ }
+ else if (engine_name == "MaterializedPostgreSQL")
+ {
+ /// MaterializedPostgreSQL('host:port', 'database', 'table', 'user', 'password', ...)
+ wipePasswordFromArgument(*storage.engine, data, 4);
+ }
+ else if (engine_name == "MongoDB")
+ {
+ /// MongoDB('host:port', 'database', 'collection', 'user', 'password', ...)
+ wipePasswordFromArgument(*storage.engine, data, 4);
+ }
+ else if (engine_name == "S3" || engine_name == "COSN")
+ {
+ /// S3('url', ['aws_access_key_id', 'aws_secret_access_key',] ...)
+ wipePasswordFromS3TableEngineArguments(*storage.engine, data);
+ }
+ }
+
+ static void wipePasswordFromS3TableEngineArguments(ASTFunction & engine, Data & data)
+ {
+ /// We replace 'aws_secret_access_key' with '[HIDDEN'] for the following signatures:
+ /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format')
+ /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression')
+
+ /// But we should check the number of arguments first because we don't need to do that replacements in case of
+ /// S3('url' [, 'format' [, 'compression']])
+ size_t num_arguments;
+ if (!tryGetNumArguments(engine, &num_arguments) || (num_arguments < 4))
+ return;
+
+ wipePasswordFromArgument(engine, data, 2);
+ }
+
+ static void visitDatabaseEngine(ASTStorage & storage, Data & data)
+ {
+ if (!storage.engine)
+ return;
+
+ const String & engine_name = storage.engine->name;
+
+ if (engine_name == "MySQL" || engine_name == "MaterializeMySQL" || engine_name == "MaterializedMySQL")
+ {
+ /// MySQL('host:port', 'database', 'user', 'password')
+ wipePasswordFromArgument(*storage.engine, data, 3);
+ }
+ else if (engine_name == "PostgreSQL" || engine_name == "MaterializedPostgreSQL")
+ {
+ /// PostgreSQL('host:port', 'database', 'user', 'password', ...)
+ wipePasswordFromArgument(*storage.engine, data, 3);
+ }
+ }
+
+ static void visitFunction(ASTFunction & function, Data & data)
+ {
+ if (function.name == "mysql")
+ {
+ /// mysql('host:port', 'database', 'table', 'user', 'password', ...)
+ wipePasswordFromArgument(function, data, 4);
+ }
+ else if (function.name == "postgresql")
+ {
+ /// postgresql('host:port', 'database', 'table', 'user', 'password', ...)
+ wipePasswordFromArgument(function, data, 4);
+ }
+ else if (function.name == "mongodb")
+ {
+ /// mongodb('host:port', 'database', 'collection', 'user', 'password', ...)
+ wipePasswordFromArgument(function, data, 4);
+ }
+ else if (function.name == "s3" || function.name == "cosn")
+ {
+ /// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...)
+ wipePasswordFromS3FunctionArguments(function, data, /* is_cluster_function= */ false);
+ }
+ else if (function.name == "s3Cluster")
+ {
+ /// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', ...)
+ wipePasswordFromS3FunctionArguments(function, data, /* is_cluster_function= */ true);
+ }
+ else if (function.name == "remote" || function.name == "remoteSecure")
+ {
+ /// remote('addresses_expr', 'db', 'table', 'user', 'password', ...)
+ wipePasswordFromRemoteFunctionArguments(function, data);
+ }
+ else if (
+ function.name == "encrypt" || function.name == "decrypt" || function.name == "aes_encrypt_mysql"
+ || function.name == "aes_decrypt_mysql" || function.name == "tryDecrypt")
+ {
+ /// encrypt('mode', 'plaintext', 'key' [, iv, aad])
+ wipePasswordFromEncryptionFunctionArguments(function, data);
+ }
+ }
+
+ static void wipePasswordFromS3FunctionArguments(ASTFunction & function, Data & data, bool is_cluster_function)
+ {
+ /// s3Cluster('cluster_name', 'url', ...) has 'url' as its second argument.
+ size_t url_arg_idx = is_cluster_function ? 1 : 0;
+
+ /// We're going to replace 'aws_secret_access_key' with '[HIDDEN'] for the following signatures:
+ /// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...)
+ /// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression')
+
+ /// But we should check the number of arguments first because we don't need to do any replacements in case of
+ /// s3('url' [, 'format']) or s3Cluster('cluster_name', 'url' [, 'format'])
+ size_t num_arguments;
+ if (!tryGetNumArguments(function, &num_arguments) || (num_arguments < url_arg_idx + 3))
+ return;
+
+ if (num_arguments >= url_arg_idx + 5)
+ {
+ /// s3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'structure', ...)
+ wipePasswordFromArgument(function, data, url_arg_idx + 2);
+ }
+ else
+ {
+ /// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...)
+ /// We need to distinguish that from s3('url', 'format', 'structure' [, 'compression_method']).
+ /// So we will check whether the argument after 'url' is a format.
+ String format;
+ if (!tryGetEvaluatedConstStringFromArgument(function, data, url_arg_idx + 1, &format))
+ return;
+
+ if (FormatFactory::instance().getAllFormats().contains(format))
+ return; /// The argument after 'url' is a format: s3('url', 'format', ...)
+
+ /// The argument after 'url' is not a format so we do our replacement:
+ /// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...) -> s3('url', 'aws_access_key_id', '[HIDDEN]', ...)
+ wipePasswordFromArgument(function, data, url_arg_idx + 2);
+ }
+ }
+
+ static void wipePasswordFromRemoteFunctionArguments(ASTFunction & function, Data & data)
+ {
+ /// We're going to replace 'password' with '[HIDDEN'] for the following signatures:
+ /// remote('addresses_expr', db.table, 'user' [, 'password'] [, sharding_key])
+ /// remote('addresses_expr', 'db', 'table', 'user' [, 'password'] [, sharding_key])
+ /// remote('addresses_expr', table_function(), 'user' [, 'password'] [, sharding_key])
+
+ /// But we should check the number of arguments first because we don't need to do any replacements in case of
+ /// remote('addresses_expr', db.table)
+ size_t num_arguments;
+ if (!tryGetNumArguments(function, &num_arguments) || (num_arguments < 3))
+ return;
+
+ auto & arguments = assert_cast(*function.arguments).children;
+ size_t arg_num = 1;
+
+ /// Skip 1 or 2 arguments with table_function() or db.table or 'db', 'table'.
+ const auto * table_function = arguments[arg_num]->as();
+ if (table_function && TableFunctionFactory::instance().isTableFunctionName(table_function->name))
+ {
+ ++arg_num;
+ }
+ else
+ {
+ String database;
+ if (!tryGetEvaluatedConstDatabaseNameFromArgument(function, data, arg_num, &database))
+ return;
+ ++arg_num;
+
+ auto qualified_name = QualifiedTableName::parseFromString(database);
+ if (qualified_name.database.empty())
+ ++arg_num; /// skip 'table' argument
+ }
+
+ /// Check if username and password are specified
+ /// (sharding_key can be of any type so while we're getting string literals they're username & password).
+ String username, password;
+ bool username_specified = tryGetStringFromArgument(function, arg_num, &username);
+ bool password_specified = username_specified && tryGetStringFromArgument(function, arg_num + 1, &password);
+
+ if (password_specified)
+ {
+ /// Password is specified so we do our replacement:
+ /// remote('addresses_expr', db.table, 'user', 'password', ...) -> remote('addresses_expr', db.table, 'user', '[HIDDEN]', ...)
+ wipePasswordFromArgument(function, data, arg_num + 1);
+ }
+ }
+
+ static void wipePasswordFromEncryptionFunctionArguments(ASTFunction & function, Data & data)
+ {
+ /// We replace all arguments after 'mode' with '[HIDDEN]':
+ /// encrypt('mode', 'plaintext', 'key' [, iv, aad]) -> encrypt('mode', '[HIDDEN]')
+ wipePasswordFromArgument(function, data, 1);
+ removeArgumentsAfter(function, data, 2);
+ }
+
+ static void visitBackupQuery(ASTBackupQuery & query, Data & data)
+ {
+ if (query.backup_name)
+ {
+ if (auto * backup_engine = query.backup_name->as())
+ wipePasswordFromBackupEngineArguments(*backup_engine, data);
+ }
+
+ if (query.base_backup_name)
+ {
+ if (auto * base_backup_engine = query.base_backup_name->as())
+ wipePasswordFromBackupEngineArguments(*base_backup_engine, data);
+ }
+ }
+
+ static void wipePasswordFromBackupEngineArguments(ASTFunction & engine, Data & data)
+ {
+ if (engine.name == "S3")
+ {
+ /// BACKUP ... TO S3(url, [aws_access_key_id, aws_secret_access_key])
+ wipePasswordFromArgument(engine, data, 2);
+ }
+ }
+
+ static void wipePasswordFromArgument(ASTFunction & function, Data & data, size_t arg_idx)
+ {
+ if (!function.arguments)
+ return;
+
+ auto * expr_list = function.arguments->as();
+ if (!expr_list)
+ return; /// return because we don't want to validate query here
+
+ auto & arguments = expr_list->children;
+ if (arg_idx >= arguments.size())
+ return;
+
+ if constexpr (check_only)
+ {
+ data.can_contain_password = true;
+ return;
+ }
+
+ arguments[arg_idx] = std::make_shared("[HIDDEN]");
+ data.password_was_hidden = true;
+ }
+
+ static void removeArgumentsAfter(ASTFunction & function, Data & data, size_t new_num_arguments)
+ {
+ if (!function.arguments)
+ return;
+
+ auto * expr_list = function.arguments->as();
+ if (!expr_list)
+ return; /// return because we don't want to validate query here
+
+ auto & arguments = expr_list->children;
+ if (new_num_arguments >= arguments.size())
+ return;
+
+ if constexpr (check_only)
+ {
+ data.can_contain_password = true;
+ return;
+ }
+
+ arguments.resize(new_num_arguments);
+ data.password_was_hidden = true;
+ }
+
+ static bool tryGetNumArguments(const ASTFunction & function, size_t * num_arguments)
+ {
+ if (!function.arguments)
+ return false;
+
+ auto * expr_list = function.arguments->as();
+ if (!expr_list)
+ return false; /// return false because we don't want to validate query here
+
+ const auto & arguments = expr_list->children;
+ *num_arguments = arguments.size();
+ return true;
+ }
+
+ static bool tryGetStringFromArgument(const ASTFunction & function, size_t arg_idx, String * value)
+ {
+ if (!function.arguments)
+ return false;
+
+ const auto * expr_list = function.arguments->as();
+ if (!expr_list)
+ return false; /// return false because we don't want to validate query here
+
+ const auto & arguments = expr_list->children;
+ if (arg_idx >= arguments.size())
+ return false;
+
+ const auto * literal = arguments[arg_idx]->as();
+ if (!literal || literal->value.getType() != Field::Types::String)
+ return false;
+
+ *value = literal->value.safeGet();
+ return true;
+ }
+
+ static bool tryGetEvaluatedConstStringFromArgument(const ASTFunction & function, Data & data, size_t arg_idx, String * value)
+ {
+ if (!function.arguments)
+ return false;
+
+ const auto * expr_list = function.arguments->as();
+ if (!expr_list)
+ return false; /// return false because we don't want to validate query here
+
+ const auto & arguments = expr_list->children;
+ if (arg_idx >= arguments.size())
+ return false;
+
+ if constexpr (check_only)
+ {
+ data.can_contain_password = true;
+ return false;
+ }
+
+ ASTPtr argument = arguments[arg_idx];
+ try
+ {
+ argument = evaluateConstantExpressionOrIdentifierAsLiteral(argument, data.context);
+ }
+ catch (...)
+ {
+ return false;
+ }
+
+ const auto & literal = assert_cast(*argument);
+ if (literal.value.getType() != Field::Types::String)
+ return false;
+
+ *value = literal.value.safeGet();
+ return true;
+ }
+
+ static bool tryGetEvaluatedConstDatabaseNameFromArgument(const ASTFunction & function, Data & data, size_t arg_idx, String * value)
+ {
+ if (!function.arguments)
+ return false;
+
+ const auto * expr_list = function.arguments->as();
+ if (!expr_list)
+ return false; /// return false because we don't want to validate query here
+
+ const auto & arguments = expr_list->children;
+ if (arg_idx >= arguments.size())
+ return false;
+
+ if constexpr (check_only)
+ {
+ data.can_contain_password = true;
+ return false;
+ }
+
+ ASTPtr argument = arguments[arg_idx];
+ try
+ {
+ argument = evaluateConstantExpressionForDatabaseName(argument, data.context);
+ }
+ catch (...)
+ {
+ return false;
+ }
+
+ const auto & literal = assert_cast(*argument);
+ if (literal.value.getType() != Field::Types::String)
+ return false;
+
+ *value = literal.value.safeGet();
+ return true;
+ }
+
+ static void visitDictionaryDef(ASTDictionary & dictionary, Data & data)
+ {
+ if (!dictionary.source || !dictionary.source->elements)
+ return;
+
+ const auto * expr_list = dictionary.source->elements->as();
+ if (!expr_list)
+ return; /// return because we don't want to validate query here
+
+ const auto & elements = expr_list->children;
+
+ /// We replace password in the dictionary's definition:
+ /// SOURCE(CLICKHOUSE(host 'example01-01-1' port 9000 user 'default' password 'qwe123' db 'default' table 'ids')) ->
+ /// SOURCE(CLICKHOUSE(host 'example01-01-1' port 9000 user 'default' password '[HIDDEN]' db 'default' table 'ids'))
+ for (const auto & element : elements)
+ {
+ auto * pair = element->as();
+ if (!pair)
+ continue; /// just skip because we don't want to validate query here
+
+ if (pair->first == "password")
+ {
+ if constexpr (check_only)
+ {
+ data.can_contain_password = true;
+ return;
+ }
+ pair->set(pair->second, std::make_shared("[HIDDEN]"));
+ data.password_was_hidden = true;
+ }
+ }
+ }
+ };
+
+ /// Checks the type of a specified AST and returns true if it can contain a password.
+ bool canContainPassword(const IAST & ast, PasswordWipingMode mode)
+ {
+ using WipingVisitor = PasswordWipingVisitor*check_only= */ true>;
+ WipingVisitor::Data data;
+ data.mode = mode;
+ WipingVisitor::Visitor visitor{data};
+ ASTPtr ast_ptr = std::const_pointer_cast(ast.shared_from_this());
+ visitor.visit(ast_ptr);
+ return data.can_contain_password;
+ }
+
+ /// Removes a password or its hash from a query if it's specified there or replaces it with some placeholder.
+ /// This function is used to prepare a query for storing in logs (we don't want logs to contain sensitive information).
+ bool wipePasswordFromQuery(ASTPtr ast, PasswordWipingMode mode, const ContextPtr & context)
+ {
+ using WipingVisitor = PasswordWipingVisitor*check_only= */ false>;
+ WipingVisitor::Data data;
+ data.context = context;
+ data.mode = mode;
+ WipingVisitor::Visitor visitor{data};
+ visitor.visit(ast);
+ return data.password_was_hidden;
+ }
+
+ /// Common utility for masking sensitive information.
+ String maskSensitiveInfoImpl(const String & query, const ASTPtr & parsed_query, PasswordWipingMode mode, const ContextPtr & context)
+ {
+ String res = query;
+
+ // Wiping a password or hash from the query because we don't want it to go to logs.
+ if (parsed_query && canContainPassword(*parsed_query, mode))
+ {
+ ASTPtr ast_without_password = parsed_query->clone();
+ if (wipePasswordFromQuery(ast_without_password, mode, context))
+ res = serializeAST(*ast_without_password);
+ }
+
+ // Wiping sensitive data before cropping query by log_queries_cut_to_length,
+ // otherwise something like credit card without last digit can go to log.
+ if (auto * masker = SensitiveDataMasker::getInstance())
+ {
+ auto matches = masker->wipeSensitiveData(res);
+ if (matches > 0)
+ {
+ ProfileEvents::increment(ProfileEvents::QueryMaskingRulesMatch, matches);
+ }
+ }
+
+ res = res.substr(0, context->getSettingsRef().log_queries_cut_to_length);
+
+ return res;
+ }
+}
+
+
+String maskSensitiveInfoInQueryForLogging(const String & query, const ASTPtr & parsed_query, const ContextPtr & context)
+{
+ return maskSensitiveInfoImpl(query, parsed_query, PasswordWipingMode::Query, context);
+}
+
+
+String maskSensitiveInfoInBackupNameForLogging(const String & backup_name, const ASTPtr & ast, const ContextPtr & context)
+{
+ return maskSensitiveInfoImpl(backup_name, ast, PasswordWipingMode::BackupName, context);
+}
+
+}
diff --git a/src/Interpreters/maskSensitiveInfoInQueryForLogging.h b/src/Interpreters/maskSensitiveInfoInQueryForLogging.h
new file mode 100644
index 00000000000..3892f89bc52
--- /dev/null
+++ b/src/Interpreters/maskSensitiveInfoInQueryForLogging.h
@@ -0,0 +1,19 @@
+#pragma once
+
+#include
+#include
+
+
+namespace DB
+{
+
+/// Makes a version of a query without sensitive information (e.g. passwords) for logging.
+/// The parameter `parsed query` is allowed to be nullptr if the query cannot be parsed.
+/// Does not validate AST, works a best-effort way.
+String maskSensitiveInfoInQueryForLogging(const String & query, const ASTPtr & parsed_query, const ContextPtr & context);
+
+/// Makes a version of backup name without sensitive information (e.g. passwords) for logging.
+/// Does not validate AST, works a best-effort way.
+String maskSensitiveInfoInBackupNameForLogging(const String & backup_name, const ASTPtr & ast, const ContextPtr & context);
+
+}
diff --git a/src/Parsers/ASTBackupQuery.cpp b/src/Parsers/ASTBackupQuery.cpp
index 4af95b96ee3..567b52b5669 100644
--- a/src/Parsers/ASTBackupQuery.cpp
+++ b/src/Parsers/ASTBackupQuery.cpp
@@ -245,7 +245,21 @@ String ASTBackupQuery::getID(char) const
ASTPtr ASTBackupQuery::clone() const
{
- return std::make_shared(*this);
+ auto res = std::make_shared(*this);
+
+ if (backup_name)
+ res->backup_name = backup_name->clone();
+
+ if (base_backup_name)
+ res->base_backup_name = base_backup_name->clone();
+
+ if (cluster_host_ids)
+ res->cluster_host_ids = cluster_host_ids->clone();
+
+ if (settings)
+ res->settings = settings->clone();
+
+ return res;
}
diff --git a/src/Parsers/ASTCreateQuery.cpp b/src/Parsers/ASTCreateQuery.cpp
index f8853d21178..d7dc4e217b7 100644
--- a/src/Parsers/ASTCreateQuery.cpp
+++ b/src/Parsers/ASTCreateQuery.cpp
@@ -210,6 +210,8 @@ ASTPtr ASTCreateQuery::clone() const
res->set(res->dictionary, dictionary->clone());
}
+ if (as_table_function)
+ res->set(res->as_table_function, as_table_function->clone());
if (comment)
res->set(res->comment, comment->clone());
diff --git a/src/Parsers/ASTCreateQuery.h b/src/Parsers/ASTCreateQuery.h
index de0f187f0e2..41083c688ad 100644
--- a/src/Parsers/ASTCreateQuery.h
+++ b/src/Parsers/ASTCreateQuery.h
@@ -83,7 +83,7 @@ public:
ASTPtr lateness_function;
String as_database;
String as_table;
- ASTPtr as_table_function;
+ IAST * as_table_function = nullptr;
ASTSelectWithUnionQuery * select = nullptr;
IAST * comment = nullptr;
diff --git a/src/Parsers/Access/ASTCreateQuotaQuery.cpp b/src/Parsers/Access/ASTCreateQuotaQuery.cpp
index 0bb6872e3af..56abedf5235 100644
--- a/src/Parsers/Access/ASTCreateQuotaQuery.cpp
+++ b/src/Parsers/Access/ASTCreateQuotaQuery.cpp
@@ -141,7 +141,12 @@ String ASTCreateQuotaQuery::getID(char) const
ASTPtr ASTCreateQuotaQuery::clone() const
{
- return std::make_shared(*this);
+ auto res = std::make_shared(*this);
+
+ if (roles)
+ res->roles = std::static_pointer_cast(roles->clone());
+
+ return res;
}
diff --git a/src/Parsers/Access/ASTCreateRoleQuery.cpp b/src/Parsers/Access/ASTCreateRoleQuery.cpp
index 29e78d710cf..d624b9a9157 100644
--- a/src/Parsers/Access/ASTCreateRoleQuery.cpp
+++ b/src/Parsers/Access/ASTCreateRoleQuery.cpp
@@ -42,7 +42,12 @@ String ASTCreateRoleQuery::getID(char) const
ASTPtr ASTCreateRoleQuery::clone() const
{
- return std::make_shared(*this);
+ auto res = std::make_shared(*this);
+
+ if (settings)
+ res->settings = std::static_pointer_cast(settings->clone());
+
+ return res;
}
diff --git a/src/Parsers/Access/ASTCreateRowPolicyQuery.cpp b/src/Parsers/Access/ASTCreateRowPolicyQuery.cpp
index d968fdd3250..ca888be2cfe 100644
--- a/src/Parsers/Access/ASTCreateRowPolicyQuery.cpp
+++ b/src/Parsers/Access/ASTCreateRowPolicyQuery.cpp
@@ -124,7 +124,25 @@ String ASTCreateRowPolicyQuery::getID(char) const
ASTPtr ASTCreateRowPolicyQuery::clone() const
{
- return std::make_shared(*this);
+ auto res = std::make_shared(*this);
+
+ if (names)
+ res->names = std::static_pointer_cast(names->clone());
+
+ if (roles)
+ res->roles = std::static_pointer_cast(roles->clone());
+
+ /// `res->filters` is already initialized by the copy constructor of ASTCreateRowPolicyQuery (see the first line of this function).
+ /// But the copy constructor just copied the pointers inside `filters` instead of cloning.
+ /// We need to make a deep copy and not a shallow copy, so we have to manually clone each pointer in `res->filters`.
+ chassert(res->filters.size() == filters.size());
+ for (auto & [_, res_filter] : res->filters)
+ {
+ if (res_filter)
+ res_filter = res_filter->clone();
+ }
+
+ return res;
}
diff --git a/src/Parsers/Access/ASTCreateSettingsProfileQuery.cpp b/src/Parsers/Access/ASTCreateSettingsProfileQuery.cpp
index d9385e6be7b..56ddef433ef 100644
--- a/src/Parsers/Access/ASTCreateSettingsProfileQuery.cpp
+++ b/src/Parsers/Access/ASTCreateSettingsProfileQuery.cpp
@@ -49,7 +49,15 @@ String ASTCreateSettingsProfileQuery::getID(char) const
ASTPtr ASTCreateSettingsProfileQuery::clone() const
{
- return std::make_shared(*this);
+ auto res = std::make_shared(*this);
+
+ if (to_roles)
+ res->to_roles = std::static_pointer_cast(to_roles->clone());
+
+ if (settings)
+ res->settings = std::static_pointer_cast(settings->clone());
+
+ return res;
}
diff --git a/src/Parsers/Access/ASTCreateUserQuery.cpp b/src/Parsers/Access/ASTCreateUserQuery.cpp
index 0f7d0810fba..b4eaf08856e 100644
--- a/src/Parsers/Access/ASTCreateUserQuery.cpp
+++ b/src/Parsers/Access/ASTCreateUserQuery.cpp
@@ -275,7 +275,24 @@ String ASTCreateUserQuery::getID(char) const
ASTPtr ASTCreateUserQuery::clone() const
{
- return std::make_shared(*this);
+ auto res = std::make_shared(*this);
+
+ if (names)
+ res->names = std::static_pointer_cast(names->clone());
+
+ if (default_roles)
+ res->default_roles = std::static_pointer_cast(default_roles->clone());
+
+ if (default_database)
+ res->default_database = std::static_pointer_cast(default_database->clone());
+
+ if (grantees)
+ res->grantees = std::static_pointer_cast(grantees->clone());
+
+ if (settings)
+ res->settings = std::static_pointer_cast(settings->clone());
+
+ return res;
}
diff --git a/src/Parsers/Access/ASTDropAccessEntityQuery.cpp b/src/Parsers/Access/ASTDropAccessEntityQuery.cpp
index 22b30d47ffa..88f2d7bce63 100644
--- a/src/Parsers/Access/ASTDropAccessEntityQuery.cpp
+++ b/src/Parsers/Access/ASTDropAccessEntityQuery.cpp
@@ -29,7 +29,12 @@ String ASTDropAccessEntityQuery::getID(char) const
ASTPtr ASTDropAccessEntityQuery::clone() const
{
- return std::make_shared(*this);
+ auto res = std::make_shared(*this);
+
+ if (row_policy_names)
+ res->row_policy_names = std::static_pointer_cast(row_policy_names->clone());
+
+ return res;
}
diff --git a/src/Parsers/Access/ASTGrantQuery.cpp b/src/Parsers/Access/ASTGrantQuery.cpp
index 99dc119087c..1d15fc272cf 100644
--- a/src/Parsers/Access/ASTGrantQuery.cpp
+++ b/src/Parsers/Access/ASTGrantQuery.cpp
@@ -96,7 +96,15 @@ String ASTGrantQuery::getID(char) const
ASTPtr ASTGrantQuery::clone() const
{
- return std::make_shared(*this);
+ auto res = std::make_shared(*this);
+
+ if (roles)
+ res->roles = std::static_pointer_cast(roles->clone());
+
+ if (grantees)
+ res->grantees = std::static_pointer_cast(grantees->clone());
+
+ return res;
}
diff --git a/src/Parsers/Access/ASTSetRoleQuery.cpp b/src/Parsers/Access/ASTSetRoleQuery.cpp
index c886da1c8b5..c26a7f18661 100644
--- a/src/Parsers/Access/ASTSetRoleQuery.cpp
+++ b/src/Parsers/Access/ASTSetRoleQuery.cpp
@@ -14,7 +14,15 @@ String ASTSetRoleQuery::getID(char) const
ASTPtr ASTSetRoleQuery::clone() const
{
- return std::make_shared(*this);
+ auto res = std::make_shared(*this);
+
+ if (roles)
+ res->roles = std::static_pointer_cast(roles->clone());
+
+ if (to_users)
+ res->to_users = std::static_pointer_cast(to_users->clone());
+
+ return res;
}
diff --git a/src/Parsers/Access/ASTShowCreateAccessEntityQuery.cpp b/src/Parsers/Access/ASTShowCreateAccessEntityQuery.cpp
index e92af22f14f..12eda260712 100644
--- a/src/Parsers/Access/ASTShowCreateAccessEntityQuery.cpp
+++ b/src/Parsers/Access/ASTShowCreateAccessEntityQuery.cpp
@@ -38,7 +38,12 @@ String ASTShowCreateAccessEntityQuery::getID(char) const
ASTPtr ASTShowCreateAccessEntityQuery::clone() const
{
- return std::make_shared(*this);
+ auto res = std::make_shared(*this);
+
+ if (row_policy_names)
+ res->row_policy_names = std::static_pointer_cast(row_policy_names->clone());
+
+ return res;
}
diff --git a/src/Parsers/Access/ASTShowGrantsQuery.cpp b/src/Parsers/Access/ASTShowGrantsQuery.cpp
index 5d54cf45dc1..2b252617578 100644
--- a/src/Parsers/Access/ASTShowGrantsQuery.cpp
+++ b/src/Parsers/Access/ASTShowGrantsQuery.cpp
@@ -14,7 +14,12 @@ String ASTShowGrantsQuery::getID(char) const
ASTPtr ASTShowGrantsQuery::clone() const
{
- return std::make_shared(*this);
+ auto res = std::make_shared(*this);
+
+ if (for_roles)
+ res->for_roles = std::static_pointer_cast(for_roles->clone());
+
+ return res;
}
diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp
index 77540141b53..bf305ba4781 100644
--- a/src/Parsers/ParserCreateQuery.cpp
+++ b/src/Parsers/ParserCreateQuery.cpp
@@ -640,9 +640,6 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe
auto query = std::make_shared();
node = query;
- if (as_table_function)
- query->as_table_function = as_table_function;
-
query->attach = attach;
query->replace_table = replace;
query->create_or_replace = or_replace;
@@ -661,6 +658,7 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe
query->set(query->columns_list, columns_list);
query->set(query->storage, storage);
+ query->set(query->as_table_function, as_table_function);
if (comment)
query->set(query->comment, comment);
diff --git a/src/Parsers/wipePasswordFromQuery.cpp b/src/Parsers/wipePasswordFromQuery.cpp
deleted file mode 100644
index d2bc2fea645..00000000000
--- a/src/Parsers/wipePasswordFromQuery.cpp
+++ /dev/null
@@ -1,22 +0,0 @@
-#include
-#include
-#include
-
-
-namespace DB
-{
-
-bool canContainPassword(const IAST & ast)
-{
- return ast.as();
-}
-
-void wipePasswordFromQuery(ASTPtr ast)
-{
- if (auto * create_query = ast->as())
- {
- create_query->show_password = false;
- }
-}
-
-}
diff --git a/src/Parsers/wipePasswordFromQuery.h b/src/Parsers/wipePasswordFromQuery.h
deleted file mode 100644
index 57e449cce3b..00000000000
--- a/src/Parsers/wipePasswordFromQuery.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#pragma once
-
-#include
-
-
-namespace DB
-{
-
-/// Checks the type of a specified AST and returns true if it can contain a password.
-bool canContainPassword(const IAST & ast);
-
-/// Removes a password or its hash from a query if it's specified there or replaces it with some placeholder.
-/// This function is used to prepare a query for storing in logs (we don't want logs to contain sensitive information).
-/// The function changes only following types of queries:
-/// CREATE/ALTER USER.
-void wipePasswordFromQuery(ASTPtr ast);
-
-}
diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp
index 9bbccf5f582..d759c339dea 100644
--- a/src/Storages/StorageS3.cpp
+++ b/src/Storages/StorageS3.cpp
@@ -1111,6 +1111,14 @@ StorageS3Configuration StorageS3::getConfiguration(ASTs & engine_args, ContextPt
}
else
{
+ /// Supported signatures:
+ ///
+ /// S3('url')
+ /// S3('url', 'format')
+ /// S3('url', 'format', 'compression')
+ /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format')
+ /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression')
+
if (engine_args.empty() || engine_args.size() > 5)
throw Exception(
"Storage S3 requires 1 to 5 arguments: url, [access_key_id, secret_access_key], name of used format and [compression_method].",
diff --git a/src/Storages/StorageS3Settings.cpp b/src/Storages/StorageS3Settings.cpp
index 65e9bb1ab8c..68e15d10f52 100644
--- a/src/Storages/StorageS3Settings.cpp
+++ b/src/Storages/StorageS3Settings.cpp
@@ -5,13 +5,23 @@
#include
#include
#include
-
+#include
#include
namespace DB
{
+namespace
+{
+ /// An object up to 5 GB can be copied in a single atomic operation.
+ constexpr UInt64 DEFAULT_MAX_SINGLE_OPERATION_COPY_SIZE = 5_GiB;
+
+ /// The maximum size of an uploaded part.
+ constexpr UInt64 DEFAULT_MAX_UPLOAD_PART_SIZE = 5_GiB;
+}
+
+
void StorageS3Settings::loadFromConfig(const String & config_elem, const Poco::Util::AbstractConfiguration & config, const Settings & settings)
{
std::lock_guard lock(mutex);
@@ -50,9 +60,11 @@ void StorageS3Settings::loadFromConfig(const String & config_elem, const Poco::U
S3Settings::ReadWriteSettings rw_settings;
rw_settings.max_single_read_retries = get_uint_for_key(key, "max_single_read_retries", true, settings.s3_max_single_read_retries);
rw_settings.min_upload_part_size = get_uint_for_key(key, "min_upload_part_size", true, settings.s3_min_upload_part_size);
+ rw_settings.max_upload_part_size = get_uint_for_key(key, "max_upload_part_size", true, DEFAULT_MAX_UPLOAD_PART_SIZE);
rw_settings.upload_part_size_multiply_factor = get_uint_for_key(key, "upload_part_size_multiply_factor", true, settings.s3_upload_part_size_multiply_factor);
rw_settings.upload_part_size_multiply_parts_count_threshold = get_uint_for_key(key, "upload_part_size_multiply_parts_count_threshold", true, settings.s3_upload_part_size_multiply_parts_count_threshold);
rw_settings.max_single_part_upload_size = get_uint_for_key(key, "max_single_part_upload_size", true, settings.s3_max_single_part_upload_size);
+ rw_settings.max_single_operation_copy_size = get_uint_for_key(key, "max_single_operation_copy_size", true, DEFAULT_MAX_SINGLE_OPERATION_COPY_SIZE);
rw_settings.max_connections = get_uint_for_key(key, "max_connections", true, settings.s3_max_connections);
rw_settings.check_objects_after_upload = get_bool_for_key(key, "check_objects_after_upload", true, false);
@@ -95,12 +107,16 @@ void S3Settings::ReadWriteSettings::updateFromSettingsIfEmpty(const Settings & s
max_single_read_retries = settings.s3_max_single_read_retries;
if (!min_upload_part_size)
min_upload_part_size = settings.s3_min_upload_part_size;
+ if (!max_upload_part_size)
+ max_upload_part_size = DEFAULT_MAX_UPLOAD_PART_SIZE;
if (!upload_part_size_multiply_factor)
upload_part_size_multiply_factor = settings.s3_upload_part_size_multiply_factor;
if (!upload_part_size_multiply_parts_count_threshold)
upload_part_size_multiply_parts_count_threshold = settings.s3_upload_part_size_multiply_parts_count_threshold;
if (!max_single_part_upload_size)
max_single_part_upload_size = settings.s3_max_single_part_upload_size;
+ if (!max_single_operation_copy_size)
+ max_single_operation_copy_size = DEFAULT_MAX_SINGLE_OPERATION_COPY_SIZE;
if (!max_connections)
max_connections = settings.s3_max_connections;
if (!max_unexpected_write_error_retries)
diff --git a/src/Storages/StorageS3Settings.h b/src/Storages/StorageS3Settings.h
index 2da4a1d7590..bd90ba569d8 100644
--- a/src/Storages/StorageS3Settings.h
+++ b/src/Storages/StorageS3Settings.h
@@ -27,9 +27,11 @@ struct S3Settings
{
size_t max_single_read_retries = 0;
size_t min_upload_part_size = 0;
+ size_t max_upload_part_size = 0;
size_t upload_part_size_multiply_factor = 0;
size_t upload_part_size_multiply_parts_count_threshold = 0;
size_t max_single_part_upload_size = 0;
+ size_t max_single_operation_copy_size = 0;
size_t max_connections = 0;
bool check_objects_after_upload = false;
size_t max_unexpected_write_error_retries = 0;
@@ -41,9 +43,11 @@ struct S3Settings
{
return max_single_read_retries == other.max_single_read_retries
&& min_upload_part_size == other.min_upload_part_size
+ && max_upload_part_size == other.max_upload_part_size
&& upload_part_size_multiply_factor == other.upload_part_size_multiply_factor
&& upload_part_size_multiply_parts_count_threshold == other.upload_part_size_multiply_parts_count_threshold
&& max_single_part_upload_size == other.max_single_part_upload_size
+ && max_single_operation_copy_size == other.max_single_operation_copy_size
&& max_connections == other.max_connections
&& check_objects_after_upload == other.check_objects_after_upload
&& max_unexpected_write_error_retries == other.max_unexpected_write_error_retries;
diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp
index c38b4313359..0f01dc4288c 100644
--- a/src/Storages/StorageURL.cpp
+++ b/src/Storages/StorageURL.cpp
@@ -280,7 +280,7 @@ namespace
timeouts,
credentials,
settings.max_http_get_redirects,
- DBMS_DEFAULT_BUFFER_SIZE,
+ settings.max_read_buffer_size,
read_settings,
headers,
ReadWriteBufferFromHTTP::Range{0, std::nullopt},
@@ -341,7 +341,7 @@ namespace
timeouts,
credentials,
settings.max_http_get_redirects,
- DBMS_DEFAULT_BUFFER_SIZE,
+ settings.max_read_buffer_size,
read_settings,
headers,
&context->getRemoteHostFilter(),
@@ -378,7 +378,7 @@ namespace
timeouts,
credentials,
settings.max_http_get_redirects,
- DBMS_DEFAULT_BUFFER_SIZE,
+ settings.max_read_buffer_size,
read_settings,
headers,
ReadWriteBufferFromHTTP::Range{},
@@ -863,6 +863,8 @@ std::optional IStorageURLBase::getLastModificationTime(
const Poco::Net::HTTPBasicCredentials & credentials,
const ContextPtr & context)
{
+ auto settings = context->getSettingsRef();
+
try
{
ReadWriteBufferFromHTTP buf(
@@ -871,8 +873,8 @@ std::optional IStorageURLBase::getLastModificationTime(
{},
ConnectionTimeouts::getHTTPTimeouts(context),
credentials,
- context->getSettingsRef().max_http_get_redirects,
- DBMS_DEFAULT_BUFFER_SIZE,
+ settings.max_http_get_redirects,
+ settings.max_read_buffer_size,
context->getReadSettings(),
headers,
ReadWriteBufferFromHTTP::Range{},
diff --git a/src/TableFunctions/TableFunctionRemote.cpp b/src/TableFunctions/TableFunctionRemote.cpp
index 097a239ccae..2c62e29810f 100644
--- a/src/TableFunctions/TableFunctionRemote.cpp
+++ b/src/TableFunctions/TableFunctionRemote.cpp
@@ -94,6 +94,30 @@ void TableFunctionRemote::parseArguments(const ASTPtr & ast_function, ContextPtr
}
else
{
+ /// Supported signatures:
+ ///
+ /// remote('addresses_expr', db.table)
+ /// remote('addresses_expr', 'db', 'table')
+ /// remote('addresses_expr', db.table, 'user')
+ /// remote('addresses_expr', 'db', 'table', 'user')
+ /// remote('addresses_expr', db.table, 'user', 'password')
+ /// remote('addresses_expr', 'db', 'table', 'user', 'password')
+ /// remote('addresses_expr', db.table, sharding_key)
+ /// remote('addresses_expr', 'db', 'table', sharding_key)
+ /// remote('addresses_expr', db.table, 'user', sharding_key)
+ /// remote('addresses_expr', 'db', 'table', 'user', sharding_key)
+ /// remote('addresses_expr', db.table, 'user', 'password', sharding_key)
+ /// remote('addresses_expr', 'db', 'table', 'user', 'password', sharding_key)
+ ///
+ /// remoteSecure() - same as remote()
+ ///
+ /// cluster('cluster_name', db.table)
+ /// cluster('cluster_name', 'db', 'table')
+ /// cluster('cluster_name', db.table, sharding_key)
+ /// cluster('cluster_name', 'db', 'table', sharding_key)
+ ///
+ /// clusterAllReplicas() - same as cluster()
+
if (args.size() < 2 || args.size() > max_args)
throw Exception(help_message, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
@@ -318,7 +342,6 @@ TableFunctionRemote::TableFunctionRemote(const std::string & name_, bool secure_
is_cluster_function ? " [, sharding_key]" : " [, username[, password], sharding_key]");
}
-
void registerTableFunctionRemote(TableFunctionFactory & factory)
{
factory.registerFunction("remote", [] () -> TableFunctionPtr { return std::make_shared("remote"); });
diff --git a/tests/ci/ast_fuzzer_check.py b/tests/ci/ast_fuzzer_check.py
index 8f94ef4a915..04dbe78adc4 100644
--- a/tests/ci/ast_fuzzer_check.py
+++ b/tests/ci/ast_fuzzer_check.py
@@ -69,7 +69,7 @@ if __name__ == "__main__":
logging.info("Check is already finished according to github status, exiting")
sys.exit(0)
- docker_image = get_image_with_version(temp_path, IMAGE_NAME)
+ docker_image = get_image_with_version(reports_path, IMAGE_NAME)
build_name = get_build_name_for_check(check_name)
print(build_name)
diff --git a/tests/ci/cancel_and_rerun_workflow_lambda/requirements.txt b/tests/ci/cancel_and_rerun_workflow_lambda/requirements.txt
index c0dcf4a4dde..e607f1a9f39 100644
--- a/tests/ci/cancel_and_rerun_workflow_lambda/requirements.txt
+++ b/tests/ci/cancel_and_rerun_workflow_lambda/requirements.txt
@@ -1,3 +1,3 @@
requests
PyJWT
-cryptography
+cryptography==37.0.4
diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py
index 5e69046915e..93322b69669 100644
--- a/tests/ci/ci_config.py
+++ b/tests/ci/ci_config.py
@@ -355,6 +355,12 @@ CI_CONFIG = {
"required_build": "package_aarch64",
"test_grep_exclude_filter": "",
},
+ "SQLancer (release)": {
+ "required_build": "package_release",
+ },
+ "SQLancer (debug)": {
+ "required_build": "package_debug",
+ },
},
} # type: dict
diff --git a/tests/ci/metrics_lambda/requirements.txt b/tests/ci/metrics_lambda/requirements.txt
index c0dcf4a4dde..e607f1a9f39 100644
--- a/tests/ci/metrics_lambda/requirements.txt
+++ b/tests/ci/metrics_lambda/requirements.txt
@@ -1,3 +1,3 @@
requests
PyJWT
-cryptography
+cryptography==37.0.4
diff --git a/tests/ci/pr_info.py b/tests/ci/pr_info.py
index dc016a7eed9..5f725a61b3e 100644
--- a/tests/ci/pr_info.py
+++ b/tests/ci/pr_info.py
@@ -87,7 +87,7 @@ class PRInfo:
self.body = ""
self.diff_urls = []
self.release_pr = 0
- ref = github_event.get("ref", "refs/head/master")
+ ref = github_event.get("ref", "refs/heads/master")
if ref and ref.startswith("refs/heads/"):
ref = ref[11:]
diff --git a/tests/ci/sqlancer_check.py b/tests/ci/sqlancer_check.py
new file mode 100644
index 00000000000..51c95e50746
--- /dev/null
+++ b/tests/ci/sqlancer_check.py
@@ -0,0 +1,193 @@
+#!/usr/bin/env python3
+
+import logging
+import subprocess
+import os
+import sys
+
+from github import Github
+
+from env_helper import (
+ GITHUB_REPOSITORY,
+ GITHUB_RUN_URL,
+ REPORTS_PATH,
+ REPO_COPY,
+ TEMP_PATH,
+)
+from s3_helper import S3Helper
+from get_robot_token import get_best_robot_token
+from pr_info import PRInfo
+from build_download_helper import get_build_name_for_check, read_build_urls
+from docker_pull_helper import get_image_with_version
+from commit_status_helper import post_commit_status
+from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse
+from upload_result_helper import upload_results
+from stopwatch import Stopwatch
+from rerun_helper import RerunHelper
+
+IMAGE_NAME = "clickhouse/sqlancer-test"
+
+
+def get_run_command(download_url, workspace_path, image):
+ return (
+ f"docker run "
+ # For sysctl
+ "--privileged "
+ "--network=host "
+ f"--volume={workspace_path}:/workspace "
+ "--cap-add syslog --cap-add sys_admin --cap-add=SYS_PTRACE "
+ f'-e BINARY_URL_TO_DOWNLOAD="{download_url}" '
+ f"{image}"
+ )
+
+
+def get_commit(gh, commit_sha):
+ repo = gh.get_repo(GITHUB_REPOSITORY)
+ commit = repo.get_commit(commit_sha)
+ return commit
+
+
+if __name__ == "__main__":
+ logging.basicConfig(level=logging.INFO)
+
+ stopwatch = Stopwatch()
+
+ temp_path = TEMP_PATH
+ repo_path = REPO_COPY
+ reports_path = REPORTS_PATH
+
+ check_name = sys.argv[1]
+
+ if not os.path.exists(temp_path):
+ os.makedirs(temp_path)
+
+ pr_info = PRInfo()
+
+ gh = Github(get_best_robot_token(), per_page=100)
+
+ rerun_helper = RerunHelper(gh, pr_info, check_name)
+ if rerun_helper.is_already_finished_by_status():
+ logging.info("Check is already finished according to github status, exiting")
+ sys.exit(0)
+
+ docker_image = get_image_with_version(reports_path, IMAGE_NAME)
+
+ build_name = get_build_name_for_check(check_name)
+ print(build_name)
+ urls = read_build_urls(build_name, reports_path)
+ if not urls:
+ raise Exception("No build URLs found")
+
+ for url in urls:
+ if url.endswith("/clickhouse"):
+ build_url = url
+ break
+ else:
+ raise Exception("Cannot find binary clickhouse among build results")
+
+ logging.info("Got build url %s", build_url)
+
+ workspace_path = os.path.join(temp_path, "workspace")
+ if not os.path.exists(workspace_path):
+ os.makedirs(workspace_path)
+
+ run_command = get_run_command(build_url, workspace_path, docker_image)
+ logging.info("Going to run %s", run_command)
+
+ run_log_path = os.path.join(workspace_path, "runlog.log")
+ with open(run_log_path, "w", encoding="utf-8") as log:
+ with subprocess.Popen(
+ run_command, shell=True, stderr=log, stdout=log
+ ) as process:
+ retcode = process.wait()
+ if retcode == 0:
+ logging.info("Run successfully")
+ else:
+ logging.info("Run failed")
+
+ subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True)
+
+ check_name_lower = (
+ check_name.lower().replace("(", "").replace(")", "").replace(" ", "")
+ )
+ s3_prefix = f"{pr_info.number}/{pr_info.sha}/{check_name_lower}/"
+
+ tests = [
+ "TLPGroupBy",
+ "TLPHaving",
+ "TLPWhere",
+ "TLPDistinct",
+ "TLPAggregate",
+ "NoREC",
+ ]
+
+ paths = [
+ run_log_path,
+ os.path.join(workspace_path, "clickhouse-server.log"),
+ os.path.join(workspace_path, "stderr.log"),
+ os.path.join(workspace_path, "stdout.log"),
+ ]
+ for t in tests:
+ err_name = f"{t}.err"
+ log_name = f"{t}.out"
+ paths.append(os.path.join(workspace_path, err_name))
+ paths.append(os.path.join(workspace_path, log_name))
+
+ s3_helper = S3Helper()
+ report_url = GITHUB_RUN_URL
+
+ status = "success"
+ test_results = []
+ # Try to get status message saved by the SQLancer
+ try:
+ # with open(
+ # os.path.join(workspace_path, "status.txt"), "r", encoding="utf-8"
+ # ) as status_f:
+ # status = status_f.readline().rstrip("\n")
+ if os.path.exists(os.path.join(workspace_path, "server_crashed.log")):
+ test_results.append("Server crashed", "FAIL")
+ with open(
+ os.path.join(workspace_path, "summary.tsv"), "r", encoding="utf-8"
+ ) as summary_f:
+ for line in summary_f:
+ l = line.split("\t")
+ test_results.append((l[0], l[1]))
+
+ with open(
+ os.path.join(workspace_path, "description.txt"), "r", encoding="utf-8"
+ ) as desc_f:
+ description = desc_f.readline().rstrip("\n")[:140]
+ except:
+ # status = "failure"
+ description = "Task failed: $?=" + str(retcode)
+
+ report_url = upload_results(
+ s3_helper,
+ pr_info.number,
+ pr_info.sha,
+ test_results,
+ paths,
+ check_name,
+ False,
+ )
+
+ post_commit_status(gh, pr_info.sha, check_name, description, status, report_url)
+
+ print(f"::notice:: {check_name} Report url: {report_url}")
+
+ ch_helper = ClickHouseHelper()
+
+ prepared_events = prepare_tests_results_for_clickhouse(
+ pr_info,
+ test_results,
+ status,
+ stopwatch.duration_seconds,
+ stopwatch.start_time_str,
+ report_url,
+ check_name,
+ )
+
+ ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)
+
+ print(f"::notice Result: '{status}', '{description}', '{report_url}'")
+ post_commit_status(gh, pr_info.sha, check_name, description, status, report_url)
diff --git a/tests/ci/termination_lambda/requirements.txt b/tests/ci/termination_lambda/requirements.txt
index c0dcf4a4dde..e607f1a9f39 100644
--- a/tests/ci/termination_lambda/requirements.txt
+++ b/tests/ci/termination_lambda/requirements.txt
@@ -1,3 +1,3 @@
requests
PyJWT
-cryptography
+cryptography==37.0.4
diff --git a/tests/ci/token_lambda/requirements.txt b/tests/ci/token_lambda/requirements.txt
index c0dcf4a4dde..e607f1a9f39 100644
--- a/tests/ci/token_lambda/requirements.txt
+++ b/tests/ci/token_lambda/requirements.txt
@@ -1,3 +1,3 @@
requests
PyJWT
-cryptography
+cryptography==37.0.4
diff --git a/tests/ci/upload_result_helper.py b/tests/ci/upload_result_helper.py
index 0fde4408176..e145df02f80 100644
--- a/tests/ci/upload_result_helper.py
+++ b/tests/ci/upload_result_helper.py
@@ -14,6 +14,8 @@ from report import ReportColorTheme, create_test_html_report
def process_logs(
s3_client, additional_logs, s3_path_prefix, test_results, with_raw_logs
):
+ logging.info("Upload files to s3 %s", additional_logs)
+
processed_logs = {}
# Firstly convert paths of logs from test_results to urls to s3.
for test_result in test_results:
diff --git a/tests/ci/workflow_approve_rerun_lambda/requirements.txt b/tests/ci/workflow_approve_rerun_lambda/requirements.txt
index c0dcf4a4dde..e607f1a9f39 100644
--- a/tests/ci/workflow_approve_rerun_lambda/requirements.txt
+++ b/tests/ci/workflow_approve_rerun_lambda/requirements.txt
@@ -1,3 +1,3 @@
requests
PyJWT
-cryptography
+cryptography==37.0.4
diff --git a/tests/fuzz/all.dict b/tests/fuzz/all.dict
index a147878da9b..7977cb9ed21 100644
--- a/tests/fuzz/all.dict
+++ b/tests/fuzz/all.dict
@@ -447,6 +447,7 @@
"FORMAT"
"formatDateTime"
"formatReadableQuantity"
+"formatReadableDecimalSize"
"formatReadableSize"
"formatReadableTimeDelta"
"formatRow"
diff --git a/tests/fuzz/dictionaries/functions.dict b/tests/fuzz/dictionaries/functions.dict
index b90697f0c3d..e4f347babf8 100644
--- a/tests/fuzz/dictionaries/functions.dict
+++ b/tests/fuzz/dictionaries/functions.dict
@@ -399,6 +399,7 @@
"demangle"
"toNullable"
"concat"
+"formatReadableDecimalSize"
"formatReadableSize"
"shardCount"
"fromModifiedJulianDayOrNull"
diff --git a/tests/integration/test_backup_restore_s3/configs/s3_settings.xml b/tests/integration/test_backup_restore_s3/configs/s3_settings.xml
new file mode 100644
index 00000000000..2aef4db55c8
--- /dev/null
+++ b/tests/integration/test_backup_restore_s3/configs/s3_settings.xml
@@ -0,0 +1,12 @@
+
+
+
+ http://minio1:9001/root/data/backups/multipart_upload_copy/
+
+ 1
+ 5242880
+ 3
+ 2
+
+
+
diff --git a/tests/integration/test_backup_restore_s3/test.py b/tests/integration/test_backup_restore_s3/test.py
index 617c14d6736..7ddb1459ab9 100644
--- a/tests/integration/test_backup_restore_s3/test.py
+++ b/tests/integration/test_backup_restore_s3/test.py
@@ -4,7 +4,11 @@ from helpers.cluster import ClickHouseCluster
cluster = ClickHouseCluster(__file__)
node = cluster.add_instance(
"node",
- main_configs=["configs/disk_s3.xml", "configs/named_collection_s3_backups.xml"],
+ main_configs=[
+ "configs/disk_s3.xml",
+ "configs/named_collection_s3_backups.xml",
+ "configs/s3_settings.xml",
+ ],
with_minio=True,
)
@@ -27,17 +31,17 @@ def new_backup_name():
return f"backup{backup_id_counter}"
-def check_backup_and_restore(storage_policy, backup_destination):
+def check_backup_and_restore(storage_policy, backup_destination, size=1000):
node.query(
f"""
DROP TABLE IF EXISTS data NO DELAY;
CREATE TABLE data (key Int, value String, array Array(String)) Engine=MergeTree() ORDER BY tuple() SETTINGS storage_policy='{storage_policy}';
- INSERT INTO data SELECT * FROM generateRandom('key Int, value String, array Array(String)') LIMIT 1000;
+ INSERT INTO data SELECT * FROM generateRandom('key Int, value String, array Array(String)') LIMIT {size};
BACKUP TABLE data TO {backup_destination};
RESTORE TABLE data AS data_restored FROM {backup_destination};
SELECT throwIf(
- (SELECT groupArray(tuple(*)) FROM data) !=
- (SELECT groupArray(tuple(*)) FROM data_restored),
+ (SELECT count(), sum(sipHash64(*)) FROM data) !=
+ (SELECT count(), sum(sipHash64(*)) FROM data_restored),
'Data does not matched after BACKUP/RESTORE'
);
DROP TABLE data NO DELAY;
@@ -106,9 +110,10 @@ def test_backup_to_s3_native_copy():
)
check_backup_and_restore(storage_policy, backup_destination)
assert node.contains_in_log("using native copy")
+ assert node.contains_in_log("single-operation copy")
-def test_backup_to_s3_other_bucket_native_copy():
+def test_backup_to_s3_native_copy_other_bucket():
storage_policy = "policy_s3_other_bucket"
backup_name = new_backup_name()
backup_destination = (
@@ -116,3 +121,13 @@ def test_backup_to_s3_other_bucket_native_copy():
)
check_backup_and_restore(storage_policy, backup_destination)
assert node.contains_in_log("using native copy")
+ assert node.contains_in_log("single-operation copy")
+
+
+def test_backup_to_s3_native_copy_multipart_upload():
+ storage_policy = "policy_s3"
+ backup_name = new_backup_name()
+ backup_destination = f"S3('http://minio1:9001/root/data/backups/multipart_upload_copy/{backup_name}', 'minio', 'minio123')"
+ check_backup_and_restore(storage_policy, backup_destination, size=1000000)
+ assert node.contains_in_log("using native copy")
+ assert node.contains_in_log("multipart upload copy")
diff --git a/tests/integration/test_mask_queries_in_logs/__init__.py b/tests/integration/test_failed_async_inserts/__init__.py
similarity index 100%
rename from tests/integration/test_mask_queries_in_logs/__init__.py
rename to tests/integration/test_failed_async_inserts/__init__.py
diff --git a/tests/integration/test_failed_async_inserts/configs/config.xml b/tests/integration/test_failed_async_inserts/configs/config.xml
new file mode 100644
index 00000000000..038c0792b44
--- /dev/null
+++ b/tests/integration/test_failed_async_inserts/configs/config.xml
@@ -0,0 +1,3 @@
+
+ 1000
+
diff --git a/tests/integration/test_failed_async_inserts/test.py b/tests/integration/test_failed_async_inserts/test.py
new file mode 100644
index 00000000000..6d66ac97006
--- /dev/null
+++ b/tests/integration/test_failed_async_inserts/test.py
@@ -0,0 +1,54 @@
+import logging
+from time import sleep
+
+import pytest
+from helpers.cluster import ClickHouseCluster
+
+
+from helpers.cluster import ClickHouseCluster
+
+cluster = ClickHouseCluster(__file__)
+node = cluster.add_instance(
+ "node", main_configs=["configs/config.xml"], with_zookeeper=True
+)
+
+
+@pytest.fixture(scope="module")
+def started_cluster():
+ try:
+ cluster.start()
+ yield cluster
+ finally:
+ cluster.shutdown()
+
+
+def test_failed_async_inserts(started_cluster):
+ node = started_cluster.instances["node"]
+
+ node.query(
+ "CREATE TABLE async_insert_30_10_2022 (id UInt32, s String) ENGINE = Memory"
+ )
+ node.query(
+ "INSERT INTO async_insert_30_10_2022 SETTINGS async_insert = 1 VALUES ()",
+ ignore_error=True,
+ )
+ node.query(
+ "INSERT INTO async_insert_30_10_2022 SETTINGS async_insert = 1 VALUES ([1,2,3], 1)",
+ ignore_error=True,
+ )
+ node.query(
+ 'INSERT INTO async_insert_30_10_2022 SETTINGS async_insert = 1 FORMAT JSONEachRow {"id" : 1} {"x"}',
+ ignore_error=True,
+ )
+ node.query(
+ "INSERT INTO async_insert_30_10_2022 SETTINGS async_insert = 1 VALUES (throwIf(4),'')",
+ ignore_error=True,
+ )
+
+ select_query = (
+ "SELECT value FROM system.events WHERE event == 'FailedAsyncInsertQuery'"
+ )
+
+ assert node.query(select_query) == "4\n"
+
+ node.query("DROP TABLE IF EXISTS async_insert_30_10_2022 NO DELAY")
diff --git a/tests/integration/test_mask_queries_in_logs/test.py b/tests/integration/test_mask_queries_in_logs/test.py
deleted file mode 100644
index 4a4d3ee4ed0..00000000000
--- a/tests/integration/test_mask_queries_in_logs/test.py
+++ /dev/null
@@ -1,75 +0,0 @@
-import pytest
-from helpers.cluster import ClickHouseCluster
-
-cluster = ClickHouseCluster(__file__)
-node = cluster.add_instance("node")
-
-
-@pytest.fixture(scope="module", autouse=True)
-def started_cluster():
- try:
- cluster.start()
- yield cluster
-
- finally:
- cluster.shutdown()
-
-
-def check_logs(must_contain, must_not_contain):
- node.query("SYSTEM FLUSH LOGS")
-
- for str in must_contain:
- assert node.contains_in_log(str)
- assert (
- int(
- node.query(
- f"SELECT COUNT() FROM system.query_log WHERE query LIKE '%{str}%'"
- ).strip()
- )
- >= 1
- )
-
- for str in must_not_contain:
- assert not node.contains_in_log(str)
- assert (
- int(
- node.query(
- f"SELECT COUNT() FROM system.query_log WHERE query LIKE '%{str}%'"
- ).strip()
- )
- == 0
- )
-
-
-# Passwords in CREATE/ALTER queries must be hidden in logs.
-def test_create_alter_user():
- node.query("CREATE USER u1 IDENTIFIED BY 'qwe123' SETTINGS custom_a = 'a'")
- node.query("ALTER USER u1 IDENTIFIED BY '123qwe' SETTINGS custom_b = 'b'")
- node.query(
- "CREATE USER u2 IDENTIFIED WITH plaintext_password BY 'plainpasswd' SETTINGS custom_c = 'c'"
- )
-
- assert (
- node.query("SHOW CREATE USER u1")
- == "CREATE USER u1 IDENTIFIED WITH sha256_password SETTINGS custom_b = \\'b\\'\n"
- )
- assert (
- node.query("SHOW CREATE USER u2")
- == "CREATE USER u2 IDENTIFIED WITH plaintext_password SETTINGS custom_c = \\'c\\'\n"
- )
-
- check_logs(
- must_contain=[
- "CREATE USER u1 IDENTIFIED WITH sha256_password",
- "ALTER USER u1 IDENTIFIED WITH sha256_password",
- "CREATE USER u2 IDENTIFIED WITH plaintext_password",
- ],
- must_not_contain=[
- "qwe123",
- "123qwe",
- "plainpasswd",
- "IDENTIFIED WITH sha256_password BY",
- "IDENTIFIED WITH sha256_hash BY",
- "IDENTIFIED WITH plaintext_password BY",
- ],
- )
diff --git a/tests/integration/test_mask_sensitive_info_in_logs/__init__.py b/tests/integration/test_mask_sensitive_info_in_logs/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/integration/test_mask_sensitive_info_in_logs/test.py b/tests/integration/test_mask_sensitive_info_in_logs/test.py
new file mode 100644
index 00000000000..48f11fbf7a1
--- /dev/null
+++ b/tests/integration/test_mask_sensitive_info_in_logs/test.py
@@ -0,0 +1,340 @@
+import pytest
+import random, string
+from helpers.cluster import ClickHouseCluster
+
+cluster = ClickHouseCluster(__file__)
+node = cluster.add_instance("node", with_zookeeper=True)
+
+
+@pytest.fixture(scope="module", autouse=True)
+def started_cluster():
+ try:
+ cluster.start()
+ yield cluster
+
+ finally:
+ cluster.shutdown()
+
+
+def check_logs(must_contain=[], must_not_contain=[]):
+ node.query("SYSTEM FLUSH LOGS")
+
+ for str in must_contain:
+ escaped_str = str.replace("`", "\\`").replace("[", "\\[").replace("]", "\\]")
+ assert node.contains_in_log(escaped_str)
+
+ for str in must_not_contain:
+ escaped_str = str.replace("`", "\\`").replace("[", "\\[").replace("]", "\\]")
+ assert not node.contains_in_log(escaped_str)
+
+ for str in must_contain:
+ escaped_str = str.replace("'", "\\'")
+ assert system_query_log_contains_search_pattern(escaped_str)
+
+ for str in must_not_contain:
+ escaped_str = str.replace("'", "\\'")
+ assert not system_query_log_contains_search_pattern(escaped_str)
+
+
+# Returns true if "system.query_log" has a query matching a specified pattern.
+def system_query_log_contains_search_pattern(search_pattern):
+ return (
+ int(
+ node.query(
+ f"SELECT COUNT() FROM system.query_log WHERE query LIKE '%{search_pattern}%'"
+ ).strip()
+ )
+ >= 1
+ )
+
+
+# Generates a random string.
+def new_password(len=16):
+ return "".join(
+ random.choice(string.ascii_uppercase + string.digits) for _ in range(len)
+ )
+
+
+# Passwords in CREATE/ALTER queries must be hidden in logs.
+def test_create_alter_user():
+ password = new_password()
+
+ node.query(f"CREATE USER u1 IDENTIFIED BY '{password}' SETTINGS custom_a = 'a'")
+ node.query(
+ f"ALTER USER u1 IDENTIFIED BY '{password}{password}' SETTINGS custom_b = 'b'"
+ )
+ node.query(
+ f"CREATE USER u2 IDENTIFIED WITH plaintext_password BY '{password}' SETTINGS custom_c = 'c'"
+ )
+
+ assert (
+ node.query("SHOW CREATE USER u1")
+ == "CREATE USER u1 IDENTIFIED WITH sha256_password SETTINGS custom_b = \\'b\\'\n"
+ )
+ assert (
+ node.query("SHOW CREATE USER u2")
+ == "CREATE USER u2 IDENTIFIED WITH plaintext_password SETTINGS custom_c = \\'c\\'\n"
+ )
+
+ check_logs(
+ must_contain=[
+ "CREATE USER u1 IDENTIFIED WITH sha256_password",
+ "ALTER USER u1 IDENTIFIED WITH sha256_password",
+ "CREATE USER u2 IDENTIFIED WITH plaintext_password",
+ ],
+ must_not_contain=[
+ password,
+ "IDENTIFIED WITH sha256_password BY",
+ "IDENTIFIED WITH sha256_hash BY",
+ "IDENTIFIED WITH plaintext_password BY",
+ ],
+ )
+
+ node.query("DROP USER u1, u2")
+
+
+def test_create_table():
+ password = new_password()
+
+ table_engines = [
+ f"MySQL('mysql57:3306', 'mysql_db', 'mysql_table', 'mysql_user', '{password}')",
+ f"PostgreSQL('postgres1:5432', 'postgres_db', 'postgres_table', 'postgres_user', '{password}')",
+ f"MongoDB('mongo1:27017', 'mongo_db', 'mongo_col', 'mongo_user', '{password}')",
+ f"S3('http://minio1:9001/root/data/test1.csv')",
+ f"S3('http://minio1:9001/root/data/test2.csv', 'CSV')",
+ f"S3('http://minio1:9001/root/data/test3.csv.gz', 'CSV', 'gzip')",
+ f"S3('http://minio1:9001/root/data/test4.csv', 'minio', '{password}', 'CSV')",
+ f"S3('http://minio1:9001/root/data/test5.csv.gz', 'minio', '{password}', 'CSV', 'gzip')",
+ ]
+
+ for i, table_engine in enumerate(table_engines):
+ node.query(f"CREATE TABLE table{i} (x int) ENGINE = {table_engine}")
+
+ check_logs(
+ must_contain=[
+ "CREATE TABLE table0 (`x` int) ENGINE = MySQL('mysql57:3306', 'mysql_db', 'mysql_table', 'mysql_user', '[HIDDEN]')",
+ "CREATE TABLE table1 (`x` int) ENGINE = PostgreSQL('postgres1:5432', 'postgres_db', 'postgres_table', 'postgres_user', '[HIDDEN]')",
+ "CREATE TABLE table2 (`x` int) ENGINE = MongoDB('mongo1:27017', 'mongo_db', 'mongo_col', 'mongo_user', '[HIDDEN]')",
+ "CREATE TABLE table3 (x int) ENGINE = S3('http://minio1:9001/root/data/test1.csv')",
+ "CREATE TABLE table4 (x int) ENGINE = S3('http://minio1:9001/root/data/test2.csv', 'CSV')",
+ "CREATE TABLE table5 (x int) ENGINE = S3('http://minio1:9001/root/data/test3.csv.gz', 'CSV', 'gzip')",
+ "CREATE TABLE table6 (`x` int) ENGINE = S3('http://minio1:9001/root/data/test4.csv', 'minio', '[HIDDEN]', 'CSV')",
+ "CREATE TABLE table7 (`x` int) ENGINE = S3('http://minio1:9001/root/data/test5.csv.gz', 'minio', '[HIDDEN]', 'CSV', 'gzip')",
+ ],
+ must_not_contain=[password],
+ )
+
+ for i in range(0, len(table_engines)):
+ node.query(f"DROP TABLE table{i}")
+
+
+def test_create_database():
+ password = new_password()
+
+ database_engines = [
+ f"MySQL('localhost:3306', 'mysql_db', 'mysql_user', '{password}') SETTINGS connect_timeout=1, connection_max_tries=1",
+ # f"PostgreSQL('localhost:5432', 'postgres_db', 'postgres_user', '{password}')",
+ ]
+
+ for i, database_engine in enumerate(database_engines):
+ # query_and_get_answer_with_error() is used here because we don't want to stop on error "Cannot connect to MySQL server".
+ # We test logging here and not actual work with MySQL server.
+ node.query_and_get_answer_with_error(
+ f"CREATE DATABASE database{i} ENGINE = {database_engine}"
+ )
+
+ check_logs(
+ must_contain=[
+ "CREATE DATABASE database0 ENGINE = MySQL('localhost:3306', 'mysql_db', 'mysql_user', '[HIDDEN]')",
+ # "CREATE DATABASE database1 ENGINE = PostgreSQL('localhost:5432', 'postgres_db', 'postgres_user', '[HIDDEN]')",
+ ],
+ must_not_contain=[password],
+ )
+
+ for i in range(0, len(database_engines)):
+ node.query(f"DROP DATABASE IF EXISTS database{i}")
+
+
+def test_table_functions():
+ password = new_password()
+
+ table_functions = [
+ f"mysql('mysql57:3306', 'mysql_db', 'mysql_table', 'mysql_user', '{password}')",
+ f"postgresql('postgres1:5432', 'postgres_db', 'postgres_table', 'postgres_user', '{password}')",
+ f"mongodb('mongo1:27017', 'mongo_db', 'mongo_col', 'mongo_user', '{password}', 'x int')",
+ f"s3('http://minio1:9001/root/data/test1.csv')",
+ f"s3('http://minio1:9001/root/data/test2.csv', 'CSV')",
+ f"s3('http://minio1:9001/root/data/test3.csv', 'minio', '{password}')",
+ f"s3('http://minio1:9001/root/data/test4.csv', 'CSV', 'x int')",
+ f"s3('http://minio1:9001/root/data/test5.csv.gz', 'CSV', 'x int', 'gzip')",
+ f"s3('http://minio1:9001/root/data/test6.csv', 'minio', '{password}', 'CSV')",
+ f"s3('http://minio1:9001/root/data/test7.csv', 'minio', '{password}', 'CSV', 'x int')",
+ f"s3('http://minio1:9001/root/data/test8.csv.gz', 'minio', '{password}', 'CSV', 'x int', 'gzip')",
+ f"s3Cluster('test_shard_localhost', 'http://minio1:9001/root/data/test1.csv', 'minio', '{password}')",
+ f"s3Cluster('test_shard_localhost', 'http://minio1:9001/root/data/test2.csv', 'CSV', 'x int')",
+ f"s3Cluster('test_shard_localhost', 'http://minio1:9001/root/data/test3.csv', 'minio', '{password}', 'CSV')",
+ f"remote('127.{{2..11}}', default.remote_table)",
+ f"remote('127.{{2..11}}', default.remote_table, rand())",
+ f"remote('127.{{2..11}}', default.remote_table, 'remote_user')",
+ f"remote('127.{{2..11}}', default.remote_table, 'remote_user', '{password}')",
+ f"remote('127.{{2..11}}', default.remote_table, 'remote_user', rand())",
+ f"remote('127.{{2..11}}', default.remote_table, 'remote_user', '{password}', rand())",
+ f"remote('127.{{2..11}}', 'default.remote_table', 'remote_user', '{password}', rand())",
+ f"remote('127.{{2..11}}', 'default', 'remote_table', 'remote_user', '{password}', rand())",
+ f"remote('127.{{2..11}}', numbers(10), 'remote_user', '{password}', rand())",
+ f"remoteSecure('127.{{2..11}}', 'default', 'remote_table', 'remote_user', '{password}')",
+ f"remoteSecure('127.{{2..11}}', 'default', 'remote_table', 'remote_user', rand())",
+ ]
+
+ for i, table_function in enumerate(table_functions):
+ node.query(f"CREATE TABLE tablefunc{i} (x int) AS {table_function}")
+
+ check_logs(
+ must_contain=[
+ "CREATE TABLE tablefunc0 (`x` int) AS mysql('mysql57:3306', 'mysql_db', 'mysql_table', 'mysql_user', '[HIDDEN]')",
+ "CREATE TABLE tablefunc1 (`x` int) AS postgresql('postgres1:5432', 'postgres_db', 'postgres_table', 'postgres_user', '[HIDDEN]')",
+ "CREATE TABLE tablefunc2 (`x` int) AS mongodb('mongo1:27017', 'mongo_db', 'mongo_col', 'mongo_user', '[HIDDEN]', 'x int')",
+ "CREATE TABLE tablefunc3 (x int) AS s3('http://minio1:9001/root/data/test1.csv')",
+ "CREATE TABLE tablefunc4 (x int) AS s3('http://minio1:9001/root/data/test2.csv', 'CSV')",
+ "CREATE TABLE tablefunc5 (`x` int) AS s3('http://minio1:9001/root/data/test3.csv', 'minio', '[HIDDEN]')",
+ "CREATE TABLE tablefunc6 (x int) AS s3('http://minio1:9001/root/data/test4.csv', 'CSV', 'x int')",
+ "CREATE TABLE tablefunc7 (x int) AS s3('http://minio1:9001/root/data/test5.csv.gz', 'CSV', 'x int', 'gzip')",
+ "CREATE TABLE tablefunc8 (`x` int) AS s3('http://minio1:9001/root/data/test6.csv', 'minio', '[HIDDEN]', 'CSV')",
+ "CREATE TABLE tablefunc9 (`x` int) AS s3('http://minio1:9001/root/data/test7.csv', 'minio', '[HIDDEN]', 'CSV', 'x int')",
+ "CREATE TABLE tablefunc10 (`x` int) AS s3('http://minio1:9001/root/data/test8.csv.gz', 'minio', '[HIDDEN]', 'CSV', 'x int', 'gzip')",
+ "CREATE TABLE tablefunc11 (`x` int) AS s3Cluster('test_shard_localhost', 'http://minio1:9001/root/data/test1.csv', 'minio', '[HIDDEN]')",
+ "CREATE TABLE tablefunc12 (x int) AS s3Cluster('test_shard_localhost', 'http://minio1:9001/root/data/test2.csv', 'CSV', 'x int')",
+ "CREATE TABLE tablefunc13 (`x` int) AS s3Cluster('test_shard_localhost', 'http://minio1:9001/root/data/test3.csv', 'minio', '[HIDDEN]', 'CSV')",
+ "CREATE TABLE tablefunc14 (x int) AS remote('127.{2..11}', default.remote_table)",
+ "CREATE TABLE tablefunc15 (x int) AS remote('127.{2..11}', default.remote_table, rand())",
+ "CREATE TABLE tablefunc16 (x int) AS remote('127.{2..11}', default.remote_table, 'remote_user')",
+ "CREATE TABLE tablefunc17 (`x` int) AS remote('127.{2..11}', default.remote_table, 'remote_user', '[HIDDEN]')",
+ "CREATE TABLE tablefunc18 (x int) AS remote('127.{2..11}', default.remote_table, 'remote_user', rand())",
+ "CREATE TABLE tablefunc19 (`x` int) AS remote('127.{2..11}', default.remote_table, 'remote_user', '[HIDDEN]', rand())",
+ "CREATE TABLE tablefunc20 (`x` int) AS remote('127.{2..11}', 'default.remote_table', 'remote_user', '[HIDDEN]', rand())",
+ "CREATE TABLE tablefunc21 (`x` int) AS remote('127.{2..11}', 'default', 'remote_table', 'remote_user', '[HIDDEN]', rand())",
+ "CREATE TABLE tablefunc22 (`x` int) AS remote('127.{2..11}', numbers(10), 'remote_user', '[HIDDEN]', rand())",
+ "CREATE TABLE tablefunc23 (`x` int) AS remoteSecure('127.{2..11}', 'default', 'remote_table', 'remote_user', '[HIDDEN]')",
+ "CREATE TABLE tablefunc24 (x int) AS remoteSecure('127.{2..11}', 'default', 'remote_table', 'remote_user', rand())",
+ ],
+ must_not_contain=[password],
+ )
+
+ for i in range(0, len(table_functions)):
+ node.query(f"DROP TABLE tablefunc{i}")
+
+
+def test_encryption_functions():
+ plaintext = new_password()
+ cipher = new_password()
+ key = new_password(32)
+ iv8 = new_password(8)
+ iv16 = new_password(16)
+ add = new_password()
+
+ encryption_functions = [
+ f"encrypt('aes-256-ofb', '{plaintext}', '{key}')",
+ f"encrypt('aes-256-ofb', '{plaintext}', '{key}', '{iv16}')",
+ f"encrypt('aes-256-gcm', '{plaintext}', '{key}', '{iv8}')",
+ f"encrypt('aes-256-gcm', '{plaintext}', '{key}', '{iv8}', '{add}')",
+ f"decrypt('aes-256-ofb', '{cipher}', '{key}', '{iv16}')",
+ f"aes_encrypt_mysql('aes-256-ofb', '{plaintext}', '{key}', '{iv16}')",
+ f"aes_decrypt_mysql('aes-256-ofb', '{cipher}', '{key}', '{iv16}')",
+ f"tryDecrypt('aes-256-ofb', '{cipher}', '{key}', '{iv16}')",
+ ]
+
+ for encryption_function in encryption_functions:
+ node.query(f"SELECT {encryption_function}")
+
+ check_logs(
+ must_contain=[
+ "SELECT encrypt('aes-256-ofb', '[HIDDEN]')",
+ "SELECT encrypt('aes-256-gcm', '[HIDDEN]')",
+ "SELECT decrypt('aes-256-ofb', '[HIDDEN]')",
+ "SELECT aes_encrypt_mysql('aes-256-ofb', '[HIDDEN]')",
+ "SELECT aes_decrypt_mysql('aes-256-ofb', '[HIDDEN]')",
+ "SELECT tryDecrypt('aes-256-ofb', '[HIDDEN]')",
+ ],
+ must_not_contain=[plaintext, cipher, key, iv8, iv16, add],
+ )
+
+
+def test_create_dictionary():
+ password = new_password()
+
+ node.query(
+ f"CREATE DICTIONARY dict1 (n int DEFAULT 0, m int DEFAULT 1) PRIMARY KEY n "
+ f"SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'user1' TABLE 'test' PASSWORD '{password}' DB 'default')) "
+ f"LIFETIME(MIN 0 MAX 10) LAYOUT(FLAT())"
+ )
+
+ check_logs(
+ must_contain=[
+ "CREATE DICTIONARY dict1 (`n` int DEFAULT 0, `m` int DEFAULT 1) PRIMARY KEY n "
+ "SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'user1' TABLE 'test' PASSWORD '[HIDDEN]' DB 'default')) "
+ "LIFETIME(MIN 0 MAX 10) LAYOUT(FLAT())"
+ ],
+ must_not_contain=[password],
+ )
+
+ node.query("DROP DICTIONARY dict1")
+
+
+def test_backup_to_s3():
+ node.query("CREATE TABLE temptbl (x int) ENGINE=Log")
+ password = new_password()
+
+ queries = [
+ f"BACKUP TABLE temptbl TO S3('http://minio1:9001/root/data/backups/backup1', 'minio', '{password}')",
+ f"RESTORE TABLE temptbl AS temptbl2 FROM S3('http://minio1:9001/root/data/backups/backup1', 'minio', '{password}')",
+ ]
+
+ for query in queries:
+ # query_and_get_answer_with_error() is used here because we don't want to stop on error "Cannot connect to AWS".
+ # We test logging here and not actual work with AWS server.
+ node.query_and_get_answer_with_error(query)
+
+ check_logs(
+ must_contain=[
+ "BACKUP TABLE temptbl TO S3('http://minio1:9001/root/data/backups/backup1', 'minio', '[HIDDEN]')",
+ "RESTORE TABLE temptbl AS temptbl2 FROM S3('http://minio1:9001/root/data/backups/backup1', 'minio', '[HIDDEN]')",
+ ],
+ must_not_contain=[password],
+ )
+
+ node.query("DROP TABLE IF EXISTS temptbl")
+ node.query("DROP TABLE IF EXISTS temptbl2")
+
+
+def test_on_cluster():
+ password = new_password()
+
+ node.query(
+ f"CREATE TABLE table_oncl ON CLUSTER 'test_shard_localhost' (x int) ENGINE = MySQL('mysql57:3307', 'mysql_db', 'mysql_table', 'mysql_user', '{password}')"
+ )
+
+ check_logs(
+ must_contain=[
+ "CREATE TABLE table_oncl ON CLUSTER test_shard_localhost (`x` int) ENGINE = MySQL('mysql57:3307', 'mysql_db', 'mysql_table', 'mysql_user', '[HIDDEN]')",
+ ],
+ must_not_contain=[password],
+ )
+
+ # Check logs of DDLWorker during executing of this query.
+ assert node.contains_in_log(
+ "DDLWorker: Processing task .*CREATE TABLE default\\.table_oncl UUID '[0-9a-fA-F-]*' (\\`x\\` Int32) ENGINE = MySQL('mysql57:3307', 'mysql_db', 'mysql_table', 'mysql_user', '\\[HIDDEN\\]')"
+ )
+ assert node.contains_in_log(
+ "DDLWorker: Executing query: .*CREATE TABLE default\\.table_oncl UUID '[0-9a-fA-F-]*' (\\`x\\` Int32) ENGINE = MySQL('mysql57:3307', 'mysql_db', 'mysql_table', 'mysql_user', '\\[HIDDEN\\]')"
+ )
+ assert node.contains_in_log(
+ "executeQuery: .*CREATE TABLE default\\.table_oncl UUID '[0-9a-fA-F-]*' (\\`x\\` Int32) ENGINE = MySQL('mysql57:3307', 'mysql_db', 'mysql_table', 'mysql_user', '\\[HIDDEN\\]')"
+ )
+ assert node.contains_in_log(
+ "DDLWorker: Executed query: .*CREATE TABLE default\\.table_oncl UUID '[0-9a-fA-F-]*' (\\`x\\` Int32) ENGINE = MySQL('mysql57:3307', 'mysql_db', 'mysql_table', 'mysql_user', '\\[HIDDEN\\]')"
+ )
+ assert system_query_log_contains_search_pattern(
+ "%CREATE TABLE default.table_oncl UUID \\'%\\' (`x` Int32) ENGINE = MySQL(\\'mysql57:3307\\', \\'mysql_db\\', \\'mysql_table\\', \\'mysql_user\\', \\'[HIDDEN]\\')"
+ )
+
+ node.query(f"DROP TABLE table_oncl")
diff --git a/tests/queries/0_stateless/00232_format_readable_decimal_size.reference b/tests/queries/0_stateless/00232_format_readable_decimal_size.reference
new file mode 100644
index 00000000000..2f2a0f39bab
--- /dev/null
+++ b/tests/queries/0_stateless/00232_format_readable_decimal_size.reference
@@ -0,0 +1,70 @@
+1.00 B 1.00 B 1.00 B
+2.72 B 2.00 B 2.00 B
+7.39 B 7.00 B 7.00 B
+20.09 B 20.00 B 20.00 B
+54.60 B 54.00 B 54.00 B
+148.41 B 148.00 B 148.00 B
+403.43 B 403.00 B 403.00 B
+1.10 KB 1.10 KB 1.10 KB
+2.98 KB 2.98 KB 2.98 KB
+8.10 KB 8.10 KB 8.10 KB
+22.03 KB 22.03 KB 22.03 KB
+59.87 KB 59.87 KB 59.87 KB
+162.75 KB 162.75 KB 162.75 KB
+442.41 KB 442.41 KB 442.41 KB
+1.20 MB 1.20 MB 1.20 MB
+3.27 MB 3.27 MB 3.27 MB
+8.89 MB 8.89 MB 8.89 MB
+24.15 MB 24.15 MB 24.15 MB
+65.66 MB 65.66 MB 65.66 MB
+178.48 MB 178.48 MB 178.48 MB
+485.17 MB 485.17 MB 485.17 MB
+1.32 GB 1.32 GB 1.32 GB
+3.58 GB 3.58 GB 2.15 GB
+9.74 GB 9.74 GB 2.15 GB
+26.49 GB 26.49 GB 2.15 GB
+72.00 GB 72.00 GB 2.15 GB
+195.73 GB 195.73 GB 2.15 GB
+532.05 GB 532.05 GB 2.15 GB
+1.45 TB 1.45 TB 2.15 GB
+3.93 TB 3.93 TB 2.15 GB
+10.69 TB 10.69 TB 2.15 GB
+29.05 TB 29.05 TB 2.15 GB
+78.96 TB 78.96 TB 2.15 GB
+214.64 TB 214.64 TB 2.15 GB
+583.46 TB 583.46 TB 2.15 GB
+1.59 PB 1.59 PB 2.15 GB
+4.31 PB 4.31 PB 2.15 GB
+11.72 PB 11.72 PB 2.15 GB
+31.86 PB 31.86 PB 2.15 GB
+86.59 PB 86.59 PB 2.15 GB
+235.39 PB 235.39 PB 2.15 GB
+639.84 PB 639.84 PB 2.15 GB
+1.74 EB 1.74 EB 2.15 GB
+4.73 EB 4.73 EB 2.15 GB
+12.85 EB 12.85 EB 2.15 GB
+34.93 EB 18.45 EB 2.15 GB
+94.96 EB 18.45 EB 2.15 GB
+258.13 EB 18.45 EB 2.15 GB
+701.67 EB 18.45 EB 2.15 GB
+1.91 ZB 18.45 EB 2.15 GB
+5.18 ZB 18.45 EB 2.15 GB
+14.09 ZB 18.45 EB 2.15 GB
+38.31 ZB 18.45 EB 2.15 GB
+104.14 ZB 18.45 EB 2.15 GB
+283.08 ZB 18.45 EB 2.15 GB
+769.48 ZB 18.45 EB 2.15 GB
+2.09 YB 18.45 EB 2.15 GB
+5.69 YB 18.45 EB 2.15 GB
+15.46 YB 18.45 EB 2.15 GB
+42.01 YB 18.45 EB 2.15 GB
+114.20 YB 18.45 EB 2.15 GB
+310.43 YB 18.45 EB 2.15 GB
+843.84 YB 18.45 EB 2.15 GB
+2293.78 YB 18.45 EB 2.15 GB
+6235.15 YB 18.45 EB 2.15 GB
+16948.89 YB 18.45 EB 2.15 GB
+46071.87 YB 18.45 EB 2.15 GB
+125236.32 YB 18.45 EB 2.15 GB
+340427.60 YB 18.45 EB 2.15 GB
+925378.17 YB 18.45 EB 2.15 GB
diff --git a/tests/queries/0_stateless/00232_format_readable_decimal_size.sql b/tests/queries/0_stateless/00232_format_readable_decimal_size.sql
new file mode 100644
index 00000000000..f8e1409ae05
--- /dev/null
+++ b/tests/queries/0_stateless/00232_format_readable_decimal_size.sql
@@ -0,0 +1,4 @@
+WITH round(exp(number), 6) AS x, x > 0xFFFFFFFFFFFFFFFF ? 0xFFFFFFFFFFFFFFFF : toUInt64(x) AS y, x > 0x7FFFFFFF ? 0x7FFFFFFF : toInt32(x) AS z
+SELECT formatReadableDecimalSize(x), formatReadableDecimalSize(y), formatReadableDecimalSize(z)
+FROM system.numbers
+LIMIT 70;
diff --git a/tests/queries/0_stateless/00732_base64_functions.reference b/tests/queries/0_stateless/00732_base64_functions.reference
index b22ae4e7e24..5dc1ba03b89 100644
--- a/tests/queries/0_stateless/00732_base64_functions.reference
+++ b/tests/queries/0_stateless/00732_base64_functions.reference
@@ -14,3 +14,5 @@ fooba
foobar
1 1
+Zm9v
+foo
diff --git a/tests/queries/0_stateless/00732_base64_functions.sql b/tests/queries/0_stateless/00732_base64_functions.sql
index 4ed86e20913..adba0cdebbd 100644
--- a/tests/queries/0_stateless/00732_base64_functions.sql
+++ b/tests/queries/0_stateless/00732_base64_functions.sql
@@ -14,3 +14,6 @@ SELECT base64Decode(val, 'excess argument') FROM (select arrayJoin(['', 'Zg==',
SELECT tryBase64Decode('Zm9vYmF=Zm9v', 'excess argument'); -- { serverError 42 }
SELECT base64Decode('Zm9vYmF=Zm9v'); -- { serverError 117 }
+
+select base64Encode(toFixedString('foo', 3));
+select base64Decode(toFixedString('Zm9v', 4));
diff --git a/tests/queries/0_stateless/01839_join_to_subqueries_rewriter_columns_matcher.reference b/tests/queries/0_stateless/01839_join_to_subqueries_rewriter_columns_matcher.reference
index 8e1a7a2271f..97e573ab52d 100644
--- a/tests/queries/0_stateless/01839_join_to_subqueries_rewriter_columns_matcher.reference
+++ b/tests/queries/0_stateless/01839_join_to_subqueries_rewriter_columns_matcher.reference
@@ -1 +1,2 @@
+b
a b c
diff --git a/tests/queries/0_stateless/01839_join_to_subqueries_rewriter_columns_matcher.sql b/tests/queries/0_stateless/01839_join_to_subqueries_rewriter_columns_matcher.sql
index 979debbcbb8..f1387ebcdc7 100644
--- a/tests/queries/0_stateless/01839_join_to_subqueries_rewriter_columns_matcher.sql
+++ b/tests/queries/0_stateless/01839_join_to_subqueries_rewriter_columns_matcher.sql
@@ -1,3 +1,8 @@
+SELECT (if(a.test == 'a', b.test, c.test)) as `a.test` FROM
+ (SELECT 1 AS id, 'a' AS test) a
+ LEFT JOIN (SELECT 1 AS id, 'b' AS test) b ON b.id = a.id
+ LEFT JOIN (SELECT 1 AS id, 'c' AS test) c ON c.id = a.id;
+
SELECT COLUMNS('test') FROM
(SELECT 1 AS id, 'a' AS test) a
LEFT JOIN (SELECT 1 AS id, 'b' AS test) b ON b.id = a.id
diff --git a/tests/queries/0_stateless/02233_HTTP_ranged.python b/tests/queries/0_stateless/02233_HTTP_ranged.python
index e0198210c16..e74d494edf5 100644
--- a/tests/queries/0_stateless/02233_HTTP_ranged.python
+++ b/tests/queries/0_stateless/02233_HTTP_ranged.python
@@ -120,8 +120,9 @@ class HttpProcessor(BaseHTTPRequestHandler):
allow_range = False
range_used = False
get_call_num = 0
+ responses_to_get = []
- def send_head(self):
+ def send_head(self, from_get = False):
if self.headers["Range"] and HttpProcessor.allow_range:
try:
self.range = parse_byte_range(self.headers["Range"])
@@ -145,7 +146,14 @@ class HttpProcessor(BaseHTTPRequestHandler):
self.send_error(416, "Requested Range Not Satisfiable")
return None
- self.send_response(206 if HttpProcessor.allow_range else 200)
+ retry_range_request = first != 0 and from_get is True and len(HttpProcessor.responses_to_get) > 0
+ if retry_range_request:
+ code = HttpProcessor.responses_to_get.pop()
+ if code not in HttpProcessor.responses:
+ self.send_response(int(code))
+ else:
+ self.send_response(206 if HttpProcessor.allow_range else 200)
+
self.send_header("Content-type", "application/json")
if HttpProcessor.allow_range:
@@ -169,7 +177,7 @@ class HttpProcessor(BaseHTTPRequestHandler):
self.send_head()
def do_GET(self):
- result = self.send_head()
+ result = self.send_head(True)
if result == None:
return
@@ -211,26 +219,36 @@ def start_server():
#####################################################################
-def test_select(download_buffer_size):
+def test_select(settings):
global HTTP_SERVER_URL_STR
- query = f"SELECT * FROM url('{HTTP_SERVER_URL_STR}','JSONAsString') SETTINGS max_download_buffer_size={download_buffer_size};"
+ query = f"SELECT * FROM url('{HTTP_SERVER_URL_STR}','JSONAsString') SETTINGS {','.join((k+'='+repr(v) for k, v in settings.items()))};"
check_answers(query, EXPECTED_ANSWER)
-def run_test(allow_range, download_buffer_size=20):
+def run_test(allow_range, settings, check_retries=False):
HttpProcessor.range_used = False
HttpProcessor.get_call_num = 0
HttpProcessor.allow_range = allow_range
+ if check_retries:
+ HttpProcessor.responses_to_get = ["500", "200", "206"]
+ retries_num = len(HttpProcessor.responses_to_get)
t, httpd = start_server()
t.start()
- test_select(download_buffer_size)
+ test_select(settings)
+ download_buffer_size = settings["max_download_buffer_size"]
expected_get_call_num = (PAYLOAD_LEN - 1) // download_buffer_size + 1
if allow_range:
if not HttpProcessor.range_used:
raise Exception("HTTP Range was not used when supported")
+ if check_retries and len(HttpProcessor.responses_to_get) > 0:
+ raise Exception("Expected to get http response 500, which had to be retried, but 200 ok returned and then retried")
+
+ if retries_num > 0:
+ expected_get_call_num += retries_num - 1
+
if expected_get_call_num != HttpProcessor.get_call_num:
raise Exception(
f"Invalid amount of GET calls with Range. Expected {expected_get_call_num}, actual {HttpProcessor.get_call_num}"
@@ -245,9 +263,23 @@ def run_test(allow_range, download_buffer_size=20):
def main():
- run_test(allow_range=False)
- run_test(allow_range=True, download_buffer_size=20)
- run_test(allow_range=True, download_buffer_size=10)
+ settings = {"max_download_buffer_size" : 20}
+
+ # Test Accept-Ranges=False
+ run_test(allow_range=False, settings=settings)
+ # Test Accept-Ranges=True, parallel download is used
+ run_test(allow_range=True, settings=settings)
+
+ # Test Accept-Ranges=True, parallel download is used
+ settings = {"max_download_buffer_size" : 10}
+ run_test(allow_range=True, settings=settings)
+
+ # Test Accept-Ranges=True, parallel download is not used,
+ # first get request 500 response,
+ # second get request 200ok response,
+ # third get request (retry) 206 response.
+ settings["max_download_threads"] = 2
+ run_test(allow_range=True, settings=settings, check_retries=True)
if __name__ == "__main__":
diff --git a/tests/queries/0_stateless/02233_HTTP_ranged.reference b/tests/queries/0_stateless/02233_HTTP_ranged.reference
index 17f0fff172a..6164e96afc5 100644
--- a/tests/queries/0_stateless/02233_HTTP_ranged.reference
+++ b/tests/queries/0_stateless/02233_HTTP_ranged.reference
@@ -1,3 +1,4 @@
PASSED
PASSED
PASSED
+PASSED
diff --git a/tests/queries/0_stateless/02310_clickhouse_client_INSERT_progress_profile_events.expect b/tests/queries/0_stateless/02310_clickhouse_client_INSERT_progress_profile_events.expect
new file mode 100755
index 00000000000..20333ae7960
--- /dev/null
+++ b/tests/queries/0_stateless/02310_clickhouse_client_INSERT_progress_profile_events.expect
@@ -0,0 +1,32 @@
+#!/usr/bin/expect -f
+# Tags: long
+
+# This is the regression for the concurrent access in ProgressIndication,
+# so it is important to read enough rows here (10e6).
+#
+# Initially there was 100e6, but under thread fuzzer 10min may be not enough sometimes,
+# but I believe that CI will catch possible issues even with less rows anyway.
+
+set basedir [file dirname $argv0]
+set basename [file tail $argv0]
+exp_internal -f $env(CLICKHOUSE_TMP)/$basename.debuglog 0
+
+log_user 0
+set timeout 60
+match_max 100000
+set stty_init "rows 25 cols 120"
+
+expect_after {
+ eof { exp_continue }
+ timeout { exit 1 }
+}
+
+spawn bash
+send "source $basedir/../shell_config.sh\r"
+
+send "yes | head -n10000000 | \$CLICKHOUSE_CLIENT --query \"insert into function null('foo String') format TSV\" >/dev/null\r"
+expect "Progress: "
+send "\3"
+
+send "exit\r"
+expect eof
diff --git a/tests/queries/0_stateless/02310_clickhouse_client_INSERT_progress_profile_events.reference b/tests/queries/0_stateless/02310_clickhouse_client_INSERT_progress_profile_events.reference
index 64ab61e6765..e69de29bb2d 100644
--- a/tests/queries/0_stateless/02310_clickhouse_client_INSERT_progress_profile_events.reference
+++ b/tests/queries/0_stateless/02310_clickhouse_client_INSERT_progress_profile_events.reference
@@ -1,2 +0,0 @@
-0
---progress produce some rows
diff --git a/tests/queries/0_stateless/02310_clickhouse_client_INSERT_progress_profile_events.sh b/tests/queries/0_stateless/02310_clickhouse_client_INSERT_progress_profile_events.sh
deleted file mode 100755
index 6c37d870652..00000000000
--- a/tests/queries/0_stateless/02310_clickhouse_client_INSERT_progress_profile_events.sh
+++ /dev/null
@@ -1,19 +0,0 @@
-#!/usr/bin/env bash
-# Tags: long
-
-# This is the regression for the concurrent access in ProgressIndication,
-# so it is important to read enough rows here (10e6).
-#
-# Initially there was 100e6, but under thread fuzzer 10min may be not enough sometimes,
-# but I believe that CI will catch possible issues even with less rows anyway.
-
-CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
-# shellcheck source=../shell_config.sh
-. "$CUR_DIR"/../shell_config.sh
-
-tmp_file_progress="$(mktemp "$CUR_DIR/$CLICKHOUSE_TEST_UNIQUE_NAME.XXXXXX.progress")"
-trap 'rm $tmp_file_progress' EXIT
-
-yes | head -n10000000 | $CLICKHOUSE_CLIENT -q "insert into function null('foo String') format TSV" --progress 2> "$tmp_file_progress"
-echo $?
-test -s "$tmp_file_progress" && echo "--progress produce some rows" || echo "FAIL: no rows with --progress"
diff --git a/tests/queries/0_stateless/02310_clickhouse_local_INSERT_progress_profile_events.expect b/tests/queries/0_stateless/02310_clickhouse_local_INSERT_progress_profile_events.expect
new file mode 100755
index 00000000000..5c95e17aefd
--- /dev/null
+++ b/tests/queries/0_stateless/02310_clickhouse_local_INSERT_progress_profile_events.expect
@@ -0,0 +1,32 @@
+#!/usr/bin/expect -f
+# Tags: long
+
+# This is the regression for the concurrent access in ProgressIndication,
+# so it is important to read enough rows here (10e6).
+#
+# Initially there was 100e6, but under thread fuzzer 10min may be not enough sometimes,
+# but I believe that CI will catch possible issues even with less rows anyway.
+
+set basedir [file dirname $argv0]
+set basename [file tail $argv0]
+exp_internal -f $env(CLICKHOUSE_TMP)/$basename.debuglog 0
+
+log_user 0
+set timeout 60
+match_max 100000
+set stty_init "rows 25 cols 120"
+
+expect_after {
+ eof { exp_continue }
+ timeout { exit 1 }
+}
+
+spawn bash
+send "source $basedir/../shell_config.sh\r"
+
+send "yes | head -n10000000 | \$CLICKHOUSE_LOCAL --query \"insert into function null('foo String') format TSV\" >/dev/null\r"
+expect "Progress: "
+send "\3"
+
+send "exit\r"
+expect eof
diff --git a/tests/queries/0_stateless/02310_clickhouse_local_INSERT_progress_profile_events.reference b/tests/queries/0_stateless/02310_clickhouse_local_INSERT_progress_profile_events.reference
index 64ab61e6765..e69de29bb2d 100644
--- a/tests/queries/0_stateless/02310_clickhouse_local_INSERT_progress_profile_events.reference
+++ b/tests/queries/0_stateless/02310_clickhouse_local_INSERT_progress_profile_events.reference
@@ -1,2 +0,0 @@
-0
---progress produce some rows
diff --git a/tests/queries/0_stateless/02310_clickhouse_local_INSERT_progress_profile_events.sh b/tests/queries/0_stateless/02310_clickhouse_local_INSERT_progress_profile_events.sh
deleted file mode 100755
index 00a8b7a2a90..00000000000
--- a/tests/queries/0_stateless/02310_clickhouse_local_INSERT_progress_profile_events.sh
+++ /dev/null
@@ -1,19 +0,0 @@
-#!/usr/bin/env bash
-# Tags: long
-
-# This is the regression for the concurrent access in ProgressIndication,
-# so it is important to read enough rows here (10e6).
-#
-# Initially there was 100e6, but under thread fuzzer 10min may be not enough sometimes,
-# but I believe that CI will catch possible issues even with less rows anyway.
-
-CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
-# shellcheck source=../shell_config.sh
-. "$CUR_DIR"/../shell_config.sh
-
-tmp_file_progress="$(mktemp "$CUR_DIR/$CLICKHOUSE_TEST_UNIQUE_NAME.XXXXXX.progress")"
-trap 'rm $tmp_file_progress' EXIT
-
-yes | head -n10000000 | $CLICKHOUSE_LOCAL -q "insert into function null('foo String') format TSV" --progress 2> "$tmp_file_progress"
-echo $?
-test -s "$tmp_file_progress" && echo "--progress produce some rows" || echo "FAIL: no rows with --progress"
diff --git a/tests/queries/0_stateless/02456_progress_tty.expect b/tests/queries/0_stateless/02456_progress_tty.expect
new file mode 100755
index 00000000000..9daa6caa3fa
--- /dev/null
+++ b/tests/queries/0_stateless/02456_progress_tty.expect
@@ -0,0 +1,55 @@
+#!/usr/bin/expect -f
+
+set basedir [file dirname $argv0]
+set basename [file tail $argv0]
+exp_internal -f $env(CLICKHOUSE_TMP)/$basename.debuglog 0
+
+log_user 0
+set timeout 60
+match_max 100000
+set stty_init "rows 25 cols 120"
+
+expect_after {
+ eof { exp_continue }
+ timeout { exit 1 }
+}
+
+spawn bash
+send "source $basedir/../shell_config.sh\r"
+
+# Progress is displayed by default
+send "\$CLICKHOUSE_LOCAL --query 'SELECT sum(sleep(1) = 0) FROM numbers(3) SETTINGS max_block_size = 1' >/dev/null\r"
+expect "Progress: "
+expect "█"
+send "\3"
+
+# It is true even if we redirect both stdout and stderr to /dev/null
+send "\$CLICKHOUSE_LOCAL --query 'SELECT sum(sleep(1) = 0) FROM numbers(3) SETTINGS max_block_size = 1' >/dev/null 2>&1\r"
+expect "Progress: "
+expect "█"
+send "\3"
+
+# The option --progress has implicit value of true
+send "\$CLICKHOUSE_LOCAL --progress --query 'SELECT sum(sleep(1) = 0) FROM numbers(3) SETTINGS max_block_size = 1' >/dev/null 2>&1\r"
+expect "Progress: "
+expect "█"
+send "\3"
+
+# But we can set it to false
+send "\$CLICKHOUSE_LOCAL --progress false --query 'SELECT sleep(1), \$\$Hello\$\$ FROM numbers(3) SETTINGS max_block_size = 1' 2>/dev/null\r"
+expect -exact "0\tHello\r\n"
+send "\3"
+
+# As well as to 0 for the same effect
+send "\$CLICKHOUSE_LOCAL --progress 0 --query 'SELECT sleep(1), \$\$Hello\$\$ FROM numbers(3) SETTINGS max_block_size = 1' 2>/dev/null\r"
+expect -exact "0\tHello\r\n"
+send "\3"
+
+# If we set it to 1, the progress will be displayed as well
+send "\$CLICKHOUSE_LOCAL --progress 1 --query 'SELECT sum(sleep(1) = 0) FROM numbers(3) SETTINGS max_block_size = 1' >/dev/null 2>&1\r"
+expect "Progress: "
+expect "█"
+send "\3"
+
+send "exit\r"
+expect eof
diff --git a/tests/queries/0_stateless/02456_progress_tty.reference b/tests/queries/0_stateless/02456_progress_tty.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/02475_analysis_of_variance.reference b/tests/queries/0_stateless/02475_analysis_of_variance.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/02475_analysis_of_variance.sql b/tests/queries/0_stateless/02475_analysis_of_variance.sql
new file mode 100644
index 00000000000..86996f784ea
--- /dev/null
+++ b/tests/queries/0_stateless/02475_analysis_of_variance.sql
@@ -0,0 +1,10 @@
+
+SELECT analysisOfVariance(number, number % 2) FROM numbers(10) FORMAT Null;
+SELECT analysisOfVariance(number :: Decimal32(5), number % 2) FROM numbers(10) FORMAT Null;
+SELECT analysisOfVariance(number :: Decimal256(5), number % 2) FROM numbers(10) FORMAT Null;
+
+SELECT analysisOfVariance(1.11, -20); -- { serverError BAD_ARGUMENTS }
+SELECT analysisOfVariance(1.11, 20 :: UInt128); -- { serverError BAD_ARGUMENTS }
+SELECT analysisOfVariance(1.11, 9000000000000000); -- { serverError BAD_ARGUMENTS }
+
+SELECT analysisOfVariance(number, number % 2), analysisOfVariance(100000000000000000000., number % 65535) FROM numbers(1048575); -- { serverError BAD_ARGUMENTS }
diff --git a/tests/queries/0_stateless/02475_bad_cast_low_cardinality_to_string_bug.reference b/tests/queries/0_stateless/02475_bad_cast_low_cardinality_to_string_bug.reference
new file mode 100644
index 00000000000..4ffc8576e57
--- /dev/null
+++ b/tests/queries/0_stateless/02475_bad_cast_low_cardinality_to_string_bug.reference
@@ -0,0 +1,2 @@
+bbbbb
+bbbbb
diff --git a/tests/queries/0_stateless/02475_bad_cast_low_cardinality_to_string_bug.sql b/tests/queries/0_stateless/02475_bad_cast_low_cardinality_to_string_bug.sql
new file mode 100644
index 00000000000..3b2abfb3c42
--- /dev/null
+++ b/tests/queries/0_stateless/02475_bad_cast_low_cardinality_to_string_bug.sql
@@ -0,0 +1 @@
+SELECT if(materialize(0), extract(materialize(CAST('aaaaaa', 'LowCardinality(String)')), '\\w'), extract(materialize(CAST('bbbbb', 'LowCardinality(String)')), '\\w*')) AS res FROM numbers(2);
diff --git a/utils/antlr/README.md b/utils/antlr/README.md
index 50bf34ab432..7d2112e46bf 100644
--- a/utils/antlr/README.md
+++ b/utils/antlr/README.md
@@ -1,3 +1,7 @@
+## This parser is unsupported
+
+We keep it in this repository for your curiosity. But this is not the parser of ClickHouse.
+
## How to generate source code files from grammar
Grammar is located inside `ClickHouseLexer.g4` and `ClickHouseParser.g4` files.
diff --git a/utils/db-generator/query_db_generator.cpp b/utils/db-generator/query_db_generator.cpp
index 00785af89f7..e77f8ac02da 100644
--- a/utils/db-generator/query_db_generator.cpp
+++ b/utils/db-generator/query_db_generator.cpp
@@ -59,7 +59,7 @@ std::string randomDate()
int32_t month = rng() % 12 + 1;
int32_t day = rng() % 12 + 1;
char answer[13];
- size_t size = sprintf(answer, "'%04u-%02u-%02u'", year, month, day);
+ size_t size = snprintf(answer, sizeof(answer), "'%04u-%02u-%02u'", year, month, day);
return std::string(answer, size);
}
@@ -72,8 +72,9 @@ std::string randomDatetime()
int32_t minutes = rng() % 60;
int32_t seconds = rng() % 60;
char answer[22];
- size_t size = sprintf(
+ size_t size = snprintf(
answer,
+ sizeof(answer),
"'%04u-%02u-%02u %02u:%02u:%02u'",
year,
month,
diff --git a/utils/zookeeper-adjust-block-numbers-to-parts/main.cpp b/utils/zookeeper-adjust-block-numbers-to-parts/main.cpp
index 7736921a9c6..863019353ee 100644
--- a/utils/zookeeper-adjust-block-numbers-to-parts/main.cpp
+++ b/utils/zookeeper-adjust-block-numbers-to-parts/main.cpp
@@ -179,7 +179,7 @@ void setCurrentBlockNumber(zkutil::ZooKeeper & zk, const std::string & path, Int
if (number != current_block_number)
{
char suffix[11] = "";
- size_t size = sprintf(suffix, "%010lld", current_block_number);
+ size_t size = snprintf(suffix, sizeof(suffix), "%010lld", current_block_number);
std::string expected_path = block_prefix + std::string(suffix, size);
std::cerr << "\t" << path_created << ": Ephemeral node has been created with an unexpected path (expected something like "
<< expected_path << ")." << std::endl;