Merge branch 'master' into jepen-multiple-tests

This commit is contained in:
Antonio Andelic 2022-11-07 08:13:49 +00:00
commit e751557599
110 changed files with 2559 additions and 583 deletions

View File

@ -2994,6 +2994,77 @@ jobs:
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH"
##############################################################################################
###################################### SQLANCER FUZZERS ######################################
##############################################################################################
SQLancerTestRelease:
needs: [BuilderDebRelease]
runs-on: [self-hosted, fuzzer-unit-tester]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/sqlancer_release
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=SQLancer (release)
REPO_COPY=${{runner.temp}}/sqlancer_release/ClickHouse
EOF
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{ env.REPORTS_PATH }}
- name: Clear repository
run: |
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
- name: SQLancer
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci"
python3 sqlancer_check.py "$CHECK_NAME"
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH"
SQLancerTestDebug:
needs: [BuilderDebDebug]
runs-on: [self-hosted, fuzzer-unit-tester]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/sqlancer_debug
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=SQLancer (debug)
REPO_COPY=${{runner.temp}}/sqlancer_debug/ClickHouse
EOF
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{ env.REPORTS_PATH }}
- name: Clear repository
run: |
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
- name: SQLancer
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci"
python3 sqlancer_check.py "$CHECK_NAME"
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH"
FinishCheck:
needs:
- DockerHubPush
@ -3053,6 +3124,8 @@ jobs:
- UnitTestsUBsan
- UnitTestsReleaseClang
- SharedBuildSmokeTest
- SQLancerTestRelease
- SQLancerTestDebug
runs-on: [self-hosted, style-checker]
steps:
- name: Clear repository

View File

@ -106,7 +106,6 @@ jobs:
if: contains(github.event.pull_request.labels.*.name, 'jepsen-test')
needs: [DockerHubPush]
uses: ./.github/workflows/jepsen.yml
FinishCheck:
needs:
- StyleCheck
@ -181,6 +180,8 @@ jobs:
- SharedBuildSmokeTest
- CompatibilityCheck
- IntegrationTestsFlakyCheck
- SQLancerTestRelease
- SQLancerTestDebug
runs-on: [self-hosted, style-checker]
steps:
- name: Clear repository

3
.gitignore vendored
View File

@ -154,3 +154,6 @@ website/package-lock.json
/programs/server/metadata
/programs/server/store
# temporary test files
tests/queries/0_stateless/test_*
tests/queries/0_stateless/*.binary

View File

@ -1,5 +1,5 @@
# docker build -t clickhouse/sqlancer-test .
FROM ubuntu:20.04
FROM ubuntu:22.04
# ARG for quick switch to a given ubuntu mirror
ARG apt_archive="http://archive.ubuntu.com"

View File

@ -11,13 +11,15 @@ def process_result(result_folder):
summary = []
paths = []
tests = [
"TLPWhere",
"TLPAggregate",
"TLPDistinct",
"TLPGroupBy",
"TLPHaving",
"TLPWhere",
"TLPWhereGroupBy",
"TLPDistinct",
"TLPAggregate",
"NoREC",
]
failed_tests = []
for test in tests:
err_path = "{}/{}.err".format(result_folder, test)
@ -33,15 +35,11 @@ def process_result(result_folder):
with open(err_path, "r") as f:
if "AssertionError" in f.read():
summary.append((test, "FAIL"))
failed_tests.append(test)
status = "failure"
else:
summary.append((test, "OK"))
logs_path = "{}/logs.tar.gz".format(result_folder)
if not os.path.exists(logs_path):
logging.info("No logs tar on path %s", logs_path)
else:
paths.append(logs_path)
stdout_path = "{}/stdout.log".format(result_folder)
if not os.path.exists(stdout_path):
logging.info("No stdout log on path %s", stdout_path)
@ -53,18 +51,23 @@ def process_result(result_folder):
else:
paths.append(stderr_path)
description = "SQLancer test run. See report"
description = "SQLancer run successfully"
if status == "failure":
description = f"Failed oracles: {failed_tests}"
return status, description, summary, paths
def write_results(results_file, status_file, results, status):
def write_results(
results_file, status_file, description_file, results, status, description
):
with open(results_file, "w") as f:
out = csv.writer(f, delimiter="\t")
out.writerows(results)
with open(status_file, "w") as f:
out = csv.writer(f, delimiter="\t")
out.writerow(status)
f.write(status + "\n")
with open(description_file, "w") as f:
f.write(description + "\n")
if __name__ == "__main__":
@ -72,13 +75,20 @@ if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="ClickHouse script for parsing results of sqlancer test"
)
parser.add_argument("--in-results-dir", default="/test_output/")
parser.add_argument("--out-results-file", default="/test_output/test_results.tsv")
parser.add_argument("--out-status-file", default="/test_output/check_status.tsv")
parser.add_argument("--in-results-dir", default="/workspace/")
parser.add_argument("--out-results-file", default="/workspace/summary.tsv")
parser.add_argument("--out-description-file", default="/workspace/description.txt")
parser.add_argument("--out-status-file", default="/workspace/status.txt")
args = parser.parse_args()
state, description, test_results, logs = process_result(args.in_results_dir)
status, description, summary, logs = process_result(args.in_results_dir)
logging.info("Result parsed")
status = (state, description)
write_results(args.out_results_file, args.out_status_file, test_results, status)
write_results(
args.out_results_file,
args.out_status_file,
args.out_description_file,
summary,
status,
description,
)
logging.info("Result written")

View File

@ -1,33 +1,62 @@
#!/bin/bash
set -exu
trap "exit" INT TERM
set -e -x
function wget_with_retry
{
for _ in 1 2 3 4; do
if wget -nv -nd -c "$1";then
return 0
else
sleep 0.5
fi
done
return 1
}
dpkg -i package_folder/clickhouse-common-static_*.deb
dpkg -i package_folder/clickhouse-common-static-dbg_*.deb
dpkg -i package_folder/clickhouse-server_*.deb
dpkg -i package_folder/clickhouse-client_*.deb
if [ -z ${BINARY_URL_TO_DOWNLOAD+x} ]
then
echo "No BINARY_URL_TO_DOWNLOAD provided."
else
wget_with_retry "$BINARY_URL_TO_DOWNLOAD"
chmod +x /clickhouse
fi
service clickhouse-server start && sleep 5
if [[ -f "/clickhouse" ]]; then
echo "/clickhouse exists"
else
exit 1
fi
cd /workspace
/clickhouse server -P /workspace/clickhouse-server.pid -L /workspace/clickhouse-server.log -E /workspace/clickhouse-server.log.err --daemon
for _ in $(seq 1 60); do if [[ $(wget -q 'localhost:8123' -O-) == 'Ok.' ]]; then break ; else sleep 1; fi ; done
cd /sqlancer/sqlancer-master
export TIMEOUT=300
export NUM_QUERIES=1000
TIMEOUT=300
NUM_QUERIES=1000
NUM_THREADS=10
TESTS=( "TLPGroupBy" "TLPHaving" "TLPWhere" "TLPDistinct" "TLPAggregate" "NoREC" )
echo "${TESTS[@]}"
( java -jar target/sqlancer-*.jar --num-threads 10 --timeout-seconds $TIMEOUT --num-queries $NUM_QUERIES --username default --password "" clickhouse --oracle TLPWhere | tee /test_output/TLPWhere.out ) 3>&1 1>&2 2>&3 | tee /test_output/TLPWhere.err
( java -jar target/sqlancer-*.jar --num-threads 10 --timeout-seconds $TIMEOUT --num-queries $NUM_QUERIES --username default --password "" clickhouse --oracle TLPGroupBy | tee /test_output/TLPGroupBy.out ) 3>&1 1>&2 2>&3 | tee /test_output/TLPGroupBy.err
( java -jar target/sqlancer-*.jar --num-threads 10 --timeout-seconds $TIMEOUT --num-queries $NUM_QUERIES --username default --password "" clickhouse --oracle TLPHaving | tee /test_output/TLPHaving.out ) 3>&1 1>&2 2>&3 | tee /test_output/TLPHaving.err
( java -jar target/sqlancer-*.jar --num-threads 10 --timeout-seconds $TIMEOUT --num-queries $NUM_QUERIES --username default --password "" clickhouse --oracle TLPWhere --oracle TLPGroupBy | tee /test_output/TLPWhereGroupBy.out ) 3>&1 1>&2 2>&3 | tee /test_output/TLPWhereGroupBy.err
( java -jar target/sqlancer-*.jar --num-threads 10 --timeout-seconds $TIMEOUT --num-queries $NUM_QUERIES --username default --password "" clickhouse --oracle TLPDistinct | tee /test_output/TLPDistinct.out ) 3>&1 1>&2 2>&3 | tee /test_output/TLPDistinct.err
( java -jar target/sqlancer-*.jar --num-threads 10 --timeout-seconds $TIMEOUT --num-queries $NUM_QUERIES --username default --password "" clickhouse --oracle TLPAggregate | tee /test_output/TLPAggregate.out ) 3>&1 1>&2 2>&3 | tee /test_output/TLPAggregate.err
for TEST in "${TESTS[@]}"; do
echo "$TEST"
if [[ $(wget -q 'localhost:8123' -O-) == 'Ok.' ]]
then
echo "Server is OK"
( java -jar target/sqlancer-*.jar --log-each-select true --print-failed false --num-threads "$NUM_THREADS" --timeout-seconds "$TIMEOUT" --num-queries "$NUM_QUERIES" --username default --password "" clickhouse --oracle "$TEST" | tee "/workspace/$TEST.out" ) 3>&1 1>&2 2>&3 | tee "/workspace/$TEST.err"
else
touch "/workspace/$TEST.err" "/workspace/$TEST.out"
echo "Server is not responding" | tee /workspace/server_crashed.log
fi
done
service clickhouse stop
ls /workspace
pkill -F /workspace/clickhouse-server.pid || true
ls /var/log/clickhouse-server/
tar czf /test_output/logs.tar.gz -C /var/log/clickhouse-server/ .
tail -n 1000 /var/log/clickhouse-server/stderr.log > /test_output/stderr.log
tail -n 1000 /var/log/clickhouse-server/stdout.log > /test_output/stdout.log
tail -n 1000 /var/log/clickhouse-server/clickhouse-server.log > /test_output/clickhouse-server.log
for _ in $(seq 1 60); do if [[ $(wget -q 'localhost:8123' -O-) == 'Ok.' ]]; then sleep 1 ; else break; fi ; done
/process_sqlancer_result.py || echo -e "failure\tCannot parse results" > /test_output/check_status.tsv
ls /test_output
/process_sqlancer_result.py || echo -e "failure\tCannot parse results" > /workspace/check_status.tsv
ls /workspace

View File

@ -4,25 +4,39 @@ sidebar_label: Cell Towers
sidebar_position: 3
title: "Cell Towers"
---
import ConnectionDetails from '@site/docs/en/_snippets/_gather_your_details_http.mdx';
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
import CodeBlock from '@theme/CodeBlock';
import ActionsMenu from '@site/docs/en/_snippets/_service_actions_menu.md';
import SQLConsoleDetail from '@site/docs/en/_snippets/_launch_sql_console.md';
import SupersetDocker from '@site/docs/en/_snippets/_add_superset_detail.md';
This dataset is from [OpenCellid](https://www.opencellid.org/) - The world's largest Open Database of Cell Towers.
## Goal
In this guide you will learn how to:
- Load the OpenCelliD data in Clickhouse
- Connect Apache Superset to ClickHouse
- Build a dashboard based on data available in the dataset
Here is a preview of the dashboard created in this guide:
![Dashboard of cell towers by radio type in mcc 204](@site/docs/en/getting-started/example-datasets/images/superset-cell-tower-dashboard.png)
## Get the Dataset {#get-the-dataset}
This dataset is from [OpenCelliD](https://www.opencellid.org/) - The world's largest Open Database of Cell Towers.
As of 2021, it contains more than 40 million records about cell towers (GSM, LTE, UMTS, etc.) around the world with their geographical coordinates and metadata (country code, network, etc).
OpenCelliD Project is licensed under a Creative Commons Attribution-ShareAlike 4.0 International License, and we redistribute a snapshot of this dataset under the terms of the same license. The up-to-date version of the dataset is available to download after sign in.
## Get the Dataset {#get-the-dataset}
<Tabs groupId="deployMethod">
<TabItem value="serverless" label="ClickHouse Cloud" default>
### Load the sample data
ClickHouse Cloud provides an easy-button for uploading this dataset from S3. Log in to your ClickHouse Cloud organization, or create a free trial at [ClickHouse.cloud](https://clickhouse.cloud).
<ActionsMenu menu="Load Data" />
@ -30,13 +44,33 @@ Choose the **Cell Towers** dataset from the **Sample data** tab, and **Load data
![Load cell towers dataset](@site/docs/en/_snippets/images/cloud-load-data-sample.png)
Examine the schema of the cell_towers table:
### Examine the schema of the cell_towers table
```sql
DESCRIBE TABLE cell_towers
```
<SQLConsoleDetail />
This is the output of `DESCRIBE`. Down further in this guide the field type choices will be described.
```response
┌─name──────────┬─type──────────────────────────────────────────────────────────────────┬
│ radio │ Enum8('' = 0, 'CDMA' = 1, 'GSM' = 2, 'LTE' = 3, 'NR' = 4, 'UMTS' = 5) │
│ mcc │ UInt16 │
│ net │ UInt16 │
│ area │ UInt16 │
│ cell │ UInt64 │
│ unit │ Int16 │
│ lon │ Float64 │
│ lat │ Float64 │
│ range │ UInt32 │
│ samples │ UInt32 │
│ changeable │ UInt8 │
│ created │ DateTime │
│ updated │ DateTime │
│ averageSignal │ UInt8 │
└───────────────┴───────────────────────────────────────────────────────────────────────┴
```
</TabItem>
<TabItem value="selfmanaged" label="Self-managed">
@ -86,7 +120,7 @@ clickhouse-client --query "INSERT INTO cell_towers FORMAT CSVWithNames" < cell_t
</TabItem>
</Tabs>
## Example queries {#examples}
## Run some example queries {#examples}
1. A number of cell towers by type:
@ -127,13 +161,13 @@ SELECT mcc, count() FROM cell_towers GROUP BY mcc ORDER BY count() DESC LIMIT 10
10 rows in set. Elapsed: 0.019 sec. Processed 43.28 million rows, 86.55 MB (2.33 billion rows/s., 4.65 GB/s.)
```
So, the top countries are: the USA, Germany, and Russia.
Based on the above query and the [MCC list](https://en.wikipedia.org/wiki/Mobile_country_code), the countries with the most cell towers are: the USA, Germany, and Russia.
You may want to create an [External Dictionary](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) in ClickHouse to decode these values.
## Use case: Incorporate geo data {#use-case}
Using `pointInPolygon` function.
Using the [`pointInPolygon`](/docs/en/sql-reference/functions/geo/coordinates.md/#pointinpolygon) function.
1. Create a table where we will store polygons:
@ -224,6 +258,110 @@ WHERE pointInPolygon((lon, lat), (SELECT * FROM moscow))
1 rows in set. Elapsed: 0.067 sec. Processed 43.28 million rows, 692.42 MB (645.83 million rows/s., 10.33 GB/s.)
```
The data is also available for interactive queries in the [Playground](https://play.clickhouse.com/play?user=play), [example](https://play.clickhouse.com/play?user=play#U0VMRUNUIG1jYywgY291bnQoKSBGUk9NIGNlbGxfdG93ZXJzIEdST1VQIEJZIG1jYyBPUkRFUiBCWSBjb3VudCgpIERFU0M=).
## Review of the schema
Although you cannot create temporary tables there.
Before building visualizations in Superset have a look at the columns that you will use. This dataset primarily provides the location (Longitude and Latitude) and radio types at mobile cellular towers worldwide. The column descriptions can be found in the [community forum](https://community.opencellid.org/t/documenting-the-columns-in-the-downloadable-cells-database-csv/186). The columns used in the visualizations that will be built are described below
Here is a description of the columns taken from the OpenCelliD forum:
| Column | Description |
|--------------|--------------------------------------------------------|
| radio | Technology generation: CDMA, GSM, UMTS, 5G NR |
| mcc | Mobile Country Code: `204` is The Netherlands |
| lon | Longitude: With Latitude, approximate tower location |
| lat | Latitude: With Longitude, approximate tower location |
:::tip mcc
To find your MCC check [Mobile network codes](https://en.wikipedia.org/wiki/Mobile_country_code), and use the three digits in the **Mobile country code** column.
:::
The schema for this table was designed for compact storage on disk and query speed.
- The `radio` data is stored as an `Enum8` (`UInt8`) rather than a string.
- `mcc` or Mobile country code, is stored as a `UInt16` as we know the range is 1 - 999.
- `lon` and `lat` are `Float64`.
None of the other fields are used in the queries or visualizations in this guide, but they are described in the forum linked above if you are interested.
## Build visualizations with Apache Superset
Superset is easy to run from Docker. If you already have Superset running, all you need to do is add ClickHouse Connect with `pip install clickhouse-connect`. If you need to install Superset open the **Launch Apache Superset in Docker** directly below.
<SupersetDocker />
To build a Superset dashboard using the OpenCelliD dataset you should:
- Add your ClickHouse service as a Superset **database**
- Add the table **cell_towers** as a Superset **dataset**
- Create some **charts**
- Add the charts to a **dashboard**
### Add your ClickHouse service as a Superset database
<ConnectionDetails />
In Superset a database can be added by choosing the database type, and then providing the connection details. Open Superset and look for the **+**, it has a menu with **Data** and then **Connect database** options.
![Add a database](@site/docs/en/getting-started/example-datasets/images/superset-add.png)
Choose **ClickHouse Connect** from the list:
![Choose clickhouse connect as database type](@site/docs/en/getting-started/example-datasets/images/superset-choose-a-database.png)
:::note
If **ClickHouse Connect** is not one of your options, then you will need to install it. The comand is `pip install clickhouse-connect`, and more info is [available here](https://pypi.org/project/clickhouse-connect/).
:::
#### Add your connection details:
:::tip
Make sure that you set **SSL** on when connecting to ClickHouse Cloud or other ClickHouse systems that enforce the use of SSL.
:::
![Add ClickHouse as a Superset datasource](@site/docs/en/getting-started/example-datasets/images/superset-connect-a-database.png)
### Add the table **cell_towers** as a Superset **dataset**
In Superset a **dataset** maps to a table within a database. Click on add a dataset and choose your ClickHouse service, the database containing your table (`default`), and choose the `cell_towers` table:
![Add cell_towers table as a dataset](@site/docs/en/getting-started/example-datasets/images/superset-add-dataset.png)
### Create some **charts**
When you choose to add a chart in Superset you have to specify the dataset (`cell_towers`) and the chart type. Since the OpenCelliD dataset provides longitude and latitude coordinates for cell towers we will create a **Map** chart. The **deck.gL Scatterplot** type is suited to this dataset as it works well with dense data points on a map.
![Create a map in Superset](@site/docs/en/getting-started/example-datasets/images/superset-create-map.png)
#### Specify the query used for the map
A deck.gl Scatterplot requires a longitude and latitude, and one or more filters can also be applied to the query. In this example two filters are applied, one for cell towers with UMTS radios, and one for the Mobile country code assigned to The Netherlands.
The fields `lon` and `lat` contain the longitude and latitude:
![Specify longitude and latitude fields](@site/docs/en/getting-started/example-datasets/images/superset-lon-lat.png)
Add a filter with `mcc` = `204` (or substitute any other `mcc` value):
![Filter on MCC 204](@site/docs/en/getting-started/example-datasets/images/superset-mcc-204.png)
Add a filter with `radio` = `'UMTS'` (or substitute any other `radio` value, you can see the choices in the output of `DESCRIBE TABLE cell_towers`):
![Filter on radio = UMTS](@site/docs/en/getting-started/example-datasets/images/superset-radio-umts.png)
This is the full configuration for the chart that filters on `radio = 'UMTS'` and `mcc = 204`:
![Chart for UMTS radios in MCC 204](@site/docs/en/getting-started/example-datasets/images/superset-umts-netherlands.png)
Click on **UPDATE CHART** to render the visualization.
### Add the charts to a **dashboard**
This screenshot shows cell tower locations with LTE, UMTS, and GSM radios. The charts are all created in the same way and they are added to a dashboard.
![Dashboard of cell towers by radio type in mcc 204](@site/docs/en/getting-started/example-datasets/images/superset-cell-tower-dashboard.png)
:::tip
The data is also available for interactive queries in the [Playground](https://play.clickhouse.com/play?user=play).
This [example](https://play.clickhouse.com/play?user=play#U0VMRUNUIG1jYywgY291bnQoKSBGUk9NIGNlbGxfdG93ZXJzIEdST1VQIEJZIG1jYyBPUkRFUiBCWSBjb3VudCgpIERFU0M=) will populate the username and even the query for you.
Although you cannot create tables in the Playground, you can run all of the queries and even use Superset (adjust the hostname and port number).
:::

Binary file not shown.

After

Width:  |  Height:  |  Size: 35 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 35 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 475 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 53 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 73 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 290 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 38 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 46 KiB

View File

@ -4,7 +4,7 @@ sidebar_label: Recipes Dataset
title: "Recipes Dataset"
---
RecipeNLG dataset is available for download [here](https://recipenlg.cs.put.poznan.pl/dataset). It contains 2.2 million recipes. The size is slightly less than 1 GB.
The RecipeNLG dataset is available for download [here](https://recipenlg.cs.put.poznan.pl/dataset). It contains 2.2 million recipes. The size is slightly less than 1 GB.
## Download and Unpack the Dataset

View File

@ -8,70 +8,69 @@ title: "Geo Functions"
## Geographical Coordinates Functions
- [greatCircleDistance](./coordinates.md#greatCircleDistance)
- [geoDistance](./coordinates.md#geoDistance)
- [greatCircleAngle](./coordinates.md#greatCircleAngle)
- [pointInEllipses](./coordinates.md#pointInEllipses)
- [pointInPolygon](./coordinates.md#pointInPolygon)
- [greatCircleDistance](./coordinates.md#greatcircledistance)
- [geoDistance](./coordinates.md#geodistance)
- [greatCircleAngle](./coordinates.md#greatcircleangle)
- [pointInEllipses](./coordinates.md#pointinellipses)
- [pointInPolygon](./coordinates.md#pointinpolygon)
## Geohash Functions
- [geohashEncode](./geohash.md#geohashEncode)
- [geohashDecode](./geohash.md#geohashDecode)
- [geohashesInBox](./geohash.md#geohashesInBox)
- [geohashEncode](./geohash.md#geohashencode)
- [geohashDecode](./geohash.md#geohashdecode)
- [geohashesInBox](./geohash.md#geohashesinbox)
## H3 Indexes Functions
- [h3IsValid](./h3.md#h3IsValid)
- [h3GetResolution](./h3.md#h3GetResolution)
- [h3EdgeAngle](./h3.md#h3EdgeAngle)
- [h3EdgeLengthM](./h3.md#h3EdgeLengthM)
- [h3EdgeLengthKm](./h3.md#h3EdgeLengthKm)
- [geoToH3](./h3.md#geoToH3)
- [h3ToGeo](./h3.md#h3ToGeo)
- [h3ToGeoBoundary](./h3.md#h3ToGeoBoundary)
- [h3kRing](./h3.md#h3kRing)
- [h3GetBaseCell](./h3.md#h3GetBaseCell)
- [h3HexAreaM2](./h3.md#h3HexAreaM2)
- [h3HexAreaKm2](./h3.md#h3HexAreaKm2)
- [h3IndexesAreNeighbors](./h3.md#h3IndexesAreNeighbors)
- [h3ToChildren](./h3.md#h3ToChildren)
- [h3ToParent](./h3.md#h3ToParent)
- [h3ToString](./h3.md#h3ToString)
- [stringToH3](./h3.md#stringToH3)
- [h3GetResolution](./h3.md#h3GetResolution)
- [h3IsResClassIII](./h3.md#h3IsResClassIII)
- [h3IsPentagon](./h3.md#h3IsPentagon)
- [h3GetFaces](./h3.md#h3GetFaces)
- [h3CellAreaM2](./h3.md#h3CellAreaM2)
- [h3CellAreaRads2](./h3.md#h3CellAreaRads2)
- [h3ToCenterChild](./h3.md#h3ToCenterChild)
- [h3ExactEdgeLengthM](./h3.md#h3ExactEdgeLengthM)
- [h3ExactEdgeLengthKm](./h3.md#h3ExactEdgeLengthKm)
- [h3ExactEdgeLengthRads](./h3.md#h3ExactEdgeLengthRads)
- [h3NumHexagons](./h3.md#h3NumHexagons)
- [h3Line](./h3.md#h3Line)
- [h3Distance](./h3.md#h3Distance)
- [h3HexRing](./h3.md#h3HexRing)
- [h3GetUnidirectionalEdge](./h3.md#h3GetUnidirectionalEdge)
- [h3UnidirectionalEdgeIsValid](./h3.md#h3UnidirectionalEdgeIsValid)
- [h3GetOriginIndexFromUnidirectionalEdge](./h3.md#h3GetOriginIndexFromUnidirectionalEdge)
- [h3GetDestinationIndexFromUnidirectionalEdge](./h3.md#h3GetDestinationIndexFromUnidirectionalEdge)
- [h3GetIndexesFromUnidirectionalEdge](./h3.md#h3GetIndexesFromUnidirectionalEdge)
- [h3GetUnidirectionalEdgesFromHexagon](./h3.md#h3GetUnidirectionalEdgesFromHexagon)
- [h3GetUnidirectionalEdgeBoundary](./h3.md#h3GetUnidirectionalEdgeBoundary)
- [h3IsValid](./h3.md#h3isvalid)
- [h3GetResolution](./h3.md#h3getresolution)
- [h3EdgeAngle](./h3.md#h3edgeangle)
- [h3EdgeLengthM](./h3.md#h3edgelengthm)
- [h3EdgeLengthKm](./h3.md#h3edgelengthkm)
- [geoToH3](./h3.md#geotoh3)
- [h3ToGeo](./h3.md#h3togeo)
- [h3ToGeoBoundary](./h3.md#h3togeoboundary)
- [h3kRing](./h3.md#h3kring)
- [h3GetBaseCell](./h3.md#h3getbasecell)
- [h3HexAreaM2](./h3.md#h3hexaream2)
- [h3HexAreaKm2](./h3.md#h3hexareakm2)
- [h3IndexesAreNeighbors](./h3.md#h3indexesareneighbors)
- [h3ToChildren](./h3.md#h3tochildren)
- [h3ToParent](./h3.md#h3toparent)
- [h3ToString](./h3.md#h3tostring)
- [stringToH3](./h3.md#stringtoh3)
- [h3GetResolution](./h3.md#h3getresolution)
- [h3IsResClassIII](./h3.md#h3isresclassiii)
- [h3IsPentagon](./h3.md#h3ispentagon)
- [h3GetFaces](./h3.md#h3getfaces)
- [h3CellAreaM2](./h3.md#h3cellaream2)
- [h3CellAreaRads2](./h3.md#h3cellarearads2)
- [h3ToCenterChild](./h3.md#h3tocenterchild)
- [h3ExactEdgeLengthM](./h3.md#h3exactedgelengthm)
- [h3ExactEdgeLengthKm](./h3.md#h3exactedgelengthkm)
- [h3ExactEdgeLengthRads](./h3.md#h3exactedgelengthrads)
- [h3NumHexagons](./h3.md#h3numhexagons)
- [h3Line](./h3.md#h3line)
- [h3Distance](./h3.md#h3distance)
- [h3HexRing](./h3.md#h3hexring)
- [h3GetUnidirectionalEdge](./h3.md#h3getunidirectionaledge)
- [h3UnidirectionalEdgeIsValid](./h3.md#h3unidirectionaledgeisvalid)
- [h3GetOriginIndexFromUnidirectionalEdge](./h3.md#h3getoriginindexfromunidirectionaledge)
- [h3GetDestinationIndexFromUnidirectionalEdge](./h3.md#h3getdestinationindexfromunidirectionaledge)
- [h3GetIndexesFromUnidirectionalEdge](./h3.md#h3getindexesfromunidirectionaledge)
- [h3GetUnidirectionalEdgesFromHexagon](./h3.md#h3getunidirectionaledgesfromhexagon)
- [h3GetUnidirectionalEdgeBoundary](./h3.md#h3getunidirectionaledgeboundary)
## S2 Index Functions
- [geoToS2](./s2.md#geoToS2)
- [s2ToGeo](./s2.md#s2ToGeo)
- [s2GetNeighbors](./s2.md#s2GetNeighbors)
- [s2CellsIntersect](./s2.md#s2CellsIntersect)
- [s2CapContains](./s2.md#s2CapContains)
- [s2CapUnion](./s2.md#s2CapUnion)
- [s2RectAdd](./s2.md#s2RectAdd)
- [s2RectContains](./s2.md#s2RectContains)
- [s2RectUinion](./s2.md#s2RectUinion)
- [s2RectIntersection](./s2.md#s2RectIntersection)
- [geoToS2](./s2.md#geotos2)
- [s2ToGeo](./s2.md#s2togeo)
- [s2GetNeighbors](./s2.md#s2getneighbors)
- [s2CellsIntersect](./s2.md#s2cellsintersect)
- [s2CapContains](./s2.md#s2capcontains)
- [s2CapUnion](./s2.md#s2capunion)
- [s2RectAdd](./s2.md#s2rectadd)
- [s2RectContains](./s2.md#s2rectcontains)
- [s2RectUnion](./s2.md#s2rectunion)
- [s2RectIntersection](./s2.md#s2rectintersection)
[Original article](https://clickhouse.com/docs/en/sql-reference/functions/geo/) <!--hide-->

View File

@ -593,6 +593,27 @@ LIMIT 10
└────────────────┴─────────┘
```
## formatReadableDecimalSize(x)
Accepts the size (number of bytes). Returns a rounded size with a suffix (KB, MB, etc.) as a string.
Example:
``` sql
SELECT
arrayJoin([1, 1024, 1024*1024, 192851925]) AS filesize_bytes,
formatReadableDecimalSize(filesize_bytes) AS filesize
```
``` text
┌─filesize_bytes─┬─filesize───┐
│ 1 │ 1.00 B │
│ 1024 │ 1.02 KB │
│ 1048576 │ 1.05 MB │
│ 192851925 │ 192.85 MB │
└────────────────┴────────────┘
```
## formatReadableSize(x)
Accepts the size (number of bytes). Returns a rounded size with a suffix (KiB, MiB, etc.) as a string.

View File

@ -1088,7 +1088,8 @@ void Client::processConfig()
}
else
{
need_render_progress = config().getBool("progress", false);
std::string progress = config().getString("progress", "tty");
need_render_progress = (Poco::icompare(progress, "off") && Poco::icompare(progress, "no") && Poco::icompare(progress, "false") && Poco::icompare(progress, "0"));
echo_queries = config().getBool("echo", false);
ignore_error = config().getBool("ignore-error", false);

View File

@ -489,7 +489,8 @@ void LocalServer::processConfig()
}
else
{
need_render_progress = config().getBool("progress", false);
std::string progress = config().getString("progress", "tty");
need_render_progress = (Poco::icompare(progress, "off") && Poco::icompare(progress, "no") && Poco::icompare(progress, "false") && Poco::icompare(progress, "0"));
echo_queries = config().hasOption("echo") || config().hasOption("verbose");
ignore_error = config().getBool("ignore-error", false);
is_multiquery = true;

View File

@ -1336,17 +1336,13 @@
name - name for the rule (optional)
regexp - RE2 compatible regular expression (mandatory)
replace - substitution string for sensitive data (optional, by default - six asterisks)
-->
<query_masking_rules>
<rule>
<name>hide encrypt/decrypt arguments</name>
<regexp>((?:aes_)?(?:encrypt|decrypt)(?:_mysql)?)\s*\(\s*(?:'(?:\\'|.)+'|.*?)\s*\)</regexp>
<!-- or more secure, but also more invasive:
(aes_\w+)\s*\(.*\)
-->
<replace>\1(???)</replace>
</rule>
</query_masking_rules>
</query_masking_rules> -->
<!-- Uncomment to use custom http handlers.
rules are checked from top to bottom, first match runs the handler

View File

@ -138,12 +138,12 @@ private:
BackupImpl::BackupImpl(
const String & backup_name_,
const String & backup_name_for_logging_,
const ArchiveParams & archive_params_,
const std::optional<BackupInfo> & base_backup_info_,
std::shared_ptr<IBackupReader> reader_,
const ContextPtr & context_)
: backup_name(backup_name_)
: backup_name_for_logging(backup_name_for_logging_)
, archive_params(archive_params_)
, use_archives(!archive_params.archive_name.empty())
, open_mode(OpenMode::READ)
@ -158,7 +158,7 @@ BackupImpl::BackupImpl(
BackupImpl::BackupImpl(
const String & backup_name_,
const String & backup_name_for_logging_,
const ArchiveParams & archive_params_,
const std::optional<BackupInfo> & base_backup_info_,
std::shared_ptr<IBackupWriter> writer_,
@ -166,7 +166,7 @@ BackupImpl::BackupImpl(
bool is_internal_backup_,
const std::shared_ptr<IBackupCoordination> & coordination_,
const std::optional<UUID> & backup_uuid_)
: backup_name(backup_name_)
: backup_name_for_logging(backup_name_for_logging_)
, archive_params(archive_params_)
, use_archives(!archive_params.archive_name.empty())
, open_mode(OpenMode::WRITE)
@ -225,10 +225,19 @@ void BackupImpl::open(const ContextPtr & context)
base_backup = BackupFactory::instance().createBackup(params);
if (open_mode == OpenMode::WRITE)
{
base_backup_uuid = base_backup->getUUID();
}
else if (base_backup_uuid != base_backup->getUUID())
throw Exception(ErrorCodes::WRONG_BASE_BACKUP, "Backup {}: The base backup {} has different UUID ({} != {})",
backup_name, base_backup->getName(), toString(base_backup->getUUID()), (base_backup_uuid ? toString(*base_backup_uuid) : ""));
{
throw Exception(
ErrorCodes::WRONG_BASE_BACKUP,
"Backup {}: The base backup {} has different UUID ({} != {})",
backup_name_for_logging,
base_backup->getNameForLogging(),
toString(base_backup->getUUID()),
(base_backup_uuid ? toString(*base_backup_uuid) : ""));
}
}
}
@ -349,14 +358,14 @@ void BackupImpl::readBackupMetadata()
if (use_archives)
{
if (!reader->fileExists(archive_params.archive_name))
throw Exception(ErrorCodes::BACKUP_NOT_FOUND, "Backup {} not found", backup_name);
throw Exception(ErrorCodes::BACKUP_NOT_FOUND, "Backup {} not found", backup_name_for_logging);
setCompressedSize();
in = getArchiveReader("")->readFile(".backup");
}
else
{
if (!reader->fileExists(".backup"))
throw Exception(ErrorCodes::BACKUP_NOT_FOUND, "Backup {} not found", backup_name);
throw Exception(ErrorCodes::BACKUP_NOT_FOUND, "Backup {} not found", backup_name_for_logging);
in = reader->readFile(".backup");
}
@ -369,7 +378,8 @@ void BackupImpl::readBackupMetadata()
version = config->getInt("version");
if ((version < INITIAL_BACKUP_VERSION) || (version > CURRENT_BACKUP_VERSION))
throw Exception(ErrorCodes::BACKUP_VERSION_NOT_SUPPORTED, "Backup {}: Version {} is not supported", backup_name, version);
throw Exception(
ErrorCodes::BACKUP_VERSION_NOT_SUPPORTED, "Backup {}: Version {} is not supported", backup_name_for_logging, version);
timestamp = parse<LocalDateTime>(config->getString("timestamp")).to_time_t();
uuid = parse<UUID>(config->getString("uuid"));
@ -400,7 +410,13 @@ void BackupImpl::readBackupMetadata()
use_base = true;
if (info.base_size > info.size)
throw Exception(ErrorCodes::BACKUP_DAMAGED, "Backup {}: Base size must not be greater than the size of entry {}", backup_name, quoteString(info.file_name));
{
throw Exception(
ErrorCodes::BACKUP_DAMAGED,
"Backup {}: Base size must not be greater than the size of entry {}",
backup_name_for_logging,
quoteString(info.file_name));
}
if (use_base)
{
@ -436,14 +452,14 @@ void BackupImpl::checkBackupDoesntExist() const
file_name_to_check_existence = ".backup";
if (writer->fileExists(file_name_to_check_existence))
throw Exception(ErrorCodes::BACKUP_ALREADY_EXISTS, "Backup {} already exists", backup_name);
throw Exception(ErrorCodes::BACKUP_ALREADY_EXISTS, "Backup {} already exists", backup_name_for_logging);
/// Check that no other backup (excluding internal backups) is writing to the same destination.
if (!is_internal_backup)
{
assert(!lock_file_name.empty());
if (writer->fileExists(lock_file_name))
throw Exception(ErrorCodes::BACKUP_ALREADY_EXISTS, "Backup {} is being written already", backup_name);
throw Exception(ErrorCodes::BACKUP_ALREADY_EXISTS, "Backup {} is being written already", backup_name_for_logging);
}
}
@ -466,8 +482,16 @@ bool BackupImpl::checkLockFile(bool throw_if_failed) const
if (throw_if_failed)
{
if (!writer->fileExists(lock_file_name))
throw Exception(ErrorCodes::FAILED_TO_SYNC_BACKUP_OR_RESTORE, "Lock file {} suddenly disappeared while writing backup {}", lock_file_name, backup_name);
throw Exception(ErrorCodes::BACKUP_ALREADY_EXISTS, "A concurrent backup writing to the same destination {} detected", backup_name);
{
throw Exception(
ErrorCodes::FAILED_TO_SYNC_BACKUP_OR_RESTORE,
"Lock file {} suddenly disappeared while writing backup {}",
lock_file_name,
backup_name_for_logging);
}
throw Exception(
ErrorCodes::BACKUP_ALREADY_EXISTS, "A concurrent backup writing to the same destination {} detected", backup_name_for_logging);
}
return false;
}
@ -514,8 +538,13 @@ UInt64 BackupImpl::getFileSize(const String & file_name) const
auto adjusted_path = removeLeadingSlash(file_name);
auto info = coordination->getFileInfo(adjusted_path);
if (!info)
{
throw Exception(
ErrorCodes::BACKUP_ENTRY_NOT_FOUND, "Backup {}: Entry {} not found in the backup", backup_name, quoteString(file_name));
ErrorCodes::BACKUP_ENTRY_NOT_FOUND,
"Backup {}: Entry {} not found in the backup",
backup_name_for_logging,
quoteString(file_name));
}
return info->size;
}
@ -525,8 +554,13 @@ UInt128 BackupImpl::getFileChecksum(const String & file_name) const
auto adjusted_path = removeLeadingSlash(file_name);
auto info = coordination->getFileInfo(adjusted_path);
if (!info)
{
throw Exception(
ErrorCodes::BACKUP_ENTRY_NOT_FOUND, "Backup {}: Entry {} not found in the backup", backup_name, quoteString(file_name));
ErrorCodes::BACKUP_ENTRY_NOT_FOUND,
"Backup {}: Entry {} not found in the backup",
backup_name_for_logging,
quoteString(file_name));
}
return info->checksum;
}
@ -536,8 +570,13 @@ SizeAndChecksum BackupImpl::getFileSizeAndChecksum(const String & file_name) con
auto adjusted_path = removeLeadingSlash(file_name);
auto info = coordination->getFileInfo(adjusted_path);
if (!info)
{
throw Exception(
ErrorCodes::BACKUP_ENTRY_NOT_FOUND, "Backup {}: Entry {} not found in the backup", backup_name, quoteString(file_name));
ErrorCodes::BACKUP_ENTRY_NOT_FOUND,
"Backup {}: Entry {} not found in the backup",
backup_name_for_logging,
quoteString(file_name));
}
return {info->size, info->checksum};
}
@ -560,8 +599,13 @@ BackupEntryPtr BackupImpl::readFile(const SizeAndChecksum & size_and_checksum) c
auto info_opt = coordination->getFileInfo(size_and_checksum);
if (!info_opt)
{
throw Exception(
ErrorCodes::BACKUP_ENTRY_NOT_FOUND, "Backup {}: Entry {} not found in the backup", backup_name, formatSizeAndChecksum(size_and_checksum));
ErrorCodes::BACKUP_ENTRY_NOT_FOUND,
"Backup {}: Entry {} not found in the backup",
backup_name_for_logging,
formatSizeAndChecksum(size_and_checksum));
}
const auto & info = *info_opt;
@ -577,7 +621,7 @@ BackupEntryPtr BackupImpl::readFile(const SizeAndChecksum & size_and_checksum) c
throw Exception(
ErrorCodes::NO_BASE_BACKUP,
"Backup {}: Entry {} is marked to be read from a base backup, but there is no base backup specified",
backup_name, formatSizeAndChecksum(size_and_checksum));
backup_name_for_logging, formatSizeAndChecksum(size_and_checksum));
}
if (!base_backup->fileExists(std::pair(info.base_size, info.base_checksum)))
@ -585,7 +629,7 @@ BackupEntryPtr BackupImpl::readFile(const SizeAndChecksum & size_and_checksum) c
throw Exception(
ErrorCodes::WRONG_BASE_BACKUP,
"Backup {}: Entry {} is marked to be read from a base backup, but doesn't exist there",
backup_name, formatSizeAndChecksum(size_and_checksum));
backup_name_for_logging, formatSizeAndChecksum(size_and_checksum));
}
auto base_entry = base_backup->readFile(std::pair{info.base_size, info.base_checksum});
@ -695,9 +739,12 @@ void BackupImpl::writeFile(const String & file_name, BackupEntryPtr entry)
LOG_TRACE(log, "Writing backup for file {} from {}", file_name, from_file_name);
auto adjusted_path = removeLeadingSlash(file_name);
if (coordination->getFileInfo(adjusted_path))
{
throw Exception(
ErrorCodes::BACKUP_ENTRY_ALREADY_EXISTS, "Backup {}: Entry {} already exists", backup_name, quoteString(file_name));
ErrorCodes::BACKUP_ENTRY_ALREADY_EXISTS, "Backup {}: Entry {} already exists", backup_name_for_logging, quoteString(file_name));
}
FileInfo info
{
@ -893,12 +940,12 @@ void BackupImpl::finalizeWriting()
if (!is_internal_backup)
{
LOG_TRACE(log, "Finalizing backup {}", backup_name);
LOG_TRACE(log, "Finalizing backup {}", backup_name_for_logging);
writeBackupMetadata();
closeArchives();
setCompressedSize();
removeLockFile();
LOG_TRACE(log, "Finalized backup {}", backup_name);
LOG_TRACE(log, "Finalized backup {}", backup_name_for_logging);
}
writing_finalized = true;
@ -971,7 +1018,7 @@ void BackupImpl::removeAllFilesAfterFailure()
try
{
LOG_INFO(log, "Removing all files of backup {} after failure", backup_name);
LOG_INFO(log, "Removing all files of backup {} after failure", backup_name_for_logging);
Strings files_to_remove;
if (use_archives)

View File

@ -35,14 +35,14 @@ public:
};
BackupImpl(
const String & backup_name_,
const String & backup_name_for_logging_,
const ArchiveParams & archive_params_,
const std::optional<BackupInfo> & base_backup_info_,
std::shared_ptr<IBackupReader> reader_,
const ContextPtr & context_);
BackupImpl(
const String & backup_name_,
const String & backup_name_for_logging_,
const ArchiveParams & archive_params_,
const std::optional<BackupInfo> & base_backup_info_,
std::shared_ptr<IBackupWriter> writer_,
@ -53,7 +53,7 @@ public:
~BackupImpl() override;
const String & getName() const override { return backup_name; }
const String & getNameForLogging() const override { return backup_name_for_logging; }
OpenMode getOpenMode() const override { return open_mode; }
time_t getTimestamp() const override { return timestamp; }
UUID getUUID() const override { return *uuid; }
@ -107,7 +107,7 @@ private:
/// Calculates and sets `compressed_size`.
void setCompressedSize();
const String backup_name;
const String backup_name_for_logging;
const ArchiveParams archive_params;
const bool use_archives;
const OpenMode open_mode;

View File

@ -6,6 +6,7 @@
#include <Parsers/ExpressionElementParsers.h>
#include <Parsers/formatAST.h>
#include <Parsers/parseQuery.h>
#include <Interpreters/maskSensitiveInfoInQueryForLogging.h>
namespace DB
@ -92,4 +93,10 @@ BackupInfo BackupInfo::fromAST(const IAST & ast)
}
String BackupInfo::toStringForLogging(const ContextPtr & context) const
{
ASTPtr ast = toAST();
return maskSensitiveInfoInBackupNameForLogging(serializeAST(*ast), ast, context);
}
}

View File

@ -1,6 +1,7 @@
#pragma once
#include <Core/Field.h>
#include <Interpreters/Context_fwd.h>
namespace DB
@ -20,6 +21,8 @@ struct BackupInfo
ASTPtr toAST() const;
static BackupInfo fromAST(const IAST & ast);
String toStringForLogging(const ContextPtr & context) const;
};
}

View File

@ -166,9 +166,10 @@ OperationID BackupsWorker::startMakingBackup(const ASTPtr & query, const Context
}
auto backup_info = BackupInfo::fromAST(*backup_query->backup_name);
String backup_name_for_logging = backup_info.toStringForLogging(context);
try
{
addInfo(backup_id, backup_info.toString(), backup_settings.internal, BackupStatus::CREATING_BACKUP);
addInfo(backup_id, backup_name_for_logging, backup_settings.internal, BackupStatus::CREATING_BACKUP);
/// Prepare context to use.
ContextPtr context_in_use = context;
@ -184,13 +185,14 @@ OperationID BackupsWorker::startMakingBackup(const ASTPtr & query, const Context
if (backup_settings.async)
{
backups_thread_pool.scheduleOrThrowOnError(
[this, backup_query, backup_id, backup_settings, backup_info, backup_coordination, context_in_use, mutable_context]
[this, backup_query, backup_id, backup_name_for_logging, backup_info, backup_settings, backup_coordination, context_in_use, mutable_context]
{
doBackup(
backup_query,
backup_id,
backup_settings,
backup_name_for_logging,
backup_info,
backup_settings,
backup_coordination,
context_in_use,
mutable_context,
@ -202,8 +204,9 @@ OperationID BackupsWorker::startMakingBackup(const ASTPtr & query, const Context
doBackup(
backup_query,
backup_id,
backup_settings,
backup_name_for_logging,
backup_info,
backup_settings,
backup_coordination,
context_in_use,
mutable_context,
@ -214,7 +217,7 @@ OperationID BackupsWorker::startMakingBackup(const ASTPtr & query, const Context
}
catch (...)
{
tryLogCurrentException(log, fmt::format("Failed to start {} {}", (backup_settings.internal ? "internal backup" : "backup"), backup_info.toString()));
tryLogCurrentException(log, fmt::format("Failed to start {} {}", (backup_settings.internal ? "internal backup" : "backup"), backup_name_for_logging));
/// Something bad happened, the backup has not built.
setStatusSafe(backup_id, BackupStatus::BACKUP_FAILED);
sendCurrentExceptionToCoordination(backup_coordination, backup_settings.host_id);
@ -226,8 +229,9 @@ OperationID BackupsWorker::startMakingBackup(const ASTPtr & query, const Context
void BackupsWorker::doBackup(
const std::shared_ptr<ASTBackupQuery> & backup_query,
const OperationID & backup_id,
BackupSettings backup_settings,
const String & backup_name_for_logging,
const BackupInfo & backup_info,
BackupSettings backup_settings,
std::shared_ptr<IBackupCoordination> backup_coordination,
const ContextPtr & context,
ContextMutablePtr mutable_context,
@ -336,7 +340,7 @@ void BackupsWorker::doBackup(
/// Close the backup.
backup.reset();
LOG_INFO(log, "{} {} was created successfully", (backup_settings.internal ? "Internal backup" : "Backup"), backup_info.toString());
LOG_INFO(log, "{} {} was created successfully", (backup_settings.internal ? "Internal backup" : "Backup"), backup_name_for_logging);
setStatus(backup_id, BackupStatus::BACKUP_CREATED);
setNumFilesAndSize(backup_id, num_files, uncompressed_size, compressed_size);
}
@ -345,7 +349,7 @@ void BackupsWorker::doBackup(
/// Something bad happened, the backup has not built.
if (called_async)
{
tryLogCurrentException(log, fmt::format("Failed to make {} {}", (backup_settings.internal ? "internal backup" : "backup"), backup_info.toString()));
tryLogCurrentException(log, fmt::format("Failed to make {} {}", (backup_settings.internal ? "internal backup" : "backup"), backup_name_for_logging));
setStatusSafe(backup_id, BackupStatus::BACKUP_FAILED);
sendCurrentExceptionToCoordination(backup_coordination, backup_settings.host_id);
}
@ -384,7 +388,8 @@ OperationID BackupsWorker::startRestoring(const ASTPtr & query, ContextMutablePt
try
{
auto backup_info = BackupInfo::fromAST(*restore_query->backup_name);
addInfo(restore_id, backup_info.toString(), restore_settings.internal, BackupStatus::RESTORING);
String backup_name_for_logging = backup_info.toStringForLogging(context);
addInfo(restore_id, backup_name_for_logging, restore_settings.internal, BackupStatus::RESTORING);
/// Prepare context to use.
ContextMutablePtr context_in_use = context;
@ -399,12 +404,14 @@ OperationID BackupsWorker::startRestoring(const ASTPtr & query, ContextMutablePt
if (restore_settings.async)
{
backups_thread_pool.scheduleOrThrowOnError(
[this, restore_query, restore_id, restore_settings, backup_info, restore_coordination, context_in_use] {
[this, restore_query, restore_id, backup_name_for_logging, backup_info, restore_settings, restore_coordination, context_in_use]
{
doRestore(
restore_query,
restore_id,
restore_settings,
backup_name_for_logging,
backup_info,
restore_settings,
restore_coordination,
context_in_use,
/* called_async= */ true);
@ -415,8 +422,9 @@ OperationID BackupsWorker::startRestoring(const ASTPtr & query, ContextMutablePt
doRestore(
restore_query,
restore_id,
restore_settings,
backup_name_for_logging,
backup_info,
restore_settings,
restore_coordination,
context_in_use,
/* called_async= */ false);
@ -437,8 +445,9 @@ OperationID BackupsWorker::startRestoring(const ASTPtr & query, ContextMutablePt
void BackupsWorker::doRestore(
const std::shared_ptr<ASTBackupQuery> & restore_query,
const OperationID & restore_id,
RestoreSettings restore_settings,
const String & backup_name_for_logging,
const BackupInfo & backup_info,
RestoreSettings restore_settings,
std::shared_ptr<IRestoreCoordination> restore_coordination,
ContextMutablePtr context,
bool called_async)
@ -541,7 +550,7 @@ void BackupsWorker::doRestore(
restore_coordination->setStage(restore_settings.host_id, Stage::COMPLETED, "");
}
LOG_INFO(log, "Restored from {} {} successfully", (restore_settings.internal ? "internal backup" : "backup"), backup_info.toString());
LOG_INFO(log, "Restored from {} {} successfully", (restore_settings.internal ? "internal backup" : "backup"), backup_name_for_logging);
setStatus(restore_id, BackupStatus::RESTORED);
}
catch (...)
@ -549,7 +558,7 @@ void BackupsWorker::doRestore(
/// Something bad happened, the backup has not built.
if (called_async)
{
tryLogCurrentException(log, fmt::format("Failed to restore from {} {}", (restore_settings.internal ? "internal backup" : "backup"), backup_info.toString()));
tryLogCurrentException(log, fmt::format("Failed to restore from {} {}", (restore_settings.internal ? "internal backup" : "backup"), backup_name_for_logging));
setStatusSafe(restore_id, BackupStatus::RESTORE_FAILED);
sendCurrentExceptionToCoordination(restore_coordination, restore_settings.host_id);
}

View File

@ -76,14 +76,28 @@ public:
private:
OperationID startMakingBackup(const ASTPtr & query, const ContextPtr & context);
void doBackup(const std::shared_ptr<ASTBackupQuery> & backup_query, const OperationID & backup_id, BackupSettings backup_settings,
const BackupInfo & backup_info, std::shared_ptr<IBackupCoordination> backup_coordination, const ContextPtr & context,
ContextMutablePtr mutable_context, bool called_async);
void doBackup(
const std::shared_ptr<ASTBackupQuery> & backup_query,
const OperationID & backup_id,
const String & backup_name_for_logging,
const BackupInfo & backup_info,
BackupSettings backup_settings,
std::shared_ptr<IBackupCoordination> backup_coordination,
const ContextPtr & context,
ContextMutablePtr mutable_context,
bool called_async);
OperationID startRestoring(const ASTPtr & query, ContextMutablePtr context);
void doRestore(const std::shared_ptr<ASTBackupQuery> & restore_query, const OperationID & restore_id, RestoreSettings restore_settings, const BackupInfo & backup_info,
std::shared_ptr<IRestoreCoordination> restore_coordination, ContextMutablePtr context, bool called_async);
void doRestore(
const std::shared_ptr<ASTBackupQuery> & restore_query,
const OperationID & restore_id,
const String & backup_name_for_logging,
const BackupInfo & backup_info,
RestoreSettings restore_settings,
std::shared_ptr<IRestoreCoordination> restore_coordination,
ContextMutablePtr context,
bool called_async);
void addInfo(const OperationID & id, const String & name, bool internal, BackupStatus status);
void setStatus(const OperationID & id, BackupStatus status, bool throw_if_error = true);

View File

@ -19,7 +19,8 @@ public:
virtual ~IBackup() = default;
/// Name of the backup.
virtual const String & getName() const = 0;
//virtual const String & getName() const = 0;
virtual const String & getNameForLogging() const = 0;
enum class OpenMode
{

View File

@ -47,7 +47,7 @@ void registerBackupEngineS3(BackupFactory & factory)
auto creator_fn = []([[maybe_unused]] const BackupFactory::CreateParams & params) -> std::unique_ptr<IBackup>
{
#if USE_AWS_S3
String backup_name = params.backup_info.toString();
String backup_name_for_logging = params.backup_info.toStringForLogging(params.context);
const String & id_arg = params.backup_info.id_arg;
const auto & args = params.backup_info.args;
@ -111,12 +111,12 @@ void registerBackupEngineS3(BackupFactory & factory)
if (params.open_mode == IBackup::OpenMode::READ)
{
auto reader = std::make_shared<BackupReaderS3>(S3::URI{Poco::URI{s3_uri}}, access_key_id, secret_access_key, params.context);
return std::make_unique<BackupImpl>(backup_name, archive_params, params.base_backup_info, reader, params.context);
return std::make_unique<BackupImpl>(backup_name_for_logging, archive_params, params.base_backup_info, reader, params.context);
}
else
{
auto writer = std::make_shared<BackupWriterS3>(S3::URI{Poco::URI{s3_uri}}, access_key_id, secret_access_key, params.context);
return std::make_unique<BackupImpl>(backup_name, archive_params, params.base_backup_info, writer, params.context, params.is_internal_backup, params.backup_coordination, params.backup_uuid);
return std::make_unique<BackupImpl>(backup_name_for_logging, archive_params, params.base_backup_info, writer, params.context, params.is_internal_backup, params.backup_coordination, params.backup_uuid);
}
#else
throw Exception("S3 support is disabled", ErrorCodes::SUPPORT_IS_DISABLED);

View File

@ -99,7 +99,7 @@ void registerBackupEnginesFileAndDisk(BackupFactory & factory)
{
auto creator_fn = [](const BackupFactory::CreateParams & params) -> std::unique_ptr<IBackup>
{
String backup_name = params.backup_info.toString();
String backup_name_for_logging = params.backup_info.toStringForLogging(params.context);
const String & engine_name = params.backup_info.backup_engine_name;
if (!params.backup_info.id_arg.empty())
@ -172,7 +172,7 @@ void registerBackupEnginesFileAndDisk(BackupFactory & factory)
reader = std::make_shared<BackupReaderFile>(path);
else
reader = std::make_shared<BackupReaderDisk>(disk, path);
return std::make_unique<BackupImpl>(backup_name, archive_params, params.base_backup_info, reader, params.context);
return std::make_unique<BackupImpl>(backup_name_for_logging, archive_params, params.base_backup_info, reader, params.context);
}
else
{
@ -181,7 +181,7 @@ void registerBackupEnginesFileAndDisk(BackupFactory & factory)
writer = std::make_shared<BackupWriterFile>(path);
else
writer = std::make_shared<BackupWriterDisk>(disk, path);
return std::make_unique<BackupImpl>(backup_name, archive_params, params.base_backup_info, writer, params.context, params.is_internal_backup, params.backup_coordination, params.backup_uuid);
return std::make_unique<BackupImpl>(backup_name_for_logging, archive_params, params.base_backup_info, writer, params.context, params.is_internal_backup, params.backup_coordination, params.backup_uuid);
}
};

View File

@ -65,10 +65,12 @@
#include <Interpreters/ReplaceQueryParameterVisitor.h>
#include <Interpreters/ProfileEventsExt.h>
#include <IO/WriteBufferFromOStream.h>
#include <IO/WriteBufferFromFileDescriptor.h>
#include <IO/CompressionMethod.h>
#include <Client/InternalTextLogs.h>
#include <IO/ForkWriteBuffer.h>
#include <Parsers/Kusto/ParserKQLStatement.h>
#include <boost/algorithm/string/case_conv.hpp>
namespace fs = std::filesystem;
@ -103,6 +105,7 @@ namespace ErrorCodes
extern const int CANNOT_SET_SIGNAL_HANDLER;
extern const int UNRECOGNIZED_ARGUMENTS;
extern const int LOGICAL_ERROR;
extern const int CANNOT_OPEN_FILE;
}
}
@ -116,6 +119,25 @@ namespace ProfileEvents
namespace DB
{
std::istream& operator>> (std::istream & in, ProgressOption & progress)
{
std::string token;
in >> token;
boost::to_upper(token);
if (token == "OFF" || token == "FALSE" || token == "0" || token == "NO")
progress = ProgressOption::OFF;
else if (token == "TTY" || token == "ON" || token == "TRUE" || token == "1" || token == "YES")
progress = ProgressOption::TTY;
else if (token == "ERR")
progress = ProgressOption::ERR;
else
throw boost::program_options::validation_error(boost::program_options::validation_error::invalid_option_value);
return in;
}
static ClientInfo::QueryKind parseQueryKind(const String & query_kind)
{
if (query_kind == "initial_query")
@ -413,8 +435,8 @@ void ClientBase::onData(Block & block, ASTPtr parsed_query)
return;
/// If results are written INTO OUTFILE, we can avoid clearing progress to avoid flicker.
if (need_render_progress && (stdout_is_a_tty || is_interactive) && (!select_into_file || select_into_file_and_stdout))
progress_indication.clearProgressOutput();
if (need_render_progress && tty_buf && (!select_into_file || select_into_file_and_stdout))
progress_indication.clearProgressOutput(*tty_buf);
try
{
@ -431,11 +453,11 @@ void ClientBase::onData(Block & block, ASTPtr parsed_query)
output_format->flush();
/// Restore progress bar after data block.
if (need_render_progress && (stdout_is_a_tty || is_interactive))
if (need_render_progress && tty_buf)
{
if (select_into_file && !select_into_file_and_stdout)
std::cerr << "\r";
progress_indication.writeProgress();
progress_indication.writeProgress(*tty_buf);
}
}
@ -443,7 +465,8 @@ void ClientBase::onData(Block & block, ASTPtr parsed_query)
void ClientBase::onLogData(Block & block)
{
initLogsOutputStream();
progress_indication.clearProgressOutput();
if (need_render_progress && tty_buf)
progress_indication.clearProgressOutput(*tty_buf);
logs_out_stream->writeLogs(block);
logs_out_stream->flush();
}
@ -639,6 +662,58 @@ void ClientBase::initLogsOutputStream()
}
}
void ClientBase::initTtyBuffer(bool to_err)
{
if (!tty_buf)
{
static constexpr auto tty_file_name = "/dev/tty";
/// Output all progress bar commands to terminal at once to avoid flicker.
/// This size is usually greater than the window size.
static constexpr size_t buf_size = 1024;
if (!to_err)
{
std::error_code ec;
std::filesystem::file_status tty = std::filesystem::status(tty_file_name, ec);
if (!ec && exists(tty) && is_character_file(tty)
&& (tty.permissions() & std::filesystem::perms::others_write) != std::filesystem::perms::none)
{
try
{
tty_buf = std::make_unique<WriteBufferFromFile>(tty_file_name, buf_size);
/// It is possible that the terminal file has writeable permissions
/// but we cannot write anything there. Check it with invisible character.
tty_buf->write('\0');
tty_buf->next();
return;
}
catch (const Exception & e)
{
if (tty_buf)
tty_buf.reset();
if (e.code() != ErrorCodes::CANNOT_OPEN_FILE)
throw;
/// It is normal if file exists, indicated as writeable but still cannot be opened.
/// Fallback to other options.
}
}
}
if (stderr_is_a_tty)
{
tty_buf = std::make_unique<WriteBufferFromFileDescriptor>(STDERR_FILENO, buf_size);
}
else
need_render_progress = false;
}
}
void ClientBase::updateSuggest(const ASTPtr & ast)
{
std::vector<std::string> new_words;
@ -937,14 +1012,15 @@ void ClientBase::onProgress(const Progress & value)
if (output_format)
output_format->onProgress(value);
if (need_render_progress)
progress_indication.writeProgress();
if (need_render_progress && tty_buf)
progress_indication.writeProgress(*tty_buf);
}
void ClientBase::onEndOfStream()
{
progress_indication.clearProgressOutput();
if (need_render_progress && tty_buf)
progress_indication.clearProgressOutput(*tty_buf);
if (output_format)
output_format->finalize();
@ -952,10 +1028,7 @@ void ClientBase::onEndOfStream()
resetOutput();
if (is_interactive && !written_first_block)
{
progress_indication.clearProgressOutput();
std::cout << "Ok." << std::endl;
}
}
@ -998,15 +1071,16 @@ void ClientBase::onProfileEvents(Block & block)
}
progress_indication.updateThreadEventData(thread_times);
if (need_render_progress)
progress_indication.writeProgress();
if (need_render_progress && tty_buf)
progress_indication.writeProgress(*tty_buf);
if (profile_events.print)
{
if (profile_events.watch.elapsedMilliseconds() >= profile_events.delay_ms)
{
initLogsOutputStream();
progress_indication.clearProgressOutput();
if (need_render_progress && tty_buf)
progress_indication.clearProgressOutput(*tty_buf);
logs_out_stream->writeProfileEvents(block);
logs_out_stream->flush();
@ -1180,7 +1254,8 @@ void ClientBase::sendData(Block & sample, const ColumnsDescription & columns_des
progress_indication.updateProgress(Progress(file_progress));
/// Set callback to be called on file progress.
progress_indication.setFileProgressCallback(global_context, true);
if (tty_buf)
progress_indication.setFileProgressCallback(global_context, *tty_buf);
}
/// If data fetched from file (maybe compressed file)
@ -1432,12 +1507,12 @@ bool ClientBase::receiveEndOfQuery()
void ClientBase::cancelQuery()
{
connection->sendCancel();
if (need_render_progress && tty_buf)
progress_indication.clearProgressOutput(*tty_buf);
if (is_interactive)
{
progress_indication.clearProgressOutput();
std::cout << "Cancelling query." << std::endl;
}
cancelled = true;
}
@ -1557,7 +1632,8 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin
if (profile_events.last_block)
{
initLogsOutputStream();
progress_indication.clearProgressOutput();
if (need_render_progress && tty_buf)
progress_indication.clearProgressOutput(*tty_buf);
logs_out_stream->writeProfileEvents(profile_events.last_block);
logs_out_stream->flush();
@ -2248,7 +2324,7 @@ void ClientBase::init(int argc, char ** argv)
("stage", po::value<std::string>()->default_value("complete"), "Request query processing up to specified stage: complete,fetch_columns,with_mergeable_state,with_mergeable_state_after_aggregation,with_mergeable_state_after_aggregation_and_limit")
("query_kind", po::value<std::string>()->default_value("initial_query"), "One of initial_query/secondary_query/no_query")
("query_id", po::value<std::string>(), "query_id")
("progress", "print progress of queries execution")
("progress", po::value<ProgressOption>()->implicit_value(ProgressOption::TTY, "tty")->default_value(ProgressOption::TTY, "tty"), "Print progress of queries execution - to TTY (default): tty|on|1|true|yes; to STDERR: err; OFF: off|0|false|no")
("disable_suggestion,A", "Disable loading suggestion data. Note that suggestion data is loaded asynchronously through a second connection to ClickHouse server. Also it is reasonable to disable suggestion if you want to paste a query with TAB characters. Shorthand option -A is for those who get used to mysql client.")
("time,t", "print query execution time to stderr in non-interactive mode (for benchmarks)")
@ -2303,6 +2379,11 @@ void ClientBase::init(int argc, char ** argv)
parseAndCheckOptions(options_description, options, common_arguments);
po::notify(options);
if (options["progress"].as<ProgressOption>() == ProgressOption::OFF)
need_render_progress = false;
else
initTtyBuffer(options["progress"].as<ProgressOption>() == ProgressOption::ERR);
if (options.count("version") || options.count("V"))
{
showClientVersion();
@ -2353,7 +2434,20 @@ void ClientBase::init(int argc, char ** argv)
if (options.count("profile-events-delay-ms"))
config().setUInt64("profile-events-delay-ms", options["profile-events-delay-ms"].as<UInt64>());
if (options.count("progress"))
config().setBool("progress", true);
{
switch (options["progress"].as<ProgressOption>())
{
case OFF:
config().setString("progress", "off");
break;
case TTY:
config().setString("progress", "tty");
break;
case ERR:
config().setString("progress", "err");
break;
}
}
if (options.count("echo"))
config().setBool("echo", true);
if (options.count("disable_suggestion"))

View File

@ -15,6 +15,7 @@
#include <Storages/StorageFile.h>
#include <Storages/SelectQueryInfo.h>
namespace po = boost::program_options;
@ -35,9 +36,18 @@ enum MultiQueryProcessingStage
PARSING_FAILED,
};
enum ProgressOption
{
OFF,
TTY,
ERR,
};
std::istream& operator>> (std::istream & in, ProgressOption & progress);
void interruptSignalHandler(int signum);
class InternalTextLogs;
class WriteBufferFromFileDescriptor;
class ClientBase : public Poco::Util::Application, public IHints<2, ClientBase>
{
@ -143,6 +153,7 @@ private:
void initOutputFormat(const Block & block, ASTPtr parsed_query);
void initLogsOutputStream();
void initTtyBuffer(bool to_err = false);
String prompt() const;
@ -218,6 +229,10 @@ protected:
String server_logs_file;
std::unique_ptr<InternalTextLogs> logs_out_stream;
/// /dev/tty if accessible or std::cerr - for progress bar.
/// We prefer to output progress bar directly to tty to allow user to redirect stdout and stderr and still get the progress indication.
std::unique_ptr<WriteBufferFromFileDescriptor> tty_buf;
String home_path;
String history_file; /// Path to a file containing command history.

View File

@ -2,6 +2,7 @@
#include <algorithm>
#include <cstddef>
#include <numeric>
#include <filesystem>
#include <cmath>
#include <IO/WriteBufferFromFileDescriptor.h>
#include <base/types.h>
@ -11,6 +12,9 @@
#include "IO/WriteBufferFromString.h"
#include <Databases/DatabaseMemory.h>
/// http://en.wikipedia.org/wiki/ANSI_escape_code
#define CLEAR_TO_END_OF_LINE "\033[K"
namespace
{
@ -44,15 +48,6 @@ bool ProgressIndication::updateProgress(const Progress & value)
return progress.incrementPiecewiseAtomically(value);
}
void ProgressIndication::clearProgressOutput()
{
if (written_progress_chars)
{
written_progress_chars = 0;
std::cerr << "\r" CLEAR_TO_END_OF_LINE;
}
}
void ProgressIndication::resetProgress()
{
watch.restart();
@ -67,15 +62,12 @@ void ProgressIndication::resetProgress()
}
}
void ProgressIndication::setFileProgressCallback(ContextMutablePtr context, bool write_progress_on_update_)
void ProgressIndication::setFileProgressCallback(ContextMutablePtr context, WriteBufferFromFileDescriptor & message)
{
write_progress_on_update = write_progress_on_update_;
context->setFileProgressCallback([&](const FileProgress & file_progress)
{
progress.incrementPiecewiseAtomically(Progress(file_progress));
if (write_progress_on_update)
writeProgress();
writeProgress(message);
});
}
@ -142,13 +134,10 @@ void ProgressIndication::writeFinalProgress()
std::cout << ". ";
}
void ProgressIndication::writeProgress()
void ProgressIndication::writeProgress(WriteBufferFromFileDescriptor & message)
{
std::lock_guard lock(progress_mutex);
/// Output all progress bar commands to stderr at once to avoid flicker.
WriteBufferFromFileDescriptor message(STDERR_FILENO, 1024);
static size_t increment = 0;
static const char * indicators[8] = {
"\033[1;30m→\033[0m",
@ -307,4 +296,14 @@ void ProgressIndication::writeProgress()
message.next();
}
void ProgressIndication::clearProgressOutput(WriteBufferFromFileDescriptor & message)
{
if (written_progress_chars)
{
written_progress_chars = 0;
message << "\r" CLEAR_TO_END_OF_LINE;
message.next();
}
}
}

View File

@ -9,12 +9,12 @@
#include <Common/Stopwatch.h>
#include <Common/EventRateMeter.h>
/// http://en.wikipedia.org/wiki/ANSI_escape_code
#define CLEAR_TO_END_OF_LINE "\033[K"
namespace DB
{
class WriteBufferFromFileDescriptor;
struct ThreadEventData
{
UInt64 time() const noexcept { return user_ms + system_ms; }
@ -30,14 +30,13 @@ using HostToThreadTimesMap = std::unordered_map<String, ThreadIdToTimeMap>;
class ProgressIndication
{
public:
/// Write progress to stderr.
void writeProgress();
/// Write progress bar.
void writeProgress(WriteBufferFromFileDescriptor & message);
void clearProgressOutput(WriteBufferFromFileDescriptor & message);
/// Write summary.
void writeFinalProgress();
/// Clear stderr output.
void clearProgressOutput();
/// Reset progress values.
void resetProgress();
@ -52,7 +51,7 @@ public:
/// In some cases there is a need to update progress value, when there is no access to progress_inidcation object.
/// In this case it is added via context.
/// `write_progress_on_update` is needed to write progress for loading files data via pipe in non-interactive mode.
void setFileProgressCallback(ContextMutablePtr context, bool write_progress_on_update = false);
void setFileProgressCallback(ContextMutablePtr context, WriteBufferFromFileDescriptor & message);
/// How much seconds passed since query execution start.
double elapsedSeconds() const { return getElapsedNanoseconds() / 1e9; }

View File

@ -3,10 +3,10 @@
#include <cstring>
#include <iostream>
#include <Core/Defines.h>
#include <Common/Stopwatch.h>
#include <Common/TargetSpecific.h>
#include <base/types.h>
#include <base/unaligned.h>
#include <Common/Stopwatch.h>
#include <Common/TargetSpecific.h>
#ifdef __SSE2__
#include <emmintrin.h>
@ -599,6 +599,9 @@ bool NO_INLINE decompressImpl(const char * const source, char * const dest, size
copy_end = op + length;
if (unlikely(copy_end > output_end))
return false;
/** Here we can write up to copy_amount - 1 - 4 * 2 bytes after buffer.
* The worst case when offset = 1 and length = 4
*/

View File

@ -1,8 +1,5 @@
#include <Compression/CompressionFactory.h>
#include <Common/PODArray.h>
#include <Common/Stopwatch.h>
#include <base/types.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/IDataType.h>
#include <IO/ReadBufferFromMemory.h>
@ -10,6 +7,12 @@
#include <Parsers/ExpressionElementParsers.h>
#include <Parsers/IParser.h>
#include <Parsers/TokenIterator.h>
#include <base/types.h>
#include <Common/PODArray.h>
#include <Common/Stopwatch.h>
#include <Compression/LZ4_decompress_faster.h>
#include <IO/BufferWithOwnMemory.h>
#include <random>
#include <bitset>
@ -1319,4 +1322,34 @@ INSTANTIATE_TEST_SUITE_P(Gorilla,
// ),
//);
TEST(LZ4Test, DecompressMalformedInput)
{
/// This malformed input was initially found by lz4_decompress_fuzzer and causes failure under UBSAN.
constexpr unsigned char data[]
= {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0x00,
0x00, 0x20, 0x00, 0x00, 0x66, 0x66, 0x66, 0x66, 0xff, 0xff, 0xff, 0x17, 0xff, 0xff, 0x0f, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xfe, 0x1f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00};
const char * const source = reinterpret_cast<const char * const>(data);
const uint32_t source_size = std::size(data);
constexpr uint32_t uncompressed_size = 80;
DB::Memory<> memory;
memory.resize(ICompressionCodec::getHeaderSize() + uncompressed_size + LZ4::ADDITIONAL_BYTES_AT_END_OF_BUFFER);
unalignedStoreLE<uint8_t>(memory.data(), static_cast<uint8_t>(CompressionMethodByte::LZ4));
unalignedStoreLE<uint32_t>(&memory[1], source_size);
unalignedStoreLE<uint32_t>(&memory[5], uncompressed_size);
auto codec = CompressionCodecFactory::instance().get("LZ4", {});
ASSERT_THROW(codec->decompress(source, source_size, memory.data()), Exception);
}
}

View File

@ -377,6 +377,9 @@ void KeeperStorage::UncommittedState::commit(int64_t commit_zxid)
{
assert(deltas.empty() || deltas.front().zxid >= commit_zxid);
// collect nodes that have no further modification in the current transaction
std::unordered_set<std::string> modified_nodes;
while (!deltas.empty() && deltas.front().zxid == commit_zxid)
{
if (std::holds_alternative<SubDeltaEnd>(deltas.front().operation))
@ -393,7 +396,17 @@ void KeeperStorage::UncommittedState::commit(int64_t commit_zxid)
assert(path_deltas.front() == &front_delta);
path_deltas.pop_front();
if (path_deltas.empty())
{
deltas_for_path.erase(front_delta.path);
// no more deltas for path -> no modification
modified_nodes.insert(std::move(front_delta.path));
}
else if (path_deltas.front()->zxid > commit_zxid)
{
// next delta has a zxid from a different transaction -> no modification in this transaction
modified_nodes.insert(std::move(front_delta.path));
}
}
else if (auto * add_auth = std::get_if<AddAuthDelta>(&front_delta.operation))
{
@ -409,9 +422,12 @@ void KeeperStorage::UncommittedState::commit(int64_t commit_zxid)
}
// delete all cached nodes that were not modified after the commit_zxid
// the commit can end on SubDeltaEnd so we don't want to clear cached nodes too soon
if (deltas.empty() || deltas.front().zxid > commit_zxid)
std::erase_if(nodes, [commit_zxid](const auto & node) { return node.second.zxid == commit_zxid; });
// we only need to check the nodes that were modified in this transaction
for (const auto & node : modified_nodes)
{
if (nodes[node].zxid == commit_zxid)
nodes.erase(node);
}
}
void KeeperStorage::UncommittedState::rollback(int64_t rollback_zxid)

View File

@ -84,11 +84,12 @@ void SerializationString::deserializeBinary(IColumn & column, ReadBuffer & istr)
void SerializationString::serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const
{
const ColumnString & column_string = typeid_cast<const ColumnString &>(column);
const auto & full_column = column.convertToFullColumnIfLowCardinality();
const ColumnString & column_string = typeid_cast<const ColumnString &>(*full_column);
const ColumnString::Chars & data = column_string.getChars();
const ColumnString::Offsets & offsets = column_string.getOffsets();
size_t size = column.size();
size_t size = column_string.size();
if (!size)
return;

View File

@ -62,11 +62,12 @@ std::pair<String, StoragePtr> createTableFromAST(
if (ast_create_query.as_table_function)
{
const auto & factory = TableFunctionFactory::instance();
auto table_function = factory.get(ast_create_query.as_table_function, context);
auto table_function_ast = ast_create_query.as_table_function->ptr();
auto table_function = factory.get(table_function_ast, context);
ColumnsDescription columns;
if (ast_create_query.columns_list && ast_create_query.columns_list->columns)
columns = InterpreterCreateQuery::getColumnsDescription(*ast_create_query.columns_list->columns, context, true);
StoragePtr storage = table_function->execute(ast_create_query.as_table_function, context, ast_create_query.getTable(), std::move(columns));
StoragePtr storage = table_function->execute(table_function_ast, context, ast_create_query.getTable(), std::move(columns));
storage->renameInMemory(ast_create_query);
return {ast_create_query.getTable(), storage};
}

View File

@ -443,6 +443,11 @@ ASTPtr DatabasePostgreSQL::getColumnDeclaration(const DataTypePtr & data_type) c
if (which.isArray())
return makeASTFunction("Array", getColumnDeclaration(typeid_cast<const DataTypeArray *>(data_type.get())->getNestedType()));
if (which.isDateTime64())
{
return makeASTFunction("DateTime64", std::make_shared<ASTLiteral>(static_cast<UInt32>(6)));
}
return std::make_shared<ASTIdentifier>(data_type->getName());
}

View File

@ -87,7 +87,7 @@ restoreUserDefinedSQLObjects(RestorerFromBackup & restorer, const String & data_
parser,
statement_def.data(),
statement_def.data() + statement_def.size(),
"in file " + filepath + " from backup " + backup->getName(),
"in file " + filepath + " from backup " + backup->getNameForLogging(),
0,
context->getSettingsRef().max_parser_depth);
break;

View File

@ -0,0 +1,35 @@
#include <Functions/FunctionFactory.h>
#include <Functions/formatReadable.h>
namespace DB
{
namespace
{
struct Impl
{
static constexpr auto name = "formatReadableDecimalSize";
static void format(double value, DB::WriteBuffer & out)
{
formatReadableSizeWithDecimalSuffix(value, out);
}
};
}
REGISTER_FUNCTION(FormatReadableDecimalSize)
{
factory.registerFunction<FunctionFormatReadable<Impl>>(
{
R"(
Accepts the size (number of bytes). Returns a rounded size with a suffix (KB, MB, etc.) as a string.
)",
Documentation::Examples{
{"formatReadableDecimalSize", "SELECT formatReadableDecimalSize(1000)"}},
Documentation::Categories{"OtherFunctions"}
},
FunctionFactory::CaseSensitive);
}
}

View File

@ -2637,7 +2637,7 @@ void NO_INLINE Aggregator::mergeBucketImpl(
ManyAggregatedDataVariants Aggregator::prepareVariantsToMerge(ManyAggregatedDataVariants & data_variants) const
{
if (data_variants.empty())
throw Exception("Empty data passed to Aggregator::mergeAndConvertToBlocks.", ErrorCodes::EMPTY_DATA_PASSED);
throw Exception("Empty data passed to Aggregator::prepareVariantsToMerge.", ErrorCodes::EMPTY_DATA_PASSED);
LOG_TRACE(log, "Merging aggregated data");

View File

@ -8,12 +8,14 @@
#include <IO/ReadBufferFromString.h>
#include <Poco/Net/NetException.h>
#include <Common/logger_useful.h>
#include <Parsers/ParserQuery.h>
#include <Parsers/parseQuery.h>
#include <Parsers/ASTQueryWithOnCluster.h>
#include <Parsers/ParserQuery.h>
#include <Parsers/formatAST.h>
#include <Parsers/parseQuery.h>
#include <Parsers/queryToString.h>
#include <Parsers/ASTQueryWithTableAndOutput.h>
#include <Databases/DatabaseReplicated.h>
#include <Interpreters/maskSensitiveInfoInQueryForLogging.h>
namespace DB
@ -168,6 +170,13 @@ void DDLTaskBase::parseQueryFromEntry(ContextPtr context)
query = parseQuery(parser_query, begin, end, description, 0, settings.max_parser_depth);
}
void DDLTaskBase::formatRewrittenQuery(ContextPtr context)
{
/// Convert rewritten AST back to string.
query_str = queryToString(*query);
query_for_logging = maskSensitiveInfoInQueryForLogging(query_str, query, context);
}
ContextMutablePtr DDLTaskBase::makeQueryContext(ContextPtr from_context, const ZooKeeperPtr & /*zookeeper*/)
{
auto query_context = Context::createCopy(from_context);
@ -265,6 +274,7 @@ void DDLTask::setClusterInfo(ContextPtr context, Poco::Logger * log)
host_id.readableString(), entry_name, address_in_cluster.readableString(), cluster_name);
}
/// Rewrite AST without ON CLUSTER.
WithoutOnClusterASTRewriteParams params;
params.default_database = address_in_cluster.default_database;
params.host_id = address_in_cluster.toString();
@ -405,6 +415,7 @@ void DatabaseReplicatedTask::parseQueryFromEntry(ContextPtr context)
chassert(!ddl_query->database);
ddl_query->setDatabase(database->getDatabaseName());
}
formatRewrittenQuery(context);
}
ContextMutablePtr DatabaseReplicatedTask::makeQueryContext(ContextPtr from_context, const ZooKeeperPtr & zookeeper)

View File

@ -99,6 +99,9 @@ struct DDLTaskBase
String host_id_str;
ASTPtr query;
String query_str;
String query_for_logging;
bool is_initial_query = false;
bool is_circular_replicated = false;
bool execute_on_leader = false;
@ -114,6 +117,7 @@ struct DDLTaskBase
virtual ~DDLTaskBase() = default;
virtual void parseQueryFromEntry(ContextPtr context);
void formatRewrittenQuery(ContextPtr context);
virtual String getShardID() const = 0;

View File

@ -10,8 +10,6 @@
#include <Parsers/ASTCreateIndexQuery.h>
#include <Parsers/ASTDropIndexQuery.h>
#include <Parsers/ParserQuery.h>
#include <Parsers/parseQuery.h>
#include <Parsers/queryToString.h>
#include <IO/WriteHelpers.h>
#include <IO/ReadHelpers.h>
#include <IO/ReadBufferFromString.h>
@ -207,6 +205,8 @@ DDLTaskPtr DDLWorker::initAndCheckTask(const String & entry_name, String & out_r
task->parseQueryFromEntry(context);
/// Stage 3.2: check cluster and find the host in cluster
task->setClusterInfo(context, log);
/// Stage 3.3: output rewritten query back to string
task->formatRewrittenQuery(context);
}
catch (...)
{
@ -431,11 +431,12 @@ DDLTaskBase & DDLWorker::saveTask(DDLTaskPtr && task)
return *current_tasks.back();
}
bool DDLWorker::tryExecuteQuery(const String & query, DDLTaskBase & task, const ZooKeeperPtr & zookeeper)
bool DDLWorker::tryExecuteQuery(DDLTaskBase & task, const ZooKeeperPtr & zookeeper)
{
/// Add special comment at the start of query to easily identify DDL-produced queries in query_log
String query_prefix = "/* ddl_entry=" + task.entry_name + " */ ";
String query_to_execute = query_prefix + query;
String query_to_execute = query_prefix + task.query_str;
String query_to_show_in_logs = query_prefix + task.query_for_logging;
ReadBufferFromString istr(query_to_execute);
String dummy_string;
@ -463,7 +464,7 @@ bool DDLWorker::tryExecuteQuery(const String & query, DDLTaskBase & task, const
throw;
task.execution_status = ExecutionStatus::fromCurrentException();
tryLogCurrentException(log, "Query " + query + " wasn't finished successfully");
tryLogCurrentException(log, "Query " + query_to_show_in_logs + " wasn't finished successfully");
/// We use return value of tryExecuteQuery(...) in tryExecuteQueryOnLeaderReplica(...) to determine
/// if replica has stopped being leader and we should retry query.
@ -484,7 +485,7 @@ bool DDLWorker::tryExecuteQuery(const String & query, DDLTaskBase & task, const
throw;
task.execution_status = ExecutionStatus::fromCurrentException();
tryLogCurrentException(log, "Query " + query + " wasn't finished successfully");
tryLogCurrentException(log, "Query " + query_to_show_in_logs + " wasn't finished successfully");
/// We don't know what exactly happened, but maybe it's Poco::NetException or std::bad_alloc,
/// so we consider unknown exception as retryable error.
@ -492,7 +493,7 @@ bool DDLWorker::tryExecuteQuery(const String & query, DDLTaskBase & task, const
}
task.execution_status = ExecutionStatus(0);
LOG_DEBUG(log, "Executed query: {}", query);
LOG_DEBUG(log, "Executed query: {}", query_to_show_in_logs);
return true;
}
@ -514,7 +515,7 @@ void DDLWorker::updateMaxDDLEntryID(const String & entry_name)
void DDLWorker::processTask(DDLTaskBase & task, const ZooKeeperPtr & zookeeper)
{
LOG_DEBUG(log, "Processing task {} ({})", task.entry_name, task.entry.query);
LOG_DEBUG(log, "Processing task {} ({})", task.entry_name, task.query_for_logging);
chassert(!task.completely_processed);
/// Setup tracing context on current thread for current DDL
@ -587,8 +588,7 @@ void DDLWorker::processTask(DDLTaskBase & task, const ZooKeeperPtr & zookeeper)
try
{
String rewritten_query = queryToString(task.query);
LOG_DEBUG(log, "Executing query: {}", rewritten_query);
LOG_DEBUG(log, "Executing query: {}", task.query_for_logging);
StoragePtr storage;
if (auto * query_with_table = dynamic_cast<ASTQueryWithTableAndOutput *>(task.query.get()); query_with_table)
@ -605,12 +605,12 @@ void DDLWorker::processTask(DDLTaskBase & task, const ZooKeeperPtr & zookeeper)
if (task.execute_on_leader)
{
tryExecuteQueryOnLeaderReplica(task, storage, rewritten_query, task.entry_path, zookeeper, execute_on_leader_lock);
tryExecuteQueryOnLeaderReplica(task, storage, task.entry_path, zookeeper, execute_on_leader_lock);
}
else
{
storage.reset();
tryExecuteQuery(rewritten_query, task, zookeeper);
tryExecuteQuery(task, zookeeper);
}
}
catch (const Coordination::Exception &)
@ -694,7 +694,6 @@ bool DDLWorker::taskShouldBeExecutedOnLeader(const ASTPtr & ast_ddl, const Stora
bool DDLWorker::tryExecuteQueryOnLeaderReplica(
DDLTaskBase & task,
StoragePtr storage,
const String & rewritten_query,
const String & /*node_path*/,
const ZooKeeperPtr & zookeeper,
std::unique_ptr<zkutil::ZooKeeperLock> & execute_on_leader_lock)
@ -793,7 +792,7 @@ bool DDLWorker::tryExecuteQueryOnLeaderReplica(
/// If the leader will unexpectedly changed this method will return false
/// and on the next iteration new leader will take lock
if (tryExecuteQuery(rewritten_query, task, zookeeper))
if (tryExecuteQuery(task, zookeeper))
{
executed_by_us = true;
break;

View File

@ -101,12 +101,11 @@ protected:
bool tryExecuteQueryOnLeaderReplica(
DDLTaskBase & task,
StoragePtr storage,
const String & rewritten_query,
const String & node_path,
const ZooKeeperPtr & zookeeper,
std::unique_ptr<zkutil::ZooKeeperLock> & execute_on_leader_lock);
bool tryExecuteQuery(const String & query, DDLTaskBase & task, const ZooKeeperPtr & zookeeper);
bool tryExecuteQuery(DDLTaskBase & task, const ZooKeeperPtr & zookeeper);
/// Checks and cleanups queue's nodes
void cleanupQueue(Int64 current_time_seconds, const ZooKeeperPtr & zookeeper);

View File

@ -25,13 +25,47 @@ public:
{}
void visit(T & ast)
{
if (ostr)
visitImpl</* with_dump= */ true>(ast);
else
visitImpl</* with_dump= */ false>(ast);
}
private:
Data & data;
size_t visit_depth;
WriteBuffer * ostr;
template <bool with_dump>
void visitImpl(T & ast)
{
checkStackSize();
DumpASTNode dump(*ast, ostr, visit_depth, typeid(Matcher).name());
if constexpr (with_dump)
{
DumpASTNode dump(*ast, ostr, visit_depth, typeid(Matcher).name());
visitImplMain</* with_dump= */ true>(ast);
}
else
{
visitImplMain</* with_dump= */ false>(ast);
}
}
template <bool with_dump>
void visitImplMain(T & ast)
{
if constexpr (!_top_to_bottom)
visitChildren(ast);
visitChildren<with_dump>(ast);
doVisit(ast);
if constexpr (_top_to_bottom)
visitChildren<with_dump>(ast);
}
void doVisit(T & ast)
{
try
{
Matcher::visit(ast, data);
@ -41,16 +75,9 @@ public:
e.addMessage("While processing {}", ast->formatForErrorMessage());
throw;
}
if constexpr (_top_to_bottom)
visitChildren(ast);
}
private:
Data & data;
size_t visit_depth;
WriteBuffer * ostr;
template <bool with_dump>
void visitChildren(T & ast)
{
for (auto & child : ast->children)
@ -62,7 +89,7 @@ private:
need_visit_child = Matcher::needChildVisit(ast, child);
if (need_visit_child)
visit(child);
visitImpl<with_dump>(child);
}
}
};

View File

@ -726,7 +726,8 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::getTableProperti
else if (create.as_table_function)
{
/// Table function without columns list.
auto table_function = TableFunctionFactory::instance().get(create.as_table_function, getContext());
auto table_function_ast = create.as_table_function->ptr();
auto table_function = TableFunctionFactory::instance().get(table_function_ast, getContext());
properties.columns = table_function->getActualTableStructure(getContext());
}
else if (create.is_dictionary)
@ -967,7 +968,7 @@ void InterpreterCreateQuery::setEngine(ASTCreateQuery & create) const
if (as_create.storage)
create.set(create.storage, as_create.storage->ptr());
else if (as_create.as_table_function)
create.as_table_function = as_create.as_table_function->clone();
create.set(create.as_table_function, as_create.as_table_function->ptr());
else
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot set engine, it's a bug.");
@ -1343,12 +1344,12 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create,
/// NOTE: CREATE query may be rewritten by Storage creator or table function
if (create.as_table_function)
{
const auto & factory = TableFunctionFactory::instance();
auto table_func = factory.get(create.as_table_function, getContext());
auto table_function_ast = create.as_table_function->ptr();
auto table_function = TableFunctionFactory::instance().get(table_function_ast, getContext());
/// In case of CREATE AS table_function() query we should use global context
/// in storage creation because there will be no query context on server startup
/// and because storage lifetime is bigger than query context lifetime.
res = table_func->execute(create.as_table_function, getContext(), create.getTable(), properties.columns, /*use_global_context=*/true);
res = table_function->execute(table_function_ast, getContext(), create.getTable(), properties.columns, /*use_global_context=*/true);
res->renameInMemory({create.getDatabase(), create.getTable(), create.uuid});
}
else

View File

@ -546,10 +546,13 @@ std::vector<TableNeededColumns> normalizeColumnNamesExtractNeeded(
{
auto alias = aliases.find(ident->name())->second;
auto alias_ident = alias->clone();
alias_ident->as<ASTIdentifier>()->restoreTable();
bool alias_equals_column_name = alias_ident->getColumnNameWithoutAlias() == ident->getColumnNameWithoutAlias();
if (!alias_equals_column_name)
throw Exception("Alias clashes with qualified column '" + ident->name() + "'", ErrorCodes::AMBIGUOUS_COLUMN_NAME);
if (auto * alias_ident_typed = alias_ident->as<ASTIdentifier>())
{
alias_ident_typed->restoreTable();
bool alias_equals_column_name = alias_ident->getColumnNameWithoutAlias() == ident->getColumnNameWithoutAlias();
if (!alias_equals_column_name)
throw Exception("Alias clashes with qualified column '" + ident->name() + "'", ErrorCodes::AMBIGUOUS_COLUMN_NAME);
}
}
String short_name = ident->shortName();
String original_long_name;

View File

@ -34,7 +34,6 @@
#include <Parsers/queryToString.h>
#include <Parsers/formatAST.h>
#include <Parsers/toOneLineQuery.h>
#include <Parsers/wipePasswordFromQuery.h>
#include <Formats/FormatFactory.h>
#include <Storages/StorageInput.h>
@ -56,9 +55,9 @@
#include <Interpreters/SelectQueryOptions.h>
#include <Interpreters/TransactionLog.h>
#include <Interpreters/executeQuery.h>
#include <Interpreters/maskSensitiveInfoInQueryForLogging.h>
#include <Common/ProfileEvents.h>
#include <Common/SensitiveDataMasker.h>
#include <IO/CompressionMethod.h>
#include <Processors/Transforms/LimitsCheckingTransform.h>
@ -77,7 +76,6 @@
namespace ProfileEvents
{
extern const Event QueryMaskingRulesMatch;
extern const Event FailedQuery;
extern const Event FailedInsertQuery;
extern const Event FailedSelectQuery;
@ -109,37 +107,6 @@ static void checkASTSizeLimits(const IAST & ast, const Settings & settings)
}
/// Makes a version of a query without sensitive information (e.g. passwords) for logging.
/// The parameter `parsed query` can be nullptr if the query cannot be parsed.
static String prepareQueryForLogging(const String & query, const ASTPtr & parsed_query, ContextPtr context)
{
String res = query;
// Wiping a password or hash from CREATE/ALTER USER query because we don't want it to go to logs.
if (parsed_query && canContainPassword(*parsed_query))
{
ASTPtr ast_for_logging = parsed_query->clone();
wipePasswordFromQuery(ast_for_logging);
res = serializeAST(*ast_for_logging);
}
// Wiping sensitive data before cropping query by log_queries_cut_to_length,
// otherwise something like credit card without last digit can go to log.
if (auto * masker = SensitiveDataMasker::getInstance())
{
auto matches = masker->wipeSensitiveData(res);
if (matches > 0)
{
ProfileEvents::increment(ProfileEvents::QueryMaskingRulesMatch, matches);
}
}
res = res.substr(0, context->getSettingsRef().log_queries_cut_to_length);
return res;
}
/// Log query into text log (not into system table).
static void logQuery(const String & query, ContextPtr context, bool internal, QueryProcessingStage::Enum stage)
{
@ -425,14 +392,14 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
/// MUST go before any modification (except for prepared statements,
/// since it substitute parameters and without them query does not contain
/// parameters), to keep query as-is in query_log and server log.
query_for_logging = prepareQueryForLogging(query, ast, context);
query_for_logging = maskSensitiveInfoInQueryForLogging(query, ast, context);
}
catch (...)
{
/// Anyway log the query.
if (query.empty())
query.assign(begin, std::min(end - begin, static_cast<ptrdiff_t>(max_query_size)));
query_for_logging = prepareQueryForLogging(query, ast, context);
query_for_logging = maskSensitiveInfoInQueryForLogging(query, ast, context);
logQuery(query_for_logging, context, internal, stage);

View File

@ -0,0 +1,623 @@
#include <Interpreters/maskSensitiveInfoInQueryForLogging.h>
#include <Formats/FormatFactory.h>
#include <Interpreters/Context.h>
#include <Interpreters/InDepthNodeVisitor.h>
#include <Interpreters/evaluateConstantExpression.h>
#include <Parsers/ASTBackupQuery.h>
#include <Parsers/ASTCreateQuery.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/Access/ASTCreateUserQuery.h>
#include <Parsers/formatAST.h>
#include <TableFunctions/TableFunctionFactory.h>
#include <Common/ProfileEvents.h>
#include <Common/SensitiveDataMasker.h>
#include <Common/typeid_cast.h>
namespace ProfileEvents
{
extern const Event QueryMaskingRulesMatch;
}
namespace DB
{
namespace
{
enum class PasswordWipingMode
{
Query,
BackupName,
};
template <bool check_only>
class PasswordWipingVisitor
{
public:
struct Data
{
bool can_contain_password = false;
bool password_was_hidden = false;
bool is_create_table_query = false;
bool is_create_database_query = false;
bool is_create_dictionary_query = false;
ContextPtr context;
PasswordWipingMode mode = PasswordWipingMode::Query;
};
using Visitor = std::conditional_t<
check_only,
ConstInDepthNodeVisitor<PasswordWipingVisitor, /* top_to_bottom= */ true, /* need_child_accept_data= */ true>,
InDepthNodeVisitor<PasswordWipingVisitor, /* top_to_bottom= */ true, /* need_child_accept_data= */ true>>;
static bool needChildVisit(const ASTPtr & /* ast */, const ASTPtr & /* child */, Data & data)
{
if constexpr (check_only)
{
return !data.can_contain_password;
}
else
{
return true;
}
}
static void visit(ASTPtr ast, Data & data)
{
if (auto * create_user_query = ast->as<ASTCreateUserQuery>())
{
visitCreateUserQuery(*create_user_query, data);
}
else if (auto * create_query = ast->as<ASTCreateQuery>())
{
visitCreateQuery(*create_query, data);
}
else if (auto * backup_query = ast->as<ASTBackupQuery>())
{
visitBackupQuery(*backup_query, data);
}
else if (auto * storage = ast->as<ASTStorage>())
{
if (data.is_create_table_query)
visitTableEngine(*storage, data);
else if (data.is_create_database_query)
visitDatabaseEngine(*storage, data);
}
else if (auto * dictionary = ast->as<ASTDictionary>())
{
if (data.is_create_dictionary_query)
visitDictionaryDef(*dictionary, data);
}
else if (auto * function = ast->as<ASTFunction>())
{
if (data.mode == PasswordWipingMode::BackupName)
wipePasswordFromBackupEngineArguments(*function, data);
else
visitFunction(*function, data);
}
}
private:
static void visitCreateUserQuery(ASTCreateUserQuery & query, Data & data)
{
if (!query.auth_data)
return;
auto auth_type = query.auth_data->getType();
if (auth_type == AuthenticationType::NO_PASSWORD || auth_type == AuthenticationType::LDAP
|| auth_type == AuthenticationType::KERBEROS || auth_type == AuthenticationType::SSL_CERTIFICATE)
return; /// No password, nothing to hide.
if constexpr (check_only)
{
data.can_contain_password = true;
return;
}
query.show_password = false;
data.password_was_hidden = true;
}
static void visitCreateQuery(ASTCreateQuery & query, Data & data)
{
if (query.is_dictionary)
data.is_create_dictionary_query = true;
else if (query.table)
data.is_create_table_query = true;
else if (query.database)
data.is_create_database_query = true;
}
static void visitTableEngine(ASTStorage & storage, Data & data)
{
if (!storage.engine)
return;
const String & engine_name = storage.engine->name;
if (engine_name == "ExternalDistributed")
{
/// ExternalDistributed('engine', 'host:port', 'database', 'table', 'user', 'password')
wipePasswordFromArgument(*storage.engine, data, 5);
}
else if (engine_name == "MySQL")
{
/// MySQL('host:port', 'database', 'table', 'user', 'password', ...)
wipePasswordFromArgument(*storage.engine, data, 4);
}
else if (engine_name == "PostgreSQL")
{
/// PostgreSQL('host:port', 'database', 'table', 'user', 'password', ...)
wipePasswordFromArgument(*storage.engine, data, 4);
}
else if (engine_name == "MaterializedPostgreSQL")
{
/// MaterializedPostgreSQL('host:port', 'database', 'table', 'user', 'password', ...)
wipePasswordFromArgument(*storage.engine, data, 4);
}
else if (engine_name == "MongoDB")
{
/// MongoDB('host:port', 'database', 'collection', 'user', 'password', ...)
wipePasswordFromArgument(*storage.engine, data, 4);
}
else if (engine_name == "S3" || engine_name == "COSN")
{
/// S3('url', ['aws_access_key_id', 'aws_secret_access_key',] ...)
wipePasswordFromS3TableEngineArguments(*storage.engine, data);
}
}
static void wipePasswordFromS3TableEngineArguments(ASTFunction & engine, Data & data)
{
/// We replace 'aws_secret_access_key' with '[HIDDEN'] for the following signatures:
/// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format')
/// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression')
/// But we should check the number of arguments first because we don't need to do that replacements in case of
/// S3('url' [, 'format' [, 'compression']])
size_t num_arguments;
if (!tryGetNumArguments(engine, &num_arguments) || (num_arguments < 4))
return;
wipePasswordFromArgument(engine, data, 2);
}
static void visitDatabaseEngine(ASTStorage & storage, Data & data)
{
if (!storage.engine)
return;
const String & engine_name = storage.engine->name;
if (engine_name == "MySQL" || engine_name == "MaterializeMySQL" || engine_name == "MaterializedMySQL")
{
/// MySQL('host:port', 'database', 'user', 'password')
wipePasswordFromArgument(*storage.engine, data, 3);
}
else if (engine_name == "PostgreSQL" || engine_name == "MaterializedPostgreSQL")
{
/// PostgreSQL('host:port', 'database', 'user', 'password', ...)
wipePasswordFromArgument(*storage.engine, data, 3);
}
}
static void visitFunction(ASTFunction & function, Data & data)
{
if (function.name == "mysql")
{
/// mysql('host:port', 'database', 'table', 'user', 'password', ...)
wipePasswordFromArgument(function, data, 4);
}
else if (function.name == "postgresql")
{
/// postgresql('host:port', 'database', 'table', 'user', 'password', ...)
wipePasswordFromArgument(function, data, 4);
}
else if (function.name == "mongodb")
{
/// mongodb('host:port', 'database', 'collection', 'user', 'password', ...)
wipePasswordFromArgument(function, data, 4);
}
else if (function.name == "s3" || function.name == "cosn")
{
/// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...)
wipePasswordFromS3FunctionArguments(function, data, /* is_cluster_function= */ false);
}
else if (function.name == "s3Cluster")
{
/// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', ...)
wipePasswordFromS3FunctionArguments(function, data, /* is_cluster_function= */ true);
}
else if (function.name == "remote" || function.name == "remoteSecure")
{
/// remote('addresses_expr', 'db', 'table', 'user', 'password', ...)
wipePasswordFromRemoteFunctionArguments(function, data);
}
else if (
function.name == "encrypt" || function.name == "decrypt" || function.name == "aes_encrypt_mysql"
|| function.name == "aes_decrypt_mysql" || function.name == "tryDecrypt")
{
/// encrypt('mode', 'plaintext', 'key' [, iv, aad])
wipePasswordFromEncryptionFunctionArguments(function, data);
}
}
static void wipePasswordFromS3FunctionArguments(ASTFunction & function, Data & data, bool is_cluster_function)
{
/// s3Cluster('cluster_name', 'url', ...) has 'url' as its second argument.
size_t url_arg_idx = is_cluster_function ? 1 : 0;
/// We're going to replace 'aws_secret_access_key' with '[HIDDEN'] for the following signatures:
/// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...)
/// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression')
/// But we should check the number of arguments first because we don't need to do any replacements in case of
/// s3('url' [, 'format']) or s3Cluster('cluster_name', 'url' [, 'format'])
size_t num_arguments;
if (!tryGetNumArguments(function, &num_arguments) || (num_arguments < url_arg_idx + 3))
return;
if (num_arguments >= url_arg_idx + 5)
{
/// s3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'structure', ...)
wipePasswordFromArgument(function, data, url_arg_idx + 2);
}
else
{
/// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...)
/// We need to distinguish that from s3('url', 'format', 'structure' [, 'compression_method']).
/// So we will check whether the argument after 'url' is a format.
String format;
if (!tryGetEvaluatedConstStringFromArgument(function, data, url_arg_idx + 1, &format))
return;
if (FormatFactory::instance().getAllFormats().contains(format))
return; /// The argument after 'url' is a format: s3('url', 'format', ...)
/// The argument after 'url' is not a format so we do our replacement:
/// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...) -> s3('url', 'aws_access_key_id', '[HIDDEN]', ...)
wipePasswordFromArgument(function, data, url_arg_idx + 2);
}
}
static void wipePasswordFromRemoteFunctionArguments(ASTFunction & function, Data & data)
{
/// We're going to replace 'password' with '[HIDDEN'] for the following signatures:
/// remote('addresses_expr', db.table, 'user' [, 'password'] [, sharding_key])
/// remote('addresses_expr', 'db', 'table', 'user' [, 'password'] [, sharding_key])
/// remote('addresses_expr', table_function(), 'user' [, 'password'] [, sharding_key])
/// But we should check the number of arguments first because we don't need to do any replacements in case of
/// remote('addresses_expr', db.table)
size_t num_arguments;
if (!tryGetNumArguments(function, &num_arguments) || (num_arguments < 3))
return;
auto & arguments = assert_cast<ASTExpressionList &>(*function.arguments).children;
size_t arg_num = 1;
/// Skip 1 or 2 arguments with table_function() or db.table or 'db', 'table'.
const auto * table_function = arguments[arg_num]->as<ASTFunction>();
if (table_function && TableFunctionFactory::instance().isTableFunctionName(table_function->name))
{
++arg_num;
}
else
{
String database;
if (!tryGetEvaluatedConstDatabaseNameFromArgument(function, data, arg_num, &database))
return;
++arg_num;
auto qualified_name = QualifiedTableName::parseFromString(database);
if (qualified_name.database.empty())
++arg_num; /// skip 'table' argument
}
/// Check if username and password are specified
/// (sharding_key can be of any type so while we're getting string literals they're username & password).
String username, password;
bool username_specified = tryGetStringFromArgument(function, arg_num, &username);
bool password_specified = username_specified && tryGetStringFromArgument(function, arg_num + 1, &password);
if (password_specified)
{
/// Password is specified so we do our replacement:
/// remote('addresses_expr', db.table, 'user', 'password', ...) -> remote('addresses_expr', db.table, 'user', '[HIDDEN]', ...)
wipePasswordFromArgument(function, data, arg_num + 1);
}
}
static void wipePasswordFromEncryptionFunctionArguments(ASTFunction & function, Data & data)
{
/// We replace all arguments after 'mode' with '[HIDDEN]':
/// encrypt('mode', 'plaintext', 'key' [, iv, aad]) -> encrypt('mode', '[HIDDEN]')
wipePasswordFromArgument(function, data, 1);
removeArgumentsAfter(function, data, 2);
}
static void visitBackupQuery(ASTBackupQuery & query, Data & data)
{
if (query.backup_name)
{
if (auto * backup_engine = query.backup_name->as<ASTFunction>())
wipePasswordFromBackupEngineArguments(*backup_engine, data);
}
if (query.base_backup_name)
{
if (auto * base_backup_engine = query.base_backup_name->as<ASTFunction>())
wipePasswordFromBackupEngineArguments(*base_backup_engine, data);
}
}
static void wipePasswordFromBackupEngineArguments(ASTFunction & engine, Data & data)
{
if (engine.name == "S3")
{
/// BACKUP ... TO S3(url, [aws_access_key_id, aws_secret_access_key])
wipePasswordFromArgument(engine, data, 2);
}
}
static void wipePasswordFromArgument(ASTFunction & function, Data & data, size_t arg_idx)
{
if (!function.arguments)
return;
auto * expr_list = function.arguments->as<ASTExpressionList>();
if (!expr_list)
return; /// return because we don't want to validate query here
auto & arguments = expr_list->children;
if (arg_idx >= arguments.size())
return;
if constexpr (check_only)
{
data.can_contain_password = true;
return;
}
arguments[arg_idx] = std::make_shared<ASTLiteral>("[HIDDEN]");
data.password_was_hidden = true;
}
static void removeArgumentsAfter(ASTFunction & function, Data & data, size_t new_num_arguments)
{
if (!function.arguments)
return;
auto * expr_list = function.arguments->as<ASTExpressionList>();
if (!expr_list)
return; /// return because we don't want to validate query here
auto & arguments = expr_list->children;
if (new_num_arguments >= arguments.size())
return;
if constexpr (check_only)
{
data.can_contain_password = true;
return;
}
arguments.resize(new_num_arguments);
data.password_was_hidden = true;
}
static bool tryGetNumArguments(const ASTFunction & function, size_t * num_arguments)
{
if (!function.arguments)
return false;
auto * expr_list = function.arguments->as<ASTExpressionList>();
if (!expr_list)
return false; /// return false because we don't want to validate query here
const auto & arguments = expr_list->children;
*num_arguments = arguments.size();
return true;
}
static bool tryGetStringFromArgument(const ASTFunction & function, size_t arg_idx, String * value)
{
if (!function.arguments)
return false;
const auto * expr_list = function.arguments->as<ASTExpressionList>();
if (!expr_list)
return false; /// return false because we don't want to validate query here
const auto & arguments = expr_list->children;
if (arg_idx >= arguments.size())
return false;
const auto * literal = arguments[arg_idx]->as<ASTLiteral>();
if (!literal || literal->value.getType() != Field::Types::String)
return false;
*value = literal->value.safeGet<String>();
return true;
}
static bool tryGetEvaluatedConstStringFromArgument(const ASTFunction & function, Data & data, size_t arg_idx, String * value)
{
if (!function.arguments)
return false;
const auto * expr_list = function.arguments->as<ASTExpressionList>();
if (!expr_list)
return false; /// return false because we don't want to validate query here
const auto & arguments = expr_list->children;
if (arg_idx >= arguments.size())
return false;
if constexpr (check_only)
{
data.can_contain_password = true;
return false;
}
ASTPtr argument = arguments[arg_idx];
try
{
argument = evaluateConstantExpressionOrIdentifierAsLiteral(argument, data.context);
}
catch (...)
{
return false;
}
const auto & literal = assert_cast<const ASTLiteral &>(*argument);
if (literal.value.getType() != Field::Types::String)
return false;
*value = literal.value.safeGet<String>();
return true;
}
static bool tryGetEvaluatedConstDatabaseNameFromArgument(const ASTFunction & function, Data & data, size_t arg_idx, String * value)
{
if (!function.arguments)
return false;
const auto * expr_list = function.arguments->as<ASTExpressionList>();
if (!expr_list)
return false; /// return false because we don't want to validate query here
const auto & arguments = expr_list->children;
if (arg_idx >= arguments.size())
return false;
if constexpr (check_only)
{
data.can_contain_password = true;
return false;
}
ASTPtr argument = arguments[arg_idx];
try
{
argument = evaluateConstantExpressionForDatabaseName(argument, data.context);
}
catch (...)
{
return false;
}
const auto & literal = assert_cast<const ASTLiteral &>(*argument);
if (literal.value.getType() != Field::Types::String)
return false;
*value = literal.value.safeGet<String>();
return true;
}
static void visitDictionaryDef(ASTDictionary & dictionary, Data & data)
{
if (!dictionary.source || !dictionary.source->elements)
return;
const auto * expr_list = dictionary.source->elements->as<ASTExpressionList>();
if (!expr_list)
return; /// return because we don't want to validate query here
const auto & elements = expr_list->children;
/// We replace password in the dictionary's definition:
/// SOURCE(CLICKHOUSE(host 'example01-01-1' port 9000 user 'default' password 'qwe123' db 'default' table 'ids')) ->
/// SOURCE(CLICKHOUSE(host 'example01-01-1' port 9000 user 'default' password '[HIDDEN]' db 'default' table 'ids'))
for (const auto & element : elements)
{
auto * pair = element->as<ASTPair>();
if (!pair)
continue; /// just skip because we don't want to validate query here
if (pair->first == "password")
{
if constexpr (check_only)
{
data.can_contain_password = true;
return;
}
pair->set(pair->second, std::make_shared<ASTLiteral>("[HIDDEN]"));
data.password_was_hidden = true;
}
}
}
};
/// Checks the type of a specified AST and returns true if it can contain a password.
bool canContainPassword(const IAST & ast, PasswordWipingMode mode)
{
using WipingVisitor = PasswordWipingVisitor</*check_only= */ true>;
WipingVisitor::Data data;
data.mode = mode;
WipingVisitor::Visitor visitor{data};
ASTPtr ast_ptr = std::const_pointer_cast<IAST>(ast.shared_from_this());
visitor.visit(ast_ptr);
return data.can_contain_password;
}
/// Removes a password or its hash from a query if it's specified there or replaces it with some placeholder.
/// This function is used to prepare a query for storing in logs (we don't want logs to contain sensitive information).
bool wipePasswordFromQuery(ASTPtr ast, PasswordWipingMode mode, const ContextPtr & context)
{
using WipingVisitor = PasswordWipingVisitor</*check_only= */ false>;
WipingVisitor::Data data;
data.context = context;
data.mode = mode;
WipingVisitor::Visitor visitor{data};
visitor.visit(ast);
return data.password_was_hidden;
}
/// Common utility for masking sensitive information.
String maskSensitiveInfoImpl(const String & query, const ASTPtr & parsed_query, PasswordWipingMode mode, const ContextPtr & context)
{
String res = query;
// Wiping a password or hash from the query because we don't want it to go to logs.
if (parsed_query && canContainPassword(*parsed_query, mode))
{
ASTPtr ast_without_password = parsed_query->clone();
if (wipePasswordFromQuery(ast_without_password, mode, context))
res = serializeAST(*ast_without_password);
}
// Wiping sensitive data before cropping query by log_queries_cut_to_length,
// otherwise something like credit card without last digit can go to log.
if (auto * masker = SensitiveDataMasker::getInstance())
{
auto matches = masker->wipeSensitiveData(res);
if (matches > 0)
{
ProfileEvents::increment(ProfileEvents::QueryMaskingRulesMatch, matches);
}
}
res = res.substr(0, context->getSettingsRef().log_queries_cut_to_length);
return res;
}
}
String maskSensitiveInfoInQueryForLogging(const String & query, const ASTPtr & parsed_query, const ContextPtr & context)
{
return maskSensitiveInfoImpl(query, parsed_query, PasswordWipingMode::Query, context);
}
String maskSensitiveInfoInBackupNameForLogging(const String & backup_name, const ASTPtr & ast, const ContextPtr & context)
{
return maskSensitiveInfoImpl(backup_name, ast, PasswordWipingMode::BackupName, context);
}
}

View File

@ -0,0 +1,19 @@
#pragma once
#include <Parsers/IAST_fwd.h>
#include <Interpreters/Context_fwd.h>
namespace DB
{
/// Makes a version of a query without sensitive information (e.g. passwords) for logging.
/// The parameter `parsed query` is allowed to be nullptr if the query cannot be parsed.
/// Does not validate AST, works a best-effort way.
String maskSensitiveInfoInQueryForLogging(const String & query, const ASTPtr & parsed_query, const ContextPtr & context);
/// Makes a version of backup name without sensitive information (e.g. passwords) for logging.
/// Does not validate AST, works a best-effort way.
String maskSensitiveInfoInBackupNameForLogging(const String & backup_name, const ASTPtr & ast, const ContextPtr & context);
}

View File

@ -245,7 +245,21 @@ String ASTBackupQuery::getID(char) const
ASTPtr ASTBackupQuery::clone() const
{
return std::make_shared<ASTBackupQuery>(*this);
auto res = std::make_shared<ASTBackupQuery>(*this);
if (backup_name)
res->backup_name = backup_name->clone();
if (base_backup_name)
res->base_backup_name = base_backup_name->clone();
if (cluster_host_ids)
res->cluster_host_ids = cluster_host_ids->clone();
if (settings)
res->settings = settings->clone();
return res;
}

View File

@ -210,6 +210,8 @@ ASTPtr ASTCreateQuery::clone() const
res->set(res->dictionary, dictionary->clone());
}
if (as_table_function)
res->set(res->as_table_function, as_table_function->clone());
if (comment)
res->set(res->comment, comment->clone());

View File

@ -83,7 +83,7 @@ public:
ASTPtr lateness_function;
String as_database;
String as_table;
ASTPtr as_table_function;
IAST * as_table_function = nullptr;
ASTSelectWithUnionQuery * select = nullptr;
IAST * comment = nullptr;

View File

@ -141,7 +141,12 @@ String ASTCreateQuotaQuery::getID(char) const
ASTPtr ASTCreateQuotaQuery::clone() const
{
return std::make_shared<ASTCreateQuotaQuery>(*this);
auto res = std::make_shared<ASTCreateQuotaQuery>(*this);
if (roles)
res->roles = std::static_pointer_cast<ASTRolesOrUsersSet>(roles->clone());
return res;
}

View File

@ -42,7 +42,12 @@ String ASTCreateRoleQuery::getID(char) const
ASTPtr ASTCreateRoleQuery::clone() const
{
return std::make_shared<ASTCreateRoleQuery>(*this);
auto res = std::make_shared<ASTCreateRoleQuery>(*this);
if (settings)
res->settings = std::static_pointer_cast<ASTSettingsProfileElements>(settings->clone());
return res;
}

View File

@ -124,7 +124,25 @@ String ASTCreateRowPolicyQuery::getID(char) const
ASTPtr ASTCreateRowPolicyQuery::clone() const
{
return std::make_shared<ASTCreateRowPolicyQuery>(*this);
auto res = std::make_shared<ASTCreateRowPolicyQuery>(*this);
if (names)
res->names = std::static_pointer_cast<ASTRowPolicyNames>(names->clone());
if (roles)
res->roles = std::static_pointer_cast<ASTRolesOrUsersSet>(roles->clone());
/// `res->filters` is already initialized by the copy constructor of ASTCreateRowPolicyQuery (see the first line of this function).
/// But the copy constructor just copied the pointers inside `filters` instead of cloning.
/// We need to make a deep copy and not a shallow copy, so we have to manually clone each pointer in `res->filters`.
chassert(res->filters.size() == filters.size());
for (auto & [_, res_filter] : res->filters)
{
if (res_filter)
res_filter = res_filter->clone();
}
return res;
}

View File

@ -49,7 +49,15 @@ String ASTCreateSettingsProfileQuery::getID(char) const
ASTPtr ASTCreateSettingsProfileQuery::clone() const
{
return std::make_shared<ASTCreateSettingsProfileQuery>(*this);
auto res = std::make_shared<ASTCreateSettingsProfileQuery>(*this);
if (to_roles)
res->to_roles = std::static_pointer_cast<ASTRolesOrUsersSet>(to_roles->clone());
if (settings)
res->settings = std::static_pointer_cast<ASTSettingsProfileElements>(settings->clone());
return res;
}

View File

@ -275,7 +275,24 @@ String ASTCreateUserQuery::getID(char) const
ASTPtr ASTCreateUserQuery::clone() const
{
return std::make_shared<ASTCreateUserQuery>(*this);
auto res = std::make_shared<ASTCreateUserQuery>(*this);
if (names)
res->names = std::static_pointer_cast<ASTUserNamesWithHost>(names->clone());
if (default_roles)
res->default_roles = std::static_pointer_cast<ASTRolesOrUsersSet>(default_roles->clone());
if (default_database)
res->default_database = std::static_pointer_cast<ASTDatabaseOrNone>(default_database->clone());
if (grantees)
res->grantees = std::static_pointer_cast<ASTRolesOrUsersSet>(grantees->clone());
if (settings)
res->settings = std::static_pointer_cast<ASTSettingsProfileElements>(settings->clone());
return res;
}

View File

@ -29,7 +29,12 @@ String ASTDropAccessEntityQuery::getID(char) const
ASTPtr ASTDropAccessEntityQuery::clone() const
{
return std::make_shared<ASTDropAccessEntityQuery>(*this);
auto res = std::make_shared<ASTDropAccessEntityQuery>(*this);
if (row_policy_names)
res->row_policy_names = std::static_pointer_cast<ASTRowPolicyNames>(row_policy_names->clone());
return res;
}

View File

@ -96,7 +96,15 @@ String ASTGrantQuery::getID(char) const
ASTPtr ASTGrantQuery::clone() const
{
return std::make_shared<ASTGrantQuery>(*this);
auto res = std::make_shared<ASTGrantQuery>(*this);
if (roles)
res->roles = std::static_pointer_cast<ASTRolesOrUsersSet>(roles->clone());
if (grantees)
res->grantees = std::static_pointer_cast<ASTRolesOrUsersSet>(grantees->clone());
return res;
}

View File

@ -14,7 +14,15 @@ String ASTSetRoleQuery::getID(char) const
ASTPtr ASTSetRoleQuery::clone() const
{
return std::make_shared<ASTSetRoleQuery>(*this);
auto res = std::make_shared<ASTSetRoleQuery>(*this);
if (roles)
res->roles = std::static_pointer_cast<ASTRolesOrUsersSet>(roles->clone());
if (to_users)
res->to_users = std::static_pointer_cast<ASTRolesOrUsersSet>(to_users->clone());
return res;
}

View File

@ -38,7 +38,12 @@ String ASTShowCreateAccessEntityQuery::getID(char) const
ASTPtr ASTShowCreateAccessEntityQuery::clone() const
{
return std::make_shared<ASTShowCreateAccessEntityQuery>(*this);
auto res = std::make_shared<ASTShowCreateAccessEntityQuery>(*this);
if (row_policy_names)
res->row_policy_names = std::static_pointer_cast<ASTRowPolicyNames>(row_policy_names->clone());
return res;
}

View File

@ -14,7 +14,12 @@ String ASTShowGrantsQuery::getID(char) const
ASTPtr ASTShowGrantsQuery::clone() const
{
return std::make_shared<ASTShowGrantsQuery>(*this);
auto res = std::make_shared<ASTShowGrantsQuery>(*this);
if (for_roles)
res->for_roles = std::static_pointer_cast<ASTRolesOrUsersSet>(for_roles->clone());
return res;
}

View File

@ -1197,6 +1197,9 @@ public:
if (!mergeElement())
return false;
if (elements.size() != 2)
return false;
elements = {makeASTFunction("CAST", elements[0], elements[1])};
finished = true;
return true;
@ -1406,7 +1409,7 @@ public:
protected:
bool getResultImpl(ASTPtr & node) override
{
if (state == 2)
if (state == 2 && elements.size() == 2)
std::swap(elements[1], elements[0]);
node = makeASTFunction("position", std::move(elements));

View File

@ -640,9 +640,6 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe
auto query = std::make_shared<ASTCreateQuery>();
node = query;
if (as_table_function)
query->as_table_function = as_table_function;
query->attach = attach;
query->replace_table = replace;
query->create_or_replace = or_replace;
@ -661,6 +658,7 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe
query->set(query->columns_list, columns_list);
query->set(query->storage, storage);
query->set(query->as_table_function, as_table_function);
if (comment)
query->set(query->comment, comment);

View File

@ -1,22 +0,0 @@
#include <Parsers/Access/ASTCreateUserQuery.h>
#include <Parsers/wipePasswordFromQuery.h>
#include <Common/typeid_cast.h>
namespace DB
{
bool canContainPassword(const IAST & ast)
{
return ast.as<ASTCreateUserQuery>();
}
void wipePasswordFromQuery(ASTPtr ast)
{
if (auto * create_query = ast->as<ASTCreateUserQuery>())
{
create_query->show_password = false;
}
}
}

View File

@ -1,18 +0,0 @@
#pragma once
#include <Parsers/IAST_fwd.h>
namespace DB
{
/// Checks the type of a specified AST and returns true if it can contain a password.
bool canContainPassword(const IAST & ast);
/// Removes a password or its hash from a query if it's specified there or replaces it with some placeholder.
/// This function is used to prepare a query for storing in logs (we don't want logs to contain sensitive information).
/// The function changes only following types of queries:
/// CREATE/ALTER USER.
void wipePasswordFromQuery(ASTPtr ast);
}

View File

@ -214,6 +214,14 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMerge(
/// Previous part only in boundaries of partition frame
const MergeTreeData::DataPartPtr * prev_part = nullptr;
/// collect min_age for each partition while iterating parts
struct PartitionInfo
{
time_t min_age{std::numeric_limits<time_t>::max()};
};
std::unordered_map<std::string, PartitionInfo> partitions_info;
size_t parts_selected_precondition = 0;
for (const MergeTreeData::DataPartPtr & part : data_parts)
{
@ -277,6 +285,9 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMerge(
part_info.compression_codec_desc = part->default_codec->getFullCodecDesc();
part_info.shall_participate_in_merges = has_volumes_with_disabled_merges ? part->shallParticipateInMerges(storage_policy) : true;
auto & partition_info = partitions_info[partition_id];
partition_info.min_age = std::min(partition_info.min_age, part_info.age);
++parts_selected_precondition;
parts_ranges.back().emplace_back(part_info);
@ -333,7 +344,8 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMerge(
SimpleMergeSelector::Settings merge_settings;
/// Override value from table settings
merge_settings.max_parts_to_merge_at_once = data_settings->max_parts_to_merge_at_once;
merge_settings.min_age_to_force_merge = data_settings->min_age_to_force_merge_seconds;
if (!data_settings->min_age_to_force_merge_on_partition_only)
merge_settings.min_age_to_force_merge = data_settings->min_age_to_force_merge_seconds;
if (aggressive)
merge_settings.base = 1;
@ -347,6 +359,20 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMerge(
if (parts_to_merge.empty())
{
if (data_settings->min_age_to_force_merge_on_partition_only && data_settings->min_age_to_force_merge_seconds)
{
auto best_partition_it = std::max_element(
partitions_info.begin(),
partitions_info.end(),
[](const auto & e1, const auto & e2) { return e1.second.min_age < e2.second.min_age; });
assert(best_partition_it != partitions_info.end());
if (static_cast<size_t>(best_partition_it->second.min_age) >= data_settings->min_age_to_force_merge_seconds)
return selectAllPartsToMergeWithinPartition(
future_part, can_merge_callback, best_partition_it->first, true, metadata_snapshot, txn, out_disable_reason);
}
if (out_disable_reason)
*out_disable_reason = "There is no need to merge parts according to merge selector algorithm";
return SelectPartsDecision::CANNOT_SELECT;

View File

@ -63,6 +63,7 @@ struct Settings;
M(UInt64, merge_tree_clear_old_parts_interval_seconds, 1, "The period of executing the clear old parts operation in background.", 0) \
M(UInt64, merge_tree_clear_old_broken_detached_parts_ttl_timeout_seconds, 1ULL * 3600 * 24 * 30, "Remove old broken detached parts in the background if they remained intouched for a specified by this setting period of time.", 0) \
M(UInt64, min_age_to_force_merge_seconds, 0, "If all parts in a certain range are older than this value, range will be always eligible for merging. Set to 0 to disable.", 0) \
M(Bool, min_age_to_force_merge_on_partition_only, false, "Whether min_age_to_force_merge_seconds should be applied only on the entire partition and not on subset.", false) \
M(UInt64, merge_tree_enable_clear_old_broken_detached, false, "Enable clearing old broken detached parts operation in background.", 0) \
M(Bool, remove_rolled_back_parts_immediately, 1, "Setting for an incomplete experimental feature.", 0) \
\

View File

@ -1111,6 +1111,14 @@ StorageS3Configuration StorageS3::getConfiguration(ASTs & engine_args, ContextPt
}
else
{
/// Supported signatures:
///
/// S3('url')
/// S3('url', 'format')
/// S3('url', 'format', 'compression')
/// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format')
/// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression')
if (engine_args.empty() || engine_args.size() > 5)
throw Exception(
"Storage S3 requires 1 to 5 arguments: url, [access_key_id, secret_access_key], name of used format and [compression_method].",

View File

@ -94,6 +94,30 @@ void TableFunctionRemote::parseArguments(const ASTPtr & ast_function, ContextPtr
}
else
{
/// Supported signatures:
///
/// remote('addresses_expr', db.table)
/// remote('addresses_expr', 'db', 'table')
/// remote('addresses_expr', db.table, 'user')
/// remote('addresses_expr', 'db', 'table', 'user')
/// remote('addresses_expr', db.table, 'user', 'password')
/// remote('addresses_expr', 'db', 'table', 'user', 'password')
/// remote('addresses_expr', db.table, sharding_key)
/// remote('addresses_expr', 'db', 'table', sharding_key)
/// remote('addresses_expr', db.table, 'user', sharding_key)
/// remote('addresses_expr', 'db', 'table', 'user', sharding_key)
/// remote('addresses_expr', db.table, 'user', 'password', sharding_key)
/// remote('addresses_expr', 'db', 'table', 'user', 'password', sharding_key)
///
/// remoteSecure() - same as remote()
///
/// cluster('cluster_name', db.table)
/// cluster('cluster_name', 'db', 'table')
/// cluster('cluster_name', db.table, sharding_key)
/// cluster('cluster_name', 'db', 'table', sharding_key)
///
/// clusterAllReplicas() - same as cluster()
if (args.size() < 2 || args.size() > max_args)
throw Exception(help_message, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
@ -318,7 +342,6 @@ TableFunctionRemote::TableFunctionRemote(const std::string & name_, bool secure_
is_cluster_function ? " [, sharding_key]" : " [, username[, password], sharding_key]");
}
void registerTableFunctionRemote(TableFunctionFactory & factory)
{
factory.registerFunction("remote", [] () -> TableFunctionPtr { return std::make_shared<TableFunctionRemote>("remote"); });

View File

@ -69,7 +69,7 @@ if __name__ == "__main__":
logging.info("Check is already finished according to github status, exiting")
sys.exit(0)
docker_image = get_image_with_version(temp_path, IMAGE_NAME)
docker_image = get_image_with_version(reports_path, IMAGE_NAME)
build_name = get_build_name_for_check(check_name)
print(build_name)

View File

@ -355,6 +355,12 @@ CI_CONFIG = {
"required_build": "package_aarch64",
"test_grep_exclude_filter": "",
},
"SQLancer (release)": {
"required_build": "package_release",
},
"SQLancer (debug)": {
"required_build": "package_debug",
},
},
} # type: dict

View File

@ -87,7 +87,7 @@ class PRInfo:
self.body = ""
self.diff_urls = []
self.release_pr = 0
ref = github_event.get("ref", "refs/head/master")
ref = github_event.get("ref", "refs/heads/master")
if ref and ref.startswith("refs/heads/"):
ref = ref[11:]

193
tests/ci/sqlancer_check.py Normal file
View File

@ -0,0 +1,193 @@
#!/usr/bin/env python3
import logging
import subprocess
import os
import sys
from github import Github
from env_helper import (
GITHUB_REPOSITORY,
GITHUB_RUN_URL,
REPORTS_PATH,
REPO_COPY,
TEMP_PATH,
)
from s3_helper import S3Helper
from get_robot_token import get_best_robot_token
from pr_info import PRInfo
from build_download_helper import get_build_name_for_check, read_build_urls
from docker_pull_helper import get_image_with_version
from commit_status_helper import post_commit_status
from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse
from upload_result_helper import upload_results
from stopwatch import Stopwatch
from rerun_helper import RerunHelper
IMAGE_NAME = "clickhouse/sqlancer-test"
def get_run_command(download_url, workspace_path, image):
return (
f"docker run "
# For sysctl
"--privileged "
"--network=host "
f"--volume={workspace_path}:/workspace "
"--cap-add syslog --cap-add sys_admin --cap-add=SYS_PTRACE "
f'-e BINARY_URL_TO_DOWNLOAD="{download_url}" '
f"{image}"
)
def get_commit(gh, commit_sha):
repo = gh.get_repo(GITHUB_REPOSITORY)
commit = repo.get_commit(commit_sha)
return commit
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO)
stopwatch = Stopwatch()
temp_path = TEMP_PATH
repo_path = REPO_COPY
reports_path = REPORTS_PATH
check_name = sys.argv[1]
if not os.path.exists(temp_path):
os.makedirs(temp_path)
pr_info = PRInfo()
gh = Github(get_best_robot_token(), per_page=100)
rerun_helper = RerunHelper(gh, pr_info, check_name)
if rerun_helper.is_already_finished_by_status():
logging.info("Check is already finished according to github status, exiting")
sys.exit(0)
docker_image = get_image_with_version(reports_path, IMAGE_NAME)
build_name = get_build_name_for_check(check_name)
print(build_name)
urls = read_build_urls(build_name, reports_path)
if not urls:
raise Exception("No build URLs found")
for url in urls:
if url.endswith("/clickhouse"):
build_url = url
break
else:
raise Exception("Cannot find binary clickhouse among build results")
logging.info("Got build url %s", build_url)
workspace_path = os.path.join(temp_path, "workspace")
if not os.path.exists(workspace_path):
os.makedirs(workspace_path)
run_command = get_run_command(build_url, workspace_path, docker_image)
logging.info("Going to run %s", run_command)
run_log_path = os.path.join(workspace_path, "runlog.log")
with open(run_log_path, "w", encoding="utf-8") as log:
with subprocess.Popen(
run_command, shell=True, stderr=log, stdout=log
) as process:
retcode = process.wait()
if retcode == 0:
logging.info("Run successfully")
else:
logging.info("Run failed")
subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True)
check_name_lower = (
check_name.lower().replace("(", "").replace(")", "").replace(" ", "")
)
s3_prefix = f"{pr_info.number}/{pr_info.sha}/{check_name_lower}/"
tests = [
"TLPGroupBy",
"TLPHaving",
"TLPWhere",
"TLPDistinct",
"TLPAggregate",
"NoREC",
]
paths = [
run_log_path,
os.path.join(workspace_path, "clickhouse-server.log"),
os.path.join(workspace_path, "stderr.log"),
os.path.join(workspace_path, "stdout.log"),
]
for t in tests:
err_name = f"{t}.err"
log_name = f"{t}.out"
paths.append(os.path.join(workspace_path, err_name))
paths.append(os.path.join(workspace_path, log_name))
s3_helper = S3Helper()
report_url = GITHUB_RUN_URL
status = "success"
test_results = []
# Try to get status message saved by the SQLancer
try:
# with open(
# os.path.join(workspace_path, "status.txt"), "r", encoding="utf-8"
# ) as status_f:
# status = status_f.readline().rstrip("\n")
if os.path.exists(os.path.join(workspace_path, "server_crashed.log")):
test_results.append("Server crashed", "FAIL")
with open(
os.path.join(workspace_path, "summary.tsv"), "r", encoding="utf-8"
) as summary_f:
for line in summary_f:
l = line.split("\t")
test_results.append((l[0], l[1]))
with open(
os.path.join(workspace_path, "description.txt"), "r", encoding="utf-8"
) as desc_f:
description = desc_f.readline().rstrip("\n")[:140]
except:
# status = "failure"
description = "Task failed: $?=" + str(retcode)
report_url = upload_results(
s3_helper,
pr_info.number,
pr_info.sha,
test_results,
paths,
check_name,
False,
)
post_commit_status(gh, pr_info.sha, check_name, description, status, report_url)
print(f"::notice:: {check_name} Report url: {report_url}")
ch_helper = ClickHouseHelper()
prepared_events = prepare_tests_results_for_clickhouse(
pr_info,
test_results,
status,
stopwatch.duration_seconds,
stopwatch.start_time_str,
report_url,
check_name,
)
ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)
print(f"::notice Result: '{status}', '{description}', '{report_url}'")
post_commit_status(gh, pr_info.sha, check_name, description, status, report_url)

View File

@ -14,6 +14,8 @@ from report import ReportColorTheme, create_test_html_report
def process_logs(
s3_client, additional_logs, s3_path_prefix, test_results, with_raw_logs
):
logging.info("Upload files to s3 %s", additional_logs)
processed_logs = {}
# Firstly convert paths of logs from test_results to urls to s3.
for test_result in test_results:

View File

@ -447,6 +447,7 @@
"FORMAT"
"formatDateTime"
"formatReadableQuantity"
"formatReadableDecimalSize"
"formatReadableSize"
"formatReadableTimeDelta"
"formatRow"

View File

@ -399,6 +399,7 @@
"demangle"
"toNullable"
"concat"
"formatReadableDecimalSize"
"formatReadableSize"
"shardCount"
"fromModifiedJulianDayOrNull"

View File

@ -1,75 +0,0 @@
import pytest
from helpers.cluster import ClickHouseCluster
cluster = ClickHouseCluster(__file__)
node = cluster.add_instance("node")
@pytest.fixture(scope="module", autouse=True)
def started_cluster():
try:
cluster.start()
yield cluster
finally:
cluster.shutdown()
def check_logs(must_contain, must_not_contain):
node.query("SYSTEM FLUSH LOGS")
for str in must_contain:
assert node.contains_in_log(str)
assert (
int(
node.query(
f"SELECT COUNT() FROM system.query_log WHERE query LIKE '%{str}%'"
).strip()
)
>= 1
)
for str in must_not_contain:
assert not node.contains_in_log(str)
assert (
int(
node.query(
f"SELECT COUNT() FROM system.query_log WHERE query LIKE '%{str}%'"
).strip()
)
== 0
)
# Passwords in CREATE/ALTER queries must be hidden in logs.
def test_create_alter_user():
node.query("CREATE USER u1 IDENTIFIED BY 'qwe123' SETTINGS custom_a = 'a'")
node.query("ALTER USER u1 IDENTIFIED BY '123qwe' SETTINGS custom_b = 'b'")
node.query(
"CREATE USER u2 IDENTIFIED WITH plaintext_password BY 'plainpasswd' SETTINGS custom_c = 'c'"
)
assert (
node.query("SHOW CREATE USER u1")
== "CREATE USER u1 IDENTIFIED WITH sha256_password SETTINGS custom_b = \\'b\\'\n"
)
assert (
node.query("SHOW CREATE USER u2")
== "CREATE USER u2 IDENTIFIED WITH plaintext_password SETTINGS custom_c = \\'c\\'\n"
)
check_logs(
must_contain=[
"CREATE USER u1 IDENTIFIED WITH sha256_password",
"ALTER USER u1 IDENTIFIED WITH sha256_password",
"CREATE USER u2 IDENTIFIED WITH plaintext_password",
],
must_not_contain=[
"qwe123",
"123qwe",
"plainpasswd",
"IDENTIFIED WITH sha256_password BY",
"IDENTIFIED WITH sha256_hash BY",
"IDENTIFIED WITH plaintext_password BY",
],
)

View File

@ -0,0 +1,340 @@
import pytest
import random, string
from helpers.cluster import ClickHouseCluster
cluster = ClickHouseCluster(__file__)
node = cluster.add_instance("node", with_zookeeper=True)
@pytest.fixture(scope="module", autouse=True)
def started_cluster():
try:
cluster.start()
yield cluster
finally:
cluster.shutdown()
def check_logs(must_contain=[], must_not_contain=[]):
node.query("SYSTEM FLUSH LOGS")
for str in must_contain:
escaped_str = str.replace("`", "\\`").replace("[", "\\[").replace("]", "\\]")
assert node.contains_in_log(escaped_str)
for str in must_not_contain:
escaped_str = str.replace("`", "\\`").replace("[", "\\[").replace("]", "\\]")
assert not node.contains_in_log(escaped_str)
for str in must_contain:
escaped_str = str.replace("'", "\\'")
assert system_query_log_contains_search_pattern(escaped_str)
for str in must_not_contain:
escaped_str = str.replace("'", "\\'")
assert not system_query_log_contains_search_pattern(escaped_str)
# Returns true if "system.query_log" has a query matching a specified pattern.
def system_query_log_contains_search_pattern(search_pattern):
return (
int(
node.query(
f"SELECT COUNT() FROM system.query_log WHERE query LIKE '%{search_pattern}%'"
).strip()
)
>= 1
)
# Generates a random string.
def new_password(len=16):
return "".join(
random.choice(string.ascii_uppercase + string.digits) for _ in range(len)
)
# Passwords in CREATE/ALTER queries must be hidden in logs.
def test_create_alter_user():
password = new_password()
node.query(f"CREATE USER u1 IDENTIFIED BY '{password}' SETTINGS custom_a = 'a'")
node.query(
f"ALTER USER u1 IDENTIFIED BY '{password}{password}' SETTINGS custom_b = 'b'"
)
node.query(
f"CREATE USER u2 IDENTIFIED WITH plaintext_password BY '{password}' SETTINGS custom_c = 'c'"
)
assert (
node.query("SHOW CREATE USER u1")
== "CREATE USER u1 IDENTIFIED WITH sha256_password SETTINGS custom_b = \\'b\\'\n"
)
assert (
node.query("SHOW CREATE USER u2")
== "CREATE USER u2 IDENTIFIED WITH plaintext_password SETTINGS custom_c = \\'c\\'\n"
)
check_logs(
must_contain=[
"CREATE USER u1 IDENTIFIED WITH sha256_password",
"ALTER USER u1 IDENTIFIED WITH sha256_password",
"CREATE USER u2 IDENTIFIED WITH plaintext_password",
],
must_not_contain=[
password,
"IDENTIFIED WITH sha256_password BY",
"IDENTIFIED WITH sha256_hash BY",
"IDENTIFIED WITH plaintext_password BY",
],
)
node.query("DROP USER u1, u2")
def test_create_table():
password = new_password()
table_engines = [
f"MySQL('mysql57:3306', 'mysql_db', 'mysql_table', 'mysql_user', '{password}')",
f"PostgreSQL('postgres1:5432', 'postgres_db', 'postgres_table', 'postgres_user', '{password}')",
f"MongoDB('mongo1:27017', 'mongo_db', 'mongo_col', 'mongo_user', '{password}')",
f"S3('http://minio1:9001/root/data/test1.csv')",
f"S3('http://minio1:9001/root/data/test2.csv', 'CSV')",
f"S3('http://minio1:9001/root/data/test3.csv.gz', 'CSV', 'gzip')",
f"S3('http://minio1:9001/root/data/test4.csv', 'minio', '{password}', 'CSV')",
f"S3('http://minio1:9001/root/data/test5.csv.gz', 'minio', '{password}', 'CSV', 'gzip')",
]
for i, table_engine in enumerate(table_engines):
node.query(f"CREATE TABLE table{i} (x int) ENGINE = {table_engine}")
check_logs(
must_contain=[
"CREATE TABLE table0 (`x` int) ENGINE = MySQL('mysql57:3306', 'mysql_db', 'mysql_table', 'mysql_user', '[HIDDEN]')",
"CREATE TABLE table1 (`x` int) ENGINE = PostgreSQL('postgres1:5432', 'postgres_db', 'postgres_table', 'postgres_user', '[HIDDEN]')",
"CREATE TABLE table2 (`x` int) ENGINE = MongoDB('mongo1:27017', 'mongo_db', 'mongo_col', 'mongo_user', '[HIDDEN]')",
"CREATE TABLE table3 (x int) ENGINE = S3('http://minio1:9001/root/data/test1.csv')",
"CREATE TABLE table4 (x int) ENGINE = S3('http://minio1:9001/root/data/test2.csv', 'CSV')",
"CREATE TABLE table5 (x int) ENGINE = S3('http://minio1:9001/root/data/test3.csv.gz', 'CSV', 'gzip')",
"CREATE TABLE table6 (`x` int) ENGINE = S3('http://minio1:9001/root/data/test4.csv', 'minio', '[HIDDEN]', 'CSV')",
"CREATE TABLE table7 (`x` int) ENGINE = S3('http://minio1:9001/root/data/test5.csv.gz', 'minio', '[HIDDEN]', 'CSV', 'gzip')",
],
must_not_contain=[password],
)
for i in range(0, len(table_engines)):
node.query(f"DROP TABLE table{i}")
def test_create_database():
password = new_password()
database_engines = [
f"MySQL('localhost:3306', 'mysql_db', 'mysql_user', '{password}') SETTINGS connect_timeout=1, connection_max_tries=1",
# f"PostgreSQL('localhost:5432', 'postgres_db', 'postgres_user', '{password}')",
]
for i, database_engine in enumerate(database_engines):
# query_and_get_answer_with_error() is used here because we don't want to stop on error "Cannot connect to MySQL server".
# We test logging here and not actual work with MySQL server.
node.query_and_get_answer_with_error(
f"CREATE DATABASE database{i} ENGINE = {database_engine}"
)
check_logs(
must_contain=[
"CREATE DATABASE database0 ENGINE = MySQL('localhost:3306', 'mysql_db', 'mysql_user', '[HIDDEN]')",
# "CREATE DATABASE database1 ENGINE = PostgreSQL('localhost:5432', 'postgres_db', 'postgres_user', '[HIDDEN]')",
],
must_not_contain=[password],
)
for i in range(0, len(database_engines)):
node.query(f"DROP DATABASE IF EXISTS database{i}")
def test_table_functions():
password = new_password()
table_functions = [
f"mysql('mysql57:3306', 'mysql_db', 'mysql_table', 'mysql_user', '{password}')",
f"postgresql('postgres1:5432', 'postgres_db', 'postgres_table', 'postgres_user', '{password}')",
f"mongodb('mongo1:27017', 'mongo_db', 'mongo_col', 'mongo_user', '{password}', 'x int')",
f"s3('http://minio1:9001/root/data/test1.csv')",
f"s3('http://minio1:9001/root/data/test2.csv', 'CSV')",
f"s3('http://minio1:9001/root/data/test3.csv', 'minio', '{password}')",
f"s3('http://minio1:9001/root/data/test4.csv', 'CSV', 'x int')",
f"s3('http://minio1:9001/root/data/test5.csv.gz', 'CSV', 'x int', 'gzip')",
f"s3('http://minio1:9001/root/data/test6.csv', 'minio', '{password}', 'CSV')",
f"s3('http://minio1:9001/root/data/test7.csv', 'minio', '{password}', 'CSV', 'x int')",
f"s3('http://minio1:9001/root/data/test8.csv.gz', 'minio', '{password}', 'CSV', 'x int', 'gzip')",
f"s3Cluster('test_shard_localhost', 'http://minio1:9001/root/data/test1.csv', 'minio', '{password}')",
f"s3Cluster('test_shard_localhost', 'http://minio1:9001/root/data/test2.csv', 'CSV', 'x int')",
f"s3Cluster('test_shard_localhost', 'http://minio1:9001/root/data/test3.csv', 'minio', '{password}', 'CSV')",
f"remote('127.{{2..11}}', default.remote_table)",
f"remote('127.{{2..11}}', default.remote_table, rand())",
f"remote('127.{{2..11}}', default.remote_table, 'remote_user')",
f"remote('127.{{2..11}}', default.remote_table, 'remote_user', '{password}')",
f"remote('127.{{2..11}}', default.remote_table, 'remote_user', rand())",
f"remote('127.{{2..11}}', default.remote_table, 'remote_user', '{password}', rand())",
f"remote('127.{{2..11}}', 'default.remote_table', 'remote_user', '{password}', rand())",
f"remote('127.{{2..11}}', 'default', 'remote_table', 'remote_user', '{password}', rand())",
f"remote('127.{{2..11}}', numbers(10), 'remote_user', '{password}', rand())",
f"remoteSecure('127.{{2..11}}', 'default', 'remote_table', 'remote_user', '{password}')",
f"remoteSecure('127.{{2..11}}', 'default', 'remote_table', 'remote_user', rand())",
]
for i, table_function in enumerate(table_functions):
node.query(f"CREATE TABLE tablefunc{i} (x int) AS {table_function}")
check_logs(
must_contain=[
"CREATE TABLE tablefunc0 (`x` int) AS mysql('mysql57:3306', 'mysql_db', 'mysql_table', 'mysql_user', '[HIDDEN]')",
"CREATE TABLE tablefunc1 (`x` int) AS postgresql('postgres1:5432', 'postgres_db', 'postgres_table', 'postgres_user', '[HIDDEN]')",
"CREATE TABLE tablefunc2 (`x` int) AS mongodb('mongo1:27017', 'mongo_db', 'mongo_col', 'mongo_user', '[HIDDEN]', 'x int')",
"CREATE TABLE tablefunc3 (x int) AS s3('http://minio1:9001/root/data/test1.csv')",
"CREATE TABLE tablefunc4 (x int) AS s3('http://minio1:9001/root/data/test2.csv', 'CSV')",
"CREATE TABLE tablefunc5 (`x` int) AS s3('http://minio1:9001/root/data/test3.csv', 'minio', '[HIDDEN]')",
"CREATE TABLE tablefunc6 (x int) AS s3('http://minio1:9001/root/data/test4.csv', 'CSV', 'x int')",
"CREATE TABLE tablefunc7 (x int) AS s3('http://minio1:9001/root/data/test5.csv.gz', 'CSV', 'x int', 'gzip')",
"CREATE TABLE tablefunc8 (`x` int) AS s3('http://minio1:9001/root/data/test6.csv', 'minio', '[HIDDEN]', 'CSV')",
"CREATE TABLE tablefunc9 (`x` int) AS s3('http://minio1:9001/root/data/test7.csv', 'minio', '[HIDDEN]', 'CSV', 'x int')",
"CREATE TABLE tablefunc10 (`x` int) AS s3('http://minio1:9001/root/data/test8.csv.gz', 'minio', '[HIDDEN]', 'CSV', 'x int', 'gzip')",
"CREATE TABLE tablefunc11 (`x` int) AS s3Cluster('test_shard_localhost', 'http://minio1:9001/root/data/test1.csv', 'minio', '[HIDDEN]')",
"CREATE TABLE tablefunc12 (x int) AS s3Cluster('test_shard_localhost', 'http://minio1:9001/root/data/test2.csv', 'CSV', 'x int')",
"CREATE TABLE tablefunc13 (`x` int) AS s3Cluster('test_shard_localhost', 'http://minio1:9001/root/data/test3.csv', 'minio', '[HIDDEN]', 'CSV')",
"CREATE TABLE tablefunc14 (x int) AS remote('127.{2..11}', default.remote_table)",
"CREATE TABLE tablefunc15 (x int) AS remote('127.{2..11}', default.remote_table, rand())",
"CREATE TABLE tablefunc16 (x int) AS remote('127.{2..11}', default.remote_table, 'remote_user')",
"CREATE TABLE tablefunc17 (`x` int) AS remote('127.{2..11}', default.remote_table, 'remote_user', '[HIDDEN]')",
"CREATE TABLE tablefunc18 (x int) AS remote('127.{2..11}', default.remote_table, 'remote_user', rand())",
"CREATE TABLE tablefunc19 (`x` int) AS remote('127.{2..11}', default.remote_table, 'remote_user', '[HIDDEN]', rand())",
"CREATE TABLE tablefunc20 (`x` int) AS remote('127.{2..11}', 'default.remote_table', 'remote_user', '[HIDDEN]', rand())",
"CREATE TABLE tablefunc21 (`x` int) AS remote('127.{2..11}', 'default', 'remote_table', 'remote_user', '[HIDDEN]', rand())",
"CREATE TABLE tablefunc22 (`x` int) AS remote('127.{2..11}', numbers(10), 'remote_user', '[HIDDEN]', rand())",
"CREATE TABLE tablefunc23 (`x` int) AS remoteSecure('127.{2..11}', 'default', 'remote_table', 'remote_user', '[HIDDEN]')",
"CREATE TABLE tablefunc24 (x int) AS remoteSecure('127.{2..11}', 'default', 'remote_table', 'remote_user', rand())",
],
must_not_contain=[password],
)
for i in range(0, len(table_functions)):
node.query(f"DROP TABLE tablefunc{i}")
def test_encryption_functions():
plaintext = new_password()
cipher = new_password()
key = new_password(32)
iv8 = new_password(8)
iv16 = new_password(16)
add = new_password()
encryption_functions = [
f"encrypt('aes-256-ofb', '{plaintext}', '{key}')",
f"encrypt('aes-256-ofb', '{plaintext}', '{key}', '{iv16}')",
f"encrypt('aes-256-gcm', '{plaintext}', '{key}', '{iv8}')",
f"encrypt('aes-256-gcm', '{plaintext}', '{key}', '{iv8}', '{add}')",
f"decrypt('aes-256-ofb', '{cipher}', '{key}', '{iv16}')",
f"aes_encrypt_mysql('aes-256-ofb', '{plaintext}', '{key}', '{iv16}')",
f"aes_decrypt_mysql('aes-256-ofb', '{cipher}', '{key}', '{iv16}')",
f"tryDecrypt('aes-256-ofb', '{cipher}', '{key}', '{iv16}')",
]
for encryption_function in encryption_functions:
node.query(f"SELECT {encryption_function}")
check_logs(
must_contain=[
"SELECT encrypt('aes-256-ofb', '[HIDDEN]')",
"SELECT encrypt('aes-256-gcm', '[HIDDEN]')",
"SELECT decrypt('aes-256-ofb', '[HIDDEN]')",
"SELECT aes_encrypt_mysql('aes-256-ofb', '[HIDDEN]')",
"SELECT aes_decrypt_mysql('aes-256-ofb', '[HIDDEN]')",
"SELECT tryDecrypt('aes-256-ofb', '[HIDDEN]')",
],
must_not_contain=[plaintext, cipher, key, iv8, iv16, add],
)
def test_create_dictionary():
password = new_password()
node.query(
f"CREATE DICTIONARY dict1 (n int DEFAULT 0, m int DEFAULT 1) PRIMARY KEY n "
f"SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'user1' TABLE 'test' PASSWORD '{password}' DB 'default')) "
f"LIFETIME(MIN 0 MAX 10) LAYOUT(FLAT())"
)
check_logs(
must_contain=[
"CREATE DICTIONARY dict1 (`n` int DEFAULT 0, `m` int DEFAULT 1) PRIMARY KEY n "
"SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'user1' TABLE 'test' PASSWORD '[HIDDEN]' DB 'default')) "
"LIFETIME(MIN 0 MAX 10) LAYOUT(FLAT())"
],
must_not_contain=[password],
)
node.query("DROP DICTIONARY dict1")
def test_backup_to_s3():
node.query("CREATE TABLE temptbl (x int) ENGINE=Log")
password = new_password()
queries = [
f"BACKUP TABLE temptbl TO S3('http://minio1:9001/root/data/backups/backup1', 'minio', '{password}')",
f"RESTORE TABLE temptbl AS temptbl2 FROM S3('http://minio1:9001/root/data/backups/backup1', 'minio', '{password}')",
]
for query in queries:
# query_and_get_answer_with_error() is used here because we don't want to stop on error "Cannot connect to AWS".
# We test logging here and not actual work with AWS server.
node.query_and_get_answer_with_error(query)
check_logs(
must_contain=[
"BACKUP TABLE temptbl TO S3('http://minio1:9001/root/data/backups/backup1', 'minio', '[HIDDEN]')",
"RESTORE TABLE temptbl AS temptbl2 FROM S3('http://minio1:9001/root/data/backups/backup1', 'minio', '[HIDDEN]')",
],
must_not_contain=[password],
)
node.query("DROP TABLE IF EXISTS temptbl")
node.query("DROP TABLE IF EXISTS temptbl2")
def test_on_cluster():
password = new_password()
node.query(
f"CREATE TABLE table_oncl ON CLUSTER 'test_shard_localhost' (x int) ENGINE = MySQL('mysql57:3307', 'mysql_db', 'mysql_table', 'mysql_user', '{password}')"
)
check_logs(
must_contain=[
"CREATE TABLE table_oncl ON CLUSTER test_shard_localhost (`x` int) ENGINE = MySQL('mysql57:3307', 'mysql_db', 'mysql_table', 'mysql_user', '[HIDDEN]')",
],
must_not_contain=[password],
)
# Check logs of DDLWorker during executing of this query.
assert node.contains_in_log(
"DDLWorker: Processing task .*CREATE TABLE default\\.table_oncl UUID '[0-9a-fA-F-]*' (\\`x\\` Int32) ENGINE = MySQL('mysql57:3307', 'mysql_db', 'mysql_table', 'mysql_user', '\\[HIDDEN\\]')"
)
assert node.contains_in_log(
"DDLWorker: Executing query: .*CREATE TABLE default\\.table_oncl UUID '[0-9a-fA-F-]*' (\\`x\\` Int32) ENGINE = MySQL('mysql57:3307', 'mysql_db', 'mysql_table', 'mysql_user', '\\[HIDDEN\\]')"
)
assert node.contains_in_log(
"executeQuery: .*CREATE TABLE default\\.table_oncl UUID '[0-9a-fA-F-]*' (\\`x\\` Int32) ENGINE = MySQL('mysql57:3307', 'mysql_db', 'mysql_table', 'mysql_user', '\\[HIDDEN\\]')"
)
assert node.contains_in_log(
"DDLWorker: Executed query: .*CREATE TABLE default\\.table_oncl UUID '[0-9a-fA-F-]*' (\\`x\\` Int32) ENGINE = MySQL('mysql57:3307', 'mysql_db', 'mysql_table', 'mysql_user', '\\[HIDDEN\\]')"
)
assert system_query_log_contains_search_pattern(
"%CREATE TABLE default.table_oncl UUID \\'%\\' (`x` Int32) ENGINE = MySQL(\\'mysql57:3307\\', \\'mysql_db\\', \\'mysql_table\\', \\'mysql_user\\', \\'[HIDDEN]\\')"
)
node.query(f"DROP TABLE table_oncl")

View File

@ -1,8 +0,0 @@
<clickhouse>
<zookeeper>
<node index="1">
<host>zoo1</host>
<port>2181</port>
</node>
</zookeeper>
</clickhouse>

View File

@ -1,88 +0,0 @@
import pytest
import time
from helpers.client import QueryRuntimeException
from helpers.cluster import ClickHouseCluster
from helpers.test_tools import TSV
cluster = ClickHouseCluster(__file__)
node = cluster.add_instance(
"node",
main_configs=["configs/zookeeper_config.xml"],
with_zookeeper=True,
)
@pytest.fixture(scope="module")
def start_cluster():
try:
cluster.start()
yield cluster
finally:
cluster.shutdown()
def get_part_number(table_name):
return TSV(
node.query(
f"SELECT count(*) FROM system.parts where table='{table_name}' and active=1"
)
)
def check_expected_part_number(seconds, table_name, expected):
ok = False
for i in range(int(seconds) * 2):
result = get_part_number(table_name)
if result == expected:
ok = True
break
else:
time.sleep(1)
assert ok
def test_without_force_merge_old_parts(start_cluster):
node.query(
"CREATE TABLE test_without_merge (i Int64) ENGINE = MergeTree ORDER BY i;"
)
node.query("INSERT INTO test_without_merge SELECT 1")
node.query("INSERT INTO test_without_merge SELECT 2")
node.query("INSERT INTO test_without_merge SELECT 3")
expected = TSV("""3\n""")
# verify that the parts don't get merged
for i in range(10):
if get_part_number("test_without_merge") != expected:
assert False
time.sleep(1)
node.query("DROP TABLE test_without_merge;")
def test_force_merge_old_parts(start_cluster):
node.query(
"CREATE TABLE test_with_merge (i Int64) ENGINE = MergeTree ORDER BY i SETTINGS min_age_to_force_merge_seconds=5;"
)
node.query("INSERT INTO test_with_merge SELECT 1")
node.query("INSERT INTO test_with_merge SELECT 2")
node.query("INSERT INTO test_with_merge SELECT 3")
expected = TSV("""1\n""")
check_expected_part_number(10, "test_with_merge", expected)
node.query("DROP TABLE test_with_merge;")
def test_force_merge_old_parts_replicated_merge_tree(start_cluster):
node.query(
"CREATE TABLE test_replicated (i Int64) ENGINE = ReplicatedMergeTree('/clickhouse/testing/test', 'node') ORDER BY i SETTINGS min_age_to_force_merge_seconds=5;"
)
node.query("INSERT INTO test_replicated SELECT 1")
node.query("INSERT INTO test_replicated SELECT 2")
node.query("INSERT INTO test_replicated SELECT 3")
expected = TSV("""1\n""")
check_expected_part_number(10, "test_replicated", expected)
node.query("DROP TABLE test_replicated;")

View File

@ -693,6 +693,19 @@ def test_auto_close_connection(started_cluster):
assert count == 2
def test_datetime(started_cluster):
cursor = started_cluster.postgres_conn.cursor()
cursor.execute("drop table if exists test")
cursor.execute("create table test (u timestamp)")
node1.query("drop database if exists pg")
node1.query("create database pg engine = PostgreSQL(postgres1)")
assert "DateTime64(6)" in node1.query("show create table pg.test")
node1.query("detach table pg.test")
node1.query("attach table pg.test")
assert "DateTime64(6)" in node1.query("show create table pg.test")
if __name__ == "__main__":
cluster.start()
input("Cluster created, press any key to destroy...")

View File

@ -0,0 +1,70 @@
1.00 B 1.00 B 1.00 B
2.72 B 2.00 B 2.00 B
7.39 B 7.00 B 7.00 B
20.09 B 20.00 B 20.00 B
54.60 B 54.00 B 54.00 B
148.41 B 148.00 B 148.00 B
403.43 B 403.00 B 403.00 B
1.10 KB 1.10 KB 1.10 KB
2.98 KB 2.98 KB 2.98 KB
8.10 KB 8.10 KB 8.10 KB
22.03 KB 22.03 KB 22.03 KB
59.87 KB 59.87 KB 59.87 KB
162.75 KB 162.75 KB 162.75 KB
442.41 KB 442.41 KB 442.41 KB
1.20 MB 1.20 MB 1.20 MB
3.27 MB 3.27 MB 3.27 MB
8.89 MB 8.89 MB 8.89 MB
24.15 MB 24.15 MB 24.15 MB
65.66 MB 65.66 MB 65.66 MB
178.48 MB 178.48 MB 178.48 MB
485.17 MB 485.17 MB 485.17 MB
1.32 GB 1.32 GB 1.32 GB
3.58 GB 3.58 GB 2.15 GB
9.74 GB 9.74 GB 2.15 GB
26.49 GB 26.49 GB 2.15 GB
72.00 GB 72.00 GB 2.15 GB
195.73 GB 195.73 GB 2.15 GB
532.05 GB 532.05 GB 2.15 GB
1.45 TB 1.45 TB 2.15 GB
3.93 TB 3.93 TB 2.15 GB
10.69 TB 10.69 TB 2.15 GB
29.05 TB 29.05 TB 2.15 GB
78.96 TB 78.96 TB 2.15 GB
214.64 TB 214.64 TB 2.15 GB
583.46 TB 583.46 TB 2.15 GB
1.59 PB 1.59 PB 2.15 GB
4.31 PB 4.31 PB 2.15 GB
11.72 PB 11.72 PB 2.15 GB
31.86 PB 31.86 PB 2.15 GB
86.59 PB 86.59 PB 2.15 GB
235.39 PB 235.39 PB 2.15 GB
639.84 PB 639.84 PB 2.15 GB
1.74 EB 1.74 EB 2.15 GB
4.73 EB 4.73 EB 2.15 GB
12.85 EB 12.85 EB 2.15 GB
34.93 EB 18.45 EB 2.15 GB
94.96 EB 18.45 EB 2.15 GB
258.13 EB 18.45 EB 2.15 GB
701.67 EB 18.45 EB 2.15 GB
1.91 ZB 18.45 EB 2.15 GB
5.18 ZB 18.45 EB 2.15 GB
14.09 ZB 18.45 EB 2.15 GB
38.31 ZB 18.45 EB 2.15 GB
104.14 ZB 18.45 EB 2.15 GB
283.08 ZB 18.45 EB 2.15 GB
769.48 ZB 18.45 EB 2.15 GB
2.09 YB 18.45 EB 2.15 GB
5.69 YB 18.45 EB 2.15 GB
15.46 YB 18.45 EB 2.15 GB
42.01 YB 18.45 EB 2.15 GB
114.20 YB 18.45 EB 2.15 GB
310.43 YB 18.45 EB 2.15 GB
843.84 YB 18.45 EB 2.15 GB
2293.78 YB 18.45 EB 2.15 GB
6235.15 YB 18.45 EB 2.15 GB
16948.89 YB 18.45 EB 2.15 GB
46071.87 YB 18.45 EB 2.15 GB
125236.32 YB 18.45 EB 2.15 GB
340427.60 YB 18.45 EB 2.15 GB
925378.17 YB 18.45 EB 2.15 GB

View File

@ -0,0 +1,4 @@
WITH round(exp(number), 6) AS x, x > 0xFFFFFFFFFFFFFFFF ? 0xFFFFFFFFFFFFFFFF : toUInt64(x) AS y, x > 0x7FFFFFFF ? 0x7FFFFFFF : toInt32(x) AS z
SELECT formatReadableDecimalSize(x), formatReadableDecimalSize(y), formatReadableDecimalSize(z)
FROM system.numbers
LIMIT 70;

View File

@ -1,3 +1,8 @@
SELECT (if(a.test == 'a', b.test, c.test)) as `a.test` FROM
(SELECT 1 AS id, 'a' AS test) a
LEFT JOIN (SELECT 1 AS id, 'b' AS test) b ON b.id = a.id
LEFT JOIN (SELECT 1 AS id, 'c' AS test) c ON c.id = a.id;
SELECT COLUMNS('test') FROM
(SELECT 1 AS id, 'a' AS test) a
LEFT JOIN (SELECT 1 AS id, 'b' AS test) b ON b.id = a.id

View File

@ -0,0 +1,32 @@
#!/usr/bin/expect -f
# Tags: long
# This is the regression for the concurrent access in ProgressIndication,
# so it is important to read enough rows here (10e6).
#
# Initially there was 100e6, but under thread fuzzer 10min may be not enough sometimes,
# but I believe that CI will catch possible issues even with less rows anyway.
set basedir [file dirname $argv0]
set basename [file tail $argv0]
exp_internal -f $env(CLICKHOUSE_TMP)/$basename.debuglog 0
log_user 0
set timeout 60
match_max 100000
set stty_init "rows 25 cols 120"
expect_after {
eof { exp_continue }
timeout { exit 1 }
}
spawn bash
send "source $basedir/../shell_config.sh\r"
send "yes | head -n10000000 | \$CLICKHOUSE_CLIENT --query \"insert into function null('foo String') format TSV\" >/dev/null\r"
expect "Progress: "
send "\3"
send "exit\r"
expect eof

View File

@ -1,2 +0,0 @@
0
--progress produce some rows

View File

@ -1,19 +0,0 @@
#!/usr/bin/env bash
# Tags: long
# This is the regression for the concurrent access in ProgressIndication,
# so it is important to read enough rows here (10e6).
#
# Initially there was 100e6, but under thread fuzzer 10min may be not enough sometimes,
# but I believe that CI will catch possible issues even with less rows anyway.
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CUR_DIR"/../shell_config.sh
tmp_file_progress="$(mktemp "$CUR_DIR/$CLICKHOUSE_TEST_UNIQUE_NAME.XXXXXX.progress")"
trap 'rm $tmp_file_progress' EXIT
yes | head -n10000000 | $CLICKHOUSE_CLIENT -q "insert into function null('foo String') format TSV" --progress 2> "$tmp_file_progress"
echo $?
test -s "$tmp_file_progress" && echo "--progress produce some rows" || echo "FAIL: no rows with --progress"

View File

@ -0,0 +1,32 @@
#!/usr/bin/expect -f
# Tags: long
# This is the regression for the concurrent access in ProgressIndication,
# so it is important to read enough rows here (10e6).
#
# Initially there was 100e6, but under thread fuzzer 10min may be not enough sometimes,
# but I believe that CI will catch possible issues even with less rows anyway.
set basedir [file dirname $argv0]
set basename [file tail $argv0]
exp_internal -f $env(CLICKHOUSE_TMP)/$basename.debuglog 0
log_user 0
set timeout 60
match_max 100000
set stty_init "rows 25 cols 120"
expect_after {
eof { exp_continue }
timeout { exit 1 }
}
spawn bash
send "source $basedir/../shell_config.sh\r"
send "yes | head -n10000000 | \$CLICKHOUSE_LOCAL --query \"insert into function null('foo String') format TSV\" >/dev/null\r"
expect "Progress: "
send "\3"
send "exit\r"
expect eof

View File

@ -1,2 +0,0 @@
0
--progress produce some rows

View File

@ -1,19 +0,0 @@
#!/usr/bin/env bash
# Tags: long
# This is the regression for the concurrent access in ProgressIndication,
# so it is important to read enough rows here (10e6).
#
# Initially there was 100e6, but under thread fuzzer 10min may be not enough sometimes,
# but I believe that CI will catch possible issues even with less rows anyway.
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CUR_DIR"/../shell_config.sh
tmp_file_progress="$(mktemp "$CUR_DIR/$CLICKHOUSE_TEST_UNIQUE_NAME.XXXXXX.progress")"
trap 'rm $tmp_file_progress' EXIT
yes | head -n10000000 | $CLICKHOUSE_LOCAL -q "insert into function null('foo String') format TSV" --progress 2> "$tmp_file_progress"
echo $?
test -s "$tmp_file_progress" && echo "--progress produce some rows" || echo "FAIL: no rows with --progress"

View File

@ -0,0 +1,55 @@
#!/usr/bin/expect -f
set basedir [file dirname $argv0]
set basename [file tail $argv0]
exp_internal -f $env(CLICKHOUSE_TMP)/$basename.debuglog 0
log_user 0
set timeout 60
match_max 100000
set stty_init "rows 25 cols 120"
expect_after {
eof { exp_continue }
timeout { exit 1 }
}
spawn bash
send "source $basedir/../shell_config.sh\r"
# Progress is displayed by default
send "\$CLICKHOUSE_LOCAL --query 'SELECT sum(sleep(1) = 0) FROM numbers(3) SETTINGS max_block_size = 1' >/dev/null\r"
expect "Progress: "
expect "█"
send "\3"
# It is true even if we redirect both stdout and stderr to /dev/null
send "\$CLICKHOUSE_LOCAL --query 'SELECT sum(sleep(1) = 0) FROM numbers(3) SETTINGS max_block_size = 1' >/dev/null 2>&1\r"
expect "Progress: "
expect "█"
send "\3"
# The option --progress has implicit value of true
send "\$CLICKHOUSE_LOCAL --progress --query 'SELECT sum(sleep(1) = 0) FROM numbers(3) SETTINGS max_block_size = 1' >/dev/null 2>&1\r"
expect "Progress: "
expect "█"
send "\3"
# But we can set it to false
send "\$CLICKHOUSE_LOCAL --progress false --query 'SELECT sleep(1), \$\$Hello\$\$ FROM numbers(3) SETTINGS max_block_size = 1' 2>/dev/null\r"
expect -exact "0\tHello\r\n"
send "\3"
# As well as to 0 for the same effect
send "\$CLICKHOUSE_LOCAL --progress 0 --query 'SELECT sleep(1), \$\$Hello\$\$ FROM numbers(3) SETTINGS max_block_size = 1' 2>/dev/null\r"
expect -exact "0\tHello\r\n"
send "\3"
# If we set it to 1, the progress will be displayed as well
send "\$CLICKHOUSE_LOCAL --progress 1 --query 'SELECT sum(sleep(1) = 0) FROM numbers(3) SETTINGS max_block_size = 1' >/dev/null 2>&1\r"
expect "Progress: "
expect "█"
send "\3"
send "exit\r"
expect eof

Some files were not shown because too many files have changed in this diff Show More