mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-20 08:40:50 +00:00
Merge branch 'master' into new-http-compression-methods
This commit is contained in:
commit
aa9e8f1cde
1
.gitattributes
vendored
1
.gitattributes
vendored
@ -1,3 +1,4 @@
|
||||
contrib/* linguist-vendored
|
||||
*.h linguist-language=C++
|
||||
tests/queries/0_stateless/data_json/* binary
|
||||
tests/queries/0_stateless/*.reference -crlf
|
||||
|
@ -495,6 +495,14 @@ endif ()
|
||||
|
||||
enable_testing() # Enable for tests without binary
|
||||
|
||||
option(ENABLE_EXTERNAL_OPENSSL "This option is insecure and not recommended for any occasions. If it is enabled, it allows building with alternative OpenSSL library. By default, ClickHouse is using BoringSSL, which is better. Do not use this option." OFF)
|
||||
|
||||
if (ENABLE_EXTERNAL_OPENSSL)
|
||||
message (STATUS "Build and uses OpenSSL library instead of BoringSSL. This is strongly discouraged. Your build of ClickHouse will be unsupported.")
|
||||
set(ENABLE_SSL 1)
|
||||
target_compile_options(global-group INTERFACE "-Wno-deprecated-declarations")
|
||||
endif ()
|
||||
|
||||
# when installing to /usr - place configs to /etc but for /usr/local place to /usr/local/etc
|
||||
if (CMAKE_INSTALL_PREFIX STREQUAL "/usr")
|
||||
set (CLICKHOUSE_ETC_DIR "/etc")
|
||||
|
@ -5,6 +5,7 @@ ClickHouse® is an open-source column-oriented database management system that a
|
||||
## Useful Links
|
||||
|
||||
* [Official website](https://clickhouse.com/) has a quick high-level overview of ClickHouse on the main page.
|
||||
* [ClickHouse Cloud](https://clickhouse.com/cloud) ClickHouse as a service, built by the creators and maintainers.
|
||||
* [Tutorial](https://clickhouse.com/docs/en/getting_started/tutorial/) shows how to set up and query a small ClickHouse cluster.
|
||||
* [Documentation](https://clickhouse.com/docs/en/) provides more in-depth information.
|
||||
* [YouTube channel](https://www.youtube.com/c/ClickHouseDB) has a lot of content about ClickHouse in video format.
|
||||
|
2
contrib/AMQP-CPP
vendored
2
contrib/AMQP-CPP
vendored
@ -1 +1 @@
|
||||
Subproject commit 1a6c51f4ac51ac56610fa95081bd2f349911375a
|
||||
Subproject commit 818c2d8ad96a08a5d20fece7d1e1e8855a2b0860
|
6
contrib/CMakeLists.txt
vendored
6
contrib/CMakeLists.txt
vendored
@ -74,7 +74,11 @@ add_contrib (re2-cmake re2)
|
||||
add_contrib (xz-cmake xz)
|
||||
add_contrib (brotli-cmake brotli)
|
||||
add_contrib (double-conversion-cmake double-conversion)
|
||||
add_contrib (boringssl-cmake boringssl)
|
||||
if (NOT ENABLE_EXTERNAL_OPENSSL)
|
||||
add_contrib (boringssl-cmake boringssl)
|
||||
else ()
|
||||
add_contrib (openssl-cmake openssl)
|
||||
endif ()
|
||||
add_contrib (poco-cmake poco)
|
||||
add_contrib (croaring-cmake croaring)
|
||||
add_contrib (zstd-cmake zstd)
|
||||
|
@ -4,6 +4,11 @@ if (NOT ENABLE_AMQPCPP)
|
||||
message(STATUS "Not using AMQP-CPP")
|
||||
return()
|
||||
endif()
|
||||
if (OS_FREEBSD)
|
||||
message(STATUS "Not using AMQP-CPP because libuv is disabled")
|
||||
return()
|
||||
endif()
|
||||
|
||||
|
||||
# can be removed once libuv build on MacOS with GCC is possible
|
||||
if (NOT TARGET ch_contrib::uv)
|
||||
|
@ -578,6 +578,12 @@ if(CMAKE_SYSTEM_NAME MATCHES "Darwin")
|
||||
list(APPEND ALL_SRCS "${CMAKE_CURRENT_BINARY_DIR}/include_private/kcmrpc.c")
|
||||
endif()
|
||||
|
||||
if (ENABLE_EXTERNAL_OPENSSL)
|
||||
list(REMOVE_ITEM ALL_SRCS "${KRB5_SOURCE_DIR}/lib/crypto/openssl/enc_provider/aes.c")
|
||||
list(APPEND ALL_SRCS "${CMAKE_CURRENT_SOURCE_DIR}/aes.c")
|
||||
endif ()
|
||||
|
||||
|
||||
target_sources(_krb5 PRIVATE
|
||||
${ALL_SRCS}
|
||||
)
|
||||
|
@ -59,6 +59,12 @@ set(SRCS
|
||||
|
||||
add_library(_libpq ${SRCS})
|
||||
|
||||
if (ENABLE_EXTERNAL_OPENSSL)
|
||||
add_definitions(-DHAVE_BIO_METH_NEW)
|
||||
add_definitions(-DHAVE_HMAC_CTX_NEW)
|
||||
add_definitions(-DHAVE_HMAC_CTX_FREE)
|
||||
endif ()
|
||||
|
||||
target_include_directories (_libpq SYSTEM PUBLIC ${LIBPQ_SOURCE_DIR})
|
||||
target_include_directories (_libpq SYSTEM PUBLIC "${LIBPQ_SOURCE_DIR}/include")
|
||||
target_include_directories (_libpq SYSTEM PRIVATE "${LIBPQ_SOURCE_DIR}/configs")
|
||||
|
2
contrib/llvm-project
vendored
2
contrib/llvm-project
vendored
@ -1 +1 @@
|
||||
Subproject commit dc972a767ff2e9488d96cb2a6e67de160fbe15a7
|
||||
Subproject commit 328e4602120ddd6b2c1fb91bf2d50bd7bc249711
|
6
docker/test/fuzzer/allow-nullable-key.xml
Normal file
6
docker/test/fuzzer/allow-nullable-key.xml
Normal file
@ -0,0 +1,6 @@
|
||||
<clickhouse>
|
||||
<!-- Allow nullable key to avoid errors while fuzzing definitions of tables -->
|
||||
<merge_tree>
|
||||
<allow_nullable_key>1</allow_nullable_key>
|
||||
</merge_tree>
|
||||
</clickhouse>
|
@ -94,6 +94,7 @@ function configure
|
||||
# TODO figure out which ones are needed
|
||||
cp -av --dereference "$repo_dir"/tests/config/config.d/listen.xml db/config.d
|
||||
cp -av --dereference "$script_dir"/query-fuzzer-tweaks-users.xml db/users.d
|
||||
cp -av --dereference "$script_dir"/allow-nullable-key.xml db/config.d
|
||||
|
||||
cat > db/config.d/core.xml <<EOL
|
||||
<clickhouse>
|
||||
@ -240,6 +241,7 @@ quit
|
||||
--receive_data_timeout_ms=10000 \
|
||||
--stacktrace \
|
||||
--query-fuzzer-runs=1000 \
|
||||
--create-query-fuzzer-runs=50 \
|
||||
--queries-file $(ls -1 ch/tests/queries/0_stateless/*.sql | sort -R) \
|
||||
$NEW_TESTS_OPT \
|
||||
> >(tail -n 100000 > fuzzer.log) \
|
||||
|
@ -1,8 +1,7 @@
|
||||
position: 10
|
||||
position: 1
|
||||
label: 'Example Datasets'
|
||||
collapsible: true
|
||||
collapsed: true
|
||||
link:
|
||||
type: generated-index
|
||||
title: Example Datasets
|
||||
slug: /en/getting-started/example-datasets
|
||||
type: doc
|
||||
id: en/getting-started/example-datasets/
|
||||
|
@ -1,9 +1,16 @@
|
||||
---
|
||||
slug: /en/getting-started/example-datasets/cell-towers
|
||||
sidebar_label: Cell Towers
|
||||
sidebar_position: 3
|
||||
title: "Cell Towers"
|
||||
---
|
||||
|
||||
import Tabs from '@theme/Tabs';
|
||||
import TabItem from '@theme/TabItem';
|
||||
import CodeBlock from '@theme/CodeBlock';
|
||||
import ActionsMenu from '@site/docs/en/_snippets/_service_actions_menu.md';
|
||||
import SQLConsoleDetail from '@site/docs/en/_snippets/_launch_sql_console.md';
|
||||
|
||||
This dataset is from [OpenCellid](https://www.opencellid.org/) - The world's largest Open Database of Cell Towers.
|
||||
|
||||
As of 2021, it contains more than 40 million records about cell towers (GSM, LTE, UMTS, etc.) around the world with their geographical coordinates and metadata (country code, network, etc).
|
||||
@ -13,6 +20,26 @@ OpenCelliD Project is licensed under a Creative Commons Attribution-ShareAlike 4
|
||||
|
||||
## Get the Dataset {#get-the-dataset}
|
||||
|
||||
<Tabs groupId="deployMethod">
|
||||
<TabItem value="serverless" label="ClickHouse Cloud" default>
|
||||
|
||||
ClickHouse Cloud provides an easy-button for uploading this dataset from S3. Log in to your ClickHouse Cloud organization, or create a free trial at [ClickHouse.cloud](https://clickhouse.cloud).
|
||||
<ActionsMenu menu="Load Data" />
|
||||
|
||||
Choose the **Cell Towers** dataset from the **Sample data** tab, and **Load data**:
|
||||
|
||||
![Load cell towers dataset](@site/docs/en/_snippets/images/cloud-load-data-sample.png)
|
||||
|
||||
Examine the schema of the cell_towers table:
|
||||
```sql
|
||||
DESCRIBE TABLE cell_towers
|
||||
```
|
||||
|
||||
<SQLConsoleDetail />
|
||||
|
||||
</TabItem>
|
||||
<TabItem value="selfmanaged" label="Self-managed">
|
||||
|
||||
1. Download the snapshot of the dataset from February 2021: [cell_towers.csv.xz](https://datasets.clickhouse.com/cell_towers.csv.xz) (729 MB).
|
||||
|
||||
2. Validate the integrity (optional step):
|
||||
@ -56,7 +83,10 @@ ENGINE = MergeTree ORDER BY (radio, mcc, net, created);
|
||||
clickhouse-client --query "INSERT INTO cell_towers FORMAT CSVWithNames" < cell_towers.csv
|
||||
```
|
||||
|
||||
## Examples {#examples}
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
## Example queries {#examples}
|
||||
|
||||
1. A number of cell towers by type:
|
||||
|
||||
@ -101,18 +131,31 @@ So, the top countries are: the USA, Germany, and Russia.
|
||||
|
||||
You may want to create an [External Dictionary](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) in ClickHouse to decode these values.
|
||||
|
||||
|
||||
## Use case {#use-case}
|
||||
## Use case: Incorporate geo data {#use-case}
|
||||
|
||||
Using `pointInPolygon` function.
|
||||
|
||||
1. Create a table where we will store polygons:
|
||||
|
||||
<Tabs groupId="deployMethod">
|
||||
<TabItem value="serverless" label="ClickHouse Cloud" default>
|
||||
|
||||
```sql
|
||||
CREATE TABLE moscow (polygon Array(Tuple(Float64, Float64)))
|
||||
ORDER BY polygon;
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
<TabItem value="selfmanaged" label="Self-managed">
|
||||
|
||||
```sql
|
||||
CREATE TEMPORARY TABLE
|
||||
moscow (polygon Array(Tuple(Float64, Float64)));
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
2. This is a rough shape of Moscow (without "new Moscow"):
|
||||
|
||||
```sql
|
||||
|
File diff suppressed because one or more lines are too long
@ -13,16 +13,6 @@ Description of the fields: https://www.gov.uk/guidance/about-the-price-paid-data
|
||||
|
||||
Contains HM Land Registry data © Crown copyright and database right 2021. This data is licensed under the Open Government Licence v3.0.
|
||||
|
||||
## Download the Dataset {#download-dataset}
|
||||
|
||||
Run the command:
|
||||
|
||||
```bash
|
||||
wget http://prod.publicdata.landregistry.gov.uk.s3-website-eu-west-1.amazonaws.com/pp-complete.csv
|
||||
```
|
||||
|
||||
Download will take about 2 minutes with good internet connection.
|
||||
|
||||
## Create the Table {#create-table}
|
||||
|
||||
```sql
|
||||
@ -41,31 +31,49 @@ CREATE TABLE uk_price_paid
|
||||
locality LowCardinality(String),
|
||||
town LowCardinality(String),
|
||||
district LowCardinality(String),
|
||||
county LowCardinality(String),
|
||||
category UInt8
|
||||
) ENGINE = MergeTree ORDER BY (postcode1, postcode2, addr1, addr2);
|
||||
county LowCardinality(String)
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY (postcode1, postcode2, addr1, addr2);
|
||||
```
|
||||
|
||||
## Preprocess and Import Data {#preprocess-import-data}
|
||||
## Preprocess and Insert the Data {#preprocess-import-data}
|
||||
|
||||
We will use `clickhouse-local` tool for data preprocessing and `clickhouse-client` to upload it.
|
||||
We will use the `url` function to stream the data into ClickHouse. We need to preprocess some of the incoming data first, which includes:
|
||||
- splitting the `postcode` to two different columns - `postcode1` and `postcode2`, which is better for storage and queries
|
||||
- converting the `time` field to date as it only contains 00:00 time
|
||||
- ignoring the [UUid](../../sql-reference/data-types/uuid.md) field because we don't need it for analysis
|
||||
- transforming `type` and `duration` to more readable `Enum` fields using the [transform](../../sql-reference/functions/other-functions.md#transform) function
|
||||
- transforming the `is_new` field from a single-character string (`Y`/`N`) to a [UInt8](../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-uint256-int8-int16-int32-int64-int128-int256) field with 0 or 1
|
||||
- drop the last two columns since they all have the same value (which is 0)
|
||||
|
||||
In this example, we define the structure of source data from the CSV file and specify a query to preprocess the data with `clickhouse-local`.
|
||||
The `url` function streams the data from the web server into your ClickHouse table. The following command inserts 5 million rows into the `uk_price_paid` table:
|
||||
|
||||
The preprocessing is:
|
||||
- splitting the postcode to two different columns `postcode1` and `postcode2` that is better for storage and queries;
|
||||
- coverting the `time` field to date as it only contains 00:00 time;
|
||||
- ignoring the [UUid](../../sql-reference/data-types/uuid.md) field because we don't need it for analysis;
|
||||
- transforming `type` and `duration` to more readable Enum fields with function [transform](../../sql-reference/functions/other-functions.md#transform);
|
||||
- transforming `is_new` and `category` fields from single-character string (`Y`/`N` and `A`/`B`) to [UInt8](../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-uint256-int8-int16-int32-int64-int128-int256) field with 0 and 1.
|
||||
|
||||
Preprocessed data is piped directly to `clickhouse-client` to be inserted into ClickHouse table in streaming fashion.
|
||||
|
||||
```bash
|
||||
clickhouse-local --input-format CSV --structure '
|
||||
uuid String,
|
||||
price UInt32,
|
||||
time DateTime,
|
||||
```sql
|
||||
INSERT INTO uk_price_paid
|
||||
WITH
|
||||
splitByChar(' ', postcode) AS p
|
||||
SELECT
|
||||
toUInt32(price_string) AS price,
|
||||
parseDateTimeBestEffortUS(time) AS date,
|
||||
p[1] AS postcode1,
|
||||
p[2] AS postcode2,
|
||||
transform(a, ['T', 'S', 'D', 'F', 'O'], ['terraced', 'semi-detached', 'detached', 'flat', 'other']) AS type,
|
||||
b = 'Y' AS is_new,
|
||||
transform(c, ['F', 'L', 'U'], ['freehold', 'leasehold', 'unknown']) AS duration,
|
||||
addr1,
|
||||
addr2,
|
||||
street,
|
||||
locality,
|
||||
town,
|
||||
district,
|
||||
county
|
||||
FROM url(
|
||||
'http://prod.publicdata.landregistry.gov.uk.s3-website-eu-west-1.amazonaws.com/pp-complete.csv',
|
||||
'CSV',
|
||||
'uuid_string String,
|
||||
price_string String,
|
||||
time String,
|
||||
postcode String,
|
||||
a String,
|
||||
b String,
|
||||
@ -78,154 +86,136 @@ clickhouse-local --input-format CSV --structure '
|
||||
district String,
|
||||
county String,
|
||||
d String,
|
||||
e String
|
||||
' --query "
|
||||
WITH splitByChar(' ', postcode) AS p
|
||||
SELECT
|
||||
price,
|
||||
toDate(time) AS date,
|
||||
p[1] AS postcode1,
|
||||
p[2] AS postcode2,
|
||||
transform(a, ['T', 'S', 'D', 'F', 'O'], ['terraced', 'semi-detached', 'detached', 'flat', 'other']) AS type,
|
||||
b = 'Y' AS is_new,
|
||||
transform(c, ['F', 'L', 'U'], ['freehold', 'leasehold', 'unknown']) AS duration,
|
||||
addr1,
|
||||
addr2,
|
||||
street,
|
||||
locality,
|
||||
town,
|
||||
district,
|
||||
county,
|
||||
d = 'B' AS category
|
||||
FROM table" --date_time_input_format best_effort < pp-complete.csv | clickhouse-client --query "INSERT INTO uk_price_paid FORMAT TSV"
|
||||
e String'
|
||||
) SETTINGS max_http_get_redirects=10;
|
||||
```
|
||||
|
||||
It will take about 40 seconds.
|
||||
Wait for the data to insert - it will take a minute or two depending on the network speed.
|
||||
|
||||
## Validate the Data {#validate-data}
|
||||
|
||||
Query:
|
||||
Let's verify it worked by seeing how many rows were inserted:
|
||||
|
||||
```sql
|
||||
SELECT count() FROM uk_price_paid;
|
||||
SELECT count()
|
||||
FROM uk_price_paid
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```text
|
||||
┌──count()─┐
|
||||
│ 26321785 │
|
||||
└──────────┘
|
||||
```
|
||||
|
||||
The size of dataset in ClickHouse is just 278 MiB, check it.
|
||||
|
||||
Query:
|
||||
At the time this query was executed, the dataset had 27,450,499 rows. Let's see what the storage size is of the table in ClickHouse:
|
||||
|
||||
```sql
|
||||
SELECT formatReadableSize(total_bytes) FROM system.tables WHERE name = 'uk_price_paid';
|
||||
SELECT formatReadableSize(total_bytes)
|
||||
FROM system.tables
|
||||
WHERE name = 'uk_price_paid'
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```text
|
||||
┌─formatReadableSize(total_bytes)─┐
|
||||
│ 278.80 MiB │
|
||||
└─────────────────────────────────┘
|
||||
```
|
||||
Notice the size of the table is just 221.43 MiB!
|
||||
|
||||
## Run Some Queries {#run-queries}
|
||||
|
||||
Let's run some queries to analyze the data:
|
||||
|
||||
### Query 1. Average Price Per Year {#average-price}
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT toYear(date) AS year, round(avg(price)) AS price, bar(price, 0, 1000000, 80) FROM uk_price_paid GROUP BY year ORDER BY year;
|
||||
SELECT
|
||||
toYear(date) AS year,
|
||||
round(avg(price)) AS price,
|
||||
bar(price, 0, 1000000, 80
|
||||
)
|
||||
FROM uk_price_paid
|
||||
GROUP BY year
|
||||
ORDER BY year
|
||||
```
|
||||
|
||||
Result:
|
||||
The result looks like:
|
||||
|
||||
```text
|
||||
```response
|
||||
┌─year─┬──price─┬─bar(round(avg(price)), 0, 1000000, 80)─┐
|
||||
│ 1995 │ 67932 │ █████▍ │
|
||||
│ 1996 │ 71505 │ █████▋ │
|
||||
│ 1997 │ 78532 │ ██████▎ │
|
||||
│ 1998 │ 85436 │ ██████▋ │
|
||||
│ 1999 │ 96037 │ ███████▋ │
|
||||
│ 2000 │ 107479 │ ████████▌ │
|
||||
│ 2001 │ 118885 │ █████████▌ │
|
||||
│ 2002 │ 137941 │ ███████████ │
|
||||
│ 2003 │ 155889 │ ████████████▍ │
|
||||
│ 2004 │ 178885 │ ██████████████▎ │
|
||||
│ 2005 │ 189351 │ ███████████████▏ │
|
||||
│ 2006 │ 203528 │ ████████████████▎ │
|
||||
│ 2007 │ 219378 │ █████████████████▌ │
|
||||
│ 1995 │ 67934 │ █████▍ │
|
||||
│ 1996 │ 71508 │ █████▋ │
|
||||
│ 1997 │ 78536 │ ██████▎ │
|
||||
│ 1998 │ 85441 │ ██████▋ │
|
||||
│ 1999 │ 96038 │ ███████▋ │
|
||||
│ 2000 │ 107487 │ ████████▌ │
|
||||
│ 2001 │ 118888 │ █████████▌ │
|
||||
│ 2002 │ 137948 │ ███████████ │
|
||||
│ 2003 │ 155893 │ ████████████▍ │
|
||||
│ 2004 │ 178888 │ ██████████████▎ │
|
||||
│ 2005 │ 189359 │ ███████████████▏ │
|
||||
│ 2006 │ 203532 │ ████████████████▎ │
|
||||
│ 2007 │ 219375 │ █████████████████▌ │
|
||||
│ 2008 │ 217056 │ █████████████████▎ │
|
||||
│ 2009 │ 213419 │ █████████████████ │
|
||||
│ 2010 │ 236109 │ ██████████████████▊ │
|
||||
│ 2010 │ 236110 │ ██████████████████▊ │
|
||||
│ 2011 │ 232805 │ ██████████████████▌ │
|
||||
│ 2012 │ 238367 │ ███████████████████ │
|
||||
│ 2013 │ 256931 │ ████████████████████▌ │
|
||||
│ 2014 │ 279915 │ ██████████████████████▍ │
|
||||
│ 2015 │ 297266 │ ███████████████████████▋ │
|
||||
│ 2016 │ 313201 │ █████████████████████████ │
|
||||
│ 2017 │ 346097 │ ███████████████████████████▋ │
|
||||
│ 2018 │ 350116 │ ████████████████████████████ │
|
||||
│ 2019 │ 351013 │ ████████████████████████████ │
|
||||
│ 2020 │ 369420 │ █████████████████████████████▌ │
|
||||
│ 2021 │ 386903 │ ██████████████████████████████▊ │
|
||||
│ 2012 │ 238381 │ ███████████████████ │
|
||||
│ 2013 │ 256927 │ ████████████████████▌ │
|
||||
│ 2014 │ 280008 │ ██████████████████████▍ │
|
||||
│ 2015 │ 297263 │ ███████████████████████▋ │
|
||||
│ 2016 │ 313518 │ █████████████████████████ │
|
||||
│ 2017 │ 346371 │ ███████████████████████████▋ │
|
||||
│ 2018 │ 350556 │ ████████████████████████████ │
|
||||
│ 2019 │ 352184 │ ████████████████████████████▏ │
|
||||
│ 2020 │ 375808 │ ██████████████████████████████ │
|
||||
│ 2021 │ 381105 │ ██████████████████████████████▍ │
|
||||
│ 2022 │ 362572 │ █████████████████████████████ │
|
||||
└──────┴────────┴────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
### Query 2. Average Price per Year in London {#average-price-london}
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT toYear(date) AS year, round(avg(price)) AS price, bar(price, 0, 2000000, 100) FROM uk_price_paid WHERE town = 'LONDON' GROUP BY year ORDER BY year;
|
||||
SELECT
|
||||
toYear(date) AS year,
|
||||
round(avg(price)) AS price,
|
||||
bar(price, 0, 2000000, 100
|
||||
)
|
||||
FROM uk_price_paid
|
||||
WHERE town = 'LONDON'
|
||||
GROUP BY year
|
||||
ORDER BY year
|
||||
```
|
||||
|
||||
Result:
|
||||
The result looks like:
|
||||
|
||||
```text
|
||||
```response
|
||||
┌─year─┬───price─┬─bar(round(avg(price)), 0, 2000000, 100)───────────────┐
|
||||
│ 1995 │ 109116 │ █████▍ │
|
||||
│ 1996 │ 118667 │ █████▊ │
|
||||
│ 1997 │ 136518 │ ██████▋ │
|
||||
│ 1998 │ 152983 │ ███████▋ │
|
||||
│ 1999 │ 180637 │ █████████ │
|
||||
│ 2000 │ 215838 │ ██████████▋ │
|
||||
│ 2001 │ 232994 │ ███████████▋ │
|
||||
│ 2002 │ 263670 │ █████████████▏ │
|
||||
│ 2003 │ 278394 │ █████████████▊ │
|
||||
│ 2004 │ 304666 │ ███████████████▏ │
|
||||
│ 2005 │ 322875 │ ████████████████▏ │
|
||||
│ 2006 │ 356191 │ █████████████████▋ │
|
||||
│ 2007 │ 404054 │ ████████████████████▏ │
|
||||
│ 1995 │ 109110 │ █████▍ │
|
||||
│ 1996 │ 118659 │ █████▊ │
|
||||
│ 1997 │ 136526 │ ██████▋ │
|
||||
│ 1998 │ 153002 │ ███████▋ │
|
||||
│ 1999 │ 180633 │ █████████ │
|
||||
│ 2000 │ 215849 │ ██████████▋ │
|
||||
│ 2001 │ 232987 │ ███████████▋ │
|
||||
│ 2002 │ 263668 │ █████████████▏ │
|
||||
│ 2003 │ 278424 │ █████████████▊ │
|
||||
│ 2004 │ 304664 │ ███████████████▏ │
|
||||
│ 2005 │ 322887 │ ████████████████▏ │
|
||||
│ 2006 │ 356195 │ █████████████████▋ │
|
||||
│ 2007 │ 404062 │ ████████████████████▏ │
|
||||
│ 2008 │ 420741 │ █████████████████████ │
|
||||
│ 2009 │ 427753 │ █████████████████████▍ │
|
||||
│ 2010 │ 480306 │ ████████████████████████ │
|
||||
│ 2011 │ 496274 │ ████████████████████████▋ │
|
||||
│ 2012 │ 519442 │ █████████████████████████▊ │
|
||||
│ 2013 │ 616212 │ ██████████████████████████████▋ │
|
||||
│ 2014 │ 724154 │ ████████████████████████████████████▏ │
|
||||
│ 2015 │ 792129 │ ███████████████████████████████████████▌ │
|
||||
│ 2016 │ 843655 │ ██████████████████████████████████████████▏ │
|
||||
│ 2017 │ 982642 │ █████████████████████████████████████████████████▏ │
|
||||
│ 2018 │ 1016835 │ ██████████████████████████████████████████████████▋ │
|
||||
│ 2019 │ 1042849 │ ████████████████████████████████████████████████████▏ │
|
||||
│ 2020 │ 1011889 │ ██████████████████████████████████████████████████▌ │
|
||||
│ 2021 │ 960343 │ ████████████████████████████████████████████████ │
|
||||
│ 2009 │ 427754 │ █████████████████████▍ │
|
||||
│ 2010 │ 480322 │ ████████████████████████ │
|
||||
│ 2011 │ 496278 │ ████████████████████████▋ │
|
||||
│ 2012 │ 519482 │ █████████████████████████▊ │
|
||||
│ 2013 │ 616195 │ ██████████████████████████████▋ │
|
||||
│ 2014 │ 724121 │ ████████████████████████████████████▏ │
|
||||
│ 2015 │ 792101 │ ███████████████████████████████████████▌ │
|
||||
│ 2016 │ 843589 │ ██████████████████████████████████████████▏ │
|
||||
│ 2017 │ 983523 │ █████████████████████████████████████████████████▏ │
|
||||
│ 2018 │ 1016753 │ ██████████████████████████████████████████████████▋ │
|
||||
│ 2019 │ 1041673 │ ████████████████████████████████████████████████████ │
|
||||
│ 2020 │ 1060027 │ █████████████████████████████████████████████████████ │
|
||||
│ 2021 │ 958249 │ ███████████████████████████████████████████████▊ │
|
||||
│ 2022 │ 902596 │ █████████████████████████████████████████████▏ │
|
||||
└──────┴─────────┴───────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
Something happened in 2013. I don't have a clue. Maybe you have a clue what happened in 2020?
|
||||
Something happened to home prices in 2020! But that is probably not a surprise...
|
||||
|
||||
### Query 3. The Most Expensive Neighborhoods {#most-expensive-neighborhoods}
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
town,
|
||||
@ -240,124 +230,123 @@ GROUP BY
|
||||
district
|
||||
HAVING c >= 100
|
||||
ORDER BY price DESC
|
||||
LIMIT 100;
|
||||
LIMIT 100
|
||||
```
|
||||
|
||||
Result:
|
||||
The result looks like:
|
||||
|
||||
```text
|
||||
|
||||
┌─town─────────────────┬─district───────────────┬────c─┬───price─┬─bar(round(avg(price)), 0, 5000000, 100)────────────────────────────┐
|
||||
│ LONDON │ CITY OF WESTMINSTER │ 3606 │ 3280239 │ █████████████████████████████████████████████████████████████████▌ │
|
||||
│ LONDON │ CITY OF LONDON │ 274 │ 3160502 │ ███████████████████████████████████████████████████████████████▏ │
|
||||
│ LONDON │ KENSINGTON AND CHELSEA │ 2550 │ 2308478 │ ██████████████████████████████████████████████▏ │
|
||||
│ LEATHERHEAD │ ELMBRIDGE │ 114 │ 1897407 │ █████████████████████████████████████▊ │
|
||||
│ LONDON │ CAMDEN │ 3033 │ 1805404 │ ████████████████████████████████████ │
|
||||
│ VIRGINIA WATER │ RUNNYMEDE │ 156 │ 1753247 │ ███████████████████████████████████ │
|
||||
│ WINDLESHAM │ SURREY HEATH │ 108 │ 1677613 │ █████████████████████████████████▌ │
|
||||
│ THORNTON HEATH │ CROYDON │ 546 │ 1671721 │ █████████████████████████████████▍ │
|
||||
│ BARNET │ ENFIELD │ 124 │ 1505840 │ ██████████████████████████████ │
|
||||
│ COBHAM │ ELMBRIDGE │ 387 │ 1237250 │ ████████████████████████▋ │
|
||||
│ LONDON │ ISLINGTON │ 2668 │ 1236980 │ ████████████████████████▋ │
|
||||
│ OXFORD │ SOUTH OXFORDSHIRE │ 321 │ 1220907 │ ████████████████████████▍ │
|
||||
│ LONDON │ RICHMOND UPON THAMES │ 704 │ 1215551 │ ████████████████████████▎ │
|
||||
│ LONDON │ HOUNSLOW │ 671 │ 1207493 │ ████████████████████████▏ │
|
||||
│ ASCOT │ WINDSOR AND MAIDENHEAD │ 407 │ 1183299 │ ███████████████████████▋ │
|
||||
│ BEACONSFIELD │ BUCKINGHAMSHIRE │ 330 │ 1175615 │ ███████████████████████▌ │
|
||||
│ RICHMOND │ RICHMOND UPON THAMES │ 874 │ 1110444 │ ██████████████████████▏ │
|
||||
│ LONDON │ HAMMERSMITH AND FULHAM │ 3086 │ 1053983 │ █████████████████████ │
|
||||
│ SURBITON │ ELMBRIDGE │ 100 │ 1011800 │ ████████████████████▏ │
|
||||
│ RADLETT │ HERTSMERE │ 283 │ 1011712 │ ████████████████████▏ │
|
||||
│ SALCOMBE │ SOUTH HAMS │ 127 │ 1011624 │ ████████████████████▏ │
|
||||
│ WEYBRIDGE │ ELMBRIDGE │ 655 │ 1007265 │ ████████████████████▏ │
|
||||
│ ESHER │ ELMBRIDGE │ 485 │ 986581 │ ███████████████████▋ │
|
||||
│ LEATHERHEAD │ GUILDFORD │ 202 │ 977320 │ ███████████████████▌ │
|
||||
│ BURFORD │ WEST OXFORDSHIRE │ 111 │ 966893 │ ███████████████████▎ │
|
||||
│ BROCKENHURST │ NEW FOREST │ 129 │ 956675 │ ███████████████████▏ │
|
||||
│ HINDHEAD │ WAVERLEY │ 137 │ 953753 │ ███████████████████ │
|
||||
│ GERRARDS CROSS │ BUCKINGHAMSHIRE │ 419 │ 951121 │ ███████████████████ │
|
||||
│ EAST MOLESEY │ ELMBRIDGE │ 192 │ 936769 │ ██████████████████▋ │
|
||||
│ CHALFONT ST GILES │ BUCKINGHAMSHIRE │ 146 │ 925515 │ ██████████████████▌ │
|
||||
│ LONDON │ TOWER HAMLETS │ 4388 │ 918304 │ ██████████████████▎ │
|
||||
│ OLNEY │ MILTON KEYNES │ 235 │ 910646 │ ██████████████████▏ │
|
||||
│ HENLEY-ON-THAMES │ SOUTH OXFORDSHIRE │ 540 │ 902418 │ ██████████████████ │
|
||||
│ LONDON │ SOUTHWARK │ 3885 │ 892997 │ █████████████████▋ │
|
||||
│ KINGSTON UPON THAMES │ KINGSTON UPON THAMES │ 960 │ 885969 │ █████████████████▋ │
|
||||
│ LONDON │ EALING │ 2658 │ 871755 │ █████████████████▍ │
|
||||
│ CRANBROOK │ TUNBRIDGE WELLS │ 431 │ 862348 │ █████████████████▏ │
|
||||
│ LONDON │ MERTON │ 2099 │ 859118 │ █████████████████▏ │
|
||||
│ BELVEDERE │ BEXLEY │ 346 │ 842423 │ ████████████████▋ │
|
||||
│ GUILDFORD │ WAVERLEY │ 143 │ 841277 │ ████████████████▋ │
|
||||
│ HARPENDEN │ ST ALBANS │ 657 │ 841216 │ ████████████████▋ │
|
||||
│ LONDON │ HACKNEY │ 3307 │ 837090 │ ████████████████▋ │
|
||||
│ LONDON │ WANDSWORTH │ 6566 │ 832663 │ ████████████████▋ │
|
||||
│ MAIDENHEAD │ BUCKINGHAMSHIRE │ 123 │ 824299 │ ████████████████▍ │
|
||||
│ KINGS LANGLEY │ DACORUM │ 145 │ 821331 │ ████████████████▍ │
|
||||
│ BERKHAMSTED │ DACORUM │ 543 │ 818415 │ ████████████████▎ │
|
||||
│ GREAT MISSENDEN │ BUCKINGHAMSHIRE │ 226 │ 802807 │ ████████████████ │
|
||||
│ BILLINGSHURST │ CHICHESTER │ 144 │ 797829 │ ███████████████▊ │
|
||||
│ WOKING │ GUILDFORD │ 176 │ 793494 │ ███████████████▋ │
|
||||
│ STOCKBRIDGE │ TEST VALLEY │ 178 │ 793269 │ ███████████████▋ │
|
||||
│ EPSOM │ REIGATE AND BANSTEAD │ 172 │ 791862 │ ███████████████▋ │
|
||||
│ TONBRIDGE │ TUNBRIDGE WELLS │ 360 │ 787876 │ ███████████████▋ │
|
||||
│ TEDDINGTON │ RICHMOND UPON THAMES │ 595 │ 786492 │ ███████████████▋ │
|
||||
│ TWICKENHAM │ RICHMOND UPON THAMES │ 1155 │ 786193 │ ███████████████▋ │
|
||||
│ LYNDHURST │ NEW FOREST │ 102 │ 785593 │ ███████████████▋ │
|
||||
│ LONDON │ LAMBETH │ 5228 │ 774574 │ ███████████████▍ │
|
||||
│ LONDON │ BARNET │ 3955 │ 773259 │ ███████████████▍ │
|
||||
│ OXFORD │ VALE OF WHITE HORSE │ 353 │ 772088 │ ███████████████▍ │
|
||||
│ TONBRIDGE │ MAIDSTONE │ 305 │ 770740 │ ███████████████▍ │
|
||||
│ LUTTERWORTH │ HARBOROUGH │ 538 │ 768634 │ ███████████████▎ │
|
||||
│ WOODSTOCK │ WEST OXFORDSHIRE │ 140 │ 766037 │ ███████████████▎ │
|
||||
│ MIDHURST │ CHICHESTER │ 257 │ 764815 │ ███████████████▎ │
|
||||
│ MARLOW │ BUCKINGHAMSHIRE │ 327 │ 761876 │ ███████████████▏ │
|
||||
│ LONDON │ NEWHAM │ 3237 │ 761784 │ ███████████████▏ │
|
||||
│ ALDERLEY EDGE │ CHESHIRE EAST │ 178 │ 757318 │ ███████████████▏ │
|
||||
│ LUTON │ CENTRAL BEDFORDSHIRE │ 212 │ 754283 │ ███████████████ │
|
||||
│ PETWORTH │ CHICHESTER │ 154 │ 754220 │ ███████████████ │
|
||||
│ ALRESFORD │ WINCHESTER │ 219 │ 752718 │ ███████████████ │
|
||||
│ POTTERS BAR │ WELWYN HATFIELD │ 174 │ 748465 │ ██████████████▊ │
|
||||
│ HASLEMERE │ CHICHESTER │ 128 │ 746907 │ ██████████████▊ │
|
||||
│ TADWORTH │ REIGATE AND BANSTEAD │ 502 │ 743252 │ ██████████████▋ │
|
||||
│ THAMES DITTON │ ELMBRIDGE │ 244 │ 741913 │ ██████████████▋ │
|
||||
│ REIGATE │ REIGATE AND BANSTEAD │ 581 │ 738198 │ ██████████████▋ │
|
||||
│ BOURNE END │ BUCKINGHAMSHIRE │ 138 │ 735190 │ ██████████████▋ │
|
||||
│ SEVENOAKS │ SEVENOAKS │ 1156 │ 730018 │ ██████████████▌ │
|
||||
│ OXTED │ TANDRIDGE │ 336 │ 729123 │ ██████████████▌ │
|
||||
│ INGATESTONE │ BRENTWOOD │ 166 │ 728103 │ ██████████████▌ │
|
||||
│ LONDON │ BRENT │ 2079 │ 720605 │ ██████████████▍ │
|
||||
│ LONDON │ HARINGEY │ 3216 │ 717780 │ ██████████████▎ │
|
||||
│ PURLEY │ CROYDON │ 575 │ 716108 │ ██████████████▎ │
|
||||
│ WELWYN │ WELWYN HATFIELD │ 222 │ 710603 │ ██████████████▏ │
|
||||
│ RICKMANSWORTH │ THREE RIVERS │ 798 │ 704571 │ ██████████████ │
|
||||
│ BANSTEAD │ REIGATE AND BANSTEAD │ 401 │ 701293 │ ██████████████ │
|
||||
│ CHIGWELL │ EPPING FOREST │ 261 │ 701203 │ ██████████████ │
|
||||
│ PINNER │ HARROW │ 528 │ 698885 │ █████████████▊ │
|
||||
│ HASLEMERE │ WAVERLEY │ 280 │ 696659 │ █████████████▊ │
|
||||
│ SLOUGH │ BUCKINGHAMSHIRE │ 396 │ 694917 │ █████████████▊ │
|
||||
│ WALTON-ON-THAMES │ ELMBRIDGE │ 946 │ 692395 │ █████████████▋ │
|
||||
│ READING │ SOUTH OXFORDSHIRE │ 318 │ 691988 │ █████████████▋ │
|
||||
│ NORTHWOOD │ HILLINGDON │ 271 │ 690643 │ █████████████▋ │
|
||||
│ FELTHAM │ HOUNSLOW │ 763 │ 688595 │ █████████████▋ │
|
||||
│ ASHTEAD │ MOLE VALLEY │ 303 │ 687923 │ █████████████▋ │
|
||||
│ BARNET │ BARNET │ 975 │ 686980 │ █████████████▋ │
|
||||
│ WOKING │ SURREY HEATH │ 283 │ 686669 │ █████████████▋ │
|
||||
│ MALMESBURY │ WILTSHIRE │ 323 │ 683324 │ █████████████▋ │
|
||||
│ AMERSHAM │ BUCKINGHAMSHIRE │ 496 │ 680962 │ █████████████▌ │
|
||||
│ CHISLEHURST │ BROMLEY │ 430 │ 680209 │ █████████████▌ │
|
||||
│ HYTHE │ FOLKESTONE AND HYTHE │ 490 │ 676908 │ █████████████▌ │
|
||||
│ MAYFIELD │ WEALDEN │ 101 │ 676210 │ █████████████▌ │
|
||||
│ ASCOT │ BRACKNELL FOREST │ 168 │ 676004 │ █████████████▌ │
|
||||
└──────────────────────┴────────────────────────┴──────┴─────────┴────────────────────────────────────────────────────────────────────┘
|
||||
```response
|
||||
┌─town─────────────────┬─district───────────────┬─────c─┬───price─┬─bar(round(avg(price)), 0, 5000000, 100)─────────────────────────┐
|
||||
│ LONDON │ CITY OF LONDON │ 578 │ 3149590 │ ██████████████████████████████████████████████████████████████▊ │
|
||||
│ LONDON │ CITY OF WESTMINSTER │ 7083 │ 2903794 │ ██████████████████████████████████████████████████████████ │
|
||||
│ LONDON │ KENSINGTON AND CHELSEA │ 4986 │ 2333782 │ ██████████████████████████████████████████████▋ │
|
||||
│ LEATHERHEAD │ ELMBRIDGE │ 203 │ 2071595 │ █████████████████████████████████████████▍ │
|
||||
│ VIRGINIA WATER │ RUNNYMEDE │ 308 │ 1939465 │ ██████████████████████████████████████▋ │
|
||||
│ LONDON │ CAMDEN │ 5750 │ 1673687 │ █████████████████████████████████▍ │
|
||||
│ WINDLESHAM │ SURREY HEATH │ 182 │ 1428358 │ ████████████████████████████▌ │
|
||||
│ NORTHWOOD │ THREE RIVERS │ 112 │ 1404170 │ ████████████████████████████ │
|
||||
│ BARNET │ ENFIELD │ 259 │ 1338299 │ ██████████████████████████▋ │
|
||||
│ LONDON │ ISLINGTON │ 5504 │ 1275520 │ █████████████████████████▌ │
|
||||
│ LONDON │ RICHMOND UPON THAMES │ 1345 │ 1261935 │ █████████████████████████▏ │
|
||||
│ COBHAM │ ELMBRIDGE │ 727 │ 1251403 │ █████████████████████████ │
|
||||
│ BEACONSFIELD │ BUCKINGHAMSHIRE │ 680 │ 1199970 │ ███████████████████████▊ │
|
||||
│ LONDON │ TOWER HAMLETS │ 10012 │ 1157827 │ ███████████████████████▏ │
|
||||
│ LONDON │ HOUNSLOW │ 1278 │ 1144389 │ ██████████████████████▊ │
|
||||
│ BURFORD │ WEST OXFORDSHIRE │ 182 │ 1139393 │ ██████████████████████▋ │
|
||||
│ RICHMOND │ RICHMOND UPON THAMES │ 1649 │ 1130076 │ ██████████████████████▌ │
|
||||
│ KINGSTON UPON THAMES │ RICHMOND UPON THAMES │ 147 │ 1126111 │ ██████████████████████▌ │
|
||||
│ ASCOT │ WINDSOR AND MAIDENHEAD │ 773 │ 1106109 │ ██████████████████████ │
|
||||
│ LONDON │ HAMMERSMITH AND FULHAM │ 6162 │ 1056198 │ █████████████████████ │
|
||||
│ RADLETT │ HERTSMERE │ 513 │ 1045758 │ ████████████████████▊ │
|
||||
│ LEATHERHEAD │ GUILDFORD │ 354 │ 1045175 │ ████████████████████▊ │
|
||||
│ WEYBRIDGE │ ELMBRIDGE │ 1275 │ 1036702 │ ████████████████████▋ │
|
||||
│ FARNHAM │ EAST HAMPSHIRE │ 107 │ 1033682 │ ████████████████████▋ │
|
||||
│ ESHER │ ELMBRIDGE │ 915 │ 1032753 │ ████████████████████▋ │
|
||||
│ FARNHAM │ HART │ 102 │ 1002692 │ ████████████████████ │
|
||||
│ GERRARDS CROSS │ BUCKINGHAMSHIRE │ 845 │ 983639 │ ███████████████████▋ │
|
||||
│ CHALFONT ST GILES │ BUCKINGHAMSHIRE │ 286 │ 973993 │ ███████████████████▍ │
|
||||
│ SALCOMBE │ SOUTH HAMS │ 215 │ 965724 │ ███████████████████▎ │
|
||||
│ SURBITON │ ELMBRIDGE │ 181 │ 960346 │ ███████████████████▏ │
|
||||
│ BROCKENHURST │ NEW FOREST │ 226 │ 951278 │ ███████████████████ │
|
||||
│ SUTTON COLDFIELD │ LICHFIELD │ 110 │ 930757 │ ██████████████████▌ │
|
||||
│ EAST MOLESEY │ ELMBRIDGE │ 372 │ 927026 │ ██████████████████▌ │
|
||||
│ LLANGOLLEN │ WREXHAM │ 127 │ 925681 │ ██████████████████▌ │
|
||||
│ OXFORD │ SOUTH OXFORDSHIRE │ 638 │ 923830 │ ██████████████████▍ │
|
||||
│ LONDON │ MERTON │ 4383 │ 923194 │ ██████████████████▍ │
|
||||
│ GUILDFORD │ WAVERLEY │ 261 │ 905733 │ ██████████████████ │
|
||||
│ TEDDINGTON │ RICHMOND UPON THAMES │ 1147 │ 894856 │ █████████████████▊ │
|
||||
│ HARPENDEN │ ST ALBANS │ 1271 │ 893079 │ █████████████████▋ │
|
||||
│ HENLEY-ON-THAMES │ SOUTH OXFORDSHIRE │ 1042 │ 887557 │ █████████████████▋ │
|
||||
│ POTTERS BAR │ WELWYN HATFIELD │ 314 │ 863037 │ █████████████████▎ │
|
||||
│ LONDON │ WANDSWORTH │ 13210 │ 857318 │ █████████████████▏ │
|
||||
│ BILLINGSHURST │ CHICHESTER │ 255 │ 856508 │ █████████████████▏ │
|
||||
│ LONDON │ SOUTHWARK │ 7742 │ 843145 │ ████████████████▋ │
|
||||
│ LONDON │ HACKNEY │ 6656 │ 839716 │ ████████████████▋ │
|
||||
│ LUTTERWORTH │ HARBOROUGH │ 1096 │ 836546 │ ████████████████▋ │
|
||||
│ KINGSTON UPON THAMES │ KINGSTON UPON THAMES │ 1846 │ 828990 │ ████████████████▌ │
|
||||
│ LONDON │ EALING │ 5583 │ 820135 │ ████████████████▍ │
|
||||
│ INGATESTONE │ CHELMSFORD │ 120 │ 815379 │ ████████████████▎ │
|
||||
│ MARLOW │ BUCKINGHAMSHIRE │ 718 │ 809943 │ ████████████████▏ │
|
||||
│ EAST GRINSTEAD │ TANDRIDGE │ 105 │ 809461 │ ████████████████▏ │
|
||||
│ CHIGWELL │ EPPING FOREST │ 484 │ 809338 │ ████████████████▏ │
|
||||
│ EGHAM │ RUNNYMEDE │ 989 │ 807858 │ ████████████████▏ │
|
||||
│ HASLEMERE │ CHICHESTER │ 223 │ 804173 │ ████████████████ │
|
||||
│ PETWORTH │ CHICHESTER │ 288 │ 803206 │ ████████████████ │
|
||||
│ TWICKENHAM │ RICHMOND UPON THAMES │ 2194 │ 802616 │ ████████████████ │
|
||||
│ WEMBLEY │ BRENT │ 1698 │ 801733 │ ████████████████ │
|
||||
│ HINDHEAD │ WAVERLEY │ 233 │ 801482 │ ████████████████ │
|
||||
│ LONDON │ BARNET │ 8083 │ 792066 │ ███████████████▋ │
|
||||
│ WOKING │ GUILDFORD │ 343 │ 789360 │ ███████████████▋ │
|
||||
│ STOCKBRIDGE │ TEST VALLEY │ 318 │ 777909 │ ███████████████▌ │
|
||||
│ BERKHAMSTED │ DACORUM │ 1049 │ 776138 │ ███████████████▌ │
|
||||
│ MAIDENHEAD │ BUCKINGHAMSHIRE │ 236 │ 775572 │ ███████████████▌ │
|
||||
│ SOLIHULL │ STRATFORD-ON-AVON │ 142 │ 770727 │ ███████████████▍ │
|
||||
│ GREAT MISSENDEN │ BUCKINGHAMSHIRE │ 431 │ 764493 │ ███████████████▎ │
|
||||
│ TADWORTH │ REIGATE AND BANSTEAD │ 920 │ 757511 │ ███████████████▏ │
|
||||
│ LONDON │ BRENT │ 4124 │ 757194 │ ███████████████▏ │
|
||||
│ THAMES DITTON │ ELMBRIDGE │ 470 │ 750828 │ ███████████████ │
|
||||
│ LONDON │ LAMBETH │ 10431 │ 750532 │ ███████████████ │
|
||||
│ RICKMANSWORTH │ THREE RIVERS │ 1500 │ 747029 │ ██████████████▊ │
|
||||
│ KINGS LANGLEY │ DACORUM │ 281 │ 746536 │ ██████████████▊ │
|
||||
│ HARLOW │ EPPING FOREST │ 172 │ 739423 │ ██████████████▋ │
|
||||
│ TONBRIDGE │ SEVENOAKS │ 103 │ 738740 │ ██████████████▋ │
|
||||
│ BELVEDERE │ BEXLEY │ 686 │ 736385 │ ██████████████▋ │
|
||||
│ CRANBROOK │ TUNBRIDGE WELLS │ 769 │ 734328 │ ██████████████▋ │
|
||||
│ SOLIHULL │ WARWICK │ 116 │ 733286 │ ██████████████▋ │
|
||||
│ ALDERLEY EDGE │ CHESHIRE EAST │ 357 │ 732882 │ ██████████████▋ │
|
||||
│ WELWYN │ WELWYN HATFIELD │ 404 │ 730281 │ ██████████████▌ │
|
||||
│ CHISLEHURST │ BROMLEY │ 870 │ 730279 │ ██████████████▌ │
|
||||
│ LONDON │ HARINGEY │ 6488 │ 726715 │ ██████████████▌ │
|
||||
│ AMERSHAM │ BUCKINGHAMSHIRE │ 965 │ 725426 │ ██████████████▌ │
|
||||
│ SEVENOAKS │ SEVENOAKS │ 2183 │ 725102 │ ██████████████▌ │
|
||||
│ BOURNE END │ BUCKINGHAMSHIRE │ 269 │ 724595 │ ██████████████▍ │
|
||||
│ NORTHWOOD │ HILLINGDON │ 568 │ 722436 │ ██████████████▍ │
|
||||
│ PURFLEET │ THURROCK │ 143 │ 722205 │ ██████████████▍ │
|
||||
│ SLOUGH │ BUCKINGHAMSHIRE │ 832 │ 721529 │ ██████████████▍ │
|
||||
│ INGATESTONE │ BRENTWOOD │ 301 │ 718292 │ ██████████████▎ │
|
||||
│ EPSOM │ REIGATE AND BANSTEAD │ 315 │ 709264 │ ██████████████▏ │
|
||||
│ ASHTEAD │ MOLE VALLEY │ 524 │ 708646 │ ██████████████▏ │
|
||||
│ BETCHWORTH │ MOLE VALLEY │ 155 │ 708525 │ ██████████████▏ │
|
||||
│ OXTED │ TANDRIDGE │ 645 │ 706946 │ ██████████████▏ │
|
||||
│ READING │ SOUTH OXFORDSHIRE │ 593 │ 705466 │ ██████████████ │
|
||||
│ FELTHAM │ HOUNSLOW │ 1536 │ 703815 │ ██████████████ │
|
||||
│ TUNBRIDGE WELLS │ WEALDEN │ 207 │ 703296 │ ██████████████ │
|
||||
│ LEWES │ WEALDEN │ 116 │ 701349 │ ██████████████ │
|
||||
│ OXFORD │ OXFORD │ 3656 │ 700813 │ ██████████████ │
|
||||
│ MAYFIELD │ WEALDEN │ 177 │ 698158 │ █████████████▊ │
|
||||
│ PINNER │ HARROW │ 997 │ 697876 │ █████████████▊ │
|
||||
│ LECHLADE │ COTSWOLD │ 155 │ 696262 │ █████████████▊ │
|
||||
│ WALTON-ON-THAMES │ ELMBRIDGE │ 1850 │ 690102 │ █████████████▋ │
|
||||
└──────────────────────┴────────────────────────┴───────┴─────────┴─────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## Let's Speed Up Queries Using Projections {#speedup-with-projections}
|
||||
|
||||
[Projections](../../sql-reference/statements/alter/projection.md) allow to improve queries speed by storing pre-aggregated data.
|
||||
[Projections](../../sql-reference/statements/alter/projection.md) allow you to improve query speeds by storing pre-aggregated data in whatever format you want. In this example, we create a projection that keeps track of the average price, total price, and count of properties grouped by the year, district and town. At execution time, ClickHouse will use your projection if it thinks the projection can improve the performance fo the query (you don't have to do anything special to use the projection - ClickHouse decides for you when the projection will be useful).
|
||||
|
||||
### Build a Projection {#build-projection}
|
||||
|
||||
Create an aggregate projection by dimensions `toYear(date)`, `district`, `town`:
|
||||
Let's create an aggregate projection by the dimensions `toYear(date)`, `district`, and `town`:
|
||||
|
||||
```sql
|
||||
ALTER TABLE uk_price_paid
|
||||
@ -374,25 +363,23 @@ ALTER TABLE uk_price_paid
|
||||
toYear(date),
|
||||
district,
|
||||
town
|
||||
);
|
||||
)
|
||||
```
|
||||
|
||||
Populate the projection for existing data (without it projection will be created for only newly inserted data):
|
||||
Populate the projection for existing data. (Without materializing it, the projection will be created for only newly inserted data):
|
||||
|
||||
```sql
|
||||
ALTER TABLE uk_price_paid
|
||||
MATERIALIZE PROJECTION projection_by_year_district_town
|
||||
SETTINGS mutations_sync = 1;
|
||||
SETTINGS mutations_sync = 1
|
||||
```
|
||||
|
||||
## Test Performance {#test-performance}
|
||||
|
||||
Let's run the same 3 queries.
|
||||
Let's run the same 3 queries again:
|
||||
|
||||
### Query 1. Average Price Per Year {#average-price-projections}
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
toYear(date) AS year,
|
||||
@ -400,47 +387,18 @@ SELECT
|
||||
bar(price, 0, 1000000, 80)
|
||||
FROM uk_price_paid
|
||||
GROUP BY year
|
||||
ORDER BY year ASC;
|
||||
ORDER BY year ASC
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```text
|
||||
┌─year─┬──price─┬─bar(round(avg(price)), 0, 1000000, 80)─┐
|
||||
│ 1995 │ 67932 │ █████▍ │
|
||||
│ 1996 │ 71505 │ █████▋ │
|
||||
│ 1997 │ 78532 │ ██████▎ │
|
||||
│ 1998 │ 85436 │ ██████▋ │
|
||||
│ 1999 │ 96037 │ ███████▋ │
|
||||
│ 2000 │ 107479 │ ████████▌ │
|
||||
│ 2001 │ 118885 │ █████████▌ │
|
||||
│ 2002 │ 137941 │ ███████████ │
|
||||
│ 2003 │ 155889 │ ████████████▍ │
|
||||
│ 2004 │ 178885 │ ██████████████▎ │
|
||||
│ 2005 │ 189351 │ ███████████████▏ │
|
||||
│ 2006 │ 203528 │ ████████████████▎ │
|
||||
│ 2007 │ 219378 │ █████████████████▌ │
|
||||
│ 2008 │ 217056 │ █████████████████▎ │
|
||||
│ 2009 │ 213419 │ █████████████████ │
|
||||
│ 2010 │ 236109 │ ██████████████████▊ │
|
||||
│ 2011 │ 232805 │ ██████████████████▌ │
|
||||
│ 2012 │ 238367 │ ███████████████████ │
|
||||
│ 2013 │ 256931 │ ████████████████████▌ │
|
||||
│ 2014 │ 279915 │ ██████████████████████▍ │
|
||||
│ 2015 │ 297266 │ ███████████████████████▋ │
|
||||
│ 2016 │ 313201 │ █████████████████████████ │
|
||||
│ 2017 │ 346097 │ ███████████████████████████▋ │
|
||||
│ 2018 │ 350116 │ ████████████████████████████ │
|
||||
│ 2019 │ 351013 │ ████████████████████████████ │
|
||||
│ 2020 │ 369420 │ █████████████████████████████▌ │
|
||||
│ 2021 │ 386903 │ ██████████████████████████████▊ │
|
||||
└──────┴────────┴────────────────────────────────────────┘
|
||||
The result is the same, but the performance is better!
|
||||
```response
|
||||
No projection: 28 rows in set. Elapsed: 1.775 sec. Processed 27.45 million rows, 164.70 MB (15.47 million rows/s., 92.79 MB/s.)
|
||||
With projection: 28 rows in set. Elapsed: 0.665 sec. Processed 87.51 thousand rows, 3.21 MB (131.51 thousand rows/s., 4.82 MB/s.)
|
||||
```
|
||||
|
||||
|
||||
### Query 2. Average Price Per Year in London {#average-price-london-projections}
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
toYear(date) AS year,
|
||||
@ -449,48 +407,19 @@ SELECT
|
||||
FROM uk_price_paid
|
||||
WHERE town = 'LONDON'
|
||||
GROUP BY year
|
||||
ORDER BY year ASC;
|
||||
ORDER BY year ASC
|
||||
```
|
||||
|
||||
Result:
|
||||
Same result, but notice the improvement in query performance:
|
||||
|
||||
```text
|
||||
┌─year─┬───price─┬─bar(round(avg(price)), 0, 2000000, 100)───────────────┐
|
||||
│ 1995 │ 109116 │ █████▍ │
|
||||
│ 1996 │ 118667 │ █████▊ │
|
||||
│ 1997 │ 136518 │ ██████▋ │
|
||||
│ 1998 │ 152983 │ ███████▋ │
|
||||
│ 1999 │ 180637 │ █████████ │
|
||||
│ 2000 │ 215838 │ ██████████▋ │
|
||||
│ 2001 │ 232994 │ ███████████▋ │
|
||||
│ 2002 │ 263670 │ █████████████▏ │
|
||||
│ 2003 │ 278394 │ █████████████▊ │
|
||||
│ 2004 │ 304666 │ ███████████████▏ │
|
||||
│ 2005 │ 322875 │ ████████████████▏ │
|
||||
│ 2006 │ 356191 │ █████████████████▋ │
|
||||
│ 2007 │ 404054 │ ████████████████████▏ │
|
||||
│ 2008 │ 420741 │ █████████████████████ │
|
||||
│ 2009 │ 427753 │ █████████████████████▍ │
|
||||
│ 2010 │ 480306 │ ████████████████████████ │
|
||||
│ 2011 │ 496274 │ ████████████████████████▋ │
|
||||
│ 2012 │ 519442 │ █████████████████████████▊ │
|
||||
│ 2013 │ 616212 │ ██████████████████████████████▋ │
|
||||
│ 2014 │ 724154 │ ████████████████████████████████████▏ │
|
||||
│ 2015 │ 792129 │ ███████████████████████████████████████▌ │
|
||||
│ 2016 │ 843655 │ ██████████████████████████████████████████▏ │
|
||||
│ 2017 │ 982642 │ █████████████████████████████████████████████████▏ │
|
||||
│ 2018 │ 1016835 │ ██████████████████████████████████████████████████▋ │
|
||||
│ 2019 │ 1042849 │ ████████████████████████████████████████████████████▏ │
|
||||
│ 2020 │ 1011889 │ ██████████████████████████████████████████████████▌ │
|
||||
│ 2021 │ 960343 │ ████████████████████████████████████████████████ │
|
||||
└──────┴─────────┴───────────────────────────────────────────────────────┘
|
||||
```response
|
||||
No projection: 28 rows in set. Elapsed: 0.720 sec. Processed 27.45 million rows, 46.61 MB (38.13 million rows/s., 64.74 MB/s.)
|
||||
With projection: 28 rows in set. Elapsed: 0.015 sec. Processed 87.51 thousand rows, 3.51 MB (5.74 million rows/s., 230.24 MB/s.)
|
||||
```
|
||||
|
||||
### Query 3. The Most Expensive Neighborhoods {#most-expensive-neighborhoods-projections}
|
||||
|
||||
The condition (date >= '2020-01-01') needs to be modified to match projection dimension (toYear(date) >= 2020).
|
||||
|
||||
Query:
|
||||
The condition (date >= '2020-01-01') needs to be modified so that it matches the projection dimension (`toYear(date) >= 2020)`:
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
@ -506,138 +435,16 @@ GROUP BY
|
||||
district
|
||||
HAVING c >= 100
|
||||
ORDER BY price DESC
|
||||
LIMIT 100;
|
||||
LIMIT 100
|
||||
```
|
||||
|
||||
Result:
|
||||
Again, the result is the same but notice the improvement in query performance:
|
||||
|
||||
```text
|
||||
┌─town─────────────────┬─district───────────────┬────c─┬───price─┬─bar(round(avg(price)), 0, 5000000, 100)────────────────────────────┐
|
||||
│ LONDON │ CITY OF WESTMINSTER │ 3606 │ 3280239 │ █████████████████████████████████████████████████████████████████▌ │
|
||||
│ LONDON │ CITY OF LONDON │ 274 │ 3160502 │ ███████████████████████████████████████████████████████████████▏ │
|
||||
│ LONDON │ KENSINGTON AND CHELSEA │ 2550 │ 2308478 │ ██████████████████████████████████████████████▏ │
|
||||
│ LEATHERHEAD │ ELMBRIDGE │ 114 │ 1897407 │ █████████████████████████████████████▊ │
|
||||
│ LONDON │ CAMDEN │ 3033 │ 1805404 │ ████████████████████████████████████ │
|
||||
│ VIRGINIA WATER │ RUNNYMEDE │ 156 │ 1753247 │ ███████████████████████████████████ │
|
||||
│ WINDLESHAM │ SURREY HEATH │ 108 │ 1677613 │ █████████████████████████████████▌ │
|
||||
│ THORNTON HEATH │ CROYDON │ 546 │ 1671721 │ █████████████████████████████████▍ │
|
||||
│ BARNET │ ENFIELD │ 124 │ 1505840 │ ██████████████████████████████ │
|
||||
│ COBHAM │ ELMBRIDGE │ 387 │ 1237250 │ ████████████████████████▋ │
|
||||
│ LONDON │ ISLINGTON │ 2668 │ 1236980 │ ████████████████████████▋ │
|
||||
│ OXFORD │ SOUTH OXFORDSHIRE │ 321 │ 1220907 │ ████████████████████████▍ │
|
||||
│ LONDON │ RICHMOND UPON THAMES │ 704 │ 1215551 │ ████████████████████████▎ │
|
||||
│ LONDON │ HOUNSLOW │ 671 │ 1207493 │ ████████████████████████▏ │
|
||||
│ ASCOT │ WINDSOR AND MAIDENHEAD │ 407 │ 1183299 │ ███████████████████████▋ │
|
||||
│ BEACONSFIELD │ BUCKINGHAMSHIRE │ 330 │ 1175615 │ ███████████████████████▌ │
|
||||
│ RICHMOND │ RICHMOND UPON THAMES │ 874 │ 1110444 │ ██████████████████████▏ │
|
||||
│ LONDON │ HAMMERSMITH AND FULHAM │ 3086 │ 1053983 │ █████████████████████ │
|
||||
│ SURBITON │ ELMBRIDGE │ 100 │ 1011800 │ ████████████████████▏ │
|
||||
│ RADLETT │ HERTSMERE │ 283 │ 1011712 │ ████████████████████▏ │
|
||||
│ SALCOMBE │ SOUTH HAMS │ 127 │ 1011624 │ ████████████████████▏ │
|
||||
│ WEYBRIDGE │ ELMBRIDGE │ 655 │ 1007265 │ ████████████████████▏ │
|
||||
│ ESHER │ ELMBRIDGE │ 485 │ 986581 │ ███████████████████▋ │
|
||||
│ LEATHERHEAD │ GUILDFORD │ 202 │ 977320 │ ███████████████████▌ │
|
||||
│ BURFORD │ WEST OXFORDSHIRE │ 111 │ 966893 │ ███████████████████▎ │
|
||||
│ BROCKENHURST │ NEW FOREST │ 129 │ 956675 │ ███████████████████▏ │
|
||||
│ HINDHEAD │ WAVERLEY │ 137 │ 953753 │ ███████████████████ │
|
||||
│ GERRARDS CROSS │ BUCKINGHAMSHIRE │ 419 │ 951121 │ ███████████████████ │
|
||||
│ EAST MOLESEY │ ELMBRIDGE │ 192 │ 936769 │ ██████████████████▋ │
|
||||
│ CHALFONT ST GILES │ BUCKINGHAMSHIRE │ 146 │ 925515 │ ██████████████████▌ │
|
||||
│ LONDON │ TOWER HAMLETS │ 4388 │ 918304 │ ██████████████████▎ │
|
||||
│ OLNEY │ MILTON KEYNES │ 235 │ 910646 │ ██████████████████▏ │
|
||||
│ HENLEY-ON-THAMES │ SOUTH OXFORDSHIRE │ 540 │ 902418 │ ██████████████████ │
|
||||
│ LONDON │ SOUTHWARK │ 3885 │ 892997 │ █████████████████▋ │
|
||||
│ KINGSTON UPON THAMES │ KINGSTON UPON THAMES │ 960 │ 885969 │ █████████████████▋ │
|
||||
│ LONDON │ EALING │ 2658 │ 871755 │ █████████████████▍ │
|
||||
│ CRANBROOK │ TUNBRIDGE WELLS │ 431 │ 862348 │ █████████████████▏ │
|
||||
│ LONDON │ MERTON │ 2099 │ 859118 │ █████████████████▏ │
|
||||
│ BELVEDERE │ BEXLEY │ 346 │ 842423 │ ████████████████▋ │
|
||||
│ GUILDFORD │ WAVERLEY │ 143 │ 841277 │ ████████████████▋ │
|
||||
│ HARPENDEN │ ST ALBANS │ 657 │ 841216 │ ████████████████▋ │
|
||||
│ LONDON │ HACKNEY │ 3307 │ 837090 │ ████████████████▋ │
|
||||
│ LONDON │ WANDSWORTH │ 6566 │ 832663 │ ████████████████▋ │
|
||||
│ MAIDENHEAD │ BUCKINGHAMSHIRE │ 123 │ 824299 │ ████████████████▍ │
|
||||
│ KINGS LANGLEY │ DACORUM │ 145 │ 821331 │ ████████████████▍ │
|
||||
│ BERKHAMSTED │ DACORUM │ 543 │ 818415 │ ████████████████▎ │
|
||||
│ GREAT MISSENDEN │ BUCKINGHAMSHIRE │ 226 │ 802807 │ ████████████████ │
|
||||
│ BILLINGSHURST │ CHICHESTER │ 144 │ 797829 │ ███████████████▊ │
|
||||
│ WOKING │ GUILDFORD │ 176 │ 793494 │ ███████████████▋ │
|
||||
│ STOCKBRIDGE │ TEST VALLEY │ 178 │ 793269 │ ███████████████▋ │
|
||||
│ EPSOM │ REIGATE AND BANSTEAD │ 172 │ 791862 │ ███████████████▋ │
|
||||
│ TONBRIDGE │ TUNBRIDGE WELLS │ 360 │ 787876 │ ███████████████▋ │
|
||||
│ TEDDINGTON │ RICHMOND UPON THAMES │ 595 │ 786492 │ ███████████████▋ │
|
||||
│ TWICKENHAM │ RICHMOND UPON THAMES │ 1155 │ 786193 │ ███████████████▋ │
|
||||
│ LYNDHURST │ NEW FOREST │ 102 │ 785593 │ ███████████████▋ │
|
||||
│ LONDON │ LAMBETH │ 5228 │ 774574 │ ███████████████▍ │
|
||||
│ LONDON │ BARNET │ 3955 │ 773259 │ ███████████████▍ │
|
||||
│ OXFORD │ VALE OF WHITE HORSE │ 353 │ 772088 │ ███████████████▍ │
|
||||
│ TONBRIDGE │ MAIDSTONE │ 305 │ 770740 │ ███████████████▍ │
|
||||
│ LUTTERWORTH │ HARBOROUGH │ 538 │ 768634 │ ███████████████▎ │
|
||||
│ WOODSTOCK │ WEST OXFORDSHIRE │ 140 │ 766037 │ ███████████████▎ │
|
||||
│ MIDHURST │ CHICHESTER │ 257 │ 764815 │ ███████████████▎ │
|
||||
│ MARLOW │ BUCKINGHAMSHIRE │ 327 │ 761876 │ ███████████████▏ │
|
||||
│ LONDON │ NEWHAM │ 3237 │ 761784 │ ███████████████▏ │
|
||||
│ ALDERLEY EDGE │ CHESHIRE EAST │ 178 │ 757318 │ ███████████████▏ │
|
||||
│ LUTON │ CENTRAL BEDFORDSHIRE │ 212 │ 754283 │ ███████████████ │
|
||||
│ PETWORTH │ CHICHESTER │ 154 │ 754220 │ ███████████████ │
|
||||
│ ALRESFORD │ WINCHESTER │ 219 │ 752718 │ ███████████████ │
|
||||
│ POTTERS BAR │ WELWYN HATFIELD │ 174 │ 748465 │ ██████████████▊ │
|
||||
│ HASLEMERE │ CHICHESTER │ 128 │ 746907 │ ██████████████▊ │
|
||||
│ TADWORTH │ REIGATE AND BANSTEAD │ 502 │ 743252 │ ██████████████▋ │
|
||||
│ THAMES DITTON │ ELMBRIDGE │ 244 │ 741913 │ ██████████████▋ │
|
||||
│ REIGATE │ REIGATE AND BANSTEAD │ 581 │ 738198 │ ██████████████▋ │
|
||||
│ BOURNE END │ BUCKINGHAMSHIRE │ 138 │ 735190 │ ██████████████▋ │
|
||||
│ SEVENOAKS │ SEVENOAKS │ 1156 │ 730018 │ ██████████████▌ │
|
||||
│ OXTED │ TANDRIDGE │ 336 │ 729123 │ ██████████████▌ │
|
||||
│ INGATESTONE │ BRENTWOOD │ 166 │ 728103 │ ██████████████▌ │
|
||||
│ LONDON │ BRENT │ 2079 │ 720605 │ ██████████████▍ │
|
||||
│ LONDON │ HARINGEY │ 3216 │ 717780 │ ██████████████▎ │
|
||||
│ PURLEY │ CROYDON │ 575 │ 716108 │ ██████████████▎ │
|
||||
│ WELWYN │ WELWYN HATFIELD │ 222 │ 710603 │ ██████████████▏ │
|
||||
│ RICKMANSWORTH │ THREE RIVERS │ 798 │ 704571 │ ██████████████ │
|
||||
│ BANSTEAD │ REIGATE AND BANSTEAD │ 401 │ 701293 │ ██████████████ │
|
||||
│ CHIGWELL │ EPPING FOREST │ 261 │ 701203 │ ██████████████ │
|
||||
│ PINNER │ HARROW │ 528 │ 698885 │ █████████████▊ │
|
||||
│ HASLEMERE │ WAVERLEY │ 280 │ 696659 │ █████████████▊ │
|
||||
│ SLOUGH │ BUCKINGHAMSHIRE │ 396 │ 694917 │ █████████████▊ │
|
||||
│ WALTON-ON-THAMES │ ELMBRIDGE │ 946 │ 692395 │ █████████████▋ │
|
||||
│ READING │ SOUTH OXFORDSHIRE │ 318 │ 691988 │ █████████████▋ │
|
||||
│ NORTHWOOD │ HILLINGDON │ 271 │ 690643 │ █████████████▋ │
|
||||
│ FELTHAM │ HOUNSLOW │ 763 │ 688595 │ █████████████▋ │
|
||||
│ ASHTEAD │ MOLE VALLEY │ 303 │ 687923 │ █████████████▋ │
|
||||
│ BARNET │ BARNET │ 975 │ 686980 │ █████████████▋ │
|
||||
│ WOKING │ SURREY HEATH │ 283 │ 686669 │ █████████████▋ │
|
||||
│ MALMESBURY │ WILTSHIRE │ 323 │ 683324 │ █████████████▋ │
|
||||
│ AMERSHAM │ BUCKINGHAMSHIRE │ 496 │ 680962 │ █████████████▌ │
|
||||
│ CHISLEHURST │ BROMLEY │ 430 │ 680209 │ █████████████▌ │
|
||||
│ HYTHE │ FOLKESTONE AND HYTHE │ 490 │ 676908 │ █████████████▌ │
|
||||
│ MAYFIELD │ WEALDEN │ 101 │ 676210 │ █████████████▌ │
|
||||
│ ASCOT │ BRACKNELL FOREST │ 168 │ 676004 │ █████████████▌ │
|
||||
└──────────────────────┴────────────────────────┴──────┴─────────┴────────────────────────────────────────────────────────────────────┘
|
||||
```response
|
||||
No projection: 100 rows in set. Elapsed: 0.928 sec. Processed 27.45 million rows, 103.80 MB (29.56 million rows/s., 111.80 MB/s.)
|
||||
With projection: 100 rows in set. Elapsed: 0.336 sec. Processed 17.32 thousand rows, 1.23 MB (51.61 thousand rows/s., 3.65 MB/s.)
|
||||
```
|
||||
|
||||
### Summary {#summary}
|
||||
|
||||
All 3 queries work much faster and read fewer rows.
|
||||
|
||||
```text
|
||||
Query 1
|
||||
|
||||
no projection: 27 rows in set. Elapsed: 0.158 sec. Processed 26.32 million rows, 157.93 MB (166.57 million rows/s., 999.39 MB/s.)
|
||||
projection: 27 rows in set. Elapsed: 0.007 sec. Processed 105.96 thousand rows, 3.33 MB (14.58 million rows/s., 458.13 MB/s.)
|
||||
|
||||
|
||||
Query 2
|
||||
|
||||
no projection: 27 rows in set. Elapsed: 0.163 sec. Processed 26.32 million rows, 80.01 MB (161.75 million rows/s., 491.64 MB/s.)
|
||||
projection: 27 rows in set. Elapsed: 0.008 sec. Processed 105.96 thousand rows, 3.67 MB (13.29 million rows/s., 459.89 MB/s.)
|
||||
|
||||
Query 3
|
||||
|
||||
no projection: 100 rows in set. Elapsed: 0.069 sec. Processed 26.32 million rows, 62.47 MB (382.13 million rows/s., 906.93 MB/s.)
|
||||
projection: 100 rows in set. Elapsed: 0.029 sec. Processed 8.08 thousand rows, 511.08 KB (276.06 thousand rows/s., 17.47 MB/s.)
|
||||
```
|
||||
|
||||
### Test It in Playground {#playground}
|
||||
### Test it in the Playground {#playground}
|
||||
|
||||
The dataset is also available in the [Online Playground](https://play.clickhouse.com/play?user=play#U0VMRUNUIHRvd24sIGRpc3RyaWN0LCBjb3VudCgpIEFTIGMsIHJvdW5kKGF2ZyhwcmljZSkpIEFTIHByaWNlLCBiYXIocHJpY2UsIDAsIDUwMDAwMDAsIDEwMCkgRlJPTSB1a19wcmljZV9wYWlkIFdIRVJFIGRhdGUgPj0gJzIwMjAtMDEtMDEnIEdST1VQIEJZIHRvd24sIGRpc3RyaWN0IEhBVklORyBjID49IDEwMCBPUkRFUiBCWSBwcmljZSBERVNDIExJTUlUIDEwMA==).
|
||||
|
26
docs/en/getting-started/index.md
Normal file
26
docs/en/getting-started/index.md
Normal file
@ -0,0 +1,26 @@
|
||||
---
|
||||
slug: /en/getting-started/example-datasets/
|
||||
sidebar_position: 0
|
||||
sidebar_label: Overview
|
||||
keywords: [clickhouse, install, tutorial, sample, datasets]
|
||||
pagination_next: 'en/tutorial'
|
||||
---
|
||||
|
||||
# Tutorials and Example Datasets
|
||||
|
||||
We have a lot of resources for helping you get started and learn how ClickHouse works:
|
||||
|
||||
- If you need to get ClickHouse up and running, check out our [Quick Start](../quick-start.mdx)
|
||||
- The [ClickHouse Tutorial](../tutorial.md) analyzes a dataset of New York City taxi rides
|
||||
|
||||
In addition, the sample datasets provide a great experience on working with ClickHouse,
|
||||
learning important techniques and tricks, and seeing how to take advantage of the many powerful
|
||||
functions in ClickHouse. The sample datasets include:
|
||||
|
||||
- The [UK Property Price Paid dataset](../getting-started/example-datasets/uk-price-paid.md) is a good starting point with some interesting SQL queries
|
||||
- The [New York Taxi Data](../getting-started/example-datasets/nyc-taxi.md) has an example of how to insert data from S3 into ClickHouse
|
||||
- The [Cell Towers dataset](../getting-started/example-datasets/cell-towers.md) imports a CSV into ClickHouse
|
||||
- The [NYPD Complaint Data](../getting-started/example-datasets/nypd_complaint_data.md) demonstrates how to use data inference to simplify creating tables
|
||||
- The ["What's on the Menu?" dataset](../getting-started/example-datasets/menus.md) has an example of denormalizing data
|
||||
|
||||
View the **Tutorials and Datasets** menu for a complete list of sample datasets.
|
@ -3,6 +3,7 @@ slug: /en/interfaces/cli
|
||||
sidebar_position: 17
|
||||
sidebar_label: Command-Line Client
|
||||
---
|
||||
import ConnectionDetails from '@site/docs/en/_snippets/_gather_your_details_native.md';
|
||||
|
||||
# Command-line Client
|
||||
|
||||
@ -24,26 +25,76 @@ Connected to ClickHouse server version 20.13.1 revision 54442.
|
||||
Different client and server versions are compatible with one another, but some features may not be available in older clients. We recommend using the same version of the client as the server app. When you try to use a client of the older version, then the server, `clickhouse-client` displays the message:
|
||||
|
||||
```response
|
||||
ClickHouse client version is older than ClickHouse server. It may lack support for new features.
|
||||
ClickHouse client version is older than ClickHouse server.
|
||||
It may lack support for new features.
|
||||
```
|
||||
|
||||
## Usage {#cli_usage}
|
||||
|
||||
The client can be used in interactive and non-interactive (batch) mode. To use batch mode, specify the ‘query’ parameter, or send data to ‘stdin’ (it verifies that ‘stdin’ is not a terminal), or both. Similar to the HTTP interface, when using the ‘query’ parameter and sending data to ‘stdin’, the request is a concatenation of the ‘query’ parameter, a line feed, and the data in ‘stdin’. This is convenient for large INSERT queries.
|
||||
The client can be used in interactive and non-interactive (batch) mode.
|
||||
|
||||
Example of using the client to insert data:
|
||||
### Gather your connection details
|
||||
<ConnectionDetails />
|
||||
|
||||
### Interactive
|
||||
|
||||
To connect to your ClickHouse Cloud service, or any ClickHouse server using TLS and passwords, interactively use `--secure`, port 9440, and provide your username and password:
|
||||
|
||||
```bash
|
||||
clickhouse-client --host <HOSTNAME> \
|
||||
--secure \
|
||||
--port 9440 \
|
||||
--user <USERNAME> \
|
||||
--password <PASSWORD>
|
||||
```
|
||||
|
||||
To connect to a self-managed ClickHouse server you will need the details for that server. Whether or not TLS is used, port numbers, and passwords are all configurable. Use the above example for ClickHouse Cloud as a starting point.
|
||||
|
||||
|
||||
### Batch
|
||||
|
||||
To use batch mode, specify the ‘query’ parameter, or send data to ‘stdin’ (it verifies that ‘stdin’ is not a terminal), or both. Similar to the HTTP interface, when using the ‘query’ parameter and sending data to ‘stdin’, the request is a concatenation of the ‘query’ parameter, a line feed, and the data in ‘stdin’. This is convenient for large INSERT queries.
|
||||
|
||||
Examples of using the client to insert data:
|
||||
|
||||
#### Inserting a CSV file into a remote ClickHouse service
|
||||
|
||||
This example is appropriate for ClickHouse Cloud, or any ClickHouse server using TLS and a password. In this example a sample dataset CSV file, `cell_towers.csv` is inserted into an existing table `cell_towers` in the `default` database:
|
||||
|
||||
```bash
|
||||
clickhouse-client --host HOSTNAME.clickhouse.cloud \
|
||||
--secure \
|
||||
--port 9440 \
|
||||
--user default \
|
||||
--password PASSWORD \
|
||||
--query "INSERT INTO cell_towers FORMAT CSVWithNames" \
|
||||
< cell_towers.csv
|
||||
```
|
||||
|
||||
:::note
|
||||
To concentrate on the query syntax, the rest of the examples leave off the connection details (`--host`, `--port`, etc.). Add them in when you try the commands.
|
||||
:::
|
||||
|
||||
#### Three different ways of inserting data
|
||||
|
||||
``` bash
|
||||
$ echo -ne "1, 'some text', '2016-08-14 00:00:00'\n2, 'some more text', '2016-08-14 00:00:01'" | clickhouse-client --database=test --query="INSERT INTO test FORMAT CSV";
|
||||
echo -ne "1, 'some text', '2016-08-14 00:00:00'\n2, 'some more text', '2016-08-14 00:00:01'" | \
|
||||
clickhouse-client --database=test --query="INSERT INTO test FORMAT CSV";
|
||||
```
|
||||
|
||||
$ cat <<_EOF | clickhouse-client --database=test --query="INSERT INTO test FORMAT CSV";
|
||||
```bash
|
||||
cat <<_EOF | clickhouse-client --database=test --query="INSERT INTO test FORMAT CSV";
|
||||
3, 'some text', '2016-08-14 00:00:00'
|
||||
4, 'some more text', '2016-08-14 00:00:01'
|
||||
_EOF
|
||||
|
||||
$ cat file.csv | clickhouse-client --database=test --query="INSERT INTO test FORMAT CSV";
|
||||
```
|
||||
|
||||
```bash
|
||||
cat file.csv | clickhouse-client --database=test --query="INSERT INTO test FORMAT CSV";
|
||||
```
|
||||
|
||||
### Notes
|
||||
|
||||
In batch mode, the default data format is TabSeparated. You can set the format in the FORMAT clause of the query.
|
||||
|
||||
By default, you can only process a single query in batch mode. To make multiple queries from a “script,” use the `--multiquery` parameter. This works for all queries except INSERT. Query results are output consecutively without additional separators. Similarly, to process a large number of queries, you can run ‘clickhouse-client’ for each query. Note that it may take tens of milliseconds to launch the ‘clickhouse-client’ program.
|
||||
|
@ -5,6 +5,9 @@ sidebar_label: ClickHouse Keeper
|
||||
---
|
||||
|
||||
# ClickHouse Keeper
|
||||
import SelfManaged from '@site/docs/en/_snippets/_self_managed_only_automated.md';
|
||||
|
||||
<SelfManaged />
|
||||
|
||||
ClickHouse Keeper provides the coordination system for data [replication](../engines/table-engines/mergetree-family/replication.md) and [distributed DDL](../sql-reference/distributed-ddl.md) queries execution. ClickHouse Keeper is compatible with ZooKeeper.
|
||||
|
||||
|
@ -3,7 +3,11 @@ slug: /en/operations/external-authenticators/
|
||||
sidebar_position: 48
|
||||
sidebar_label: External User Authenticators and Directories
|
||||
title: "External User Authenticators and Directories"
|
||||
pagination_next: 'en/operations/external-authenticators/kerberos'
|
||||
---
|
||||
import SelfManaged from '@site/docs/en/_snippets/_self_managed_only_no_roadmap.md';
|
||||
|
||||
<SelfManaged />
|
||||
|
||||
ClickHouse supports authenticating and managing users using external services.
|
||||
|
||||
|
@ -2,6 +2,9 @@
|
||||
slug: /en/operations/external-authenticators/kerberos
|
||||
---
|
||||
# Kerberos
|
||||
import SelfManaged from '@site/docs/en/_snippets/_self_managed_only_no_roadmap.md';
|
||||
|
||||
<SelfManaged />
|
||||
|
||||
Existing and properly configured ClickHouse users can be authenticated via Kerberos authentication protocol.
|
||||
|
||||
|
@ -2,6 +2,9 @@
|
||||
slug: /en/operations/external-authenticators/ldap
|
||||
title: "LDAP"
|
||||
---
|
||||
import SelfManaged from '@site/docs/en/_snippets/_self_managed_only_no_roadmap.md';
|
||||
|
||||
<SelfManaged />
|
||||
|
||||
LDAP server can be used to authenticate ClickHouse users. There are two different approaches for doing this:
|
||||
|
||||
|
@ -2,6 +2,9 @@
|
||||
slug: /en/operations/external-authenticators/ssl-x509
|
||||
title: "SSL X.509 certificate authentication"
|
||||
---
|
||||
import SelfManaged from '@site/docs/en/_snippets/_self_managed_only_no_roadmap.md';
|
||||
|
||||
<SelfManaged />
|
||||
|
||||
[SSL 'strict' option](../server-configuration-parameters/settings.md#server_configuration_parameters-openssl) enables mandatory certificate validation for the incoming connections. In this case, only connections with trusted certificates can be established. Connections with untrusted certificates will be rejected. Thus, certificate validation allows to uniquely authenticate an incoming connection. `Common Name` field of the certificate is used to identify connected user. This allows to associate multiple certificates with the same user. Additionally, reissuing and revoking of the certificates does not affect the ClickHouse configuration.
|
||||
|
||||
|
@ -5,6 +5,9 @@ sidebar_label: Monitoring
|
||||
---
|
||||
|
||||
# Monitoring
|
||||
import SelfManaged from '@site/docs/en/_snippets/_self_managed_only_automated.md';
|
||||
|
||||
<SelfManaged />
|
||||
|
||||
You can monitor:
|
||||
|
||||
|
@ -3,9 +3,12 @@ slug: /en/operations/optimizing-performance/sampling-query-profiler
|
||||
sidebar_position: 54
|
||||
sidebar_label: Query Profiling
|
||||
---
|
||||
import SelfManaged from '@site/docs/en/_snippets/_self_managed_only_no_roadmap.md';
|
||||
|
||||
# Sampling Query Profiler
|
||||
|
||||
<SelfManaged />
|
||||
|
||||
ClickHouse runs sampling profiler that allows analyzing query execution. Using profiler you can find source code routines that used the most frequently during query execution. You can trace CPU time and wall-clock time spent including idle time.
|
||||
|
||||
To use profiler:
|
||||
|
@ -5,6 +5,10 @@ sidebar_label: Testing Hardware
|
||||
title: "How to Test Your Hardware with ClickHouse"
|
||||
---
|
||||
|
||||
import SelfManaged from '@site/docs/en/_snippets/_self_managed_only_no_roadmap.md';
|
||||
|
||||
<SelfManaged />
|
||||
|
||||
You can run a basic ClickHouse performance test on any server without installation of ClickHouse packages.
|
||||
|
||||
|
||||
|
@ -2,6 +2,7 @@
|
||||
slug: /en/operations/server-configuration-parameters/
|
||||
sidebar_position: 54
|
||||
sidebar_label: Server Configuration Parameters
|
||||
pagination_next: en/operations/server-configuration-parameters/settings
|
||||
---
|
||||
|
||||
# Server Configuration Parameters
|
||||
|
@ -666,6 +666,7 @@ Keys:
|
||||
- `http_proxy` - Configure HTTP proxy for sending crash reports.
|
||||
- `debug` - Sets the Sentry client into debug mode.
|
||||
- `tmp_path` - Filesystem path for temporary crash report state.
|
||||
- `environment` - An arbitrary name of an environment in which the ClickHouse server is running. It will be mentioned in each crash report. The default value is `test` or `prod` depending on the version of ClickHouse.
|
||||
|
||||
**Recommended way to use**
|
||||
|
||||
|
@ -2,6 +2,7 @@
|
||||
sidebar_label: Settings
|
||||
sidebar_position: 51
|
||||
slug: /en/operations/settings/
|
||||
pagination_next: en/operations/settings/settings
|
||||
---
|
||||
|
||||
# Settings Overview
|
||||
|
@ -668,7 +668,7 @@ log_query_views=1
|
||||
|
||||
## log_formatted_queries {#settings-log-formatted-queries}
|
||||
|
||||
Allows to log formatted queries to the [system.query_log](../../operations/system-tables/query_log.md) system table.
|
||||
Allows to log formatted queries to the [system.query_log](../../operations/system-tables/query_log.md) system table (populates `formatted_query` column in the [system.query_log](../../operations/system-tables/query_log.md)).
|
||||
|
||||
Possible values:
|
||||
|
||||
|
@ -5,6 +5,9 @@ sidebar_label: Secured Communication with Zookeeper
|
||||
---
|
||||
|
||||
# Optional secured communication between ClickHouse and Zookeeper
|
||||
import SelfManaged from '@site/docs/en/_snippets/_self_managed_only_automated.md';
|
||||
|
||||
<SelfManaged />
|
||||
|
||||
You should specify `ssl.keyStore.location`, `ssl.keyStore.password` and `ssl.trustStore.location`, `ssl.trustStore.password` for communication with ClickHouse client over SSL. These options are available from Zookeeper version 3.5.2.
|
||||
|
||||
|
@ -4,6 +4,9 @@ sidebar_position: 58
|
||||
sidebar_label: Usage Recommendations
|
||||
title: "Usage Recommendations"
|
||||
---
|
||||
import SelfManaged from '@site/docs/en/_snippets/_self_managed_only_automated.md';
|
||||
|
||||
<SelfManaged />
|
||||
|
||||
## CPU Scaling Governor
|
||||
|
||||
|
@ -294,6 +294,53 @@ Result:
|
||||
|
||||
Notice how only a portion of the data was properly decrypted, and the rest is gibberish since either `mode`, `key`, or `iv` were different upon encryption.
|
||||
|
||||
## tryDecrypt
|
||||
|
||||
Similar to `decrypt`, but returns NULL if decryption fails because of using the wrong key.
|
||||
|
||||
**Examples**
|
||||
|
||||
Let's create a table where `user_id` is the unique user id, `encrypted` is an encrypted string field, `iv` is an initial vector for decrypt/encrypt. Assume that users know their id and the key to decrypt the encrypted field:
|
||||
|
||||
```sql
|
||||
CREATE TABLE decrypt_null (
|
||||
dt DateTime,
|
||||
user_id UInt32,
|
||||
encrypted String,
|
||||
iv String
|
||||
) ENGINE = Memory;
|
||||
```
|
||||
|
||||
Insert some data:
|
||||
|
||||
```sql
|
||||
INSERT INTO decrypt_null VALUES
|
||||
('2022-08-02 00:00:00', 1, encrypt('aes-256-gcm', 'value1', 'keykeykeykeykeykeykeykeykeykey01', 'iv1'), 'iv1'),
|
||||
('2022-09-02 00:00:00', 2, encrypt('aes-256-gcm', 'value2', 'keykeykeykeykeykeykeykeykeykey02', 'iv2'), 'iv2'),
|
||||
('2022-09-02 00:00:01', 3, encrypt('aes-256-gcm', 'value3', 'keykeykeykeykeykeykeykeykeykey03', 'iv3'), 'iv3');
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
dt,
|
||||
user_id,
|
||||
tryDecrypt('aes-256-gcm', encrypted, 'keykeykeykeykeykeykeykeykeykey02', iv) AS value
|
||||
FROM decrypt_null
|
||||
ORDER BY user_id ASC
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```
|
||||
┌──────────────────dt─┬─user_id─┬─value──┐
|
||||
│ 2022-08-02 00:00:00 │ 1 │ ᴺᵁᴸᴸ │
|
||||
│ 2022-09-02 00:00:00 │ 2 │ value2 │
|
||||
│ 2022-09-02 00:00:01 │ 3 │ ᴺᵁᴸᴸ │
|
||||
└─────────────────────┴─────────┴────────┘
|
||||
```
|
||||
|
||||
## aes_decrypt_mysql
|
||||
|
||||
Compatible with mysql encryption and decrypts data encrypted with [AES_ENCRYPT](https://dev.mysql.com/doc/refman/8.0/en/encryption-functions.html#function_aes-encrypt) function.
|
||||
|
@ -624,6 +624,7 @@ ClickHouse поддерживает динамическое изменение
|
||||
- `http_proxy` - Настройка HTTP proxy для отсылки отчетов о сбоях.
|
||||
- `debug` - Настроить клиентскую библиотеку Sentry в debug режим.
|
||||
- `tmp_path` - Путь в файловой системе для временного хранения состояния отчетов о сбоях перед отправкой на сервер Sentry.
|
||||
- `environment` - Произвольное название среды, в которой запущен сервер ClickHouse, которое будет упомянуто в каждом отчете от сбое. По умолчанию имеет значение `test` или `prod` в зависимости от версии ClickHouse.
|
||||
|
||||
**Рекомендованные настройки**
|
||||
|
||||
|
@ -12,6 +12,7 @@
|
||||
#include <string>
|
||||
#include "Client.h"
|
||||
#include "Core/Protocol.h"
|
||||
#include "Parsers/formatAST.h"
|
||||
|
||||
#include <base/find_symbols.h>
|
||||
|
||||
@ -514,6 +515,66 @@ static bool queryHasWithClause(const IAST & ast)
|
||||
return false;
|
||||
}
|
||||
|
||||
std::optional<bool> Client::processFuzzingStep(const String & query_to_execute, const ASTPtr & parsed_query)
|
||||
{
|
||||
processParsedSingleQuery(query_to_execute, query_to_execute, parsed_query);
|
||||
|
||||
const auto * exception = server_exception ? server_exception.get() : client_exception.get();
|
||||
// Sometimes you may get TOO_DEEP_RECURSION from the server,
|
||||
// and TOO_DEEP_RECURSION should not fail the fuzzer check.
|
||||
if (have_error && exception->code() == ErrorCodes::TOO_DEEP_RECURSION)
|
||||
{
|
||||
have_error = false;
|
||||
server_exception.reset();
|
||||
client_exception.reset();
|
||||
return true;
|
||||
}
|
||||
|
||||
if (have_error)
|
||||
{
|
||||
fmt::print(stderr, "Error on processing query '{}': {}\n", parsed_query->formatForErrorMessage(), exception->message());
|
||||
|
||||
// Try to reconnect after errors, for two reasons:
|
||||
// 1. We might not have realized that the server died, e.g. if
|
||||
// it sent us a <Fatal> trace and closed connection properly.
|
||||
// 2. The connection might have gotten into a wrong state and
|
||||
// the next query will get false positive about
|
||||
// "Unknown packet from server".
|
||||
try
|
||||
{
|
||||
connection->forceConnected(connection_parameters.timeouts);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
// Just report it, we'll terminate below.
|
||||
fmt::print(stderr,
|
||||
"Error while reconnecting to the server: {}\n",
|
||||
getCurrentExceptionMessage(true));
|
||||
|
||||
// The reconnection might fail, but we'll still be connected
|
||||
// in the sense of `connection->isConnected() = true`,
|
||||
// in case when the requested database doesn't exist.
|
||||
// Disconnect manually now, so that the following code doesn't
|
||||
// have any doubts, and the connection state is predictable.
|
||||
connection->disconnect();
|
||||
}
|
||||
}
|
||||
|
||||
if (!connection->isConnected())
|
||||
{
|
||||
// Probably the server is dead because we found an assertion
|
||||
// failure. Fail fast.
|
||||
fmt::print(stderr, "Lost connection to the server.\n");
|
||||
|
||||
// Print the changed settings because they might be needed to
|
||||
// reproduce the error.
|
||||
printChangedSettings();
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
/// Returns false when server is not available.
|
||||
bool Client::processWithFuzzing(const String & full_query)
|
||||
@ -558,18 +619,33 @@ bool Client::processWithFuzzing(const String & full_query)
|
||||
// - SET -- The time to fuzz the settings has not yet come
|
||||
// (see comments in Client/QueryFuzzer.cpp)
|
||||
size_t this_query_runs = query_fuzzer_runs;
|
||||
if (orig_ast->as<ASTInsertQuery>() ||
|
||||
orig_ast->as<ASTCreateQuery>() ||
|
||||
orig_ast->as<ASTDropQuery>() ||
|
||||
orig_ast->as<ASTSetQuery>())
|
||||
ASTs queries_for_fuzzed_tables;
|
||||
|
||||
if (orig_ast->as<ASTSetQuery>())
|
||||
{
|
||||
this_query_runs = 1;
|
||||
}
|
||||
else if (const auto * create = orig_ast->as<ASTCreateQuery>())
|
||||
{
|
||||
if (QueryFuzzer::isSuitableForFuzzing(*create))
|
||||
this_query_runs = create_query_fuzzer_runs;
|
||||
else
|
||||
this_query_runs = 1;
|
||||
}
|
||||
else if (const auto * insert = orig_ast->as<ASTInsertQuery>())
|
||||
{
|
||||
this_query_runs = 1;
|
||||
queries_for_fuzzed_tables = fuzzer.getInsertQueriesForFuzzedTables(full_query);
|
||||
}
|
||||
else if (const auto * drop = orig_ast->as<ASTDropQuery>())
|
||||
{
|
||||
this_query_runs = 1;
|
||||
queries_for_fuzzed_tables = fuzzer.getDropQueriesForFuzzedTables(*drop);
|
||||
}
|
||||
|
||||
String query_to_execute;
|
||||
ASTPtr parsed_query;
|
||||
|
||||
ASTPtr fuzz_base = orig_ast;
|
||||
|
||||
for (size_t fuzz_step = 0; fuzz_step < this_query_runs; ++fuzz_step)
|
||||
{
|
||||
fmt::print(stderr, "Fuzzing step {} out of {}\n", fuzz_step, this_query_runs);
|
||||
@ -630,9 +706,9 @@ bool Client::processWithFuzzing(const String & full_query)
|
||||
continue;
|
||||
}
|
||||
|
||||
parsed_query = ast_to_process;
|
||||
query_to_execute = parsed_query->formatForErrorMessage();
|
||||
processParsedSingleQuery(full_query, query_to_execute, parsed_query);
|
||||
query_to_execute = ast_to_process->formatForErrorMessage();
|
||||
if (auto res = processFuzzingStep(query_to_execute, ast_to_process))
|
||||
return *res;
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
@ -645,60 +721,6 @@ bool Client::processWithFuzzing(const String & full_query)
|
||||
have_error = true;
|
||||
}
|
||||
|
||||
const auto * exception = server_exception ? server_exception.get() : client_exception.get();
|
||||
// Sometimes you may get TOO_DEEP_RECURSION from the server,
|
||||
// and TOO_DEEP_RECURSION should not fail the fuzzer check.
|
||||
if (have_error && exception->code() == ErrorCodes::TOO_DEEP_RECURSION)
|
||||
{
|
||||
have_error = false;
|
||||
server_exception.reset();
|
||||
client_exception.reset();
|
||||
return true;
|
||||
}
|
||||
|
||||
if (have_error)
|
||||
{
|
||||
fmt::print(stderr, "Error on processing query '{}': {}\n", ast_to_process->formatForErrorMessage(), exception->message());
|
||||
|
||||
// Try to reconnect after errors, for two reasons:
|
||||
// 1. We might not have realized that the server died, e.g. if
|
||||
// it sent us a <Fatal> trace and closed connection properly.
|
||||
// 2. The connection might have gotten into a wrong state and
|
||||
// the next query will get false positive about
|
||||
// "Unknown packet from server".
|
||||
try
|
||||
{
|
||||
connection->forceConnected(connection_parameters.timeouts);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
// Just report it, we'll terminate below.
|
||||
fmt::print(stderr,
|
||||
"Error while reconnecting to the server: {}\n",
|
||||
getCurrentExceptionMessage(true));
|
||||
|
||||
// The reconnection might fail, but we'll still be connected
|
||||
// in the sense of `connection->isConnected() = true`,
|
||||
// in case when the requested database doesn't exist.
|
||||
// Disconnect manually now, so that the following code doesn't
|
||||
// have any doubts, and the connection state is predictable.
|
||||
connection->disconnect();
|
||||
}
|
||||
}
|
||||
|
||||
if (!connection->isConnected())
|
||||
{
|
||||
// Probably the server is dead because we found an assertion
|
||||
// failure. Fail fast.
|
||||
fmt::print(stderr, "Lost connection to the server.\n");
|
||||
|
||||
// Print the changed settings because they might be needed to
|
||||
// reproduce the error.
|
||||
printChangedSettings();
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check that after the query is formatted, we can parse it back,
|
||||
// format again and get the same result. Unfortunately, we can't
|
||||
// compare the ASTs, which would be more sensitive to errors. This
|
||||
@ -729,13 +751,12 @@ bool Client::processWithFuzzing(const String & full_query)
|
||||
// query, but second and third.
|
||||
// If you have to add any more workarounds to this check, just remove
|
||||
// it altogether, it's not so useful.
|
||||
if (parsed_query && !have_error && !queryHasWithClause(*parsed_query))
|
||||
if (ast_to_process && !have_error && !queryHasWithClause(*ast_to_process))
|
||||
{
|
||||
ASTPtr ast_2;
|
||||
try
|
||||
{
|
||||
const auto * tmp_pos = query_to_execute.c_str();
|
||||
|
||||
ast_2 = parseQuery(tmp_pos, tmp_pos + query_to_execute.size(), false /* allow_multi_statements */);
|
||||
}
|
||||
catch (Exception & e)
|
||||
@ -762,7 +783,7 @@ bool Client::processWithFuzzing(const String & full_query)
|
||||
"Got the following (different) text after formatting the fuzzed query and parsing it back:\n'{}'\n, expected:\n'{}'\n",
|
||||
text_3, text_2);
|
||||
fmt::print(stderr, "In more detail:\n");
|
||||
fmt::print(stderr, "AST-1 (generated by fuzzer):\n'{}'\n", parsed_query->dumpTree());
|
||||
fmt::print(stderr, "AST-1 (generated by fuzzer):\n'{}'\n", ast_to_process->dumpTree());
|
||||
fmt::print(stderr, "Text-1 (AST-1 formatted):\n'{}'\n", query_to_execute);
|
||||
fmt::print(stderr, "AST-2 (Text-1 parsed):\n'{}'\n", ast_2->dumpTree());
|
||||
fmt::print(stderr, "Text-2 (AST-2 formatted):\n'{}'\n", text_2);
|
||||
@ -784,6 +805,7 @@ bool Client::processWithFuzzing(const String & full_query)
|
||||
// so that it doesn't influence the exit code.
|
||||
server_exception.reset();
|
||||
client_exception.reset();
|
||||
fuzzer.notifyQueryFailed(ast_to_process);
|
||||
have_error = false;
|
||||
}
|
||||
else if (ast_to_process->formatForErrorMessage().size() > 500)
|
||||
@ -800,6 +822,35 @@ bool Client::processWithFuzzing(const String & full_query)
|
||||
}
|
||||
}
|
||||
|
||||
for (const auto & query : queries_for_fuzzed_tables)
|
||||
{
|
||||
std::cout << std::endl;
|
||||
WriteBufferFromOStream ast_buf(std::cout, 4096);
|
||||
formatAST(*query, ast_buf, false /*highlight*/);
|
||||
ast_buf.next();
|
||||
std::cout << std::endl << std::endl;
|
||||
|
||||
try
|
||||
{
|
||||
query_to_execute = query->formatForErrorMessage();
|
||||
if (auto res = processFuzzingStep(query_to_execute, query))
|
||||
return *res;
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
client_exception = std::make_unique<Exception>(getCurrentExceptionMessage(print_stack_trace), getCurrentExceptionCode());
|
||||
have_error = true;
|
||||
}
|
||||
|
||||
if (have_error)
|
||||
{
|
||||
server_exception.reset();
|
||||
client_exception.reset();
|
||||
fuzzer.notifyQueryFailed(query);
|
||||
have_error = false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -834,6 +885,7 @@ void Client::addOptions(OptionsDescription & options_description)
|
||||
("compression", po::value<bool>(), "enable or disable compression (enabled by default for remote communication and disabled for localhost communication).")
|
||||
|
||||
("query-fuzzer-runs", po::value<int>()->default_value(0), "After executing every SELECT query, do random mutations in it and run again specified number of times. This is used for testing to discover unexpected corner cases.")
|
||||
("create-query-fuzzer-runs", po::value<int>()->default_value(0), "")
|
||||
("interleave-queries-file", po::value<std::vector<std::string>>()->multitoken(),
|
||||
"file path with queries to execute before every file from 'queries-file'; multiple files can be specified (--queries-file file1 file2...); this is needed to enable more aggressive fuzzing of newly added tests (see 'query-fuzzer-runs' option)")
|
||||
|
||||
@ -994,6 +1046,17 @@ void Client::processOptions(const OptionsDescription & options_description,
|
||||
ignore_error = true;
|
||||
}
|
||||
|
||||
if ((create_query_fuzzer_runs = options["create-query-fuzzer-runs"].as<int>()))
|
||||
{
|
||||
// Fuzzer implies multiquery.
|
||||
config().setBool("multiquery", true);
|
||||
// Ignore errors in parsing queries.
|
||||
config().setBool("ignore-error", true);
|
||||
|
||||
global_context->setSetting("allow_suspicious_low_cardinality_types", true);
|
||||
ignore_error = true;
|
||||
}
|
||||
|
||||
if (options.count("opentelemetry-traceparent"))
|
||||
{
|
||||
String traceparent = options["opentelemetry-traceparent"].as<std::string>();
|
||||
|
@ -17,6 +17,7 @@ public:
|
||||
|
||||
protected:
|
||||
bool processWithFuzzing(const String & full_query) override;
|
||||
std::optional<bool> processFuzzingStep(const String & query_to_execute, const ASTPtr & parsed_query);
|
||||
|
||||
void connect() override;
|
||||
|
||||
|
@ -79,7 +79,9 @@
|
||||
#include <Common/ThreadFuzzer.h>
|
||||
#include <Common/getHashOfLoadedBinary.h>
|
||||
#include <Common/filesystemHelpers.h>
|
||||
#if USE_BORINGSSL
|
||||
#include <Compression/CompressionCodecEncrypted.h>
|
||||
#endif
|
||||
#include <Server/MySQLHandlerFactory.h>
|
||||
#include <Server/PostgreSQLHandlerFactory.h>
|
||||
#include <Server/CertificateReloader.h>
|
||||
@ -1264,8 +1266,9 @@ int Server::main(const std::vector<std::string> & /*args*/)
|
||||
|
||||
global_context->updateStorageConfiguration(*config);
|
||||
global_context->updateInterserverCredentials(*config);
|
||||
|
||||
#if USE_BORINGSSL
|
||||
CompressionCodecEncrypted::Configuration::instance().tryLoad(*config, "encryption_codecs");
|
||||
#endif
|
||||
#if USE_SSL
|
||||
CertificateReloader::instance().tryLoad(*config);
|
||||
#endif
|
||||
@ -1418,8 +1421,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
|
||||
global_context->setAsynchronousInsertQueue(std::make_shared<AsynchronousInsertQueue>(
|
||||
global_context,
|
||||
settings.async_insert_threads,
|
||||
settings.async_insert_max_data_size,
|
||||
AsynchronousInsertQueue::Timeout{.busy = settings.async_insert_busy_timeout_ms, .stale = settings.async_insert_stale_timeout_ms}));
|
||||
settings.async_insert_cleanup_timeout_ms));
|
||||
|
||||
/// Size of cache for marks (index of MergeTree family of tables).
|
||||
size_t mark_cache_size = config().getUInt64("mark_cache_size", 5368709120);
|
||||
@ -1471,9 +1473,10 @@ int Server::main(const std::vector<std::string> & /*args*/)
|
||||
global_context->getMergeTreeSettings().sanityCheck(background_pool_tasks);
|
||||
global_context->getReplicatedMergeTreeSettings().sanityCheck(background_pool_tasks);
|
||||
}
|
||||
|
||||
#if USE_BORINGSSL
|
||||
/// try set up encryption. There are some errors in config, error will be printed and server wouldn't start.
|
||||
CompressionCodecEncrypted::Configuration::instance().load(config(), "encryption_codecs");
|
||||
#endif
|
||||
|
||||
SCOPE_EXIT({
|
||||
/// Stop reloading of the main config. This must be done before `global_context->shutdown()` because
|
||||
|
@ -32,7 +32,7 @@ struct RankCorrelationData : public StatisticalSample<Float64, Float64>
|
||||
std::tie(ranks_y, std::ignore) = computeRanksAndTieCorrection(this->y);
|
||||
|
||||
/// Sizes can be non-equal due to skipped NaNs.
|
||||
const auto size = std::min(this->size_x, this->size_y);
|
||||
const Float64 size = static_cast<Float64>(std::min(this->size_x, this->size_y));
|
||||
|
||||
/// Count d^2 sum
|
||||
Float64 answer = 0;
|
||||
|
@ -247,7 +247,13 @@ add_object_library(clickhouse_access Access)
|
||||
add_object_library(clickhouse_backups Backups)
|
||||
add_object_library(clickhouse_core Core)
|
||||
add_object_library(clickhouse_core_mysql Core/MySQL)
|
||||
add_object_library(clickhouse_compression Compression)
|
||||
if (NOT ENABLE_EXTERNAL_OPENSSL)
|
||||
add_object_library(clickhouse_compression Compression)
|
||||
else ()
|
||||
add_headers_and_sources(dbms Compression)
|
||||
list(REMOVE_ITEM dbms_headers Compression/CompressionCodecEncrypted.h)
|
||||
list(REMOVE_ITEM dbms_sources Compression/CompressionCodecEncrypted.cpp)
|
||||
endif ()
|
||||
add_object_library(clickhouse_querypipeline QueryPipeline)
|
||||
add_object_library(clickhouse_datatypes DataTypes)
|
||||
add_object_library(clickhouse_datatypes_serializations DataTypes/Serializations)
|
||||
@ -368,8 +374,6 @@ target_link_libraries(clickhouse_common_io PUBLIC ch_contrib::re2_st)
|
||||
target_link_libraries(clickhouse_common_io PUBLIC ch_contrib::re2)
|
||||
|
||||
target_link_libraries(clickhouse_common_io
|
||||
PRIVATE
|
||||
${EXECINFO_LIBRARIES}
|
||||
PUBLIC
|
||||
boost::program_options
|
||||
boost::system
|
||||
|
@ -251,6 +251,7 @@ protected:
|
||||
|
||||
QueryFuzzer fuzzer;
|
||||
int query_fuzzer_runs = 0;
|
||||
int create_query_fuzzer_runs = 0;
|
||||
|
||||
struct
|
||||
{
|
||||
|
@ -1,4 +1,22 @@
|
||||
#include "QueryFuzzer.h"
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypeFactory.h>
|
||||
#include <DataTypes/DataTypeFixedString.h>
|
||||
#include <DataTypes/DataTypeLowCardinality.h>
|
||||
#include <DataTypes/DataTypeMap.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
#include <DataTypes/DataTypeTuple.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/DataTypesDecimal.h>
|
||||
#include <DataTypes/IDataType.h>
|
||||
#include <IO/ReadBufferFromString.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Parsers/ASTColumnDeclaration.h>
|
||||
#include <Parsers/ASTCreateQuery.h>
|
||||
#include <Parsers/IAST_fwd.h>
|
||||
#include <Parsers/ParserDataType.h>
|
||||
#include <Parsers/ParserInsertQuery.h>
|
||||
#include <Parsers/ASTDropQuery.h>
|
||||
|
||||
#include <unordered_set>
|
||||
|
||||
@ -430,6 +448,303 @@ void QueryFuzzer::fuzzWindowFrame(ASTWindowDefinition & def)
|
||||
}
|
||||
}
|
||||
|
||||
bool QueryFuzzer::isSuitableForFuzzing(const ASTCreateQuery & create)
|
||||
{
|
||||
return create.columns_list && create.columns_list->columns;
|
||||
}
|
||||
|
||||
void QueryFuzzer::fuzzCreateQuery(ASTCreateQuery & create)
|
||||
{
|
||||
if (create.columns_list && create.columns_list->columns)
|
||||
{
|
||||
for (auto & ast : create.columns_list->columns->children)
|
||||
{
|
||||
if (auto * column = ast->as<ASTColumnDeclaration>())
|
||||
{
|
||||
fuzzColumnDeclaration(*column);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (create.storage && create.storage->engine)
|
||||
{
|
||||
/// Replace ReplicatedMergeTree to ordinary MergeTree
|
||||
/// to avoid inconsistency of metadata in zookeeper.
|
||||
auto & engine_name = create.storage->engine->name;
|
||||
if (startsWith(engine_name, "Replicated"))
|
||||
{
|
||||
engine_name = engine_name.substr(strlen("Replicated"));
|
||||
if (auto & arguments = create.storage->engine->arguments)
|
||||
{
|
||||
auto & children = arguments->children;
|
||||
if (children.size() <= 2)
|
||||
arguments.reset();
|
||||
else
|
||||
children.erase(children.begin(), children.begin() + 2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
auto full_name = create.getTable();
|
||||
auto original_name = full_name.substr(0, full_name.find("__fuzz_"));
|
||||
|
||||
size_t index = index_of_fuzzed_table[original_name]++;
|
||||
auto new_name = original_name + "__fuzz_" + toString(index);
|
||||
|
||||
create.setTable(new_name);
|
||||
|
||||
SipHash sip_hash;
|
||||
sip_hash.update(original_name);
|
||||
if (create.columns_list)
|
||||
create.columns_list->updateTreeHash(sip_hash);
|
||||
if (create.storage)
|
||||
create.storage->updateTreeHash(sip_hash);
|
||||
|
||||
IAST::Hash hash;
|
||||
sip_hash.get128(hash);
|
||||
|
||||
/// Save only tables with unique definition.
|
||||
if (created_tables_hashes.insert(hash).second)
|
||||
original_table_name_to_fuzzed[original_name].insert(new_name);
|
||||
}
|
||||
|
||||
void QueryFuzzer::fuzzColumnDeclaration(ASTColumnDeclaration & column)
|
||||
{
|
||||
if (column.type)
|
||||
{
|
||||
auto data_type = fuzzDataType(DataTypeFactory::instance().get(column.type));
|
||||
|
||||
ParserDataType parser;
|
||||
column.type = parseQuery(parser, data_type->getName(), DBMS_DEFAULT_MAX_QUERY_SIZE, DBMS_DEFAULT_MAX_PARSER_DEPTH);
|
||||
}
|
||||
}
|
||||
|
||||
DataTypePtr QueryFuzzer::fuzzDataType(DataTypePtr type)
|
||||
{
|
||||
/// Do not replace Array/Tuple/etc. with not Array/Tuple too often.
|
||||
const auto * type_array = typeid_cast<const DataTypeArray *>(type.get());
|
||||
if (type_array && fuzz_rand() % 4 != 0)
|
||||
return std::make_shared<DataTypeArray>(fuzzDataType(type_array->getNestedType()));
|
||||
|
||||
const auto * type_tuple = typeid_cast<const DataTypeTuple *>(type.get());
|
||||
if (type_tuple && fuzz_rand() % 4 != 0)
|
||||
{
|
||||
DataTypes elements;
|
||||
for (const auto & element : type_tuple->getElements())
|
||||
elements.push_back(fuzzDataType(element));
|
||||
|
||||
return type_tuple->haveExplicitNames()
|
||||
? std::make_shared<DataTypeTuple>(elements, type_tuple->getElementNames())
|
||||
: std::make_shared<DataTypeTuple>(elements);
|
||||
}
|
||||
|
||||
const auto * type_map = typeid_cast<const DataTypeMap *>(type.get());
|
||||
if (type_map && fuzz_rand() % 4 != 0)
|
||||
{
|
||||
auto key_type = fuzzDataType(type_map->getKeyType());
|
||||
auto value_type = fuzzDataType(type_map->getValueType());
|
||||
if (!DataTypeMap::checkKeyType(key_type))
|
||||
key_type = type_map->getKeyType();
|
||||
|
||||
return std::make_shared<DataTypeMap>(key_type, value_type);
|
||||
}
|
||||
|
||||
const auto * type_nullable = typeid_cast<const DataTypeNullable *>(type.get());
|
||||
if (type_nullable)
|
||||
{
|
||||
size_t tmp = fuzz_rand() % 3;
|
||||
if (tmp == 0)
|
||||
return fuzzDataType(type_nullable->getNestedType());
|
||||
|
||||
if (tmp == 1)
|
||||
{
|
||||
auto nested_type = fuzzDataType(type_nullable->getNestedType());
|
||||
if (nested_type->canBeInsideNullable())
|
||||
return std::make_shared<DataTypeNullable>(nested_type);
|
||||
}
|
||||
}
|
||||
|
||||
const auto * type_low_cardinality = typeid_cast<const DataTypeLowCardinality *>(type.get());
|
||||
if (type_low_cardinality)
|
||||
{
|
||||
size_t tmp = fuzz_rand() % 3;
|
||||
if (tmp == 0)
|
||||
return fuzzDataType(type_low_cardinality->getDictionaryType());
|
||||
|
||||
if (tmp == 1)
|
||||
{
|
||||
auto nested_type = fuzzDataType(type_low_cardinality->getDictionaryType());
|
||||
if (nested_type->canBeInsideLowCardinality())
|
||||
return std::make_shared<DataTypeLowCardinality>(nested_type);
|
||||
}
|
||||
}
|
||||
|
||||
size_t tmp = fuzz_rand() % 8;
|
||||
if (tmp == 0)
|
||||
return std::make_shared<DataTypeArray>(type);
|
||||
|
||||
if (tmp <= 1 && type->canBeInsideNullable())
|
||||
return std::make_shared<DataTypeNullable>(type);
|
||||
|
||||
if (tmp <= 2 && type->canBeInsideLowCardinality())
|
||||
return std::make_shared<DataTypeLowCardinality>(type);
|
||||
|
||||
if (tmp <= 3)
|
||||
return getRandomType();
|
||||
|
||||
return type;
|
||||
}
|
||||
|
||||
DataTypePtr QueryFuzzer::getRandomType()
|
||||
{
|
||||
auto type_id = static_cast<TypeIndex>(fuzz_rand() % static_cast<size_t>(TypeIndex::Tuple) + 1);
|
||||
|
||||
if (type_id == TypeIndex::Tuple)
|
||||
{
|
||||
size_t tuple_size = fuzz_rand() % 6 + 1;
|
||||
DataTypes elements;
|
||||
for (size_t i = 0; i < tuple_size; ++i)
|
||||
elements.push_back(getRandomType());
|
||||
return std::make_shared<DataTypeTuple>(elements);
|
||||
}
|
||||
|
||||
if (type_id == TypeIndex::Array)
|
||||
return std::make_shared<DataTypeArray>(getRandomType());
|
||||
|
||||
/// NOLINTBEGIN(bugprone-macro-parentheses)
|
||||
#define DISPATCH(DECIMAL) \
|
||||
if (type_id == TypeIndex::DECIMAL) \
|
||||
return std::make_shared<DataTypeDecimal<DECIMAL>>( \
|
||||
DataTypeDecimal<DECIMAL>::maxPrecision(), \
|
||||
(fuzz_rand() % DataTypeDecimal<DECIMAL>::maxPrecision()) + 1);
|
||||
|
||||
DISPATCH(Decimal32)
|
||||
DISPATCH(Decimal64)
|
||||
DISPATCH(Decimal128)
|
||||
DISPATCH(Decimal256)
|
||||
#undef DISPATCH
|
||||
/// NOLINTEND(bugprone-macro-parentheses)
|
||||
|
||||
if (type_id == TypeIndex::FixedString)
|
||||
return std::make_shared<DataTypeFixedString>(fuzz_rand() % 20);
|
||||
|
||||
if (type_id == TypeIndex::Enum8)
|
||||
return std::make_shared<DataTypeUInt8>();
|
||||
|
||||
if (type_id == TypeIndex::Enum16)
|
||||
return std::make_shared<DataTypeUInt16>();
|
||||
|
||||
return DataTypeFactory::instance().get(String(magic_enum::enum_name(type_id)));
|
||||
}
|
||||
|
||||
void QueryFuzzer::fuzzTableName(ASTTableExpression & table)
|
||||
{
|
||||
if (!table.database_and_table_name || fuzz_rand() % 3 == 0)
|
||||
return;
|
||||
|
||||
const auto * identifier = table.database_and_table_name->as<ASTTableIdentifier>();
|
||||
if (!identifier)
|
||||
return;
|
||||
|
||||
auto table_id = identifier->getTableId();
|
||||
if (table_id.empty())
|
||||
return;
|
||||
|
||||
auto it = original_table_name_to_fuzzed.find(table_id.getTableName());
|
||||
if (it != original_table_name_to_fuzzed.end() && !it->second.empty())
|
||||
{
|
||||
auto new_table_name = it->second.begin();
|
||||
std::advance(new_table_name, fuzz_rand() % it->second.size());
|
||||
StorageID new_table_id(table_id.database_name, *new_table_name);
|
||||
table.database_and_table_name = std::make_shared<ASTTableIdentifier>(new_table_id);
|
||||
}
|
||||
}
|
||||
|
||||
static ASTPtr tryParseInsertQuery(const String & full_query)
|
||||
{
|
||||
const char * pos = full_query.data();
|
||||
const char * end = full_query.data() + full_query.size();
|
||||
|
||||
ParserInsertQuery parser(end, false);
|
||||
String message;
|
||||
|
||||
return tryParseQuery(parser, pos, end, message, false, "", false, DBMS_DEFAULT_MAX_QUERY_SIZE, DBMS_DEFAULT_MAX_PARSER_DEPTH);
|
||||
}
|
||||
|
||||
ASTs QueryFuzzer::getInsertQueriesForFuzzedTables(const String & full_query)
|
||||
{
|
||||
auto parsed_query = tryParseInsertQuery(full_query);
|
||||
if (!parsed_query)
|
||||
return {};
|
||||
|
||||
const auto & insert = *parsed_query->as<ASTInsertQuery>();
|
||||
if (!insert.table)
|
||||
return {};
|
||||
|
||||
auto table_name = insert.getTable();
|
||||
auto it = original_table_name_to_fuzzed.find(table_name);
|
||||
if (it == original_table_name_to_fuzzed.end())
|
||||
return {};
|
||||
|
||||
ASTs queries;
|
||||
for (const auto & fuzzed_name : it->second)
|
||||
{
|
||||
/// Parse query from scratch for each table instead of clone,
|
||||
/// to store proper pointers to inlined data,
|
||||
/// which are not copied during clone.
|
||||
auto & query = queries.emplace_back(tryParseInsertQuery(full_query));
|
||||
query->as<ASTInsertQuery>()->setTable(fuzzed_name);
|
||||
}
|
||||
|
||||
return queries;
|
||||
}
|
||||
|
||||
ASTs QueryFuzzer::getDropQueriesForFuzzedTables(const ASTDropQuery & drop_query)
|
||||
{
|
||||
if (drop_query.kind != ASTDropQuery::Drop)
|
||||
return {};
|
||||
|
||||
auto table_name = drop_query.getTable();
|
||||
auto it = index_of_fuzzed_table.find(table_name);
|
||||
if (it == index_of_fuzzed_table.end())
|
||||
return {};
|
||||
|
||||
ASTs queries;
|
||||
/// Drop all created tables, not only unique ones.
|
||||
for (size_t i = 0; i < it->second; ++i)
|
||||
{
|
||||
auto fuzzed_name = table_name + "__fuzz_" + toString(i);
|
||||
auto & query = queries.emplace_back(drop_query.clone());
|
||||
query->as<ASTDropQuery>()->setTable(fuzzed_name);
|
||||
/// Just in case add IF EXISTS to avoid exceptions.
|
||||
query->as<ASTDropQuery>()->if_exists = true;
|
||||
}
|
||||
|
||||
index_of_fuzzed_table.erase(it);
|
||||
original_table_name_to_fuzzed.erase(table_name);
|
||||
|
||||
return queries;
|
||||
}
|
||||
|
||||
void QueryFuzzer::notifyQueryFailed(ASTPtr ast)
|
||||
{
|
||||
auto remove_fuzzed_table = [this](const auto & table_name)
|
||||
{
|
||||
auto pos = table_name.find("__fuzz_");
|
||||
if (pos != std::string::npos)
|
||||
{
|
||||
auto original_name = table_name.substr(0, pos);
|
||||
original_table_name_to_fuzzed[original_name].erase(table_name);
|
||||
}
|
||||
};
|
||||
|
||||
if (const auto * create = ast->as<ASTCreateQuery>())
|
||||
remove_fuzzed_table(create->getTable());
|
||||
|
||||
if (const auto * insert = ast->as<ASTInsertQuery>())
|
||||
remove_fuzzed_table(insert->getTable());
|
||||
}
|
||||
|
||||
void QueryFuzzer::fuzz(ASTs & asts)
|
||||
{
|
||||
for (auto & ast : asts)
|
||||
@ -497,6 +812,7 @@ void QueryFuzzer::fuzz(ASTPtr & ast)
|
||||
}
|
||||
else if (auto * table_expr = typeid_cast<ASTTableExpression *>(ast.get()))
|
||||
{
|
||||
fuzzTableName(*table_expr);
|
||||
fuzz(table_expr->children);
|
||||
}
|
||||
else if (auto * expr_list = typeid_cast<ASTExpressionList *>(ast.get()))
|
||||
@ -563,6 +879,10 @@ void QueryFuzzer::fuzz(ASTPtr & ast)
|
||||
literal->value = fuzzField(literal->value);
|
||||
}
|
||||
}
|
||||
else if (auto * create_query = typeid_cast<ASTCreateQuery *>(ast.get()))
|
||||
{
|
||||
fuzzCreateQuery(*create_query);
|
||||
}
|
||||
else
|
||||
{
|
||||
fuzz(ast->children);
|
||||
|
@ -1,5 +1,6 @@
|
||||
#pragma once
|
||||
|
||||
#include <DataTypes/IDataType.h>
|
||||
#include <unordered_set>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
@ -16,6 +17,11 @@ namespace DB
|
||||
|
||||
class ASTExpressionList;
|
||||
class ASTOrderByElement;
|
||||
class ASTCreateQuery;
|
||||
class ASTInsertQuery;
|
||||
class ASTColumnDeclaration;
|
||||
class ASTDropQuery;
|
||||
struct ASTTableExpression;
|
||||
struct ASTWindowDefinition;
|
||||
|
||||
/*
|
||||
@ -54,6 +60,9 @@ struct QueryFuzzer
|
||||
std::unordered_set<const IAST *> debug_visited_nodes;
|
||||
ASTPtr * debug_top_ast = nullptr;
|
||||
|
||||
std::unordered_map<std::string, std::unordered_set<std::string>> original_table_name_to_fuzzed;
|
||||
std::unordered_map<std::string, size_t> index_of_fuzzed_table;
|
||||
std::set<IAST::Hash> created_tables_hashes;
|
||||
|
||||
// This is the only function you have to call -- it will modify the passed
|
||||
// ASTPtr to point to new AST with some random changes.
|
||||
@ -63,18 +72,28 @@ struct QueryFuzzer
|
||||
Field getRandomField(int type);
|
||||
Field fuzzField(Field field);
|
||||
ASTPtr getRandomColumnLike();
|
||||
DataTypePtr fuzzDataType(DataTypePtr type);
|
||||
DataTypePtr getRandomType();
|
||||
ASTs getInsertQueriesForFuzzedTables(const String & full_query);
|
||||
ASTs getDropQueriesForFuzzedTables(const ASTDropQuery & drop_query);
|
||||
void notifyQueryFailed(ASTPtr ast);
|
||||
void replaceWithColumnLike(ASTPtr & ast);
|
||||
void replaceWithTableLike(ASTPtr & ast);
|
||||
void fuzzOrderByElement(ASTOrderByElement * elem);
|
||||
void fuzzOrderByList(IAST * ast);
|
||||
void fuzzColumnLikeExpressionList(IAST * ast);
|
||||
void fuzzWindowFrame(ASTWindowDefinition & def);
|
||||
void fuzzCreateQuery(ASTCreateQuery & create);
|
||||
void fuzzColumnDeclaration(ASTColumnDeclaration & column);
|
||||
void fuzzTableName(ASTTableExpression & table);
|
||||
void fuzz(ASTs & asts);
|
||||
void fuzz(ASTPtr & ast);
|
||||
void collectFuzzInfoMain(ASTPtr ast);
|
||||
void addTableLike(ASTPtr ast);
|
||||
void addColumnLike(ASTPtr ast);
|
||||
void collectFuzzInfoRecurse(ASTPtr ast);
|
||||
|
||||
static bool isSuitableForFuzzing(const ASTCreateQuery & create);
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -176,7 +176,9 @@ void registerCodecDelta(CompressionCodecFactory & factory);
|
||||
void registerCodecT64(CompressionCodecFactory & factory);
|
||||
void registerCodecDoubleDelta(CompressionCodecFactory & factory);
|
||||
void registerCodecGorilla(CompressionCodecFactory & factory);
|
||||
#if USE_BORINGSSL
|
||||
void registerCodecEncrypted(CompressionCodecFactory & factory);
|
||||
#endif
|
||||
void registerCodecFPC(CompressionCodecFactory & factory);
|
||||
|
||||
#endif
|
||||
@ -193,7 +195,9 @@ CompressionCodecFactory::CompressionCodecFactory()
|
||||
registerCodecT64(*this);
|
||||
registerCodecDoubleDelta(*this);
|
||||
registerCodecGorilla(*this);
|
||||
#if USE_BORINGSSL
|
||||
registerCodecEncrypted(*this);
|
||||
#endif
|
||||
registerCodecFPC(*this);
|
||||
#ifdef ENABLE_QPL_COMPRESSION
|
||||
registerCodecDeflateQpl(*this);
|
||||
|
@ -596,9 +596,9 @@ static constexpr UInt64 operator""_GiB(unsigned long long value)
|
||||
M(Bool, async_insert, false, "If true, data from INSERT query is stored in queue and later flushed to table in background. Makes sense only for inserts via HTTP protocol. If wait_for_async_insert is false, INSERT query is processed almost instantly, otherwise client will wait until data will be flushed to table", 0) \
|
||||
M(Bool, wait_for_async_insert, true, "If true wait for processing of asynchronous insertion", 0) \
|
||||
M(Seconds, wait_for_async_insert_timeout, DBMS_DEFAULT_LOCK_ACQUIRE_TIMEOUT_SEC, "Timeout for waiting for processing asynchronous insertion", 0) \
|
||||
M(UInt64, async_insert_max_data_size, 100000, "Maximum size in bytes of unparsed data collected per query before being inserted", 0) \
|
||||
M(UInt64, async_insert_max_data_size, 1000000, "Maximum size in bytes of unparsed data collected per query before being inserted", 0) \
|
||||
M(Milliseconds, async_insert_busy_timeout_ms, 200, "Maximum time to wait before dumping collected data per query since the first data appeared", 0) \
|
||||
M(Milliseconds, async_insert_stale_timeout_ms, 0, "Maximum time to wait before dumping collected data per query since the last data appeared. Zero means no timeout at all", 0) \
|
||||
M(Milliseconds, async_insert_cleanup_timeout_ms, 1000, "Time to wait before each iteration of cleaning up buffers for INSERT queries which don't appear anymore. Only has meaning at server startup.", 0) \
|
||||
\
|
||||
M(UInt64, remote_fs_read_max_backoff_ms, 10000, "Max wait time when trying to read data for remote disk", 0) \
|
||||
M(UInt64, remote_fs_read_backoff_max_tries, 5, "Max attempts to read with backoff", 0) \
|
||||
@ -671,6 +671,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value)
|
||||
MAKE_OBSOLETE(M, Bool, allow_experimental_database_atomic, true) \
|
||||
MAKE_OBSOLETE(M, Bool, allow_experimental_bigint_types, true) \
|
||||
MAKE_OBSOLETE(M, Bool, allow_experimental_window_functions, true) \
|
||||
MAKE_OBSOLETE(M, Milliseconds, async_insert_stale_timeout_ms, 0) \
|
||||
MAKE_OBSOLETE(M, HandleKafkaErrorMode, handle_kafka_error_mode, HandleKafkaErrorMode::DEFAULT) \
|
||||
MAKE_OBSOLETE(M, Bool, database_replicated_ddl_output, true) \
|
||||
MAKE_OBSOLETE(M, UInt64, replication_alter_columns_timeout, 60) \
|
||||
|
@ -22,3 +22,4 @@
|
||||
#cmakedefine01 USE_ODBC
|
||||
#cmakedefine01 USE_REPLXX
|
||||
#cmakedefine01 USE_JEMALLOC
|
||||
#cmakedefine01 USE_BORINGSSL
|
||||
|
@ -11,7 +11,7 @@ if (OS_DARWIN AND NOT USE_STATIC_LIBRARIES)
|
||||
target_link_libraries (daemon PUBLIC -Wl,-undefined,dynamic_lookup)
|
||||
endif()
|
||||
|
||||
target_link_libraries (daemon PUBLIC loggers common PRIVATE clickhouse_common_io clickhouse_common_config ${EXECINFO_LIBRARIES})
|
||||
target_link_libraries (daemon PUBLIC loggers common PRIVATE clickhouse_common_io clickhouse_common_config)
|
||||
|
||||
if (TARGET ch_contrib::sentry)
|
||||
target_link_libraries (daemon PRIVATE ch_contrib::sentry dbms)
|
||||
|
@ -96,14 +96,14 @@ void SentryWriter::initialize(Poco::Util::LayeredConfiguration & config)
|
||||
}
|
||||
sentry_options_set_dsn(options, endpoint.c_str());
|
||||
sentry_options_set_database_path(options, temp_folder_path.c_str());
|
||||
|
||||
/// This value will be attached to each report
|
||||
String environment_default_value = "test";
|
||||
if (strstr(VERSION_DESCRIBE, "-stable") || strstr(VERSION_DESCRIBE, "-lts"))
|
||||
{
|
||||
sentry_options_set_environment(options, "prod");
|
||||
}
|
||||
else
|
||||
{
|
||||
sentry_options_set_environment(options, "test");
|
||||
}
|
||||
environment_default_value = "prod";
|
||||
/// If the value is set in config - use it
|
||||
auto value = config.getString("send_crash_reports.environment", environment_default_value);
|
||||
sentry_options_set_environment(options, value.c_str());
|
||||
|
||||
const std::string & http_proxy = config.getString("send_crash_reports.http_proxy", "");
|
||||
if (!http_proxy.empty())
|
||||
|
@ -205,10 +205,9 @@ inline ReturnType convertToDecimalImpl(const typename FromDataType::FieldType &
|
||||
if (!std::isfinite(value))
|
||||
{
|
||||
if constexpr (throw_exception)
|
||||
throw Exception(std::string(ToDataType::family_name) + " convert overflow. Cannot convert infinity or NaN to decimal",
|
||||
ErrorCodes::DECIMAL_OVERFLOW);
|
||||
throw Exception(ErrorCodes::DECIMAL_OVERFLOW, "{} convert overflow. Cannot convert infinity or NaN to decimal", ToDataType::family_name);
|
||||
else
|
||||
return false;
|
||||
return ReturnType(false);
|
||||
}
|
||||
|
||||
auto out = value * static_cast<FromFieldType>(DecimalUtils::scaleMultiplier<ToNativeType>(scale));
|
||||
@ -217,8 +216,7 @@ inline ReturnType convertToDecimalImpl(const typename FromDataType::FieldType &
|
||||
out >= static_cast<FromFieldType>(std::numeric_limits<ToNativeType>::max()))
|
||||
{
|
||||
if constexpr (throw_exception)
|
||||
throw Exception(std::string(ToDataType::family_name) + " convert overflow. Float is out of Decimal range",
|
||||
ErrorCodes::DECIMAL_OVERFLOW);
|
||||
throw Exception(ErrorCodes::DECIMAL_OVERFLOW, "{} convert overflow. Float is out of Decimal range", ToDataType::family_name);
|
||||
else
|
||||
return ReturnType(false);
|
||||
}
|
||||
|
@ -65,38 +65,61 @@ void WriteBufferFromAzureBlobStorage::execWithRetry(std::function<void()> func,
|
||||
void WriteBufferFromAzureBlobStorage::finalizeImpl()
|
||||
{
|
||||
execWithRetry([this](){ next(); }, DEFAULT_RETRY_NUM);
|
||||
|
||||
if (tmp_buffer_write_offset > 0)
|
||||
uploadBlock(tmp_buffer->data(), tmp_buffer_write_offset);
|
||||
|
||||
auto block_blob_client = blob_container_client->GetBlockBlobClient(blob_path);
|
||||
execWithRetry([&](){ block_blob_client.CommitBlockList(block_ids); }, DEFAULT_RETRY_NUM);
|
||||
|
||||
LOG_TRACE(log, "Committed {} blocks for blob `{}`", block_ids.size(), blob_path);
|
||||
}
|
||||
|
||||
void WriteBufferFromAzureBlobStorage::uploadBlock(const char * data, size_t size)
|
||||
{
|
||||
auto block_blob_client = blob_container_client->GetBlockBlobClient(blob_path);
|
||||
const std::string & block_id = block_ids.emplace_back(getRandomASCIIString(64));
|
||||
|
||||
Azure::Core::IO::MemoryBodyStream memory_stream(reinterpret_cast<const uint8_t *>(data), size);
|
||||
execWithRetry([&](){ block_blob_client.StageBlock(block_id, memory_stream); }, DEFAULT_RETRY_NUM);
|
||||
tmp_buffer_write_offset = 0;
|
||||
|
||||
LOG_TRACE(log, "Staged block (id: {}) of size {} (blob path: {}).", block_id, size, blob_path);
|
||||
}
|
||||
|
||||
WriteBufferFromAzureBlobStorage::MemoryBufferPtr WriteBufferFromAzureBlobStorage::allocateBuffer() const
|
||||
{
|
||||
return std::make_unique<Memory<>>(max_single_part_upload_size);
|
||||
}
|
||||
|
||||
void WriteBufferFromAzureBlobStorage::nextImpl()
|
||||
{
|
||||
if (!offset())
|
||||
size_t size_to_upload = offset();
|
||||
|
||||
if (size_to_upload == 0)
|
||||
return;
|
||||
|
||||
char * buffer_begin = working_buffer.begin();
|
||||
size_t total_size = offset();
|
||||
if (!tmp_buffer)
|
||||
tmp_buffer = allocateBuffer();
|
||||
|
||||
auto block_blob_client = blob_container_client->GetBlockBlobClient(blob_path);
|
||||
|
||||
size_t current_size = 0;
|
||||
std::vector<std::string> block_ids;
|
||||
|
||||
while (current_size < total_size)
|
||||
size_t uploaded_size = 0;
|
||||
while (uploaded_size != size_to_upload)
|
||||
{
|
||||
size_t part_len = std::min(total_size - current_size, max_single_part_upload_size);
|
||||
const std::string & block_id = block_ids.emplace_back(getRandomASCIIString(64));
|
||||
size_t memory_buffer_remaining_size = max_single_part_upload_size - tmp_buffer_write_offset;
|
||||
if (memory_buffer_remaining_size == 0)
|
||||
uploadBlock(tmp_buffer->data(), tmp_buffer->size());
|
||||
|
||||
Azure::Core::IO::MemoryBodyStream tmp_buffer(reinterpret_cast<uint8_t *>(buffer_begin + current_size), part_len);
|
||||
execWithRetry([&](){ block_blob_client.StageBlock(block_id, tmp_buffer); }, DEFAULT_RETRY_NUM);
|
||||
|
||||
current_size += part_len;
|
||||
LOG_TRACE(log, "Staged block (id: {}) of size {} (written {}/{}, blob path: {}).", block_id, part_len, current_size, total_size, blob_path);
|
||||
size_t size = std::min(memory_buffer_remaining_size, size_to_upload - uploaded_size);
|
||||
memcpy(tmp_buffer->data() + tmp_buffer_write_offset, working_buffer.begin() + uploaded_size, size);
|
||||
uploaded_size += size;
|
||||
tmp_buffer_write_offset += size;
|
||||
}
|
||||
|
||||
execWithRetry([&](){ block_blob_client.CommitBlockList(block_ids); }, DEFAULT_RETRY_NUM);
|
||||
LOG_TRACE(log, "Committed {} blocks for blob `{}`", block_ids.size(), blob_path);
|
||||
if (tmp_buffer_write_offset == max_single_part_upload_size)
|
||||
uploadBlock(tmp_buffer->data(), tmp_buffer->size());
|
||||
|
||||
if (write_settings.remote_throttler)
|
||||
write_settings.remote_throttler->add(total_size);
|
||||
write_settings.remote_throttler->add(size_to_upload);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -40,6 +40,7 @@ public:
|
||||
private:
|
||||
void finalizeImpl() override;
|
||||
void execWithRetry(std::function<void()> func, size_t num_tries);
|
||||
void uploadBlock(const char * data, size_t size);
|
||||
|
||||
Poco::Logger * log;
|
||||
|
||||
@ -48,6 +49,13 @@ private:
|
||||
const WriteSettings write_settings;
|
||||
|
||||
AzureClientPtr blob_container_client;
|
||||
std::vector<std::string> block_ids;
|
||||
|
||||
using MemoryBufferPtr = std::unique_ptr<Memory<>>;
|
||||
MemoryBufferPtr tmp_buffer;
|
||||
size_t tmp_buffer_write_offset = 0;
|
||||
|
||||
MemoryBufferPtr allocateBuffer() const;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -1,4 +1,5 @@
|
||||
#include <Functions/FunctionsAES.h>
|
||||
#include <Interpreters/Context.h>
|
||||
|
||||
#if USE_SSL
|
||||
|
||||
@ -8,7 +9,6 @@
|
||||
#include <string>
|
||||
#include <cassert>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
|
@ -1,6 +1,9 @@
|
||||
#pragma once
|
||||
|
||||
#include <Common/config.h>
|
||||
#include <Columns/ColumnNullable.h>
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
|
||||
#if USE_SSL
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
@ -20,7 +23,6 @@
|
||||
|
||||
#include <string.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
@ -411,6 +413,7 @@ class FunctionDecrypt : public IFunction
|
||||
public:
|
||||
static constexpr OpenSSLDetails::CompatibilityMode compatibility_mode = Impl::compatibility_mode;
|
||||
static constexpr auto name = Impl::name;
|
||||
static constexpr bool use_null_when_decrypt_fail = Impl::use_null_when_decrypt_fail;
|
||||
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionDecrypt>(); }
|
||||
|
||||
private:
|
||||
@ -445,6 +448,9 @@ private:
|
||||
optional_args
|
||||
);
|
||||
|
||||
if constexpr (use_null_when_decrypt_fail)
|
||||
return std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>());
|
||||
|
||||
return std::make_shared<DataTypeString>();
|
||||
}
|
||||
|
||||
@ -468,7 +474,7 @@ private:
|
||||
ColumnPtr result_column;
|
||||
if (arguments.size() <= 3)
|
||||
{
|
||||
result_column = doDecrypt(evp_cipher, input_rows_count, input_column, key_column, nullptr, nullptr);
|
||||
result_column = doDecrypt<use_null_when_decrypt_fail>(evp_cipher, input_rows_count, input_column, key_column, nullptr, nullptr);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -478,7 +484,7 @@ private:
|
||||
|
||||
if (arguments.size() <= 4)
|
||||
{
|
||||
result_column = doDecrypt(evp_cipher, input_rows_count, input_column, key_column, iv_column, nullptr);
|
||||
result_column = doDecrypt<use_null_when_decrypt_fail>(evp_cipher, input_rows_count, input_column, key_column, iv_column, nullptr);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -486,13 +492,13 @@ private:
|
||||
throw Exception("AAD can be only set for GCM-mode", ErrorCodes::BAD_ARGUMENTS);
|
||||
|
||||
const auto aad_column = arguments[4].column;
|
||||
result_column = doDecrypt(evp_cipher, input_rows_count, input_column, key_column, iv_column, aad_column);
|
||||
result_column = doDecrypt<use_null_when_decrypt_fail>(evp_cipher, input_rows_count, input_column, key_column, iv_column, aad_column);
|
||||
}
|
||||
}
|
||||
|
||||
return result_column;
|
||||
}
|
||||
|
||||
template<bool use_null_when_decrypt_fail>
|
||||
static ColumnPtr doDecrypt(
|
||||
const EVP_CIPHER * evp_cipher,
|
||||
size_t input_rows_count,
|
||||
@ -503,25 +509,25 @@ private:
|
||||
{
|
||||
if constexpr (compatibility_mode == OpenSSLDetails::CompatibilityMode::MySQL)
|
||||
{
|
||||
return doDecryptImpl<CipherMode::MySQLCompatibility>(evp_cipher, input_rows_count, input_column, key_column, iv_column, aad_column);
|
||||
return doDecryptImpl<CipherMode::MySQLCompatibility, use_null_when_decrypt_fail>(evp_cipher, input_rows_count, input_column, key_column, iv_column, aad_column);
|
||||
}
|
||||
else
|
||||
{
|
||||
const auto cipher_mode = EVP_CIPHER_mode(evp_cipher);
|
||||
if (cipher_mode == EVP_CIPH_GCM_MODE)
|
||||
{
|
||||
return doDecryptImpl<CipherMode::RFC5116_AEAD_AES_GCM>(evp_cipher, input_rows_count, input_column, key_column, iv_column, aad_column);
|
||||
return doDecryptImpl<CipherMode::RFC5116_AEAD_AES_GCM, use_null_when_decrypt_fail>(evp_cipher, input_rows_count, input_column, key_column, iv_column, aad_column);
|
||||
}
|
||||
else
|
||||
{
|
||||
return doDecryptImpl<CipherMode::OpenSSLCompatibility>(evp_cipher, input_rows_count, input_column, key_column, iv_column, aad_column);
|
||||
return doDecryptImpl<CipherMode::OpenSSLCompatibility, use_null_when_decrypt_fail>(evp_cipher, input_rows_count, input_column, key_column, iv_column, aad_column);
|
||||
}
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
template <CipherMode mode>
|
||||
template <CipherMode mode, bool use_null_when_decrypt_fail>
|
||||
static ColumnPtr doDecryptImpl(const EVP_CIPHER * evp_cipher,
|
||||
size_t input_rows_count,
|
||||
const ColumnPtr & input_column,
|
||||
@ -541,6 +547,7 @@ private:
|
||||
static constexpr size_t tag_size = 16; // https://tools.ietf.org/html/rfc5116#section-5.1
|
||||
|
||||
auto decrypted_result_column = ColumnString::create();
|
||||
auto null_map = ColumnUInt8::create();
|
||||
auto & decrypted_result_column_data = decrypted_result_column->getChars();
|
||||
auto & decrypted_result_column_offsets = decrypted_result_column->getOffsets();
|
||||
|
||||
@ -616,6 +623,7 @@ private:
|
||||
}
|
||||
}
|
||||
|
||||
bool decrypt_fail = false;
|
||||
/// Avoid extra work on empty ciphertext/plaintext. Always decrypt empty to empty.
|
||||
/// This makes sense for default implementation for NULLs.
|
||||
if (input_value.size > 0)
|
||||
@ -662,9 +670,14 @@ private:
|
||||
if (EVP_DecryptUpdate(evp_ctx,
|
||||
reinterpret_cast<unsigned char*>(decrypted), &output_len,
|
||||
reinterpret_cast<const unsigned char*>(input_value.data), static_cast<int>(input_value.size)) != 1)
|
||||
{
|
||||
if constexpr (!use_null_when_decrypt_fail)
|
||||
onError("Failed to decrypt");
|
||||
decrypt_fail = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
decrypted += output_len;
|
||||
|
||||
// 3: optionally get tag from the ciphertext (RFC5116) and feed it to the context
|
||||
if constexpr (mode == CipherMode::RFC5116_AEAD_AES_GCM)
|
||||
{
|
||||
@ -674,16 +687,29 @@ private:
|
||||
}
|
||||
|
||||
// 4: retrieve encrypted data (ciphertext)
|
||||
if (EVP_DecryptFinal_ex(evp_ctx,
|
||||
if (!decrypt_fail && EVP_DecryptFinal_ex(evp_ctx,
|
||||
reinterpret_cast<unsigned char*>(decrypted), &output_len) != 1)
|
||||
{
|
||||
if constexpr (!use_null_when_decrypt_fail)
|
||||
onError("Failed to decrypt");
|
||||
decrypt_fail = true;
|
||||
}
|
||||
else
|
||||
decrypted += output_len;
|
||||
}
|
||||
}
|
||||
|
||||
*decrypted = '\0';
|
||||
++decrypted;
|
||||
|
||||
decrypted_result_column_offsets.push_back(decrypted - decrypted_result_column_data.data());
|
||||
if constexpr (use_null_when_decrypt_fail)
|
||||
{
|
||||
if (decrypt_fail)
|
||||
null_map->insertValue(1);
|
||||
else
|
||||
null_map->insertValue(0);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -694,6 +720,9 @@ private:
|
||||
}
|
||||
|
||||
decrypted_result_column->validate();
|
||||
if constexpr (use_null_when_decrypt_fail)
|
||||
return ColumnNullable::create(std::move(decrypted_result_column), std::move(null_map));
|
||||
else
|
||||
return decrypted_result_column;
|
||||
}
|
||||
};
|
||||
|
@ -323,13 +323,13 @@ struct ToDateTimeImpl
|
||||
{
|
||||
static constexpr auto name = "toDateTime";
|
||||
|
||||
static inline UInt32 execute(UInt16 d, const DateLUTImpl & time_zone)
|
||||
static UInt32 execute(UInt16 d, const DateLUTImpl & time_zone)
|
||||
{
|
||||
auto date_time = time_zone.fromDayNum(ExtendedDayNum(d));
|
||||
return date_time <= 0xffffffff ? UInt32(date_time) : UInt32(0xffffffff);
|
||||
}
|
||||
|
||||
static inline UInt32 execute(Int32 d, const DateLUTImpl & time_zone)
|
||||
static UInt32 execute(Int32 d, const DateLUTImpl & time_zone)
|
||||
{
|
||||
if (d < 0)
|
||||
return 0;
|
||||
@ -338,12 +338,12 @@ struct ToDateTimeImpl
|
||||
return date_time <= 0xffffffff ? date_time : 0xffffffff;
|
||||
}
|
||||
|
||||
static inline UInt32 execute(UInt32 dt, const DateLUTImpl & /*time_zone*/)
|
||||
static UInt32 execute(UInt32 dt, const DateLUTImpl & /*time_zone*/)
|
||||
{
|
||||
return dt;
|
||||
}
|
||||
|
||||
static inline UInt32 execute(Int64 d, const DateLUTImpl & time_zone)
|
||||
static UInt32 execute(Int64 d, const DateLUTImpl & time_zone)
|
||||
{
|
||||
if (d < 0)
|
||||
return 0;
|
||||
@ -352,7 +352,7 @@ struct ToDateTimeImpl
|
||||
return date_time <= 0xffffffff ? date_time : 0xffffffff;
|
||||
}
|
||||
|
||||
static inline UInt32 execute(const DecimalUtils::DecimalComponents<DateTime64> & t, const DateLUTImpl & /*time_zone*/)
|
||||
static UInt32 execute(const DecimalUtils::DecimalComponents<DateTime64> & t, const DateLUTImpl & /*time_zone*/)
|
||||
{
|
||||
if (t.whole < 0 || (t.whole >= 0 && t.fractional < 0))
|
||||
return 0;
|
||||
@ -374,7 +374,7 @@ struct ToDateTransform32Or64
|
||||
{
|
||||
static constexpr auto name = "toDate";
|
||||
|
||||
static inline NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl & time_zone)
|
||||
static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl & time_zone)
|
||||
{
|
||||
// since converting to Date, no need in values outside of default LUT range.
|
||||
if (from < 0)
|
||||
@ -391,7 +391,7 @@ struct ToDateTransform32Or64Signed
|
||||
{
|
||||
static constexpr auto name = "toDate";
|
||||
|
||||
static inline NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl & time_zone)
|
||||
static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl & time_zone)
|
||||
{
|
||||
// TODO: decide narrow or extended range based on FromType
|
||||
/// The function should be monotonic (better for query optimizations), so we saturate instead of overflow.
|
||||
@ -413,7 +413,7 @@ struct ToDateTransform8Or16Signed
|
||||
{
|
||||
static constexpr auto name = "toDate";
|
||||
|
||||
static inline NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl &)
|
||||
static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl &)
|
||||
{
|
||||
if (from < 0)
|
||||
return 0;
|
||||
@ -431,7 +431,7 @@ struct ToDate32Transform32Or64
|
||||
{
|
||||
static constexpr auto name = "toDate32";
|
||||
|
||||
static inline NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl & time_zone)
|
||||
static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl & time_zone)
|
||||
{
|
||||
return (from < DATE_LUT_MAX_EXTEND_DAY_NUM)
|
||||
? from
|
||||
@ -444,7 +444,7 @@ struct ToDate32Transform32Or64Signed
|
||||
{
|
||||
static constexpr auto name = "toDate32";
|
||||
|
||||
static inline NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl & time_zone)
|
||||
static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl & time_zone)
|
||||
{
|
||||
static const Int32 daynum_min_offset = -static_cast<Int32>(DateLUT::instance().getDayNumOffsetEpoch());
|
||||
if (from < daynum_min_offset)
|
||||
@ -460,7 +460,7 @@ struct ToDate32Transform8Or16Signed
|
||||
{
|
||||
static constexpr auto name = "toDate32";
|
||||
|
||||
static inline NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl &)
|
||||
static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl &)
|
||||
{
|
||||
return from;
|
||||
}
|
||||
@ -529,7 +529,7 @@ struct ToDateTimeTransform64
|
||||
{
|
||||
static constexpr auto name = "toDateTime";
|
||||
|
||||
static inline NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl &)
|
||||
static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl &)
|
||||
{
|
||||
return std::min<Int64>(Int64(from), Int64(0xFFFFFFFF));
|
||||
}
|
||||
@ -540,7 +540,7 @@ struct ToDateTimeTransformSigned
|
||||
{
|
||||
static constexpr auto name = "toDateTime";
|
||||
|
||||
static inline NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl &)
|
||||
static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl &)
|
||||
{
|
||||
if (from < 0)
|
||||
return 0;
|
||||
@ -553,7 +553,7 @@ struct ToDateTimeTransform64Signed
|
||||
{
|
||||
static constexpr auto name = "toDateTime";
|
||||
|
||||
static inline NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl & /* time_zone */)
|
||||
static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl & /* time_zone */)
|
||||
{
|
||||
if (from < 0)
|
||||
return 0;
|
||||
@ -581,9 +581,9 @@ template <typename Name> struct ConvertImpl<DataTypeFloat32, DataTypeDateTime, N
|
||||
template <typename Name> struct ConvertImpl<DataTypeFloat64, DataTypeDateTime, Name>
|
||||
: DateTimeTransformImpl<DataTypeFloat64, DataTypeDateTime, ToDateTimeTransform64Signed<Float64, UInt32>> {};
|
||||
|
||||
const time_t LUT_MIN_TIME = -2208988800l; // 1900-01-01 UTC
|
||||
constexpr time_t LUT_MIN_TIME = -2208988800l; // 1900-01-01 UTC
|
||||
|
||||
const time_t LUT_MAX_TIME = 10413791999l; // 2299-12-31 UTC
|
||||
constexpr time_t LUT_MAX_TIME = 10413791999l; // 2299-12-31 UTC
|
||||
|
||||
/** Conversion of numeric to DateTime64
|
||||
*/
|
||||
@ -599,7 +599,7 @@ struct ToDateTime64TransformUnsigned
|
||||
: scale_multiplier(DecimalUtils::scaleMultiplier<DateTime64::NativeType>(scale))
|
||||
{}
|
||||
|
||||
inline NO_SANITIZE_UNDEFINED DateTime64::NativeType execute(FromType from, const DateLUTImpl &) const
|
||||
NO_SANITIZE_UNDEFINED DateTime64::NativeType execute(FromType from, const DateLUTImpl &) const
|
||||
{
|
||||
from = std::min<time_t>(from, LUT_MAX_TIME);
|
||||
return DecimalUtils::decimalFromComponentsWithMultiplier<DateTime64>(from, 0, scale_multiplier);
|
||||
@ -616,7 +616,7 @@ struct ToDateTime64TransformSigned
|
||||
: scale_multiplier(DecimalUtils::scaleMultiplier<DateTime64::NativeType>(scale))
|
||||
{}
|
||||
|
||||
inline NO_SANITIZE_UNDEFINED DateTime64::NativeType execute(FromType from, const DateLUTImpl &) const
|
||||
NO_SANITIZE_UNDEFINED DateTime64::NativeType execute(FromType from, const DateLUTImpl &) const
|
||||
{
|
||||
from = std::max<time_t>(from, LUT_MIN_TIME);
|
||||
from = std::min<time_t>(from, LUT_MAX_TIME);
|
||||
@ -634,11 +634,10 @@ struct ToDateTime64TransformFloat
|
||||
: scale(scale_)
|
||||
{}
|
||||
|
||||
inline NO_SANITIZE_UNDEFINED DateTime64::NativeType execute(FromType from, const DateLUTImpl &) const
|
||||
NO_SANITIZE_UNDEFINED DateTime64::NativeType execute(FromType from, const DateLUTImpl &) const
|
||||
{
|
||||
if (from < 0)
|
||||
return 0;
|
||||
from = std::min<FromType>(from, FromType(0xFFFFFFFF));
|
||||
from = std::max(from, static_cast<FromType>(LUT_MIN_TIME));
|
||||
from = std::min(from, static_cast<FromType>(LUT_MAX_TIME));
|
||||
return convertToDecimal<FromDataType, DataTypeDateTime64>(from, scale);
|
||||
}
|
||||
};
|
||||
@ -672,7 +671,7 @@ struct FromDateTime64Transform
|
||||
: scale_multiplier(DecimalUtils::scaleMultiplier<DateTime64::NativeType>(scale))
|
||||
{}
|
||||
|
||||
inline auto execute(DateTime64::NativeType dt, const DateLUTImpl & time_zone) const
|
||||
auto execute(DateTime64::NativeType dt, const DateLUTImpl & time_zone) const
|
||||
{
|
||||
const auto c = DecimalUtils::splitWithScaleMultiplier(DateTime64(dt), scale_multiplier);
|
||||
return Transform::execute(static_cast<UInt32>(c.whole), time_zone);
|
||||
@ -694,19 +693,19 @@ struct ToDateTime64Transform
|
||||
: scale_multiplier(DecimalUtils::scaleMultiplier<DateTime64::NativeType>(scale))
|
||||
{}
|
||||
|
||||
inline DateTime64::NativeType execute(UInt16 d, const DateLUTImpl & time_zone) const
|
||||
DateTime64::NativeType execute(UInt16 d, const DateLUTImpl & time_zone) const
|
||||
{
|
||||
const auto dt = ToDateTimeImpl::execute(d, time_zone);
|
||||
return execute(dt, time_zone);
|
||||
}
|
||||
|
||||
inline DateTime64::NativeType execute(Int32 d, const DateLUTImpl & time_zone) const
|
||||
DateTime64::NativeType execute(Int32 d, const DateLUTImpl & time_zone) const
|
||||
{
|
||||
const auto dt = time_zone.fromDayNum(ExtendedDayNum(d));
|
||||
return DecimalUtils::decimalFromComponentsWithMultiplier<DateTime64>(dt, 0, scale_multiplier);
|
||||
}
|
||||
|
||||
inline DateTime64::NativeType execute(UInt32 dt, const DateLUTImpl & /*time_zone*/) const
|
||||
DateTime64::NativeType execute(UInt32 dt, const DateLUTImpl & /*time_zone*/) const
|
||||
{
|
||||
return DecimalUtils::decimalFromComponentsWithMultiplier<DateTime64>(dt, 0, scale_multiplier);
|
||||
}
|
||||
|
@ -12,6 +12,7 @@ struct DecryptMySQLModeImpl
|
||||
{
|
||||
static constexpr auto name = "aes_decrypt_mysql";
|
||||
static constexpr auto compatibility_mode = OpenSSLDetails::CompatibilityMode::MySQL;
|
||||
static constexpr bool use_null_when_decrypt_fail = false;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -12,6 +12,7 @@ struct DecryptImpl
|
||||
{
|
||||
static constexpr auto name = "decrypt";
|
||||
static constexpr auto compatibility_mode = OpenSSLDetails::CompatibilityMode::OpenSSL;
|
||||
static constexpr bool use_null_when_decrypt_fail = false;
|
||||
};
|
||||
|
||||
}
|
||||
|
32
src/Functions/tryDecrypt.cpp
Normal file
32
src/Functions/tryDecrypt.cpp
Normal file
@ -0,0 +1,32 @@
|
||||
#include <Common/Documentation.h>
|
||||
#include <Common/config.h>
|
||||
|
||||
#if USE_SSL
|
||||
|
||||
# include <Functions/FunctionFactory.h>
|
||||
# include <Functions/FunctionsAES.h>
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
struct TryDecryptImpl
|
||||
{
|
||||
static constexpr auto name = "tryDecrypt";
|
||||
static constexpr auto compatibility_mode = OpenSSLDetails::CompatibilityMode::OpenSSL;
|
||||
static constexpr bool use_null_when_decrypt_fail = true;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
REGISTER_FUNCTION(TryDecrypt)
|
||||
{
|
||||
factory.registerFunction<FunctionDecrypt<TryDecryptImpl>>(Documentation(
|
||||
"Similar to `decrypt`, but returns NULL if decryption fails because of using the wrong key."));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
@ -120,11 +120,9 @@ std::exception_ptr AsynchronousInsertQueue::InsertData::Entry::getException() co
|
||||
}
|
||||
|
||||
|
||||
AsynchronousInsertQueue::AsynchronousInsertQueue(ContextPtr context_, size_t pool_size, size_t max_data_size_, const Timeout & timeouts)
|
||||
AsynchronousInsertQueue::AsynchronousInsertQueue(ContextPtr context_, size_t pool_size, Milliseconds cleanup_timeout_)
|
||||
: WithContext(context_)
|
||||
, max_data_size(max_data_size_)
|
||||
, busy_timeout(timeouts.busy)
|
||||
, stale_timeout(timeouts.stale)
|
||||
, cleanup_timeout(cleanup_timeout_)
|
||||
, pool(pool_size)
|
||||
, dump_by_first_update_thread(&AsynchronousInsertQueue::busyCheck, this)
|
||||
, cleanup_thread(&AsynchronousInsertQueue::cleanup, this)
|
||||
@ -132,9 +130,6 @@ AsynchronousInsertQueue::AsynchronousInsertQueue(ContextPtr context_, size_t poo
|
||||
using namespace std::chrono;
|
||||
|
||||
assert(pool_size);
|
||||
|
||||
if (stale_timeout > 0ms)
|
||||
dump_by_last_update_thread = ThreadFromGlobalPool(&AsynchronousInsertQueue::staleCheck, this);
|
||||
}
|
||||
|
||||
AsynchronousInsertQueue::~AsynchronousInsertQueue()
|
||||
@ -143,10 +138,14 @@ AsynchronousInsertQueue::~AsynchronousInsertQueue()
|
||||
|
||||
LOG_TRACE(log, "Shutting down the asynchronous insertion queue");
|
||||
|
||||
{
|
||||
std::lock_guard lock(shutdown_mutex);
|
||||
shutdown = true;
|
||||
shutdown_cv.notify_all();
|
||||
{
|
||||
std::lock_guard lock(deadline_mutex);
|
||||
are_tasks_available.notify_one();
|
||||
}
|
||||
{
|
||||
std::lock_guard lock(cleanup_mutex);
|
||||
cleanup_can_run.notify_one();
|
||||
}
|
||||
|
||||
assert(dump_by_first_update_thread.joinable());
|
||||
@ -155,9 +154,6 @@ AsynchronousInsertQueue::~AsynchronousInsertQueue()
|
||||
assert(cleanup_thread.joinable());
|
||||
cleanup_thread.join();
|
||||
|
||||
if (dump_by_last_update_thread.joinable())
|
||||
dump_by_last_update_thread.join();
|
||||
|
||||
pool.wait();
|
||||
|
||||
std::lock_guard lock(currently_processing_mutex);
|
||||
@ -234,12 +230,18 @@ void AsynchronousInsertQueue::pushImpl(InsertData::EntryPtr entry, QueueIterator
|
||||
std::lock_guard data_lock(data_mutex);
|
||||
|
||||
if (!data)
|
||||
data = std::make_unique<InsertData>();
|
||||
{
|
||||
auto now = std::chrono::steady_clock::now();
|
||||
data = std::make_unique<InsertData>(now);
|
||||
|
||||
std::lock_guard lock(deadline_mutex);
|
||||
deadline_queue.insert({now + Milliseconds{it->first.settings.async_insert_busy_timeout_ms}, it});
|
||||
are_tasks_available.notify_one();
|
||||
}
|
||||
|
||||
size_t entry_data_size = entry->bytes.size();
|
||||
|
||||
data->size += entry_data_size;
|
||||
data->last_update = std::chrono::steady_clock::now();
|
||||
data->entries.emplace_back(entry);
|
||||
|
||||
{
|
||||
@ -250,7 +252,10 @@ void AsynchronousInsertQueue::pushImpl(InsertData::EntryPtr entry, QueueIterator
|
||||
LOG_TRACE(log, "Have {} pending inserts with total {} bytes of data for query '{}'",
|
||||
data->entries.size(), data->size, queryToString(it->first.query));
|
||||
|
||||
if (data->size > max_data_size)
|
||||
/// Here we check whether we hit the limit on maximum data size in the buffer.
|
||||
/// And use setting from query context!
|
||||
/// It works, because queries with the same set of settings are already grouped together.
|
||||
if (data->size > it->first.settings.async_insert_max_data_size)
|
||||
scheduleDataProcessingJob(it->first, std::move(data), getContext());
|
||||
|
||||
CurrentMetrics::add(CurrentMetrics::PendingAsyncInsert);
|
||||
@ -282,43 +287,45 @@ void AsynchronousInsertQueue::waitForProcessingQuery(const String & query_id, co
|
||||
|
||||
void AsynchronousInsertQueue::busyCheck()
|
||||
{
|
||||
auto timeout = busy_timeout;
|
||||
|
||||
while (!waitForShutdown(timeout))
|
||||
while (!shutdown)
|
||||
{
|
||||
/// TODO: use priority queue instead of raw unsorted queue.
|
||||
timeout = busy_timeout;
|
||||
std::vector<QueueIterator> entries_to_flush;
|
||||
{
|
||||
std::unique_lock deadline_lock(deadline_mutex);
|
||||
are_tasks_available.wait_for(deadline_lock, Milliseconds(getContext()->getSettingsRef().async_insert_busy_timeout_ms), [this]()
|
||||
{
|
||||
if (shutdown)
|
||||
return true;
|
||||
|
||||
if (!deadline_queue.empty() && deadline_queue.begin()->first < std::chrono::steady_clock::now())
|
||||
return true;
|
||||
|
||||
return false;
|
||||
});
|
||||
|
||||
if (shutdown)
|
||||
return;
|
||||
|
||||
const auto now = std::chrono::steady_clock::now();
|
||||
|
||||
while (true)
|
||||
{
|
||||
if (deadline_queue.empty() || deadline_queue.begin()->first > now)
|
||||
break;
|
||||
|
||||
entries_to_flush.emplace_back(deadline_queue.begin()->second);
|
||||
deadline_queue.erase(deadline_queue.begin());
|
||||
}
|
||||
}
|
||||
|
||||
std::shared_lock read_lock(rwlock);
|
||||
|
||||
for (auto & [key, elem] : queue)
|
||||
for (auto & entry : entries_to_flush)
|
||||
{
|
||||
auto & [key, elem] = *entry;
|
||||
std::lock_guard data_lock(elem->mutex);
|
||||
if (!elem->data)
|
||||
continue;
|
||||
|
||||
auto lag = std::chrono::steady_clock::now() - elem->data->first_update;
|
||||
if (lag >= busy_timeout)
|
||||
scheduleDataProcessingJob(key, std::move(elem->data), getContext());
|
||||
else
|
||||
timeout = std::min(timeout, std::chrono::ceil<std::chrono::milliseconds>(busy_timeout - lag));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void AsynchronousInsertQueue::staleCheck()
|
||||
{
|
||||
while (!waitForShutdown(stale_timeout))
|
||||
{
|
||||
std::shared_lock read_lock(rwlock);
|
||||
|
||||
for (auto & [key, elem] : queue)
|
||||
{
|
||||
std::lock_guard data_lock(elem->mutex);
|
||||
if (!elem->data)
|
||||
continue;
|
||||
|
||||
auto lag = std::chrono::steady_clock::now() - elem->data->last_update;
|
||||
if (lag >= stale_timeout)
|
||||
scheduleDataProcessingJob(key, std::move(elem->data), getContext());
|
||||
}
|
||||
}
|
||||
@ -326,12 +333,16 @@ void AsynchronousInsertQueue::staleCheck()
|
||||
|
||||
void AsynchronousInsertQueue::cleanup()
|
||||
{
|
||||
/// Do not run cleanup too often,
|
||||
/// because it holds exclusive lock.
|
||||
auto timeout = busy_timeout * 5;
|
||||
|
||||
while (!waitForShutdown(timeout))
|
||||
while (true)
|
||||
{
|
||||
{
|
||||
std::unique_lock cleanup_lock(cleanup_mutex);
|
||||
cleanup_can_run.wait_for(cleanup_lock, Milliseconds(cleanup_timeout), [this]() -> bool { return shutdown; });
|
||||
|
||||
if (shutdown)
|
||||
return;
|
||||
}
|
||||
|
||||
std::vector<InsertQuery> keys_to_remove;
|
||||
|
||||
{
|
||||
@ -383,11 +394,6 @@ void AsynchronousInsertQueue::cleanup()
|
||||
}
|
||||
}
|
||||
|
||||
bool AsynchronousInsertQueue::waitForShutdown(const Milliseconds & timeout)
|
||||
{
|
||||
std::unique_lock shutdown_lock(shutdown_mutex);
|
||||
return shutdown_cv.wait_for(shutdown_lock, timeout, [this]() { return shutdown; });
|
||||
}
|
||||
|
||||
// static
|
||||
void AsynchronousInsertQueue::processData(InsertQuery key, InsertDataPtr data, ContextPtr global_context)
|
||||
|
@ -5,6 +5,7 @@
|
||||
#include <Core/Settings.h>
|
||||
#include <Poco/Logger.h>
|
||||
|
||||
#include <atomic>
|
||||
#include <unordered_map>
|
||||
|
||||
|
||||
@ -18,14 +19,7 @@ class AsynchronousInsertQueue : public WithContext
|
||||
public:
|
||||
using Milliseconds = std::chrono::milliseconds;
|
||||
|
||||
/// Using structure to allow and benefit from designated initialization and not mess with a positional arguments in ctor.
|
||||
struct Timeout
|
||||
{
|
||||
Milliseconds busy;
|
||||
Milliseconds stale;
|
||||
};
|
||||
|
||||
AsynchronousInsertQueue(ContextPtr context_, size_t pool_size, size_t max_data_size, const Timeout & timeouts);
|
||||
AsynchronousInsertQueue(ContextPtr context_, size_t pool_size, Milliseconds cleanup_timeout);
|
||||
~AsynchronousInsertQueue();
|
||||
|
||||
void push(ASTPtr query, ContextPtr query_context);
|
||||
@ -69,6 +63,10 @@ private:
|
||||
std::exception_ptr exception;
|
||||
};
|
||||
|
||||
explicit InsertData(std::chrono::steady_clock::time_point now)
|
||||
: first_update(now)
|
||||
{}
|
||||
|
||||
using EntryPtr = std::shared_ptr<Entry>;
|
||||
|
||||
std::list<EntryPtr> entries;
|
||||
@ -76,11 +74,7 @@ private:
|
||||
|
||||
/// Timestamp of the first insert into queue, or after the last queue dump.
|
||||
/// Used to detect for how long the queue is active, so we can dump it by timer.
|
||||
std::chrono::time_point<std::chrono::steady_clock> first_update = std::chrono::steady_clock::now();
|
||||
|
||||
/// Timestamp of the last insert into queue.
|
||||
/// Used to detect for how long the queue is stale, so we can dump it by another timer.
|
||||
std::chrono::time_point<std::chrono::steady_clock> last_update;
|
||||
std::chrono::time_point<std::chrono::steady_clock> first_update;
|
||||
};
|
||||
|
||||
using InsertDataPtr = std::unique_ptr<InsertData>;
|
||||
@ -96,10 +90,21 @@ private:
|
||||
|
||||
using Queue = std::unordered_map<InsertQuery, std::shared_ptr<Container>, InsertQuery::Hash>;
|
||||
using QueueIterator = Queue::iterator;
|
||||
/// Ordered container
|
||||
using DeadlineQueue = std::map<std::chrono::steady_clock::time_point, QueueIterator>;
|
||||
|
||||
|
||||
mutable std::shared_mutex rwlock;
|
||||
Queue queue;
|
||||
|
||||
/// This is needed only for using inside cleanup() function and correct signaling about shutdown
|
||||
mutable std::mutex cleanup_mutex;
|
||||
mutable std::condition_variable cleanup_can_run;
|
||||
|
||||
mutable std::mutex deadline_mutex;
|
||||
mutable std::condition_variable are_tasks_available;
|
||||
DeadlineQueue deadline_queue;
|
||||
|
||||
using QueryIdToEntry = std::unordered_map<String, InsertData::EntryPtr>;
|
||||
mutable std::mutex currently_processing_mutex;
|
||||
QueryIdToEntry currently_processing_queries;
|
||||
@ -109,25 +114,21 @@ private:
|
||||
/// grow for a long period of time and users will be able to select new data in deterministic manner.
|
||||
/// - stale_timeout: if queue is stale for too long, then we dump the data too, so that users will be able to select the last
|
||||
/// piece of inserted data.
|
||||
/// - max_data_size: if the maximum size of data is reached, then again we dump the data.
|
||||
///
|
||||
/// During processing incoming INSERT queries we can also check whether the maximum size of data in buffer is reached (async_insert_max_data_size setting)
|
||||
/// If so, then again we dump the data.
|
||||
|
||||
const size_t max_data_size; /// in bytes
|
||||
const Milliseconds busy_timeout;
|
||||
const Milliseconds stale_timeout;
|
||||
const Milliseconds cleanup_timeout;
|
||||
|
||||
std::mutex shutdown_mutex;
|
||||
std::condition_variable shutdown_cv;
|
||||
bool shutdown{false};
|
||||
std::atomic<bool> shutdown{false};
|
||||
|
||||
ThreadPool pool; /// dump the data only inside this pool.
|
||||
ThreadFromGlobalPool dump_by_first_update_thread; /// uses busy_timeout and busyCheck()
|
||||
ThreadFromGlobalPool dump_by_last_update_thread; /// uses stale_timeout and staleCheck()
|
||||
ThreadFromGlobalPool cleanup_thread; /// uses busy_timeout and cleanup()
|
||||
|
||||
Poco::Logger * log = &Poco::Logger::get("AsynchronousInsertQueue");
|
||||
|
||||
void busyCheck();
|
||||
void staleCheck();
|
||||
void cleanup();
|
||||
|
||||
/// Should be called with shared or exclusively locked 'rwlock'.
|
||||
|
@ -30,6 +30,7 @@ public:
|
||||
{
|
||||
auto res = std::make_shared<ASTExplainQuery>(*this);
|
||||
res->children.clear();
|
||||
if (!children.empty())
|
||||
res->children.push_back(children[0]->clone());
|
||||
cloneOutputOptions(*res);
|
||||
return res;
|
||||
|
@ -27,6 +27,11 @@
|
||||
#include <Parsers/ASTWindowDefinition.h>
|
||||
#include <Parsers/ASTAssignment.h>
|
||||
#include <Parsers/ASTColumnsMatcher.h>
|
||||
#include <Parsers/ASTExplainQuery.h>
|
||||
|
||||
#include <Parsers/ASTSelectQuery.h>
|
||||
#include <Parsers/ASTTablesInSelectQuery.h>
|
||||
|
||||
|
||||
#include <Parsers/parseIdentifierOrStringLiteral.h>
|
||||
#include <Parsers/parseIntervalKind.h>
|
||||
@ -36,6 +41,7 @@
|
||||
|
||||
#include <Parsers/ExpressionElementParsers.h>
|
||||
#include <Parsers/ParserCreateQuery.h>
|
||||
#include <Parsers/ParserExplainQuery.h>
|
||||
|
||||
#include <Parsers/queryToString.h>
|
||||
|
||||
@ -52,25 +58,84 @@ namespace ErrorCodes
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
/*
|
||||
* Build an AST with the following structure:
|
||||
*
|
||||
* ```
|
||||
* SelectWithUnionQuery (children 1)
|
||||
* ExpressionList (children 1)
|
||||
* SelectQuery (children 2)
|
||||
* ExpressionList (children 1)
|
||||
* Asterisk
|
||||
* TablesInSelectQuery (children 1)
|
||||
* TablesInSelectQueryElement (children 1)
|
||||
* TableExpression (children 1)
|
||||
* Function <...>
|
||||
* ```
|
||||
*/
|
||||
static ASTPtr buildSelectFromTableFunction(const std::shared_ptr<ASTFunction> & ast_function)
|
||||
{
|
||||
auto result_select_query = std::make_shared<ASTSelectWithUnionQuery>();
|
||||
|
||||
{
|
||||
auto select_ast = std::make_shared<ASTSelectQuery>();
|
||||
select_ast->setExpression(ASTSelectQuery::Expression::SELECT, std::make_shared<ASTExpressionList>());
|
||||
select_ast->select()->children.push_back(std::make_shared<ASTAsterisk>());
|
||||
|
||||
auto list_of_selects = std::make_shared<ASTExpressionList>();
|
||||
list_of_selects->children.push_back(select_ast);
|
||||
|
||||
result_select_query->children.push_back(std::move(list_of_selects));
|
||||
result_select_query->list_of_selects = result_select_query->children.back();
|
||||
|
||||
{
|
||||
auto tables = std::make_shared<ASTTablesInSelectQuery>();
|
||||
select_ast->setExpression(ASTSelectQuery::Expression::TABLES, tables);
|
||||
auto tables_elem = std::make_shared<ASTTablesInSelectQueryElement>();
|
||||
auto table_expr = std::make_shared<ASTTableExpression>();
|
||||
tables->children.push_back(tables_elem);
|
||||
tables_elem->table_expression = table_expr;
|
||||
tables_elem->children.push_back(table_expr);
|
||||
|
||||
table_expr->table_function = ast_function;
|
||||
table_expr->children.push_back(table_expr->table_function);
|
||||
}
|
||||
}
|
||||
|
||||
return result_select_query;
|
||||
}
|
||||
|
||||
bool ParserSubquery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
|
||||
{
|
||||
ASTPtr select_node;
|
||||
ParserSelectWithUnionQuery select;
|
||||
ParserExplainQuery explain;
|
||||
|
||||
if (pos->type != TokenType::OpeningRoundBracket)
|
||||
return false;
|
||||
++pos;
|
||||
|
||||
if (!select.parse(pos, select_node, expected))
|
||||
ASTPtr result_node = nullptr;
|
||||
|
||||
if (ASTPtr select_node; select.parse(pos, select_node, expected))
|
||||
{
|
||||
result_node = std::move(select_node);
|
||||
}
|
||||
else if (ASTPtr explain_node; explain.parse(pos, explain_node, expected))
|
||||
{
|
||||
/// Replace SELECT * FROM (EXPLAIN SELECT ...) with SELECT * FROM viewExplain(EXPLAIN SELECT ...)
|
||||
result_node = buildSelectFromTableFunction(makeASTFunction("viewExplain", explain_node));
|
||||
}
|
||||
else
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (pos->type != TokenType::ClosingRoundBracket)
|
||||
return false;
|
||||
++pos;
|
||||
|
||||
node = std::make_shared<ASTSubquery>();
|
||||
node->children.push_back(select_node);
|
||||
node->children.push_back(result_node);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -88,11 +88,20 @@ bool ParserExplainQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
|
||||
{
|
||||
/// Nothing to parse
|
||||
}
|
||||
else if (select_only)
|
||||
{
|
||||
if (select_p.parse(pos, query, expected))
|
||||
explain_query->setExplainedQuery(std::move(query));
|
||||
else
|
||||
return false;
|
||||
}
|
||||
else if (select_p.parse(pos, query, expected) ||
|
||||
create_p.parse(pos, query, expected) ||
|
||||
insert_p.parse(pos, query, expected) ||
|
||||
system_p.parse(pos, query, expected))
|
||||
{
|
||||
explain_query->setExplainedQuery(std::move(query));
|
||||
}
|
||||
else
|
||||
return false;
|
||||
|
||||
|
@ -11,6 +11,7 @@ class ParserExplainQuery : public IParserBase
|
||||
protected:
|
||||
const char * end;
|
||||
bool allow_settings_after_format_in_insert;
|
||||
bool select_only;
|
||||
|
||||
const char * getName() const override { return "EXPLAIN"; }
|
||||
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
|
||||
@ -18,7 +19,13 @@ public:
|
||||
explicit ParserExplainQuery(const char* end_, bool allow_settings_after_format_in_insert_)
|
||||
: end(end_)
|
||||
, allow_settings_after_format_in_insert(allow_settings_after_format_in_insert_)
|
||||
, select_only(false)
|
||||
{}
|
||||
|
||||
explicit ParserExplainQuery()
|
||||
: end(nullptr) , allow_settings_after_format_in_insert(false) , select_only(true)
|
||||
{}
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -1241,19 +1241,18 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks)
|
||||
for (const auto & disk_ptr : disks)
|
||||
defined_disk_names.insert(disk_ptr->getName());
|
||||
|
||||
for (const auto & [_, disk_ptr] : getContext()->getDisksMap())
|
||||
for (const auto & [disk_name, disk_ptr] : getContext()->getDisksMap())
|
||||
{
|
||||
/// In composable cache with the underlying source disk there might the following structure:
|
||||
/// DiskObjectStorage(CachedObjectStorage(...(CachedObjectStored(ObjectStorage)...)))
|
||||
/// In configuration file each of these layers has a different name, but data path
|
||||
/// (getPath() result) is the same. We need to take it into account here.
|
||||
if (disk_ptr->supportsCache())
|
||||
{
|
||||
if (defined_disk_names.contains(disk_ptr->getName()))
|
||||
if (disk_ptr->supportsCache() && defined_disk_names.contains(disk_ptr->getName()))
|
||||
{
|
||||
auto caches = disk_ptr->getCacheLayersNames();
|
||||
disk_names_wrapped_in_cache.insert(caches.begin(), caches.end());
|
||||
}
|
||||
LOG_TEST(log, "Cache layers for cache disk `{}`, inner disk `{}`: {}",
|
||||
disk_name, disk_ptr->getName(), fmt::join(caches, ", "));
|
||||
}
|
||||
}
|
||||
|
||||
@ -1272,8 +1271,9 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks)
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::UNKNOWN_DISK,
|
||||
"Part {} ({}) was found on disk {} which is not defined in the storage policy",
|
||||
backQuote(it->name()), backQuote(it->path()), backQuote(disk_name));
|
||||
"Part {} ({}) was found on disk {} which is not defined in the storage policy (defined disks: {}, wrapped disks: {})",
|
||||
backQuote(it->name()), backQuote(it->path()), backQuote(disk_name),
|
||||
fmt::join(defined_disk_names, ", "), fmt::join(disk_names_wrapped_in_cache, ", "));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -542,7 +542,10 @@ bool MergeTreeIndexConditionBloomFilter::traverseASTEquals(
|
||||
{
|
||||
out.function = RPNElement::FUNCTION_HAS;
|
||||
const DataTypePtr actual_type = BloomFilter::getPrimitiveType(array_type->getNestedType());
|
||||
Field converted_field = convertFieldToType(value_field, *actual_type, value_type.get());
|
||||
auto converted_field = convertFieldToType(value_field, *actual_type, value_type.get());
|
||||
if (converted_field.isNull())
|
||||
return false;
|
||||
|
||||
out.predicate.emplace_back(std::make_pair(position, BloomFilterHash::hashWithField(actual_type.get(), converted_field)));
|
||||
}
|
||||
}
|
||||
@ -565,7 +568,11 @@ bool MergeTreeIndexConditionBloomFilter::traverseASTEquals(
|
||||
if ((f.isNull() && !is_nullable) || f.isDecimal(f.getType()))
|
||||
return false;
|
||||
|
||||
mutable_column->insert(convertFieldToType(f, *actual_type, value_type.get()));
|
||||
auto converted = convertFieldToType(f, *actual_type);
|
||||
if (converted.isNull())
|
||||
return false;
|
||||
|
||||
mutable_column->insert(converted);
|
||||
}
|
||||
|
||||
column = std::move(mutable_column);
|
||||
@ -583,7 +590,10 @@ bool MergeTreeIndexConditionBloomFilter::traverseASTEquals(
|
||||
|
||||
out.function = function_name == "equals" ? RPNElement::FUNCTION_EQUALS : RPNElement::FUNCTION_NOT_EQUALS;
|
||||
const DataTypePtr actual_type = BloomFilter::getPrimitiveType(index_type);
|
||||
Field converted_field = convertFieldToType(value_field, *actual_type, value_type.get());
|
||||
auto converted_field = convertFieldToType(value_field, *actual_type, value_type.get());
|
||||
if (converted_field.isNull())
|
||||
return false;
|
||||
|
||||
out.predicate.emplace_back(std::make_pair(position, BloomFilterHash::hashWithField(actual_type.get(), converted_field)));
|
||||
}
|
||||
|
||||
@ -611,9 +621,11 @@ bool MergeTreeIndexConditionBloomFilter::traverseASTEquals(
|
||||
|
||||
out.function = RPNElement::FUNCTION_HAS;
|
||||
const DataTypePtr actual_type = BloomFilter::getPrimitiveType(array_type->getNestedType());
|
||||
Field converted_field = convertFieldToType(value_field, *actual_type, value_type.get());
|
||||
out.predicate.emplace_back(std::make_pair(position, BloomFilterHash::hashWithField(actual_type.get(), converted_field)));
|
||||
auto converted_field = convertFieldToType(value_field, *actual_type, value_type.get());
|
||||
if (converted_field.isNull())
|
||||
return false;
|
||||
|
||||
out.predicate.emplace_back(std::make_pair(position, BloomFilterHash::hashWithField(actual_type.get(), converted_field)));
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -32,7 +32,7 @@ namespace ErrorCodes
|
||||
|
||||
namespace
|
||||
{
|
||||
constexpr auto retry_period_ms = 10 * 1000;
|
||||
constexpr auto retry_period_ms = 1000;
|
||||
}
|
||||
|
||||
/// Used to check whether it's us who set node `is_active`, or not.
|
||||
|
@ -265,7 +265,7 @@ static StoragePtr create(const StorageFactory::Arguments & args)
|
||||
|
||||
if (max_num_params == 0)
|
||||
msg += "no parameters";
|
||||
if (min_num_params == max_num_params)
|
||||
else if (min_num_params == max_num_params)
|
||||
msg += fmt::format("{} parameters: {}", min_num_params, needed_params);
|
||||
else
|
||||
msg += fmt::format("{} to {} parameters: {}", min_num_params, max_num_params, needed_params);
|
||||
|
@ -4180,6 +4180,7 @@ void StorageReplicatedMergeTree::startupImpl()
|
||||
/// And this is just a callback
|
||||
session_expired_callback_handler = EventNotifier::instance().subscribe(Coordination::Error::ZSESSIONEXPIRED, [this]()
|
||||
{
|
||||
LOG_TEST(log, "Received event for expired session. Waking up restarting thread");
|
||||
restarting_thread.start();
|
||||
});
|
||||
|
||||
|
@ -24,7 +24,6 @@ NamesAndTypesList StorageSystemAsynchronousInserts::getNamesAndTypes()
|
||||
{"table", std::make_shared<DataTypeString>()},
|
||||
{"format", std::make_shared<DataTypeString>()},
|
||||
{"first_update", std::make_shared<DataTypeDateTime64>(TIME_SCALE)},
|
||||
{"last_update", std::make_shared<DataTypeDateTime64>(TIME_SCALE)},
|
||||
{"total_bytes", std::make_shared<DataTypeUInt64>()},
|
||||
{"entries.query_id", std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())},
|
||||
{"entries.bytes", std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt64>())},
|
||||
@ -77,7 +76,6 @@ void StorageSystemAsynchronousInserts::fillData(MutableColumns & res_columns, Co
|
||||
|
||||
res_columns[i++]->insert(insert_query.format);
|
||||
res_columns[i++]->insert(time_in_microseconds(elem->data->first_update));
|
||||
res_columns[i++]->insert(time_in_microseconds(elem->data->last_update));
|
||||
res_columns[i++]->insert(elem->data->size);
|
||||
|
||||
Array arr_query_id;
|
||||
|
110
src/TableFunctions/TableFunctionExplain.cpp
Normal file
110
src/TableFunctions/TableFunctionExplain.cpp
Normal file
@ -0,0 +1,110 @@
|
||||
#include <Interpreters/InterpreterSelectWithUnionQuery.h>
|
||||
#include <Parsers/ASTFunction.h>
|
||||
#include <Parsers/ASTSelectWithUnionQuery.h>
|
||||
#include <Parsers/queryToString.h>
|
||||
#include <Storages/StorageValues.h>
|
||||
#include <TableFunctions/ITableFunction.h>
|
||||
#include <TableFunctions/TableFunctionFactory.h>
|
||||
#include <TableFunctions/TableFunctionExplain.h>
|
||||
#include <TableFunctions/registerTableFunctions.h>
|
||||
#include <Processors/Executors/PullingPipelineExecutor.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
void TableFunctionExplain::parseArguments(const ASTPtr & ast_function, ContextPtr /*context*/)
|
||||
{
|
||||
const auto * function = ast_function->as<ASTFunction>();
|
||||
if (function && function->arguments && function->arguments->children.size() == 1)
|
||||
{
|
||||
const auto & query_arg = function->arguments->children[0];
|
||||
|
||||
if (!query_arg->as<ASTExplainQuery>())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Table function '{}' requires a explain query argument, got '{}'",
|
||||
getName(), queryToString(query_arg));
|
||||
|
||||
query = query_arg;
|
||||
}
|
||||
else
|
||||
{
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Table function '{}' cannot be called directly, use `SELECT * FROM (EXPLAIN ...)` syntax", getName());
|
||||
}
|
||||
}
|
||||
|
||||
ColumnsDescription TableFunctionExplain::getActualTableStructure(ContextPtr context) const
|
||||
{
|
||||
Block sample_block = getInterpreter(context).getSampleBlock(query->as<ASTExplainQuery>()->getKind());
|
||||
ColumnsDescription columns_description;
|
||||
for (const auto & column : sample_block.getColumnsWithTypeAndName())
|
||||
columns_description.add(ColumnDescription(column.name, column.type));
|
||||
return columns_description;
|
||||
}
|
||||
|
||||
static Block executeMonoBlock(QueryPipeline & pipeline)
|
||||
{
|
||||
if (!pipeline.pulling())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected pulling pipeline");
|
||||
|
||||
PullingPipelineExecutor pulling_executor(pipeline);
|
||||
std::vector<Block> blocks;
|
||||
while (true)
|
||||
{
|
||||
Block block;
|
||||
if (pulling_executor.pull(block))
|
||||
blocks.push_back(std::move(block));
|
||||
else
|
||||
break;
|
||||
}
|
||||
|
||||
if (blocks.size() == 1)
|
||||
return blocks[0];
|
||||
|
||||
return concatenateBlocks(blocks);
|
||||
}
|
||||
|
||||
StoragePtr TableFunctionExplain::executeImpl(
|
||||
const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/) const
|
||||
{
|
||||
BlockIO blockio = getInterpreter(context).execute();
|
||||
Block block = executeMonoBlock(blockio.pipeline);
|
||||
|
||||
StorageID storage_id(getDatabaseName(), table_name);
|
||||
auto storage = std::make_shared<StorageValues>(storage_id, getActualTableStructure(context), std::move(block));
|
||||
storage->startup();
|
||||
return storage;
|
||||
}
|
||||
|
||||
InterpreterExplainQuery TableFunctionExplain::getInterpreter(ContextPtr context) const
|
||||
{
|
||||
if (!query)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Table function '{}' requires a explain query argument", getName());
|
||||
|
||||
return InterpreterExplainQuery(query, context);
|
||||
}
|
||||
|
||||
void registerTableFunctionExplain(TableFunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction<TableFunctionExplain>({R"(
|
||||
Returns result of EXPLAIN query.
|
||||
|
||||
The function should not be called directly but can be invoked via `SELECT * FROM (EXPLAIN <query>)`.
|
||||
|
||||
You can use this query to process the result of EXPLAIN further using SQL (e.g., in tests).
|
||||
|
||||
Example:
|
||||
[example:1]
|
||||
|
||||
)",
|
||||
{{"1", "SELECT explain FROM (EXPLAIN AST SELECT * FROM system.numbers) WHERE explain LIKE '%Asterisk%'"}}
|
||||
});
|
||||
|
||||
}
|
||||
|
||||
}
|
31
src/TableFunctions/TableFunctionExplain.h
Normal file
31
src/TableFunctions/TableFunctionExplain.h
Normal file
@ -0,0 +1,31 @@
|
||||
#pragma once
|
||||
|
||||
#include <TableFunctions/ITableFunction.h>
|
||||
#include <Parsers/ASTExplainQuery.h>
|
||||
#include <Interpreters/InterpreterExplainQuery.h>
|
||||
#include <base/types.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class TableFunctionExplain : public ITableFunction
|
||||
{
|
||||
public:
|
||||
static constexpr auto name = "viewExplain";
|
||||
std::string getName() const override { return name; }
|
||||
|
||||
private:
|
||||
StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const String & table_name, ColumnsDescription cached_columns) const override;
|
||||
const char * getStorageTypeName() const override { return "Explain"; }
|
||||
|
||||
void parseArguments(const ASTPtr & ast_function, ContextPtr context) override;
|
||||
ColumnsDescription getActualTableStructure(ContextPtr context) const override;
|
||||
|
||||
InterpreterExplainQuery getInterpreter(ContextPtr context) const;
|
||||
|
||||
ASTPtr query = nullptr;
|
||||
};
|
||||
|
||||
|
||||
}
|
@ -59,6 +59,7 @@ void registerTableFunctions()
|
||||
registerTableFunctionDictionary(factory);
|
||||
|
||||
registerTableFunctionFormat(factory);
|
||||
registerTableFunctionExplain(factory);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -58,6 +58,8 @@ void registerTableFunctionDictionary(TableFunctionFactory & factory);
|
||||
|
||||
void registerTableFunctionFormat(TableFunctionFactory & factory);
|
||||
|
||||
void registerTableFunctionExplain(TableFunctionFactory & factory);
|
||||
|
||||
void registerTableFunctions();
|
||||
|
||||
}
|
||||
|
@ -106,3 +106,6 @@ endif()
|
||||
if (TARGET ch_contrib::jemalloc)
|
||||
set(USE_JEMALLOC 1)
|
||||
endif()
|
||||
if (NOT ENABLE_EXTERNAL_OPENSSL)
|
||||
set(USE_BORINGSSL 1)
|
||||
endif ()
|
||||
|
@ -10,7 +10,7 @@
|
||||
<!-- default credentials for Azurite storage account -->
|
||||
<account_name>devstoreaccount1</account_name>
|
||||
<account_key>Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==</account_key>
|
||||
<max_single_part_upload_size>33554432</max_single_part_upload_size>
|
||||
<max_single_part_upload_size>100000</max_single_part_upload_size>
|
||||
</blob_storage_disk>
|
||||
<hdd>
|
||||
<type>local</type>
|
||||
|
@ -4,10 +4,9 @@ import os
|
||||
|
||||
import pytest
|
||||
|
||||
pytestmark = pytest.mark.skip
|
||||
|
||||
from helpers.cluster import ClickHouseCluster
|
||||
from helpers.utility import generate_values, replace_config, SafeThread
|
||||
from azure.storage.blob import BlobServiceClient
|
||||
|
||||
|
||||
SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
|
||||
@ -573,8 +572,42 @@ def test_restart_during_load(cluster):
|
||||
def test_big_insert(cluster):
|
||||
node = cluster.instances[NODE_NAME]
|
||||
create_table(node, TABLE_NAME)
|
||||
|
||||
check_query = "SELECT '2020-01-03', number, toString(number) FROM numbers(1000000)"
|
||||
|
||||
azure_query(
|
||||
node,
|
||||
f"INSERT INTO {TABLE_NAME} select '2020-01-03', number, toString(number) from numbers(5000000)",
|
||||
f"INSERT INTO {TABLE_NAME} {check_query}",
|
||||
)
|
||||
assert int(azure_query(node, f"SELECT count() FROM {TABLE_NAME}")) == 5000000
|
||||
assert azure_query(node, f"SELECT * FROM {TABLE_NAME} ORDER BY id") == node.query(
|
||||
check_query
|
||||
)
|
||||
|
||||
blob_container_client = cluster.blob_service_client.get_container_client(
|
||||
CONTAINER_NAME
|
||||
)
|
||||
|
||||
blobs = blob_container_client.list_blobs()
|
||||
max_single_part_upload_size = 100000
|
||||
checked = False
|
||||
|
||||
for blob in blobs:
|
||||
blob_client = cluster.blob_service_client.get_blob_client(
|
||||
CONTAINER_NAME, blob.name
|
||||
)
|
||||
committed, uncommited = blob_client.get_block_list()
|
||||
|
||||
blocks = committed
|
||||
last_id = len(blocks)
|
||||
id = 1
|
||||
if len(blocks) > 1:
|
||||
checked = True
|
||||
|
||||
for block in blocks:
|
||||
print(f"blob: {blob.name}, block size: {block.size}")
|
||||
if id == last_id:
|
||||
assert max_single_part_upload_size >= block.size
|
||||
else:
|
||||
assert max_single_part_upload_size == block.size
|
||||
id += 1
|
||||
assert checked
|
||||
|
@ -51,7 +51,7 @@ select * from enums order by e;
|
||||
select * from enums order by e desc;
|
||||
|
||||
-- GROUP BY
|
||||
select count(), e from enums group by e;
|
||||
select count(), e from enums group by e order by e;
|
||||
select any(e) from enums;
|
||||
|
||||
-- IN
|
||||
|
@ -11,4 +11,4 @@ ${CLICKHOUSE_LOCAL} --query="SELECT * FROM numbers(1) UNION ALL SELECT * FROM nu
|
||||
echo 'extremes'
|
||||
${CLICKHOUSE_LOCAL} --query="SELECT * FROM numbers(3)" --format PrettyCompactMonoBlock --extremes=1
|
||||
echo 'totals'
|
||||
${CLICKHOUSE_LOCAL} --query="SELECT sum(number) FROM numbers(3) GROUP BY number%2 WITH TOTALS" --format PrettyCompactMonoBlock
|
||||
${CLICKHOUSE_LOCAL} --query="SELECT sum(number) FROM numbers(3) GROUP BY number%2 WITH TOTALS ORDER BY number%2" --format PrettyCompactMonoBlock
|
||||
|
@ -9,4 +9,5 @@ SELECT
|
||||
number IN (1, 2) AS x,
|
||||
count()
|
||||
FROM numbers(10)
|
||||
GROUP BY x;
|
||||
GROUP BY x
|
||||
ORDER BY x;
|
||||
|
@ -66,10 +66,10 @@ drop table if exists lc_null_fix_str_1;
|
||||
|
||||
select '-';
|
||||
SELECT toLowCardinality('a') AS s, toTypeName(s), toTypeName(length(s)) from system.one;
|
||||
select toLowCardinality('a') as val group by val;
|
||||
select (toLowCardinality('a') as val) || 'b' group by val;
|
||||
select toLowCardinality(z) as val from (select arrayJoin(['c', 'd']) as z) group by val;
|
||||
select (toLowCardinality(z) as val) || 'b' from (select arrayJoin(['c', 'd']) as z) group by val;
|
||||
select toLowCardinality('a') as val group by val order by val;
|
||||
select (toLowCardinality('a') as val) || 'b' group by val order by val;
|
||||
select toLowCardinality(z) as val from (select arrayJoin(['c', 'd']) as z) group by val order by val;
|
||||
select (toLowCardinality(z) as val) || 'b' from (select arrayJoin(['c', 'd']) as z) group by val order by val;
|
||||
|
||||
select '-';
|
||||
drop table if exists lc_str_uuid;
|
||||
|
@ -20,13 +20,14 @@ INSERT INTO bitmap_state_test SELECT
|
||||
city_id,
|
||||
groupBitmapState(uid) AS uv
|
||||
FROM bitmap_test
|
||||
GROUP BY pickup_date, city_id;
|
||||
GROUP BY pickup_date, city_id
|
||||
ORDER BY pickup_date, city_id;
|
||||
|
||||
SELECT pickup_date, groupBitmapMerge(uv) AS users from bitmap_state_test group by pickup_date;
|
||||
SELECT pickup_date, groupBitmapMerge(uv) AS users from bitmap_state_test group by pickup_date order by pickup_date;
|
||||
|
||||
SELECT groupBitmap( uid ) AS user_num FROM bitmap_test;
|
||||
|
||||
SELECT pickup_date, groupBitmap( uid ) AS user_num, bitmapToArray(groupBitmapState( uid )) AS users FROM bitmap_test GROUP BY pickup_date;
|
||||
SELECT pickup_date, groupBitmap( uid ) AS user_num, bitmapToArray(groupBitmapState( uid )) AS users FROM bitmap_test GROUP BY pickup_date order by pickup_date;
|
||||
|
||||
SELECT
|
||||
bitmapCardinality(day_today) AS today_users,
|
||||
@ -37,11 +38,11 @@ SELECT
|
||||
bitmapXorCardinality(day_today, day_before) AS diff_users
|
||||
FROM
|
||||
(
|
||||
SELECT city_id, groupBitmapState( uid ) AS day_today FROM bitmap_test WHERE pickup_date = '2019-01-02' GROUP BY city_id
|
||||
SELECT city_id, groupBitmapState( uid ) AS day_today FROM bitmap_test WHERE pickup_date = '2019-01-02' GROUP BY city_id ORDER BY city_id
|
||||
) js1
|
||||
ALL LEFT JOIN
|
||||
(
|
||||
SELECT city_id, groupBitmapState( uid ) AS day_before FROM bitmap_test WHERE pickup_date = '2019-01-01' GROUP BY city_id
|
||||
SELECT city_id, groupBitmapState( uid ) AS day_before FROM bitmap_test WHERE pickup_date = '2019-01-01' GROUP BY city_id ORDER BY city_id
|
||||
) js2
|
||||
USING city_id;
|
||||
|
||||
@ -54,11 +55,11 @@ SELECT
|
||||
bitmapCardinality(bitmapXor(day_today, day_before)) AS diff_users
|
||||
FROM
|
||||
(
|
||||
SELECT city_id, groupBitmapState( uid ) AS day_today FROM bitmap_test WHERE pickup_date = '2019-01-02' GROUP BY city_id
|
||||
SELECT city_id, groupBitmapState( uid ) AS day_today FROM bitmap_test WHERE pickup_date = '2019-01-02' GROUP BY city_id ORDER BY city_id
|
||||
) js1
|
||||
ALL LEFT JOIN
|
||||
(
|
||||
SELECT city_id, groupBitmapState( uid ) AS day_before FROM bitmap_test WHERE pickup_date = '2019-01-01' GROUP BY city_id
|
||||
SELECT city_id, groupBitmapState( uid ) AS day_before FROM bitmap_test WHERE pickup_date = '2019-01-01' GROUP BY city_id ORDER BY city_id
|
||||
) js2
|
||||
USING city_id;
|
||||
|
||||
@ -68,7 +69,7 @@ SELECT count(*) FROM bitmap_test WHERE bitmapHasAny(bitmapBuild([uid]), (SELECT
|
||||
|
||||
SELECT count(*) FROM bitmap_test WHERE 0 = bitmapHasAny((SELECT groupBitmapState(uid) FROM bitmap_test WHERE pickup_date = '2019-01-01'), bitmapBuild([uid]));
|
||||
|
||||
SELECT bitmapToArray(bitmapAnd(groupBitmapState(uid), bitmapBuild(CAST([4294967296, 4294967297, 4294967298], 'Array(UInt64)')))) FROM bitmap_test GROUP BY city_id;
|
||||
SELECT bitmapToArray(bitmapAnd(groupBitmapState(uid), bitmapBuild(CAST([4294967296, 4294967297, 4294967298], 'Array(UInt64)')))) FROM bitmap_test GROUP BY city_id ORDER BY city_id;
|
||||
|
||||
DROP TABLE bitmap_state_test;
|
||||
DROP TABLE bitmap_test;
|
||||
|
@ -20,7 +20,7 @@ INSERT INTO bitmap_test SELECT '2019-01-03', 2, number FROM numbers(1,10);
|
||||
|
||||
SELECT groupBitmap( uid ) AS user_num FROM bitmap_test;
|
||||
|
||||
SELECT pickup_date, groupBitmap( uid ) AS user_num, bitmapToArray(groupBitmapState( uid )) AS users FROM bitmap_test GROUP BY pickup_date;
|
||||
SELECT pickup_date, groupBitmap( uid ) AS user_num, bitmapToArray(groupBitmapState( uid )) AS users FROM bitmap_test GROUP BY pickup_date ORDER BY pickup_date;
|
||||
|
||||
SELECT
|
||||
bitmapCardinality(day_today) AS today_users,
|
||||
@ -31,11 +31,11 @@ SELECT
|
||||
bitmapXorCardinality(day_today, day_before) AS diff_users
|
||||
FROM
|
||||
(
|
||||
SELECT city_id, groupBitmapState( uid ) AS day_today FROM bitmap_test WHERE pickup_date = '2019-01-02' GROUP BY city_id
|
||||
SELECT city_id, groupBitmapState( uid ) AS day_today FROM bitmap_test WHERE pickup_date = '2019-01-02' GROUP BY city_id ORDER BY city_id
|
||||
) js1
|
||||
ALL LEFT JOIN
|
||||
(
|
||||
SELECT city_id, groupBitmapState( uid ) AS day_before FROM bitmap_test WHERE pickup_date = '2019-01-01' GROUP BY city_id
|
||||
SELECT city_id, groupBitmapState( uid ) AS day_before FROM bitmap_test WHERE pickup_date = '2019-01-01' GROUP BY city_id ORDER BY city_id
|
||||
) js2
|
||||
USING city_id;
|
||||
|
||||
@ -48,11 +48,11 @@ SELECT
|
||||
bitmapCardinality(bitmapXor(day_today, day_before)) AS diff_users
|
||||
FROM
|
||||
(
|
||||
SELECT city_id, groupBitmapState( uid ) AS day_today FROM bitmap_test WHERE pickup_date = '2019-01-02' GROUP BY city_id
|
||||
SELECT city_id, groupBitmapState( uid ) AS day_today FROM bitmap_test WHERE pickup_date = '2019-01-02' GROUP BY city_id ORDER BY city_id
|
||||
) js1
|
||||
ALL LEFT JOIN
|
||||
(
|
||||
SELECT city_id, groupBitmapState( uid ) AS day_before FROM bitmap_test WHERE pickup_date = '2019-01-01' GROUP BY city_id
|
||||
SELECT city_id, groupBitmapState( uid ) AS day_before FROM bitmap_test WHERE pickup_date = '2019-01-01' GROUP BY city_id ORDER BY city_id
|
||||
) js2
|
||||
USING city_id;
|
||||
|
||||
@ -67,7 +67,7 @@ SELECT count(*) FROM bitmap_test WHERE bitmapContains((SELECT groupBitmapState(u
|
||||
SELECT count(*) FROM bitmap_test WHERE 0 = bitmapContains((SELECT groupBitmapState(uid) FROM bitmap_test WHERE pickup_date = '2019-01-01'), uid);
|
||||
|
||||
-- PR#8082
|
||||
SELECT bitmapToArray(bitmapAnd(groupBitmapState(uid), bitmapBuild(CAST([1, 2, 3], 'Array(UInt32)')))) FROM bitmap_test GROUP BY city_id;
|
||||
SELECT bitmapToArray(bitmapAnd(groupBitmapState(uid), bitmapBuild(CAST([1, 2, 3], 'Array(UInt32)')))) FROM bitmap_test GROUP BY city_id ORDER BY city_id;
|
||||
|
||||
-- bitmap state test
|
||||
DROP TABLE IF EXISTS bitmap_state_test;
|
||||
@ -87,7 +87,7 @@ INSERT INTO bitmap_state_test SELECT
|
||||
FROM bitmap_test
|
||||
GROUP BY pickup_date, city_id;
|
||||
|
||||
SELECT pickup_date, groupBitmapMerge(uv) AS users from bitmap_state_test group by pickup_date;
|
||||
SELECT pickup_date, groupBitmapMerge(uv) AS users from bitmap_state_test group by pickup_date order by pickup_date;
|
||||
|
||||
-- between column and expression test
|
||||
DROP TABLE IF EXISTS bitmap_column_expr_test;
|
||||
|
@ -1,4 +1,4 @@
|
||||
select k, groupArraySample(10, 123456)(v) from (select number % 4 as k, number as v from numbers(1024)) group by k;
|
||||
select k, groupArraySample(10, 123456)(v) from (select number % 4 as k, number as v from numbers(1024)) group by k order by k;
|
||||
|
||||
-- different seed
|
||||
select k, groupArraySample(10, 1)(v) from (select number % 4 as k, number as v from numbers(1024)) group by k;
|
||||
select k, groupArraySample(10, 1)(v) from (select number % 4 as k, number as v from numbers(1024)) group by k order by k;
|
||||
|
@ -3,17 +3,17 @@ CREATE TABLE t (item_id UInt64, price_sold Float32, date Date) ENGINE MergeTree
|
||||
|
||||
SELECT item_id
|
||||
FROM (SELECT item_id FROM t GROUP BY item_id WITH TOTALS) l
|
||||
FULL JOIN (SELECT item_id FROM t GROUP BY item_id WITH TOTALS) r
|
||||
FULL JOIN (SELECT item_id FROM t GROUP BY item_id WITH TOTALS ORDER BY item_id) r
|
||||
USING (item_id);
|
||||
|
||||
SELECT id
|
||||
FROM (SELECT item_id AS id FROM t GROUP BY id WITH TOTALS) l
|
||||
FULL JOIN (SELECT item_id AS id FROM t GROUP BY id WITH TOTALS) r
|
||||
FULL JOIN (SELECT item_id AS id FROM t GROUP BY id WITH TOTALS ORDER BY item_id) r
|
||||
USING (id);
|
||||
|
||||
SELECT item_id
|
||||
FROM (SELECT item_id FROM t GROUP BY item_id WITH TOTALS) l
|
||||
INNER JOIN (SELECT item_id FROM t GROUP BY item_id WITH TOTALS) r
|
||||
INNER JOIN (SELECT item_id FROM t GROUP BY item_id WITH TOTALS ORDER BY item_id) r
|
||||
USING (item_id);
|
||||
|
||||
SELECT id
|
||||
@ -26,75 +26,77 @@ FROM (
|
||||
SELECT item_id AS id, SUM(price_sold) AS recent
|
||||
FROM t WHERE (date BETWEEN '2019-12-16' AND '2020-03-08')
|
||||
GROUP BY id WITH TOTALS
|
||||
ORDER BY id
|
||||
) ll
|
||||
FULL JOIN
|
||||
(
|
||||
SELECT item_id AS id, SUM(price_sold) AS yago
|
||||
FROM t WHERE (date BETWEEN '2018-12-17' AND '2019-03-10')
|
||||
GROUP BY id WITH TOTALS
|
||||
ORDER BY id
|
||||
) rr
|
||||
USING (id);
|
||||
|
||||
SELECT id, yago
|
||||
FROM ( SELECT item_id AS id FROM t GROUP BY id ) AS ll
|
||||
FULL OUTER JOIN ( SELECT item_id AS id, arrayJoin([111, 222, 333, 444]), SUM(price_sold) AS yago FROM t GROUP BY id WITH TOTALS ) AS rr
|
||||
FULL OUTER JOIN ( SELECT item_id AS id, arrayJoin([111, 222, 333, 444]), SUM(price_sold) AS yago FROM t GROUP BY id WITH TOTALS ORDER BY id ) AS rr
|
||||
USING (id);
|
||||
|
||||
SELECT id, yago
|
||||
FROM ( SELECT item_id AS id, arrayJoin([111, 222, 333]) FROM t GROUP BY id WITH TOTALS ) AS ll
|
||||
FROM ( SELECT item_id AS id, arrayJoin([111, 222, 333]) FROM t GROUP BY id WITH TOTALS ORDER BY id ) AS ll
|
||||
FULL OUTER JOIN ( SELECT item_id AS id, SUM(price_sold) AS yago FROM t GROUP BY id ) AS rr
|
||||
USING (id);
|
||||
|
||||
SELECT id, yago
|
||||
FROM ( SELECT item_id AS id, arrayJoin(emptyArrayInt32()) FROM t GROUP BY id WITH TOTALS ) AS ll
|
||||
FROM ( SELECT item_id AS id, arrayJoin(emptyArrayInt32()) FROM t GROUP BY id WITH TOTALS ORDER BY id ) AS ll
|
||||
FULL OUTER JOIN ( SELECT item_id AS id, SUM(price_sold) AS yago FROM t GROUP BY id ) AS rr
|
||||
USING (id);
|
||||
|
||||
SELECT id, yago
|
||||
FROM ( SELECT item_id AS id FROM t GROUP BY id ) AS ll
|
||||
FULL OUTER JOIN ( SELECT item_id AS id, arrayJoin(emptyArrayInt32()), SUM(price_sold) AS yago FROM t GROUP BY id WITH TOTALS ) AS rr
|
||||
FULL OUTER JOIN ( SELECT item_id AS id, arrayJoin(emptyArrayInt32()), SUM(price_sold) AS yago FROM t GROUP BY id WITH TOTALS ORDER BY id ) AS rr
|
||||
USING (id);
|
||||
|
||||
SELECT id, yago
|
||||
FROM ( SELECT item_id AS id, arrayJoin([111, 222, 333]) FROM t GROUP BY id WITH TOTALS ) AS ll
|
||||
FULL OUTER JOIN ( SELECT item_id AS id, arrayJoin([111, 222, 333, 444]), SUM(price_sold) AS yago FROM t GROUP BY id WITH TOTALS ) AS rr
|
||||
FROM ( SELECT item_id AS id, arrayJoin([111, 222, 333]) FROM t GROUP BY id WITH TOTALS ORDER BY id ) AS ll
|
||||
FULL OUTER JOIN ( SELECT item_id AS id, arrayJoin([111, 222, 333, 444]), SUM(price_sold) AS yago FROM t GROUP BY id WITH TOTALS ORDER BY id ) AS rr
|
||||
USING (id);
|
||||
|
||||
INSERT INTO t VALUES (1, 100, '1970-01-01'), (1, 200, '1970-01-02');
|
||||
|
||||
SELECT *
|
||||
FROM (SELECT item_id FROM t GROUP BY item_id WITH TOTALS) l
|
||||
FROM (SELECT item_id FROM t GROUP BY item_id WITH TOTALS ORDER BY item_id) l
|
||||
LEFT JOIN (SELECT item_id FROM t ) r
|
||||
ON l.item_id = r.item_id;
|
||||
|
||||
SELECT *
|
||||
FROM (SELECT item_id FROM t GROUP BY item_id WITH TOTALS) l
|
||||
FROM (SELECT item_id FROM t GROUP BY item_id WITH TOTALS ORDER BY item_id) l
|
||||
RIGHT JOIN (SELECT item_id FROM t ) r
|
||||
ON l.item_id = r.item_id;
|
||||
|
||||
SELECT *
|
||||
FROM (SELECT item_id FROM t) l
|
||||
LEFT JOIN (SELECT item_id FROM t GROUP BY item_id WITH TOTALS ) r
|
||||
LEFT JOIN (SELECT item_id FROM t GROUP BY item_id WITH TOTALS ORDER BY item_id ) r
|
||||
ON l.item_id = r.item_id;
|
||||
|
||||
SELECT *
|
||||
FROM (SELECT item_id FROM t) l
|
||||
RIGHT JOIN (SELECT item_id FROM t GROUP BY item_id WITH TOTALS ) r
|
||||
RIGHT JOIN (SELECT item_id FROM t GROUP BY item_id WITH TOTALS ORDER BY item_id ) r
|
||||
ON l.item_id = r.item_id;
|
||||
|
||||
SELECT *
|
||||
FROM (SELECT item_id FROM t GROUP BY item_id WITH TOTALS) l
|
||||
LEFT JOIN (SELECT item_id FROM t GROUP BY item_id WITH TOTALS ) r
|
||||
FROM (SELECT item_id FROM t GROUP BY item_id WITH TOTALS ORDER BY item_id) l
|
||||
LEFT JOIN (SELECT item_id FROM t GROUP BY item_id WITH TOTALS ORDER BY item_id ) r
|
||||
ON l.item_id = r.item_id;
|
||||
|
||||
SELECT *
|
||||
FROM (SELECT item_id, 'foo' AS key, 1 AS val FROM t GROUP BY item_id WITH TOTALS) l
|
||||
LEFT JOIN (SELECT item_id, sum(price_sold) AS val FROM t GROUP BY item_id WITH TOTALS ) r
|
||||
FROM (SELECT item_id, 'foo' AS key, 1 AS val FROM t GROUP BY item_id WITH TOTALS ORDER BY item_id) l
|
||||
LEFT JOIN (SELECT item_id, sum(price_sold) AS val FROM t GROUP BY item_id WITH TOTALS ORDER BY item_id ) r
|
||||
ON l.item_id = r.item_id;
|
||||
|
||||
SELECT *
|
||||
FROM (SELECT * FROM t GROUP BY item_id, price_sold, date WITH TOTALS) l
|
||||
LEFT JOIN (SELECT * FROM t GROUP BY item_id, price_sold, date WITH TOTALS ) r
|
||||
FROM (SELECT * FROM t GROUP BY item_id, price_sold, date WITH TOTALS ORDER BY item_id, price_sold, date) l
|
||||
LEFT JOIN (SELECT * FROM t GROUP BY item_id, price_sold, date WITH TOTALS ORDER BY item_id, price_sold, date ) r
|
||||
ON l.item_id = r.item_id;
|
||||
|
||||
DROP TABLE t;
|
||||
|
@ -5,7 +5,7 @@ SELECT finalizeAggregation(countState(DISTINCT toString(number % 20))) FROM numb
|
||||
SELECT round(corrStable(DISTINCT x, y), 5) FROM (SELECT number % 10 AS x, number % 5 AS y FROM numbers(1000));
|
||||
SELECT round(corrStable(x, y), 5) FROM (SELECT DISTINCT number % 10 AS x, number % 5 AS y FROM numbers(1000));
|
||||
|
||||
SELECT sum(DISTINCT y) FROM (SELECT number % 5 AS x, number % 15 AS y FROM numbers(1000)) GROUP BY x;
|
||||
SELECT sum(DISTINCT y) FROM (SELECT number % 5 AS x, number % 15 AS y FROM numbers(1000)) GROUP BY x ORDER BY x;
|
||||
|
||||
SELECT countIf(DISTINCT number % 10, number % 5 = 2) FROM numbers(10000);
|
||||
EXPLAIN SYNTAX SELECT countIf(DISTINCT number % 10, number % 5 = 2) FROM numbers(10000);
|
||||
|
@ -87,3 +87,7 @@ aes-256-gcm 1
|
||||
aes-256-gcm 1
|
||||
aes-256-gcm 1
|
||||
F56E87055BC32D0EEB31B2EACC2BF2A5 1
|
||||
2022-09-02 00:00:00 2
|
||||
2022-08-02 00:00:00 1 \N
|
||||
2022-09-02 00:00:00 2 value2
|
||||
2022-09-02 00:00:01 3 \N
|
||||
|
@ -129,4 +129,18 @@ SELECT
|
||||
hex(decrypt('aes-256-gcm', concat(ciphertext, tag), key, iv, aad)) as plaintext_actual,
|
||||
plaintext_actual = hex(plaintext);
|
||||
|
||||
-- tryDecrypt
|
||||
CREATE TABLE decrypt_null (
|
||||
dt DateTime,
|
||||
user_id UInt32,
|
||||
encrypted String,
|
||||
iv String
|
||||
) ENGINE = Memory;
|
||||
|
||||
INSERT INTO decrypt_null VALUES ('2022-08-02 00:00:00', 1, encrypt('aes-256-gcm', 'value1', 'keykeykeykeykeykeykeykeykeykey01', 'iv1'), 'iv1'), ('2022-09-02 00:00:00', 2, encrypt('aes-256-gcm', 'value2', 'keykeykeykeykeykeykeykeykeykey02', 'iv2'), 'iv2'), ('2022-09-02 00:00:01', 3, encrypt('aes-256-gcm', 'value3', 'keykeykeykeykeykeykeykeykeykey03', 'iv3'), 'iv3');
|
||||
|
||||
SELECT dt, user_id FROM decrypt_null WHERE (user_id > 0) AND (decrypt('aes-256-gcm', encrypted, 'keykeykeykeykeykeykeykeykeykey02', iv) = 'value2'); --{serverError 454}
|
||||
SELECT dt, user_id FROM decrypt_null WHERE (user_id > 0) AND (tryDecrypt('aes-256-gcm', encrypted, 'keykeykeykeykeykeykeykeykeykey02', iv) = 'value2');
|
||||
SELECT dt, user_id, (tryDecrypt('aes-256-gcm', encrypted, 'keykeykeykeykeykeykeykeykeykey02', iv)) as value FROM decrypt_null ORDER BY user_id;
|
||||
|
||||
DROP TABLE encryption_test;
|
||||
|
@ -2,7 +2,7 @@ select sumResample(0, 20, 1)(number, number % 20) from numbers(200);
|
||||
select arrayMap(x -> finalizeAggregation(x), state) from (select sumStateResample(0, 20, 1)(number, number % 20) as state from numbers(200));
|
||||
select arrayMap(x -> finalizeAggregation(x), state) from
|
||||
(
|
||||
select sumStateResample(0,20,1)(number, number%20) as state from numbers(200) group by number % 3
|
||||
select sumStateResample(0,20,1)(number, number%20) as state from numbers(200) group by number % 3 order by number % 3
|
||||
);
|
||||
|
||||
select groupArrayResample(0, 20, 1)(number, number % 20) from numbers(50);
|
||||
|
@ -1 +1 @@
|
||||
SELECT number % 100 AS k, sumArray(emptyArrayUInt8()) AS v FROM numbers(10) GROUP BY k;
|
||||
SELECT number % 100 AS k, sumArray(emptyArrayUInt8()) AS v FROM numbers(10) GROUP BY k ORDER BY k;
|
||||
|
@ -1,9 +1,9 @@
|
||||
set output_format_write_statistics = 0;
|
||||
select g, s from (select g, sum(number) as s from numbers(4) group by bitAnd(number, 1) as g with totals) array join [1, 2] as a format Pretty;
|
||||
select g, s from (select g, sum(number) as s from numbers(4) group by bitAnd(number, 1) as g with totals order by g) array join [1, 2] as a format Pretty;
|
||||
select '--';
|
||||
|
||||
select g, s from (select g, sum(number) as s from numbers(4) group by bitAnd(number, 1) as g with totals) array join [1, 2] as a format TSV;
|
||||
select g, s from (select g, sum(number) as s from numbers(4) group by bitAnd(number, 1) as g with totals order by g) array join [1, 2] as a format TSV;
|
||||
select '--';
|
||||
|
||||
select g, s from (select g, sum(number) as s from numbers(4) group by bitAnd(number, 1) as g with totals) array join [1, 2] as a format JSON;
|
||||
select g, s from (select g, sum(number) as s from numbers(4) group by bitAnd(number, 1) as g with totals order by g) array join [1, 2] as a format JSON;
|
||||
select '--';
|
||||
|
@ -28,7 +28,7 @@ SELECT name, active FROM system.parts WHERE database = currentDatabase() AND tab
|
||||
SELECT '# optimize';
|
||||
SYSTEM START MERGES data_01660;
|
||||
OPTIMIZE TABLE data_01660 FINAL;
|
||||
SELECT count(), _state FROM system.parts WHERE database = currentDatabase() AND table = 'data_01660' GROUP BY _state;
|
||||
SELECT count(), _state FROM system.parts WHERE database = currentDatabase() AND table = 'data_01660' GROUP BY _state ORDER BY _state;
|
||||
|
||||
-- TRUNCATE does not remove parts instantly
|
||||
SELECT '# truncate';
|
||||
|
@ -10,11 +10,11 @@ SELECT CAST('2020-01-01 00:00:00.3' AS DateTime64(0, 'Asia/Istanbul'));
|
||||
2020-01-01 00:00:00
|
||||
SELECT toDateTime64(bitShiftLeft(toUInt64(1), 33), 2, 'Asia/Istanbul') FORMAT Null;
|
||||
SELECT toTimeZone(toDateTime(-2., 2), 'Asia/Istanbul');
|
||||
1970-01-01 02:00:00.00
|
||||
1970-01-01 01:59:58.00
|
||||
SELECT toDateTime64(-2., 2, 'Asia/Istanbul');
|
||||
1970-01-01 02:00:00.00
|
||||
1970-01-01 01:59:58.00
|
||||
SELECT toDateTime64(toFloat32(bitShiftLeft(toUInt64(1),33)), 2, 'Asia/Istanbul');
|
||||
2106-02-07 09:28:16.00
|
||||
2242-03-16 15:56:32.00
|
||||
SELECT toDateTime64(toFloat64(bitShiftLeft(toUInt64(1),33)), 2, 'Asia/Istanbul') FORMAT Null;
|
||||
-- These are outsize of extended range and hence clamped
|
||||
SELECT toDateTime64(-1 * bitShiftLeft(toUInt64(1), 35), 2, 'Asia/Istanbul');
|
||||
|
@ -1,3 +1,13 @@
|
||||
2005-03-18 03:58:31.222
|
||||
2005-03-18 03:58:31.222
|
||||
2005-03-18 03:58:31.222
|
||||
1970-01-01 00:00:00.000000000
|
||||
1970-01-01 00:00:00.000000000
|
||||
1900-04-15 00:53:20.000000000
|
||||
1900-04-15 00:53:20.000000000
|
||||
1900-01-01 00:00:00.000000000
|
||||
1900-01-01 00:00:00.000000000
|
||||
1900-01-01 00:00:00.000000000
|
||||
1900-01-01 00:00:00.000000000
|
||||
2261-07-15 11:33:20.000000000
|
||||
2261-07-15 11:33:20.000000000
|
||||
|
@ -1,3 +1,22 @@
|
||||
SELECT CAST(1111111111.222 AS DateTime64(3, 'Asia/Istanbul'));
|
||||
SELECT toDateTime(1111111111.222, 3, 'Asia/Istanbul');
|
||||
SELECT toDateTime64(1111111111.222, 3, 'Asia/Istanbul');
|
||||
|
||||
SELECT toDateTime64(0.0, 9, 'UTC') ;
|
||||
SELECT toDateTime64(0, 9, 'UTC');
|
||||
|
||||
SELECT toDateTime64(-2200000000.0, 9, 'UTC'); -- 1900-01-01 < value
|
||||
SELECT toDateTime64(-2200000000, 9, 'UTC');
|
||||
|
||||
SELECT toDateTime64(-2300000000.0, 9, 'UTC'); -- value < 1900-01-01
|
||||
SELECT toDateTime64(-2300000000, 9, 'UTC');
|
||||
|
||||
SELECT toDateTime64(-999999999999.0, 9, 'UTC'); -- value << 1900-01-01
|
||||
SELECT toDateTime64(-999999999999, 9, 'UTC');
|
||||
|
||||
SELECT toDateTime64(9200000000.0, 9, 'UTC'); -- value < 2262-04-11
|
||||
SELECT toDateTime64(9200000000, 9, 'UTC');
|
||||
|
||||
SELECT toDateTime64(9300000000.0, 9, 'UTC'); -- { serverError 407 } # 2262-04-11 < value
|
||||
SELECT toDateTime64(9300000000, 9, 'UTC'); -- { serverError 407 }
|
||||
|
||||
|
@ -124,14 +124,14 @@ SELECT * FROM t_sparse WHERE arr2 != [] ORDER BY id;
|
||||
192 0 [] [0,1]
|
||||
SELECT sum(u) FROM t_sparse;
|
||||
1900
|
||||
SELECT sum(u) FROM t_sparse GROUP BY id % 7;
|
||||
210
|
||||
360
|
||||
300
|
||||
240
|
||||
190
|
||||
330
|
||||
270
|
||||
SELECT id % 7, sum(u) FROM t_sparse GROUP BY id % 7 ORDER BY id % 7;
|
||||
0 210
|
||||
1 360
|
||||
2 300
|
||||
3 240
|
||||
4 190
|
||||
5 330
|
||||
6 270
|
||||
SELECT arrayFilter(x -> x % 2 = 1, arr2) FROM t_sparse WHERE arr2 != [] LIMIT 5;
|
||||
[1]
|
||||
[1,3]
|
||||
|
@ -25,7 +25,7 @@ SELECT * FROM t_sparse WHERE arr1 != [] ORDER BY id;
|
||||
SELECT * FROM t_sparse WHERE arr2 != [] ORDER BY id;
|
||||
|
||||
SELECT sum(u) FROM t_sparse;
|
||||
SELECT sum(u) FROM t_sparse GROUP BY id % 7;
|
||||
SELECT id % 7, sum(u) FROM t_sparse GROUP BY id % 7 ORDER BY id % 7;
|
||||
|
||||
SELECT arrayFilter(x -> x % 2 = 1, arr2) FROM t_sparse WHERE arr2 != [] LIMIT 5;
|
||||
|
||||
|
@ -95,4 +95,5 @@ FROM
|
||||
FROM numbers(10)
|
||||
)
|
||||
GROUP BY b
|
||||
ORDER BY b ASC
|
||||
1 1 1
|
||||
|
@ -58,8 +58,8 @@ SELECT quantileBFloat16Weighted(0.2)(d, 1), quantileBFloat16Weighted(0.3)(d, 1),
|
||||
|
||||
EXPLAIN SYNTAX SELECT quantile(0.2)(d) as k, quantile(0.3)(d) FROM datetime order by quantile(0.2)(d);
|
||||
|
||||
SELECT b, quantile(0.5)(x) as a, quantile(0.9)(x) as y, quantile(0.95)(x) FROM (select number as x, number % 2 as b from numbers(10)) group by b;
|
||||
EXPLAIN SYNTAX SELECT b, quantile(0.5)(x) as a, quantile(0.9)(x) as y, quantile(0.95)(x) FROM (select number as x, number % 2 as b from numbers(10)) group by b;
|
||||
SELECT b, quantile(0.5)(x) as a, quantile(0.9)(x) as y, quantile(0.95)(x) FROM (select number as x, number % 2 as b from numbers(10)) group by b order by b;
|
||||
EXPLAIN SYNTAX SELECT b, quantile(0.5)(x) as a, quantile(0.9)(x) as y, quantile(0.95)(x) FROM (select number as x, number % 2 as b from numbers(10)) group by b order by b;
|
||||
|
||||
-- fuzzer
|
||||
SELECT quantileDeterministic(0.99)(1023) FROM datetime FORMAT Null; -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
|
||||
|
@ -12,7 +12,6 @@ CREATE TABLE system.asynchronous_inserts
|
||||
`table` String,
|
||||
`format` String,
|
||||
`first_update` DateTime64(6),
|
||||
`last_update` DateTime64(6),
|
||||
`total_bytes` UInt64,
|
||||
`entries.query_id` Array(String),
|
||||
`entries.bytes` Array(UInt64),
|
||||
|
@ -0,0 +1 @@
|
||||
-1
|
@ -0,0 +1 @@
|
||||
SELECT round(rankCorr(number, -number)) FROM numbers(5000000);
|
@ -1,5 +1,5 @@
|
||||
#!/usr/bin/env bash
|
||||
# Tags: race, zookeeper, no-parallel
|
||||
# Tags: race, zookeeper, no-parallel, disabled
|
||||
|
||||
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# shellcheck source=../shell_config.sh
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user