Merge branch 'master' into new-http-compression-methods

This commit is contained in:
Alexey Milovidov 2022-10-05 23:07:01 +03:00 committed by GitHub
commit aa9e8f1cde
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
106 changed files with 1934 additions and 1067 deletions

1
.gitattributes vendored
View File

@ -1,3 +1,4 @@
contrib/* linguist-vendored
*.h linguist-language=C++
tests/queries/0_stateless/data_json/* binary
tests/queries/0_stateless/*.reference -crlf

View File

@ -495,6 +495,14 @@ endif ()
enable_testing() # Enable for tests without binary
option(ENABLE_EXTERNAL_OPENSSL "This option is insecure and not recommended for any occasions. If it is enabled, it allows building with alternative OpenSSL library. By default, ClickHouse is using BoringSSL, which is better. Do not use this option." OFF)
if (ENABLE_EXTERNAL_OPENSSL)
message (STATUS "Build and uses OpenSSL library instead of BoringSSL. This is strongly discouraged. Your build of ClickHouse will be unsupported.")
set(ENABLE_SSL 1)
target_compile_options(global-group INTERFACE "-Wno-deprecated-declarations")
endif ()
# when installing to /usr - place configs to /etc but for /usr/local place to /usr/local/etc
if (CMAKE_INSTALL_PREFIX STREQUAL "/usr")
set (CLICKHOUSE_ETC_DIR "/etc")

View File

@ -5,6 +5,7 @@ ClickHouse® is an open-source column-oriented database management system that a
## Useful Links
* [Official website](https://clickhouse.com/) has a quick high-level overview of ClickHouse on the main page.
* [ClickHouse Cloud](https://clickhouse.com/cloud) ClickHouse as a service, built by the creators and maintainers.
* [Tutorial](https://clickhouse.com/docs/en/getting_started/tutorial/) shows how to set up and query a small ClickHouse cluster.
* [Documentation](https://clickhouse.com/docs/en/) provides more in-depth information.
* [YouTube channel](https://www.youtube.com/c/ClickHouseDB) has a lot of content about ClickHouse in video format.

2
contrib/AMQP-CPP vendored

@ -1 +1 @@
Subproject commit 1a6c51f4ac51ac56610fa95081bd2f349911375a
Subproject commit 818c2d8ad96a08a5d20fece7d1e1e8855a2b0860

View File

@ -74,7 +74,11 @@ add_contrib (re2-cmake re2)
add_contrib (xz-cmake xz)
add_contrib (brotli-cmake brotli)
add_contrib (double-conversion-cmake double-conversion)
add_contrib (boringssl-cmake boringssl)
if (NOT ENABLE_EXTERNAL_OPENSSL)
add_contrib (boringssl-cmake boringssl)
else ()
add_contrib (openssl-cmake openssl)
endif ()
add_contrib (poco-cmake poco)
add_contrib (croaring-cmake croaring)
add_contrib (zstd-cmake zstd)

View File

@ -4,6 +4,11 @@ if (NOT ENABLE_AMQPCPP)
message(STATUS "Not using AMQP-CPP")
return()
endif()
if (OS_FREEBSD)
message(STATUS "Not using AMQP-CPP because libuv is disabled")
return()
endif()
# can be removed once libuv build on MacOS with GCC is possible
if (NOT TARGET ch_contrib::uv)

View File

@ -578,6 +578,12 @@ if(CMAKE_SYSTEM_NAME MATCHES "Darwin")
list(APPEND ALL_SRCS "${CMAKE_CURRENT_BINARY_DIR}/include_private/kcmrpc.c")
endif()
if (ENABLE_EXTERNAL_OPENSSL)
list(REMOVE_ITEM ALL_SRCS "${KRB5_SOURCE_DIR}/lib/crypto/openssl/enc_provider/aes.c")
list(APPEND ALL_SRCS "${CMAKE_CURRENT_SOURCE_DIR}/aes.c")
endif ()
target_sources(_krb5 PRIVATE
${ALL_SRCS}
)

View File

@ -59,6 +59,12 @@ set(SRCS
add_library(_libpq ${SRCS})
if (ENABLE_EXTERNAL_OPENSSL)
add_definitions(-DHAVE_BIO_METH_NEW)
add_definitions(-DHAVE_HMAC_CTX_NEW)
add_definitions(-DHAVE_HMAC_CTX_FREE)
endif ()
target_include_directories (_libpq SYSTEM PUBLIC ${LIBPQ_SOURCE_DIR})
target_include_directories (_libpq SYSTEM PUBLIC "${LIBPQ_SOURCE_DIR}/include")
target_include_directories (_libpq SYSTEM PRIVATE "${LIBPQ_SOURCE_DIR}/configs")

@ -1 +1 @@
Subproject commit dc972a767ff2e9488d96cb2a6e67de160fbe15a7
Subproject commit 328e4602120ddd6b2c1fb91bf2d50bd7bc249711

View File

@ -0,0 +1,6 @@
<clickhouse>
<!-- Allow nullable key to avoid errors while fuzzing definitions of tables -->
<merge_tree>
<allow_nullable_key>1</allow_nullable_key>
</merge_tree>
</clickhouse>

View File

@ -94,6 +94,7 @@ function configure
# TODO figure out which ones are needed
cp -av --dereference "$repo_dir"/tests/config/config.d/listen.xml db/config.d
cp -av --dereference "$script_dir"/query-fuzzer-tweaks-users.xml db/users.d
cp -av --dereference "$script_dir"/allow-nullable-key.xml db/config.d
cat > db/config.d/core.xml <<EOL
<clickhouse>
@ -240,6 +241,7 @@ quit
--receive_data_timeout_ms=10000 \
--stacktrace \
--query-fuzzer-runs=1000 \
--create-query-fuzzer-runs=50 \
--queries-file $(ls -1 ch/tests/queries/0_stateless/*.sql | sort -R) \
$NEW_TESTS_OPT \
> >(tail -n 100000 > fuzzer.log) \

View File

@ -1,8 +1,7 @@
position: 10
position: 1
label: 'Example Datasets'
collapsible: true
collapsed: true
link:
type: generated-index
title: Example Datasets
slug: /en/getting-started/example-datasets
type: doc
id: en/getting-started/example-datasets/

View File

@ -1,9 +1,16 @@
---
slug: /en/getting-started/example-datasets/cell-towers
sidebar_label: Cell Towers
sidebar_position: 3
title: "Cell Towers"
---
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
import CodeBlock from '@theme/CodeBlock';
import ActionsMenu from '@site/docs/en/_snippets/_service_actions_menu.md';
import SQLConsoleDetail from '@site/docs/en/_snippets/_launch_sql_console.md';
This dataset is from [OpenCellid](https://www.opencellid.org/) - The world's largest Open Database of Cell Towers.
As of 2021, it contains more than 40 million records about cell towers (GSM, LTE, UMTS, etc.) around the world with their geographical coordinates and metadata (country code, network, etc).
@ -13,6 +20,26 @@ OpenCelliD Project is licensed under a Creative Commons Attribution-ShareAlike 4
## Get the Dataset {#get-the-dataset}
<Tabs groupId="deployMethod">
<TabItem value="serverless" label="ClickHouse Cloud" default>
ClickHouse Cloud provides an easy-button for uploading this dataset from S3. Log in to your ClickHouse Cloud organization, or create a free trial at [ClickHouse.cloud](https://clickhouse.cloud).
<ActionsMenu menu="Load Data" />
Choose the **Cell Towers** dataset from the **Sample data** tab, and **Load data**:
![Load cell towers dataset](@site/docs/en/_snippets/images/cloud-load-data-sample.png)
Examine the schema of the cell_towers table:
```sql
DESCRIBE TABLE cell_towers
```
<SQLConsoleDetail />
</TabItem>
<TabItem value="selfmanaged" label="Self-managed">
1. Download the snapshot of the dataset from February 2021: [cell_towers.csv.xz](https://datasets.clickhouse.com/cell_towers.csv.xz) (729 MB).
2. Validate the integrity (optional step):
@ -56,7 +83,10 @@ ENGINE = MergeTree ORDER BY (radio, mcc, net, created);
clickhouse-client --query "INSERT INTO cell_towers FORMAT CSVWithNames" < cell_towers.csv
```
## Examples {#examples}
</TabItem>
</Tabs>
## Example queries {#examples}
1. A number of cell towers by type:
@ -101,18 +131,31 @@ So, the top countries are: the USA, Germany, and Russia.
You may want to create an [External Dictionary](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) in ClickHouse to decode these values.
## Use case {#use-case}
## Use case: Incorporate geo data {#use-case}
Using `pointInPolygon` function.
1. Create a table where we will store polygons:
<Tabs groupId="deployMethod">
<TabItem value="serverless" label="ClickHouse Cloud" default>
```sql
CREATE TABLE moscow (polygon Array(Tuple(Float64, Float64)))
ORDER BY polygon;
```
</TabItem>
<TabItem value="selfmanaged" label="Self-managed">
```sql
CREATE TEMPORARY TABLE
moscow (polygon Array(Tuple(Float64, Float64)));
```
</TabItem>
</Tabs>
2. This is a rough shape of Moscow (without "new Moscow"):
```sql

File diff suppressed because one or more lines are too long

View File

@ -13,16 +13,6 @@ Description of the fields: https://www.gov.uk/guidance/about-the-price-paid-data
Contains HM Land Registry data © Crown copyright and database right 2021. This data is licensed under the Open Government Licence v3.0.
## Download the Dataset {#download-dataset}
Run the command:
```bash
wget http://prod.publicdata.landregistry.gov.uk.s3-website-eu-west-1.amazonaws.com/pp-complete.csv
```
Download will take about 2 minutes with good internet connection.
## Create the Table {#create-table}
```sql
@ -41,31 +31,49 @@ CREATE TABLE uk_price_paid
locality LowCardinality(String),
town LowCardinality(String),
district LowCardinality(String),
county LowCardinality(String),
category UInt8
) ENGINE = MergeTree ORDER BY (postcode1, postcode2, addr1, addr2);
county LowCardinality(String)
)
ENGINE = MergeTree
ORDER BY (postcode1, postcode2, addr1, addr2);
```
## Preprocess and Import Data {#preprocess-import-data}
## Preprocess and Insert the Data {#preprocess-import-data}
We will use `clickhouse-local` tool for data preprocessing and `clickhouse-client` to upload it.
We will use the `url` function to stream the data into ClickHouse. We need to preprocess some of the incoming data first, which includes:
- splitting the `postcode` to two different columns - `postcode1` and `postcode2`, which is better for storage and queries
- converting the `time` field to date as it only contains 00:00 time
- ignoring the [UUid](../../sql-reference/data-types/uuid.md) field because we don't need it for analysis
- transforming `type` and `duration` to more readable `Enum` fields using the [transform](../../sql-reference/functions/other-functions.md#transform) function
- transforming the `is_new` field from a single-character string (`Y`/`N`) to a [UInt8](../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-uint256-int8-int16-int32-int64-int128-int256) field with 0 or 1
- drop the last two columns since they all have the same value (which is 0)
In this example, we define the structure of source data from the CSV file and specify a query to preprocess the data with `clickhouse-local`.
The `url` function streams the data from the web server into your ClickHouse table. The following command inserts 5 million rows into the `uk_price_paid` table:
The preprocessing is:
- splitting the postcode to two different columns `postcode1` and `postcode2` that is better for storage and queries;
- coverting the `time` field to date as it only contains 00:00 time;
- ignoring the [UUid](../../sql-reference/data-types/uuid.md) field because we don't need it for analysis;
- transforming `type` and `duration` to more readable Enum fields with function [transform](../../sql-reference/functions/other-functions.md#transform);
- transforming `is_new` and `category` fields from single-character string (`Y`/`N` and `A`/`B`) to [UInt8](../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-uint256-int8-int16-int32-int64-int128-int256) field with 0 and 1.
Preprocessed data is piped directly to `clickhouse-client` to be inserted into ClickHouse table in streaming fashion.
```bash
clickhouse-local --input-format CSV --structure '
uuid String,
price UInt32,
time DateTime,
```sql
INSERT INTO uk_price_paid
WITH
splitByChar(' ', postcode) AS p
SELECT
toUInt32(price_string) AS price,
parseDateTimeBestEffortUS(time) AS date,
p[1] AS postcode1,
p[2] AS postcode2,
transform(a, ['T', 'S', 'D', 'F', 'O'], ['terraced', 'semi-detached', 'detached', 'flat', 'other']) AS type,
b = 'Y' AS is_new,
transform(c, ['F', 'L', 'U'], ['freehold', 'leasehold', 'unknown']) AS duration,
addr1,
addr2,
street,
locality,
town,
district,
county
FROM url(
'http://prod.publicdata.landregistry.gov.uk.s3-website-eu-west-1.amazonaws.com/pp-complete.csv',
'CSV',
'uuid_string String,
price_string String,
time String,
postcode String,
a String,
b String,
@ -78,154 +86,136 @@ clickhouse-local --input-format CSV --structure '
district String,
county String,
d String,
e String
' --query "
WITH splitByChar(' ', postcode) AS p
SELECT
price,
toDate(time) AS date,
p[1] AS postcode1,
p[2] AS postcode2,
transform(a, ['T', 'S', 'D', 'F', 'O'], ['terraced', 'semi-detached', 'detached', 'flat', 'other']) AS type,
b = 'Y' AS is_new,
transform(c, ['F', 'L', 'U'], ['freehold', 'leasehold', 'unknown']) AS duration,
addr1,
addr2,
street,
locality,
town,
district,
county,
d = 'B' AS category
FROM table" --date_time_input_format best_effort < pp-complete.csv | clickhouse-client --query "INSERT INTO uk_price_paid FORMAT TSV"
e String'
) SETTINGS max_http_get_redirects=10;
```
It will take about 40 seconds.
Wait for the data to insert - it will take a minute or two depending on the network speed.
## Validate the Data {#validate-data}
Query:
Let's verify it worked by seeing how many rows were inserted:
```sql
SELECT count() FROM uk_price_paid;
SELECT count()
FROM uk_price_paid
```
Result:
```text
┌──count()─┐
│ 26321785 │
└──────────┘
```
The size of dataset in ClickHouse is just 278 MiB, check it.
Query:
At the time this query was executed, the dataset had 27,450,499 rows. Let's see what the storage size is of the table in ClickHouse:
```sql
SELECT formatReadableSize(total_bytes) FROM system.tables WHERE name = 'uk_price_paid';
SELECT formatReadableSize(total_bytes)
FROM system.tables
WHERE name = 'uk_price_paid'
```
Result:
```text
┌─formatReadableSize(total_bytes)─┐
│ 278.80 MiB │
└─────────────────────────────────┘
```
Notice the size of the table is just 221.43 MiB!
## Run Some Queries {#run-queries}
Let's run some queries to analyze the data:
### Query 1. Average Price Per Year {#average-price}
Query:
```sql
SELECT toYear(date) AS year, round(avg(price)) AS price, bar(price, 0, 1000000, 80) FROM uk_price_paid GROUP BY year ORDER BY year;
SELECT
toYear(date) AS year,
round(avg(price)) AS price,
bar(price, 0, 1000000, 80
)
FROM uk_price_paid
GROUP BY year
ORDER BY year
```
Result:
The result looks like:
```text
```response
┌─year─┬──price─┬─bar(round(avg(price)), 0, 1000000, 80)─┐
│ 1995 │ 67932 │ █████▍ │
│ 1996 │ 71505 │ █████▋ │
│ 1997 │ 78532 │ ██████▎ │
│ 1998 │ 85436 │ ██████▋ │
│ 1999 │ 96037 │ ███████▋ │
│ 2000 │ 107479 │ ████████▌ │
│ 2001 │ 118885 │ █████████▌ │
│ 2002 │ 137941 │ ███████████ │
│ 2003 │ 155889 │ ████████████▍ │
│ 2004 │ 178885 │ ██████████████▎ │
│ 2005 │ 189351 │ ███████████████▏ │
│ 2006 │ 203528 │ ████████████████▎ │
│ 2007 │ 219378 │ █████████████████▌ │
│ 1995 │ 67934 │ █████▍ │
│ 1996 │ 71508 │ █████▋ │
│ 1997 │ 78536 │ ██████▎ │
│ 1998 │ 85441 │ ██████▋ │
│ 1999 │ 96038 │ ███████▋ │
│ 2000 │ 107487 │ ████████▌ │
│ 2001 │ 118888 │ █████████▌ │
│ 2002 │ 137948 │ ███████████ │
│ 2003 │ 155893 │ ████████████▍ │
│ 2004 │ 178888 │ ██████████████▎ │
│ 2005 │ 189359 │ ███████████████▏ │
│ 2006 │ 203532 │ ████████████████▎ │
│ 2007 │ 219375 │ █████████████████▌ │
│ 2008 │ 217056 │ █████████████████▎ │
│ 2009 │ 213419 │ █████████████████ │
│ 2010 │ 236109 │ ██████████████████▊ │
│ 2010 │ 236110 │ ██████████████████▊ │
│ 2011 │ 232805 │ ██████████████████▌ │
│ 2012 │ 238367 │ ███████████████████ │
│ 2013 │ 256931 │ ████████████████████▌ │
│ 2014 │ 279915 │ ██████████████████████▍ │
│ 2015 │ 297266 │ ███████████████████████▋ │
│ 2016 │ 313201 │ █████████████████████████ │
│ 2017 │ 346097 │ ███████████████████████████▋ │
│ 2018 │ 350116 │ ████████████████████████████ │
│ 2019 │ 351013 │ ████████████████████████████ │
│ 2020 │ 369420 │ █████████████████████████████▌ │
│ 2021 │ 386903 │ ██████████████████████████████▊ │
│ 2012 │ 238381 │ ███████████████████ │
│ 2013 │ 256927 │ ████████████████████▌ │
│ 2014 │ 280008 │ ██████████████████████▍ │
│ 2015 │ 297263 │ ███████████████████████▋ │
│ 2016 │ 313518 │ █████████████████████████ │
│ 2017 │ 346371 │ ███████████████████████████▋ │
│ 2018 │ 350556 │ ████████████████████████████ │
│ 2019 │ 352184 │ ████████████████████████████▏ │
│ 2020 │ 375808 │ ██████████████████████████████ │
│ 2021 │ 381105 │ ██████████████████████████████▍ │
│ 2022 │ 362572 │ █████████████████████████████ │
└──────┴────────┴────────────────────────────────────────┘
```
### Query 2. Average Price per Year in London {#average-price-london}
Query:
```sql
SELECT toYear(date) AS year, round(avg(price)) AS price, bar(price, 0, 2000000, 100) FROM uk_price_paid WHERE town = 'LONDON' GROUP BY year ORDER BY year;
SELECT
toYear(date) AS year,
round(avg(price)) AS price,
bar(price, 0, 2000000, 100
)
FROM uk_price_paid
WHERE town = 'LONDON'
GROUP BY year
ORDER BY year
```
Result:
The result looks like:
```text
```response
┌─year─┬───price─┬─bar(round(avg(price)), 0, 2000000, 100)───────────────┐
│ 1995 │ 109116 │ █████▍ │
│ 1996 │ 118667 │ █████▊ │
│ 1997 │ 136518 │ ██████▋ │
│ 1998 │ 152983 │ ███████▋ │
│ 1999 │ 180637 │ █████████ │
│ 2000 │ 215838 │ ██████████▋ │
│ 2001 │ 232994 │ ███████████▋ │
│ 2002 │ 263670 │ █████████████▏ │
│ 2003 │ 278394 │ █████████████▊ │
│ 2004 │ 304666 │ ███████████████▏ │
│ 2005 │ 322875 │ ████████████████▏ │
│ 2006 │ 356191 │ █████████████████▋ │
│ 2007 │ 404054 │ ████████████████████▏ │
│ 1995 │ 109110 │ █████▍ │
│ 1996 │ 118659 │ █████▊ │
│ 1997 │ 136526 │ ██████▋ │
│ 1998 │ 153002 │ ███████▋ │
│ 1999 │ 180633 │ █████████ │
│ 2000 │ 215849 │ ██████████▋ │
│ 2001 │ 232987 │ ███████████▋ │
│ 2002 │ 263668 │ █████████████▏ │
│ 2003 │ 278424 │ █████████████▊ │
│ 2004 │ 304664 │ ███████████████▏ │
│ 2005 │ 322887 │ ████████████████▏ │
│ 2006 │ 356195 │ █████████████████▋ │
│ 2007 │ 404062 │ ████████████████████▏ │
│ 2008 │ 420741 │ █████████████████████ │
│ 2009 │ 427753 │ █████████████████████▍ │
│ 2010 │ 480306 │ ████████████████████████ │
│ 2011 │ 496274 │ ████████████████████████▋ │
│ 2012 │ 519442 │ █████████████████████████▊ │
│ 2013 │ 616212 │ ██████████████████████████████▋ │
│ 2014 │ 724154 │ ████████████████████████████████████▏ │
│ 2015 │ 792129 │ ███████████████████████████████████████▌ │
│ 2016 │ 843655 │ ██████████████████████████████████████████▏ │
│ 2017 │ 982642 │ █████████████████████████████████████████████████▏ │
│ 2018 │ 1016835 │ ██████████████████████████████████████████████████▋ │
│ 2019 │ 1042849 │ ████████████████████████████████████████████████████▏ │
│ 2020 │ 1011889 │ ██████████████████████████████████████████████████▌ │
│ 2021 │ 960343 │ ████████████████████████████████████████████████ │
│ 2009 │ 427754 │ █████████████████████▍ │
│ 2010 │ 480322 │ ████████████████████████ │
│ 2011 │ 496278 │ ████████████████████████▋ │
│ 2012 │ 519482 │ █████████████████████████▊ │
│ 2013 │ 616195 │ ██████████████████████████████▋ │
│ 2014 │ 724121 │ ████████████████████████████████████▏ │
│ 2015 │ 792101 │ ███████████████████████████████████████▌ │
│ 2016 │ 843589 │ ██████████████████████████████████████████▏ │
│ 2017 │ 983523 │ █████████████████████████████████████████████████▏ │
│ 2018 │ 1016753 │ ██████████████████████████████████████████████████▋ │
│ 2019 │ 1041673 │ ████████████████████████████████████████████████████ │
│ 2020 │ 1060027 │ █████████████████████████████████████████████████████ │
│ 2021 │ 958249 │ ███████████████████████████████████████████████▊ │
│ 2022 │ 902596 │ █████████████████████████████████████████████▏ │
└──────┴─────────┴───────────────────────────────────────────────────────┘
```
Something happened in 2013. I don't have a clue. Maybe you have a clue what happened in 2020?
Something happened to home prices in 2020! But that is probably not a surprise...
### Query 3. The Most Expensive Neighborhoods {#most-expensive-neighborhoods}
Query:
```sql
SELECT
town,
@ -240,124 +230,123 @@ GROUP BY
district
HAVING c >= 100
ORDER BY price DESC
LIMIT 100;
LIMIT 100
```
Result:
The result looks like:
```text
┌─town─────────────────┬─district───────────────┬────c─┬───price─┬─bar(round(avg(price)), 0, 5000000, 100)────────────────────────────┐
│ LONDON │ CITY OF WESTMINSTER │ 3606 │ 3280239 │ █████████████████████████████████████████████████████████████████▌ │
│ LONDON │ CITY OF LONDON │ 274 │ 3160502 │ ███████████████████████████████████████████████████████████████▏ │
│ LONDON │ KENSINGTON AND CHELSEA │ 2550 │ 2308478 │ ██████████████████████████████████████████████▏ │
│ LEATHERHEAD │ ELMBRIDGE │ 114 │ 1897407 │ █████████████████████████████████████▊ │
│ LONDON │ CAMDEN │ 3033 │ 1805404 │ ████████████████████████████████████ │
│ VIRGINIA WATER │ RUNNYMEDE │ 156 │ 1753247 │ ███████████████████████████████████ │
│ WINDLESHAM │ SURREY HEATH │ 108 │ 1677613 │ █████████████████████████████████▌ │
│ THORNTON HEATH │ CROYDON │ 546 │ 1671721 │ █████████████████████████████████▍ │
│ BARNET │ ENFIELD │ 124 │ 1505840 │ ██████████████████████████████ │
│ COBHAM │ ELMBRIDGE │ 387 │ 1237250 │ ████████████████████████▋ │
│ LONDON │ ISLINGTON │ 2668 │ 1236980 │ ████████████████████████▋ │
│ OXFORD │ SOUTH OXFORDSHIRE │ 321 │ 1220907 │ ████████████████████████▍ │
│ LONDON │ RICHMOND UPON THAMES │ 704 │ 1215551 │ ████████████████████████▎ │
│ LONDON │ HOUNSLOW │ 671 │ 1207493 │ ████████████████████████▏ │
│ ASCOT │ WINDSOR AND MAIDENHEAD │ 407 │ 1183299 │ ███████████████████████▋ │
│ BEACONSFIELD │ BUCKINGHAMSHIRE │ 330 │ 1175615 │ ███████████████████████▌ │
│ RICHMOND │ RICHMOND UPON THAMES │ 874 │ 1110444 │ ██████████████████████▏ │
│ LONDON │ HAMMERSMITH AND FULHAM │ 3086 │ 1053983 │ █████████████████████ │
│ SURBITON │ ELMBRIDGE │ 100 │ 1011800 │ ████████████████████▏ │
│ RADLETT │ HERTSMERE │ 283 │ 1011712 │ ████████████████████▏ │
│ SALCOMBE │ SOUTH HAMS │ 127 │ 1011624 │ ████████████████████▏ │
│ WEYBRIDGE │ ELMBRIDGE │ 655 │ 1007265 │ ████████████████████▏ │
│ ESHER │ ELMBRIDGE │ 485 │ 986581 │ ███████████████████▋ │
│ LEATHERHEAD │ GUILDFORD │ 202 │ 977320 │ ███████████████████▌ │
│ BURFORD │ WEST OXFORDSHIRE │ 111 │ 966893 │ ███████████████████▎ │
│ BROCKENHURST │ NEW FOREST │ 129 │ 956675 │ ███████████████████▏ │
│ HINDHEAD │ WAVERLEY │ 137 │ 953753 │ ███████████████████ │
│ GERRARDS CROSS │ BUCKINGHAMSHIRE │ 419 │ 951121 │ ███████████████████ │
│ EAST MOLESEY │ ELMBRIDGE │ 192 │ 936769 │ ██████████████████▋ │
│ CHALFONT ST GILES │ BUCKINGHAMSHIRE │ 146 │ 925515 │ ██████████████████▌ │
│ LONDON │ TOWER HAMLETS │ 4388 │ 918304 │ ██████████████████▎ │
│ OLNEY │ MILTON KEYNES │ 235 │ 910646 │ ██████████████████▏ │
│ HENLEY-ON-THAMES │ SOUTH OXFORDSHIRE │ 540 │ 902418 │ ██████████████████ │
│ LONDON │ SOUTHWARK │ 3885 │ 892997 │ █████████████████▋ │
│ KINGSTON UPON THAMES │ KINGSTON UPON THAMES │ 960 │ 885969 │ █████████████████▋ │
│ LONDON │ EALING │ 2658 │ 871755 │ █████████████████▍ │
│ CRANBROOK │ TUNBRIDGE WELLS │ 431 │ 862348 │ █████████████████▏ │
│ LONDON │ MERTON │ 2099 │ 859118 │ █████████████████▏ │
│ BELVEDERE │ BEXLEY │ 346 │ 842423 │ ████████████████▋ │
│ GUILDFORD │ WAVERLEY │ 143 │ 841277 │ ████████████████▋ │
│ HARPENDEN │ ST ALBANS │ 657 │ 841216 │ ████████████████▋ │
│ LONDON │ HACKNEY │ 3307 │ 837090 │ ████████████████▋ │
│ LONDON │ WANDSWORTH │ 6566 │ 832663 │ ████████████████▋ │
│ MAIDENHEAD │ BUCKINGHAMSHIRE │ 123 │ 824299 │ ████████████████▍ │
│ KINGS LANGLEY │ DACORUM │ 145 │ 821331 │ ████████████████▍ │
│ BERKHAMSTED │ DACORUM │ 543 │ 818415 │ ████████████████▎ │
│ GREAT MISSENDEN │ BUCKINGHAMSHIRE │ 226 │ 802807 │ ████████████████ │
│ BILLINGSHURST │ CHICHESTER │ 144 │ 797829 │ ███████████████▊ │
│ WOKING │ GUILDFORD │ 176 │ 793494 │ ███████████████▋ │
│ STOCKBRIDGE │ TEST VALLEY │ 178 │ 793269 │ ███████████████▋ │
│ EPSOM │ REIGATE AND BANSTEAD │ 172 │ 791862 │ ███████████████▋ │
│ TONBRIDGE │ TUNBRIDGE WELLS │ 360 │ 787876 │ ███████████████▋ │
│ TEDDINGTON │ RICHMOND UPON THAMES │ 595 │ 786492 │ ███████████████▋ │
│ TWICKENHAM │ RICHMOND UPON THAMES │ 1155 │ 786193 │ ███████████████▋ │
│ LYNDHURST │ NEW FOREST │ 102 │ 785593 │ ███████████████▋ │
│ LONDON │ LAMBETH │ 5228 │ 774574 │ ███████████████▍ │
│ LONDON │ BARNET │ 3955 │ 773259 │ ███████████████▍ │
│ OXFORD │ VALE OF WHITE HORSE │ 353 │ 772088 │ ███████████████▍ │
│ TONBRIDGE │ MAIDSTONE │ 305 │ 770740 │ ███████████████▍ │
│ LUTTERWORTH │ HARBOROUGH │ 538 │ 768634 │ ███████████████▎ │
│ WOODSTOCK │ WEST OXFORDSHIRE │ 140 │ 766037 │ ███████████████▎ │
│ MIDHURST │ CHICHESTER │ 257 │ 764815 │ ███████████████▎ │
│ MARLOW │ BUCKINGHAMSHIRE │ 327 │ 761876 │ ███████████████▏ │
│ LONDON │ NEWHAM │ 3237 │ 761784 │ ███████████████▏ │
│ ALDERLEY EDGE │ CHESHIRE EAST │ 178 │ 757318 │ ███████████████▏ │
│ LUTON │ CENTRAL BEDFORDSHIRE │ 212 │ 754283 │ ███████████████ │
│ PETWORTH │ CHICHESTER │ 154 │ 754220 │ ███████████████ │
│ ALRESFORD │ WINCHESTER │ 219 │ 752718 │ ███████████████ │
│ POTTERS BAR │ WELWYN HATFIELD │ 174 │ 748465 │ ██████████████▊ │
│ HASLEMERE │ CHICHESTER │ 128 │ 746907 │ ██████████████▊ │
│ TADWORTH │ REIGATE AND BANSTEAD │ 502 │ 743252 │ ██████████████▋ │
│ THAMES DITTON │ ELMBRIDGE │ 244 │ 741913 │ ██████████████▋ │
│ REIGATE │ REIGATE AND BANSTEAD │ 581 │ 738198 │ ██████████████▋ │
│ BOURNE END │ BUCKINGHAMSHIRE │ 138 │ 735190 │ ██████████████▋ │
│ SEVENOAKS │ SEVENOAKS │ 1156 │ 730018 │ ██████████████▌ │
│ OXTED │ TANDRIDGE │ 336 │ 729123 │ ██████████████▌ │
│ INGATESTONE │ BRENTWOOD │ 166 │ 728103 │ ██████████████▌ │
│ LONDON │ BRENT │ 2079 │ 720605 │ ██████████████▍ │
│ LONDON │ HARINGEY │ 3216 │ 717780 │ ██████████████▎ │
│ PURLEY │ CROYDON │ 575 │ 716108 │ ██████████████▎ │
│ WELWYN │ WELWYN HATFIELD │ 222 │ 710603 │ ██████████████▏ │
│ RICKMANSWORTH │ THREE RIVERS │ 798 │ 704571 │ ██████████████ │
│ BANSTEAD │ REIGATE AND BANSTEAD │ 401 │ 701293 │ ██████████████ │
│ CHIGWELL │ EPPING FOREST │ 261 │ 701203 │ ██████████████ │
│ PINNER │ HARROW │ 528 │ 698885 │ █████████████▊ │
│ HASLEMERE │ WAVERLEY │ 280 │ 696659 │ █████████████▊ │
│ SLOUGH │ BUCKINGHAMSHIRE │ 396 │ 694917 │ █████████████▊ │
│ WALTON-ON-THAMES │ ELMBRIDGE │ 946 │ 692395 │ █████████████▋ │
│ READING │ SOUTH OXFORDSHIRE │ 318 │ 691988 │ █████████████▋ │
│ NORTHWOOD │ HILLINGDON │ 271 │ 690643 │ █████████████▋ │
│ FELTHAM │ HOUNSLOW │ 763 │ 688595 │ █████████████▋ │
│ ASHTEAD │ MOLE VALLEY │ 303 │ 687923 │ █████████████▋ │
│ BARNET │ BARNET │ 975 │ 686980 │ █████████████▋ │
│ WOKING │ SURREY HEATH │ 283 │ 686669 │ █████████████▋ │
│ MALMESBURY │ WILTSHIRE │ 323 │ 683324 │ █████████████▋ │
│ AMERSHAM │ BUCKINGHAMSHIRE │ 496 │ 680962 │ █████████████▌ │
│ CHISLEHURST │ BROMLEY │ 430 │ 680209 │ █████████████▌ │
│ HYTHE │ FOLKESTONE AND HYTHE │ 490 │ 676908 │ █████████████▌ │
│ MAYFIELD │ WEALDEN │ 101 │ 676210 │ █████████████▌ │
│ ASCOT │ BRACKNELL FOREST │ 168 │ 676004 │ █████████████▌ │
└──────────────────────┴────────────────────────┴──────┴─────────┴────────────────────────────────────────────────────────────────────┘
```response
┌─town─────────────────┬─district───────────────┬─────c─┬───price─┬─bar(round(avg(price)), 0, 5000000, 100)─────────────────────────┐
│ LONDON │ CITY OF LONDON │ 578 │ 3149590 │ ██████████████████████████████████████████████████████████████▊ │
│ LONDON │ CITY OF WESTMINSTER │ 7083 │ 2903794 │ ██████████████████████████████████████████████████████████ │
│ LONDON │ KENSINGTON AND CHELSEA │ 4986 │ 2333782 │ ██████████████████████████████████████████████▋ │
│ LEATHERHEAD │ ELMBRIDGE │ 203 │ 2071595 │ █████████████████████████████████████████▍ │
│ VIRGINIA WATER │ RUNNYMEDE │ 308 │ 1939465 │ ██████████████████████████████████████▋ │
│ LONDON │ CAMDEN │ 5750 │ 1673687 │ █████████████████████████████████▍ │
│ WINDLESHAM │ SURREY HEATH │ 182 │ 1428358 │ ████████████████████████████▌ │
│ NORTHWOOD │ THREE RIVERS │ 112 │ 1404170 │ ████████████████████████████ │
│ BARNET │ ENFIELD │ 259 │ 1338299 │ ██████████████████████████▋ │
│ LONDON │ ISLINGTON │ 5504 │ 1275520 │ █████████████████████████▌ │
│ LONDON │ RICHMOND UPON THAMES │ 1345 │ 1261935 │ █████████████████████████▏ │
│ COBHAM │ ELMBRIDGE │ 727 │ 1251403 │ █████████████████████████ │
│ BEACONSFIELD │ BUCKINGHAMSHIRE │ 680 │ 1199970 │ ███████████████████████▊ │
│ LONDON │ TOWER HAMLETS │ 10012 │ 1157827 │ ███████████████████████▏ │
│ LONDON │ HOUNSLOW │ 1278 │ 1144389 │ ██████████████████████▊ │
│ BURFORD │ WEST OXFORDSHIRE │ 182 │ 1139393 │ ██████████████████████▋ │
│ RICHMOND │ RICHMOND UPON THAMES │ 1649 │ 1130076 │ ██████████████████████▌ │
│ KINGSTON UPON THAMES │ RICHMOND UPON THAMES │ 147 │ 1126111 │ ██████████████████████▌ │
│ ASCOT │ WINDSOR AND MAIDENHEAD │ 773 │ 1106109 │ ██████████████████████ │
│ LONDON │ HAMMERSMITH AND FULHAM │ 6162 │ 1056198 │ █████████████████████ │
│ RADLETT │ HERTSMERE │ 513 │ 1045758 │ ████████████████████▊ │
│ LEATHERHEAD │ GUILDFORD │ 354 │ 1045175 │ ████████████████████▊ │
│ WEYBRIDGE │ ELMBRIDGE │ 1275 │ 1036702 │ ████████████████████▋ │
│ FARNHAM │ EAST HAMPSHIRE │ 107 │ 1033682 │ ████████████████████▋ │
│ ESHER │ ELMBRIDGE │ 915 │ 1032753 │ ████████████████████▋ │
│ FARNHAM │ HART │ 102 │ 1002692 │ ████████████████████ │
│ GERRARDS CROSS │ BUCKINGHAMSHIRE │ 845 │ 983639 │ ███████████████████▋ │
│ CHALFONT ST GILES │ BUCKINGHAMSHIRE │ 286 │ 973993 │ ███████████████████▍ │
│ SALCOMBE │ SOUTH HAMS │ 215 │ 965724 │ ███████████████████▎ │
│ SURBITON │ ELMBRIDGE │ 181 │ 960346 │ ███████████████████▏ │
│ BROCKENHURST │ NEW FOREST │ 226 │ 951278 │ ███████████████████ │
│ SUTTON COLDFIELD │ LICHFIELD │ 110 │ 930757 │ ██████████████████▌ │
│ EAST MOLESEY │ ELMBRIDGE │ 372 │ 927026 │ ██████████████████▌ │
│ LLANGOLLEN │ WREXHAM │ 127 │ 925681 │ ██████████████████▌ │
│ OXFORD │ SOUTH OXFORDSHIRE │ 638 │ 923830 │ ██████████████████▍ │
│ LONDON │ MERTON │ 4383 │ 923194 │ ██████████████████▍ │
│ GUILDFORD │ WAVERLEY │ 261 │ 905733 │ ██████████████████ │
│ TEDDINGTON │ RICHMOND UPON THAMES │ 1147 │ 894856 │ █████████████████▊ │
│ HARPENDEN │ ST ALBANS │ 1271 │ 893079 │ █████████████████▋ │
│ HENLEY-ON-THAMES │ SOUTH OXFORDSHIRE │ 1042 │ 887557 │ █████████████████▋ │
│ POTTERS BAR │ WELWYN HATFIELD │ 314 │ 863037 │ █████████████████▎ │
│ LONDON │ WANDSWORTH │ 13210 │ 857318 │ █████████████████▏ │
│ BILLINGSHURST │ CHICHESTER │ 255 │ 856508 │ █████████████████▏ │
│ LONDON │ SOUTHWARK │ 7742 │ 843145 │ ████████████████▋ │
│ LONDON │ HACKNEY │ 6656 │ 839716 │ ████████████████▋ │
│ LUTTERWORTH │ HARBOROUGH │ 1096 │ 836546 │ ████████████████▋ │
│ KINGSTON UPON THAMES │ KINGSTON UPON THAMES │ 1846 │ 828990 │ ████████████████▌ │
│ LONDON │ EALING │ 5583 │ 820135 │ ████████████████▍ │
│ INGATESTONE │ CHELMSFORD │ 120 │ 815379 │ ████████████████▎ │
│ MARLOW │ BUCKINGHAMSHIRE │ 718 │ 809943 │ ████████████████▏ │
│ EAST GRINSTEAD │ TANDRIDGE │ 105 │ 809461 │ ████████████████▏ │
│ CHIGWELL │ EPPING FOREST │ 484 │ 809338 │ ████████████████▏ │
│ EGHAM │ RUNNYMEDE │ 989 │ 807858 │ ████████████████▏ │
│ HASLEMERE │ CHICHESTER │ 223 │ 804173 │ ████████████████ │
│ PETWORTH │ CHICHESTER │ 288 │ 803206 │ ████████████████ │
│ TWICKENHAM │ RICHMOND UPON THAMES │ 2194 │ 802616 │ ████████████████ │
│ WEMBLEY │ BRENT │ 1698 │ 801733 │ ████████████████ │
│ HINDHEAD │ WAVERLEY │ 233 │ 801482 │ ████████████████ │
│ LONDON │ BARNET │ 8083 │ 792066 │ ███████████████▋ │
│ WOKING │ GUILDFORD │ 343 │ 789360 │ ███████████████▋ │
│ STOCKBRIDGE │ TEST VALLEY │ 318 │ 777909 │ ███████████████▌ │
│ BERKHAMSTED │ DACORUM │ 1049 │ 776138 │ ███████████████▌ │
│ MAIDENHEAD │ BUCKINGHAMSHIRE │ 236 │ 775572 │ ███████████████▌ │
│ SOLIHULL │ STRATFORD-ON-AVON │ 142 │ 770727 │ ███████████████▍ │
│ GREAT MISSENDEN │ BUCKINGHAMSHIRE │ 431 │ 764493 │ ███████████████▎ │
│ TADWORTH │ REIGATE AND BANSTEAD │ 920 │ 757511 │ ███████████████▏ │
│ LONDON │ BRENT │ 4124 │ 757194 │ ███████████████▏ │
│ THAMES DITTON │ ELMBRIDGE │ 470 │ 750828 │ ███████████████ │
│ LONDON │ LAMBETH │ 10431 │ 750532 │ ███████████████ │
│ RICKMANSWORTH │ THREE RIVERS │ 1500 │ 747029 │ ██████████████▊ │
│ KINGS LANGLEY │ DACORUM │ 281 │ 746536 │ ██████████████▊ │
│ HARLOW │ EPPING FOREST │ 172 │ 739423 │ ██████████████▋ │
│ TONBRIDGE │ SEVENOAKS │ 103 │ 738740 │ ██████████████▋ │
│ BELVEDERE │ BEXLEY │ 686 │ 736385 │ ██████████████▋ │
│ CRANBROOK │ TUNBRIDGE WELLS │ 769 │ 734328 │ ██████████████▋ │
│ SOLIHULL │ WARWICK │ 116 │ 733286 │ ██████████████▋ │
│ ALDERLEY EDGE │ CHESHIRE EAST │ 357 │ 732882 │ ██████████████▋ │
│ WELWYN │ WELWYN HATFIELD │ 404 │ 730281 │ ██████████████▌ │
│ CHISLEHURST │ BROMLEY │ 870 │ 730279 │ ██████████████▌ │
│ LONDON │ HARINGEY │ 6488 │ 726715 │ ██████████████▌ │
│ AMERSHAM │ BUCKINGHAMSHIRE │ 965 │ 725426 │ ██████████████▌ │
│ SEVENOAKS │ SEVENOAKS │ 2183 │ 725102 │ ██████████████▌ │
│ BOURNE END │ BUCKINGHAMSHIRE │ 269 │ 724595 │ ██████████████▍ │
│ NORTHWOOD │ HILLINGDON │ 568 │ 722436 │ ██████████████▍ │
│ PURFLEET │ THURROCK │ 143 │ 722205 │ ██████████████▍ │
│ SLOUGH │ BUCKINGHAMSHIRE │ 832 │ 721529 │ ██████████████▍ │
│ INGATESTONE │ BRENTWOOD │ 301 │ 718292 │ ██████████████▎ │
│ EPSOM │ REIGATE AND BANSTEAD │ 315 │ 709264 │ ██████████████▏ │
│ ASHTEAD │ MOLE VALLEY │ 524 │ 708646 │ ██████████████▏ │
│ BETCHWORTH │ MOLE VALLEY │ 155 │ 708525 │ ██████████████▏ │
│ OXTED │ TANDRIDGE │ 645 │ 706946 │ ██████████████▏ │
│ READING │ SOUTH OXFORDSHIRE │ 593 │ 705466 │ ██████████████ │
│ FELTHAM │ HOUNSLOW │ 1536 │ 703815 │ ██████████████ │
│ TUNBRIDGE WELLS │ WEALDEN │ 207 │ 703296 │ ██████████████ │
│ LEWES │ WEALDEN │ 116 │ 701349 │ ██████████████ │
│ OXFORD │ OXFORD │ 3656 │ 700813 │ ██████████████ │
│ MAYFIELD │ WEALDEN │ 177 │ 698158 │ █████████████▊ │
│ PINNER │ HARROW │ 997 │ 697876 │ █████████████▊ │
│ LECHLADE │ COTSWOLD │ 155 │ 696262 │ █████████████▊ │
│ WALTON-ON-THAMES │ ELMBRIDGE │ 1850 │ 690102 │ █████████████▋ │
└──────────────────────┴────────────────────────┴───────┴─────────┴─────────────────────────────────────────────────────────────────┘
```
## Let's Speed Up Queries Using Projections {#speedup-with-projections}
[Projections](../../sql-reference/statements/alter/projection.md) allow to improve queries speed by storing pre-aggregated data.
[Projections](../../sql-reference/statements/alter/projection.md) allow you to improve query speeds by storing pre-aggregated data in whatever format you want. In this example, we create a projection that keeps track of the average price, total price, and count of properties grouped by the year, district and town. At execution time, ClickHouse will use your projection if it thinks the projection can improve the performance fo the query (you don't have to do anything special to use the projection - ClickHouse decides for you when the projection will be useful).
### Build a Projection {#build-projection}
Create an aggregate projection by dimensions `toYear(date)`, `district`, `town`:
Let's create an aggregate projection by the dimensions `toYear(date)`, `district`, and `town`:
```sql
ALTER TABLE uk_price_paid
@ -374,25 +363,23 @@ ALTER TABLE uk_price_paid
toYear(date),
district,
town
);
)
```
Populate the projection for existing data (without it projection will be created for only newly inserted data):
Populate the projection for existing data. (Without materializing it, the projection will be created for only newly inserted data):
```sql
ALTER TABLE uk_price_paid
MATERIALIZE PROJECTION projection_by_year_district_town
SETTINGS mutations_sync = 1;
SETTINGS mutations_sync = 1
```
## Test Performance {#test-performance}
Let's run the same 3 queries.
Let's run the same 3 queries again:
### Query 1. Average Price Per Year {#average-price-projections}
Query:
```sql
SELECT
toYear(date) AS year,
@ -400,47 +387,18 @@ SELECT
bar(price, 0, 1000000, 80)
FROM uk_price_paid
GROUP BY year
ORDER BY year ASC;
ORDER BY year ASC
```
Result:
```text
┌─year─┬──price─┬─bar(round(avg(price)), 0, 1000000, 80)─┐
│ 1995 │ 67932 │ █████▍ │
│ 1996 │ 71505 │ █████▋ │
│ 1997 │ 78532 │ ██████▎ │
│ 1998 │ 85436 │ ██████▋ │
│ 1999 │ 96037 │ ███████▋ │
│ 2000 │ 107479 │ ████████▌ │
│ 2001 │ 118885 │ █████████▌ │
│ 2002 │ 137941 │ ███████████ │
│ 2003 │ 155889 │ ████████████▍ │
│ 2004 │ 178885 │ ██████████████▎ │
│ 2005 │ 189351 │ ███████████████▏ │
│ 2006 │ 203528 │ ████████████████▎ │
│ 2007 │ 219378 │ █████████████████▌ │
│ 2008 │ 217056 │ █████████████████▎ │
│ 2009 │ 213419 │ █████████████████ │
│ 2010 │ 236109 │ ██████████████████▊ │
│ 2011 │ 232805 │ ██████████████████▌ │
│ 2012 │ 238367 │ ███████████████████ │
│ 2013 │ 256931 │ ████████████████████▌ │
│ 2014 │ 279915 │ ██████████████████████▍ │
│ 2015 │ 297266 │ ███████████████████████▋ │
│ 2016 │ 313201 │ █████████████████████████ │
│ 2017 │ 346097 │ ███████████████████████████▋ │
│ 2018 │ 350116 │ ████████████████████████████ │
│ 2019 │ 351013 │ ████████████████████████████ │
│ 2020 │ 369420 │ █████████████████████████████▌ │
│ 2021 │ 386903 │ ██████████████████████████████▊ │
└──────┴────────┴────────────────────────────────────────┘
The result is the same, but the performance is better!
```response
No projection: 28 rows in set. Elapsed: 1.775 sec. Processed 27.45 million rows, 164.70 MB (15.47 million rows/s., 92.79 MB/s.)
With projection: 28 rows in set. Elapsed: 0.665 sec. Processed 87.51 thousand rows, 3.21 MB (131.51 thousand rows/s., 4.82 MB/s.)
```
### Query 2. Average Price Per Year in London {#average-price-london-projections}
Query:
```sql
SELECT
toYear(date) AS year,
@ -449,48 +407,19 @@ SELECT
FROM uk_price_paid
WHERE town = 'LONDON'
GROUP BY year
ORDER BY year ASC;
ORDER BY year ASC
```
Result:
Same result, but notice the improvement in query performance:
```text
┌─year─┬───price─┬─bar(round(avg(price)), 0, 2000000, 100)───────────────┐
│ 1995 │ 109116 │ █████▍ │
│ 1996 │ 118667 │ █████▊ │
│ 1997 │ 136518 │ ██████▋ │
│ 1998 │ 152983 │ ███████▋ │
│ 1999 │ 180637 │ █████████ │
│ 2000 │ 215838 │ ██████████▋ │
│ 2001 │ 232994 │ ███████████▋ │
│ 2002 │ 263670 │ █████████████▏ │
│ 2003 │ 278394 │ █████████████▊ │
│ 2004 │ 304666 │ ███████████████▏ │
│ 2005 │ 322875 │ ████████████████▏ │
│ 2006 │ 356191 │ █████████████████▋ │
│ 2007 │ 404054 │ ████████████████████▏ │
│ 2008 │ 420741 │ █████████████████████ │
│ 2009 │ 427753 │ █████████████████████▍ │
│ 2010 │ 480306 │ ████████████████████████ │
│ 2011 │ 496274 │ ████████████████████████▋ │
│ 2012 │ 519442 │ █████████████████████████▊ │
│ 2013 │ 616212 │ ██████████████████████████████▋ │
│ 2014 │ 724154 │ ████████████████████████████████████▏ │
│ 2015 │ 792129 │ ███████████████████████████████████████▌ │
│ 2016 │ 843655 │ ██████████████████████████████████████████▏ │
│ 2017 │ 982642 │ █████████████████████████████████████████████████▏ │
│ 2018 │ 1016835 │ ██████████████████████████████████████████████████▋ │
│ 2019 │ 1042849 │ ████████████████████████████████████████████████████▏ │
│ 2020 │ 1011889 │ ██████████████████████████████████████████████████▌ │
│ 2021 │ 960343 │ ████████████████████████████████████████████████ │
└──────┴─────────┴───────────────────────────────────────────────────────┘
```response
No projection: 28 rows in set. Elapsed: 0.720 sec. Processed 27.45 million rows, 46.61 MB (38.13 million rows/s., 64.74 MB/s.)
With projection: 28 rows in set. Elapsed: 0.015 sec. Processed 87.51 thousand rows, 3.51 MB (5.74 million rows/s., 230.24 MB/s.)
```
### Query 3. The Most Expensive Neighborhoods {#most-expensive-neighborhoods-projections}
The condition (date >= '2020-01-01') needs to be modified to match projection dimension (toYear(date) >= 2020).
Query:
The condition (date >= '2020-01-01') needs to be modified so that it matches the projection dimension (`toYear(date) >= 2020)`:
```sql
SELECT
@ -506,138 +435,16 @@ GROUP BY
district
HAVING c >= 100
ORDER BY price DESC
LIMIT 100;
LIMIT 100
```
Result:
Again, the result is the same but notice the improvement in query performance:
```text
┌─town─────────────────┬─district───────────────┬────c─┬───price─┬─bar(round(avg(price)), 0, 5000000, 100)────────────────────────────┐
│ LONDON │ CITY OF WESTMINSTER │ 3606 │ 3280239 │ █████████████████████████████████████████████████████████████████▌ │
│ LONDON │ CITY OF LONDON │ 274 │ 3160502 │ ███████████████████████████████████████████████████████████████▏ │
│ LONDON │ KENSINGTON AND CHELSEA │ 2550 │ 2308478 │ ██████████████████████████████████████████████▏ │
│ LEATHERHEAD │ ELMBRIDGE │ 114 │ 1897407 │ █████████████████████████████████████▊ │
│ LONDON │ CAMDEN │ 3033 │ 1805404 │ ████████████████████████████████████ │
│ VIRGINIA WATER │ RUNNYMEDE │ 156 │ 1753247 │ ███████████████████████████████████ │
│ WINDLESHAM │ SURREY HEATH │ 108 │ 1677613 │ █████████████████████████████████▌ │
│ THORNTON HEATH │ CROYDON │ 546 │ 1671721 │ █████████████████████████████████▍ │
│ BARNET │ ENFIELD │ 124 │ 1505840 │ ██████████████████████████████ │
│ COBHAM │ ELMBRIDGE │ 387 │ 1237250 │ ████████████████████████▋ │
│ LONDON │ ISLINGTON │ 2668 │ 1236980 │ ████████████████████████▋ │
│ OXFORD │ SOUTH OXFORDSHIRE │ 321 │ 1220907 │ ████████████████████████▍ │
│ LONDON │ RICHMOND UPON THAMES │ 704 │ 1215551 │ ████████████████████████▎ │
│ LONDON │ HOUNSLOW │ 671 │ 1207493 │ ████████████████████████▏ │
│ ASCOT │ WINDSOR AND MAIDENHEAD │ 407 │ 1183299 │ ███████████████████████▋ │
│ BEACONSFIELD │ BUCKINGHAMSHIRE │ 330 │ 1175615 │ ███████████████████████▌ │
│ RICHMOND │ RICHMOND UPON THAMES │ 874 │ 1110444 │ ██████████████████████▏ │
│ LONDON │ HAMMERSMITH AND FULHAM │ 3086 │ 1053983 │ █████████████████████ │
│ SURBITON │ ELMBRIDGE │ 100 │ 1011800 │ ████████████████████▏ │
│ RADLETT │ HERTSMERE │ 283 │ 1011712 │ ████████████████████▏ │
│ SALCOMBE │ SOUTH HAMS │ 127 │ 1011624 │ ████████████████████▏ │
│ WEYBRIDGE │ ELMBRIDGE │ 655 │ 1007265 │ ████████████████████▏ │
│ ESHER │ ELMBRIDGE │ 485 │ 986581 │ ███████████████████▋ │
│ LEATHERHEAD │ GUILDFORD │ 202 │ 977320 │ ███████████████████▌ │
│ BURFORD │ WEST OXFORDSHIRE │ 111 │ 966893 │ ███████████████████▎ │
│ BROCKENHURST │ NEW FOREST │ 129 │ 956675 │ ███████████████████▏ │
│ HINDHEAD │ WAVERLEY │ 137 │ 953753 │ ███████████████████ │
│ GERRARDS CROSS │ BUCKINGHAMSHIRE │ 419 │ 951121 │ ███████████████████ │
│ EAST MOLESEY │ ELMBRIDGE │ 192 │ 936769 │ ██████████████████▋ │
│ CHALFONT ST GILES │ BUCKINGHAMSHIRE │ 146 │ 925515 │ ██████████████████▌ │
│ LONDON │ TOWER HAMLETS │ 4388 │ 918304 │ ██████████████████▎ │
│ OLNEY │ MILTON KEYNES │ 235 │ 910646 │ ██████████████████▏ │
│ HENLEY-ON-THAMES │ SOUTH OXFORDSHIRE │ 540 │ 902418 │ ██████████████████ │
│ LONDON │ SOUTHWARK │ 3885 │ 892997 │ █████████████████▋ │
│ KINGSTON UPON THAMES │ KINGSTON UPON THAMES │ 960 │ 885969 │ █████████████████▋ │
│ LONDON │ EALING │ 2658 │ 871755 │ █████████████████▍ │
│ CRANBROOK │ TUNBRIDGE WELLS │ 431 │ 862348 │ █████████████████▏ │
│ LONDON │ MERTON │ 2099 │ 859118 │ █████████████████▏ │
│ BELVEDERE │ BEXLEY │ 346 │ 842423 │ ████████████████▋ │
│ GUILDFORD │ WAVERLEY │ 143 │ 841277 │ ████████████████▋ │
│ HARPENDEN │ ST ALBANS │ 657 │ 841216 │ ████████████████▋ │
│ LONDON │ HACKNEY │ 3307 │ 837090 │ ████████████████▋ │
│ LONDON │ WANDSWORTH │ 6566 │ 832663 │ ████████████████▋ │
│ MAIDENHEAD │ BUCKINGHAMSHIRE │ 123 │ 824299 │ ████████████████▍ │
│ KINGS LANGLEY │ DACORUM │ 145 │ 821331 │ ████████████████▍ │
│ BERKHAMSTED │ DACORUM │ 543 │ 818415 │ ████████████████▎ │
│ GREAT MISSENDEN │ BUCKINGHAMSHIRE │ 226 │ 802807 │ ████████████████ │
│ BILLINGSHURST │ CHICHESTER │ 144 │ 797829 │ ███████████████▊ │
│ WOKING │ GUILDFORD │ 176 │ 793494 │ ███████████████▋ │
│ STOCKBRIDGE │ TEST VALLEY │ 178 │ 793269 │ ███████████████▋ │
│ EPSOM │ REIGATE AND BANSTEAD │ 172 │ 791862 │ ███████████████▋ │
│ TONBRIDGE │ TUNBRIDGE WELLS │ 360 │ 787876 │ ███████████████▋ │
│ TEDDINGTON │ RICHMOND UPON THAMES │ 595 │ 786492 │ ███████████████▋ │
│ TWICKENHAM │ RICHMOND UPON THAMES │ 1155 │ 786193 │ ███████████████▋ │
│ LYNDHURST │ NEW FOREST │ 102 │ 785593 │ ███████████████▋ │
│ LONDON │ LAMBETH │ 5228 │ 774574 │ ███████████████▍ │
│ LONDON │ BARNET │ 3955 │ 773259 │ ███████████████▍ │
│ OXFORD │ VALE OF WHITE HORSE │ 353 │ 772088 │ ███████████████▍ │
│ TONBRIDGE │ MAIDSTONE │ 305 │ 770740 │ ███████████████▍ │
│ LUTTERWORTH │ HARBOROUGH │ 538 │ 768634 │ ███████████████▎ │
│ WOODSTOCK │ WEST OXFORDSHIRE │ 140 │ 766037 │ ███████████████▎ │
│ MIDHURST │ CHICHESTER │ 257 │ 764815 │ ███████████████▎ │
│ MARLOW │ BUCKINGHAMSHIRE │ 327 │ 761876 │ ███████████████▏ │
│ LONDON │ NEWHAM │ 3237 │ 761784 │ ███████████████▏ │
│ ALDERLEY EDGE │ CHESHIRE EAST │ 178 │ 757318 │ ███████████████▏ │
│ LUTON │ CENTRAL BEDFORDSHIRE │ 212 │ 754283 │ ███████████████ │
│ PETWORTH │ CHICHESTER │ 154 │ 754220 │ ███████████████ │
│ ALRESFORD │ WINCHESTER │ 219 │ 752718 │ ███████████████ │
│ POTTERS BAR │ WELWYN HATFIELD │ 174 │ 748465 │ ██████████████▊ │
│ HASLEMERE │ CHICHESTER │ 128 │ 746907 │ ██████████████▊ │
│ TADWORTH │ REIGATE AND BANSTEAD │ 502 │ 743252 │ ██████████████▋ │
│ THAMES DITTON │ ELMBRIDGE │ 244 │ 741913 │ ██████████████▋ │
│ REIGATE │ REIGATE AND BANSTEAD │ 581 │ 738198 │ ██████████████▋ │
│ BOURNE END │ BUCKINGHAMSHIRE │ 138 │ 735190 │ ██████████████▋ │
│ SEVENOAKS │ SEVENOAKS │ 1156 │ 730018 │ ██████████████▌ │
│ OXTED │ TANDRIDGE │ 336 │ 729123 │ ██████████████▌ │
│ INGATESTONE │ BRENTWOOD │ 166 │ 728103 │ ██████████████▌ │
│ LONDON │ BRENT │ 2079 │ 720605 │ ██████████████▍ │
│ LONDON │ HARINGEY │ 3216 │ 717780 │ ██████████████▎ │
│ PURLEY │ CROYDON │ 575 │ 716108 │ ██████████████▎ │
│ WELWYN │ WELWYN HATFIELD │ 222 │ 710603 │ ██████████████▏ │
│ RICKMANSWORTH │ THREE RIVERS │ 798 │ 704571 │ ██████████████ │
│ BANSTEAD │ REIGATE AND BANSTEAD │ 401 │ 701293 │ ██████████████ │
│ CHIGWELL │ EPPING FOREST │ 261 │ 701203 │ ██████████████ │
│ PINNER │ HARROW │ 528 │ 698885 │ █████████████▊ │
│ HASLEMERE │ WAVERLEY │ 280 │ 696659 │ █████████████▊ │
│ SLOUGH │ BUCKINGHAMSHIRE │ 396 │ 694917 │ █████████████▊ │
│ WALTON-ON-THAMES │ ELMBRIDGE │ 946 │ 692395 │ █████████████▋ │
│ READING │ SOUTH OXFORDSHIRE │ 318 │ 691988 │ █████████████▋ │
│ NORTHWOOD │ HILLINGDON │ 271 │ 690643 │ █████████████▋ │
│ FELTHAM │ HOUNSLOW │ 763 │ 688595 │ █████████████▋ │
│ ASHTEAD │ MOLE VALLEY │ 303 │ 687923 │ █████████████▋ │
│ BARNET │ BARNET │ 975 │ 686980 │ █████████████▋ │
│ WOKING │ SURREY HEATH │ 283 │ 686669 │ █████████████▋ │
│ MALMESBURY │ WILTSHIRE │ 323 │ 683324 │ █████████████▋ │
│ AMERSHAM │ BUCKINGHAMSHIRE │ 496 │ 680962 │ █████████████▌ │
│ CHISLEHURST │ BROMLEY │ 430 │ 680209 │ █████████████▌ │
│ HYTHE │ FOLKESTONE AND HYTHE │ 490 │ 676908 │ █████████████▌ │
│ MAYFIELD │ WEALDEN │ 101 │ 676210 │ █████████████▌ │
│ ASCOT │ BRACKNELL FOREST │ 168 │ 676004 │ █████████████▌ │
└──────────────────────┴────────────────────────┴──────┴─────────┴────────────────────────────────────────────────────────────────────┘
```response
No projection: 100 rows in set. Elapsed: 0.928 sec. Processed 27.45 million rows, 103.80 MB (29.56 million rows/s., 111.80 MB/s.)
With projection: 100 rows in set. Elapsed: 0.336 sec. Processed 17.32 thousand rows, 1.23 MB (51.61 thousand rows/s., 3.65 MB/s.)
```
### Summary {#summary}
All 3 queries work much faster and read fewer rows.
```text
Query 1
no projection: 27 rows in set. Elapsed: 0.158 sec. Processed 26.32 million rows, 157.93 MB (166.57 million rows/s., 999.39 MB/s.)
projection: 27 rows in set. Elapsed: 0.007 sec. Processed 105.96 thousand rows, 3.33 MB (14.58 million rows/s., 458.13 MB/s.)
Query 2
no projection: 27 rows in set. Elapsed: 0.163 sec. Processed 26.32 million rows, 80.01 MB (161.75 million rows/s., 491.64 MB/s.)
projection: 27 rows in set. Elapsed: 0.008 sec. Processed 105.96 thousand rows, 3.67 MB (13.29 million rows/s., 459.89 MB/s.)
Query 3
no projection: 100 rows in set. Elapsed: 0.069 sec. Processed 26.32 million rows, 62.47 MB (382.13 million rows/s., 906.93 MB/s.)
projection: 100 rows in set. Elapsed: 0.029 sec. Processed 8.08 thousand rows, 511.08 KB (276.06 thousand rows/s., 17.47 MB/s.)
```
### Test It in Playground {#playground}
### Test it in the Playground {#playground}
The dataset is also available in the [Online Playground](https://play.clickhouse.com/play?user=play#U0VMRUNUIHRvd24sIGRpc3RyaWN0LCBjb3VudCgpIEFTIGMsIHJvdW5kKGF2ZyhwcmljZSkpIEFTIHByaWNlLCBiYXIocHJpY2UsIDAsIDUwMDAwMDAsIDEwMCkgRlJPTSB1a19wcmljZV9wYWlkIFdIRVJFIGRhdGUgPj0gJzIwMjAtMDEtMDEnIEdST1VQIEJZIHRvd24sIGRpc3RyaWN0IEhBVklORyBjID49IDEwMCBPUkRFUiBCWSBwcmljZSBERVNDIExJTUlUIDEwMA==).

View File

@ -0,0 +1,26 @@
---
slug: /en/getting-started/example-datasets/
sidebar_position: 0
sidebar_label: Overview
keywords: [clickhouse, install, tutorial, sample, datasets]
pagination_next: 'en/tutorial'
---
# Tutorials and Example Datasets
We have a lot of resources for helping you get started and learn how ClickHouse works:
- If you need to get ClickHouse up and running, check out our [Quick Start](../quick-start.mdx)
- The [ClickHouse Tutorial](../tutorial.md) analyzes a dataset of New York City taxi rides
In addition, the sample datasets provide a great experience on working with ClickHouse,
learning important techniques and tricks, and seeing how to take advantage of the many powerful
functions in ClickHouse. The sample datasets include:
- The [UK Property Price Paid dataset](../getting-started/example-datasets/uk-price-paid.md) is a good starting point with some interesting SQL queries
- The [New York Taxi Data](../getting-started/example-datasets/nyc-taxi.md) has an example of how to insert data from S3 into ClickHouse
- The [Cell Towers dataset](../getting-started/example-datasets/cell-towers.md) imports a CSV into ClickHouse
- The [NYPD Complaint Data](../getting-started/example-datasets/nypd_complaint_data.md) demonstrates how to use data inference to simplify creating tables
- The ["What's on the Menu?" dataset](../getting-started/example-datasets/menus.md) has an example of denormalizing data
View the **Tutorials and Datasets** menu for a complete list of sample datasets.

View File

@ -3,6 +3,7 @@ slug: /en/interfaces/cli
sidebar_position: 17
sidebar_label: Command-Line Client
---
import ConnectionDetails from '@site/docs/en/_snippets/_gather_your_details_native.md';
# Command-line Client
@ -24,26 +25,76 @@ Connected to ClickHouse server version 20.13.1 revision 54442.
Different client and server versions are compatible with one another, but some features may not be available in older clients. We recommend using the same version of the client as the server app. When you try to use a client of the older version, then the server, `clickhouse-client` displays the message:
```response
ClickHouse client version is older than ClickHouse server. It may lack support for new features.
ClickHouse client version is older than ClickHouse server.
It may lack support for new features.
```
## Usage {#cli_usage}
The client can be used in interactive and non-interactive (batch) mode. To use batch mode, specify the query parameter, or send data to stdin (it verifies that stdin is not a terminal), or both. Similar to the HTTP interface, when using the query parameter and sending data to stdin, the request is a concatenation of the query parameter, a line feed, and the data in stdin. This is convenient for large INSERT queries.
The client can be used in interactive and non-interactive (batch) mode.
Example of using the client to insert data:
### Gather your connection details
<ConnectionDetails />
### Interactive
To connect to your ClickHouse Cloud service, or any ClickHouse server using TLS and passwords, interactively use `--secure`, port 9440, and provide your username and password:
```bash
clickhouse-client --host <HOSTNAME> \
--secure \
--port 9440 \
--user <USERNAME> \
--password <PASSWORD>
```
To connect to a self-managed ClickHouse server you will need the details for that server. Whether or not TLS is used, port numbers, and passwords are all configurable. Use the above example for ClickHouse Cloud as a starting point.
### Batch
To use batch mode, specify the query parameter, or send data to stdin (it verifies that stdin is not a terminal), or both. Similar to the HTTP interface, when using the query parameter and sending data to stdin, the request is a concatenation of the query parameter, a line feed, and the data in stdin. This is convenient for large INSERT queries.
Examples of using the client to insert data:
#### Inserting a CSV file into a remote ClickHouse service
This example is appropriate for ClickHouse Cloud, or any ClickHouse server using TLS and a password. In this example a sample dataset CSV file, `cell_towers.csv` is inserted into an existing table `cell_towers` in the `default` database:
```bash
clickhouse-client --host HOSTNAME.clickhouse.cloud \
--secure \
--port 9440 \
--user default \
--password PASSWORD \
--query "INSERT INTO cell_towers FORMAT CSVWithNames" \
< cell_towers.csv
```
:::note
To concentrate on the query syntax, the rest of the examples leave off the connection details (`--host`, `--port`, etc.). Add them in when you try the commands.
:::
#### Three different ways of inserting data
``` bash
$ echo -ne "1, 'some text', '2016-08-14 00:00:00'\n2, 'some more text', '2016-08-14 00:00:01'" | clickhouse-client --database=test --query="INSERT INTO test FORMAT CSV";
echo -ne "1, 'some text', '2016-08-14 00:00:00'\n2, 'some more text', '2016-08-14 00:00:01'" | \
clickhouse-client --database=test --query="INSERT INTO test FORMAT CSV";
```
$ cat <<_EOF | clickhouse-client --database=test --query="INSERT INTO test FORMAT CSV";
```bash
cat <<_EOF | clickhouse-client --database=test --query="INSERT INTO test FORMAT CSV";
3, 'some text', '2016-08-14 00:00:00'
4, 'some more text', '2016-08-14 00:00:01'
_EOF
$ cat file.csv | clickhouse-client --database=test --query="INSERT INTO test FORMAT CSV";
```
```bash
cat file.csv | clickhouse-client --database=test --query="INSERT INTO test FORMAT CSV";
```
### Notes
In batch mode, the default data format is TabSeparated. You can set the format in the FORMAT clause of the query.
By default, you can only process a single query in batch mode. To make multiple queries from a “script,” use the `--multiquery` parameter. This works for all queries except INSERT. Query results are output consecutively without additional separators. Similarly, to process a large number of queries, you can run clickhouse-client for each query. Note that it may take tens of milliseconds to launch the clickhouse-client program.

View File

@ -5,6 +5,9 @@ sidebar_label: ClickHouse Keeper
---
# ClickHouse Keeper
import SelfManaged from '@site/docs/en/_snippets/_self_managed_only_automated.md';
<SelfManaged />
ClickHouse Keeper provides the coordination system for data [replication](../engines/table-engines/mergetree-family/replication.md) and [distributed DDL](../sql-reference/distributed-ddl.md) queries execution. ClickHouse Keeper is compatible with ZooKeeper.

View File

@ -3,7 +3,11 @@ slug: /en/operations/external-authenticators/
sidebar_position: 48
sidebar_label: External User Authenticators and Directories
title: "External User Authenticators and Directories"
pagination_next: 'en/operations/external-authenticators/kerberos'
---
import SelfManaged from '@site/docs/en/_snippets/_self_managed_only_no_roadmap.md';
<SelfManaged />
ClickHouse supports authenticating and managing users using external services.

View File

@ -2,6 +2,9 @@
slug: /en/operations/external-authenticators/kerberos
---
# Kerberos
import SelfManaged from '@site/docs/en/_snippets/_self_managed_only_no_roadmap.md';
<SelfManaged />
Existing and properly configured ClickHouse users can be authenticated via Kerberos authentication protocol.

View File

@ -2,6 +2,9 @@
slug: /en/operations/external-authenticators/ldap
title: "LDAP"
---
import SelfManaged from '@site/docs/en/_snippets/_self_managed_only_no_roadmap.md';
<SelfManaged />
LDAP server can be used to authenticate ClickHouse users. There are two different approaches for doing this:

View File

@ -2,6 +2,9 @@
slug: /en/operations/external-authenticators/ssl-x509
title: "SSL X.509 certificate authentication"
---
import SelfManaged from '@site/docs/en/_snippets/_self_managed_only_no_roadmap.md';
<SelfManaged />
[SSL 'strict' option](../server-configuration-parameters/settings.md#server_configuration_parameters-openssl) enables mandatory certificate validation for the incoming connections. In this case, only connections with trusted certificates can be established. Connections with untrusted certificates will be rejected. Thus, certificate validation allows to uniquely authenticate an incoming connection. `Common Name` field of the certificate is used to identify connected user. This allows to associate multiple certificates with the same user. Additionally, reissuing and revoking of the certificates does not affect the ClickHouse configuration.

View File

@ -5,6 +5,9 @@ sidebar_label: Monitoring
---
# Monitoring
import SelfManaged from '@site/docs/en/_snippets/_self_managed_only_automated.md';
<SelfManaged />
You can monitor:

View File

@ -3,9 +3,12 @@ slug: /en/operations/optimizing-performance/sampling-query-profiler
sidebar_position: 54
sidebar_label: Query Profiling
---
import SelfManaged from '@site/docs/en/_snippets/_self_managed_only_no_roadmap.md';
# Sampling Query Profiler
<SelfManaged />
ClickHouse runs sampling profiler that allows analyzing query execution. Using profiler you can find source code routines that used the most frequently during query execution. You can trace CPU time and wall-clock time spent including idle time.
To use profiler:

View File

@ -5,6 +5,10 @@ sidebar_label: Testing Hardware
title: "How to Test Your Hardware with ClickHouse"
---
import SelfManaged from '@site/docs/en/_snippets/_self_managed_only_no_roadmap.md';
<SelfManaged />
You can run a basic ClickHouse performance test on any server without installation of ClickHouse packages.

View File

@ -2,6 +2,7 @@
slug: /en/operations/server-configuration-parameters/
sidebar_position: 54
sidebar_label: Server Configuration Parameters
pagination_next: en/operations/server-configuration-parameters/settings
---
# Server Configuration Parameters

View File

@ -666,6 +666,7 @@ Keys:
- `http_proxy` - Configure HTTP proxy for sending crash reports.
- `debug` - Sets the Sentry client into debug mode.
- `tmp_path` - Filesystem path for temporary crash report state.
- `environment` - An arbitrary name of an environment in which the ClickHouse server is running. It will be mentioned in each crash report. The default value is `test` or `prod` depending on the version of ClickHouse.
**Recommended way to use**

View File

@ -2,6 +2,7 @@
sidebar_label: Settings
sidebar_position: 51
slug: /en/operations/settings/
pagination_next: en/operations/settings/settings
---
# Settings Overview

View File

@ -668,7 +668,7 @@ log_query_views=1
## log_formatted_queries {#settings-log-formatted-queries}
Allows to log formatted queries to the [system.query_log](../../operations/system-tables/query_log.md) system table.
Allows to log formatted queries to the [system.query_log](../../operations/system-tables/query_log.md) system table (populates `formatted_query` column in the [system.query_log](../../operations/system-tables/query_log.md)).
Possible values:

View File

@ -5,6 +5,9 @@ sidebar_label: Secured Communication with Zookeeper
---
# Optional secured communication between ClickHouse and Zookeeper
import SelfManaged from '@site/docs/en/_snippets/_self_managed_only_automated.md';
<SelfManaged />
You should specify `ssl.keyStore.location`, `ssl.keyStore.password` and `ssl.trustStore.location`, `ssl.trustStore.password` for communication with ClickHouse client over SSL. These options are available from Zookeeper version 3.5.2.

View File

@ -4,6 +4,9 @@ sidebar_position: 58
sidebar_label: Usage Recommendations
title: "Usage Recommendations"
---
import SelfManaged from '@site/docs/en/_snippets/_self_managed_only_automated.md';
<SelfManaged />
## CPU Scaling Governor

View File

@ -294,6 +294,53 @@ Result:
Notice how only a portion of the data was properly decrypted, and the rest is gibberish since either `mode`, `key`, or `iv` were different upon encryption.
## tryDecrypt
Similar to `decrypt`, but returns NULL if decryption fails because of using the wrong key.
**Examples**
Let's create a table where `user_id` is the unique user id, `encrypted` is an encrypted string field, `iv` is an initial vector for decrypt/encrypt. Assume that users know their id and the key to decrypt the encrypted field:
```sql
CREATE TABLE decrypt_null (
dt DateTime,
user_id UInt32,
encrypted String,
iv String
) ENGINE = Memory;
```
Insert some data:
```sql
INSERT INTO decrypt_null VALUES
('2022-08-02 00:00:00', 1, encrypt('aes-256-gcm', 'value1', 'keykeykeykeykeykeykeykeykeykey01', 'iv1'), 'iv1'),
('2022-09-02 00:00:00', 2, encrypt('aes-256-gcm', 'value2', 'keykeykeykeykeykeykeykeykeykey02', 'iv2'), 'iv2'),
('2022-09-02 00:00:01', 3, encrypt('aes-256-gcm', 'value3', 'keykeykeykeykeykeykeykeykeykey03', 'iv3'), 'iv3');
```
Query:
```sql
SELECT
dt,
user_id,
tryDecrypt('aes-256-gcm', encrypted, 'keykeykeykeykeykeykeykeykeykey02', iv) AS value
FROM decrypt_null
ORDER BY user_id ASC
```
Result:
```
┌──────────────────dt─┬─user_id─┬─value──┐
│ 2022-08-02 00:00:00 │ 1 │ ᴺᵁᴸᴸ │
│ 2022-09-02 00:00:00 │ 2 │ value2 │
│ 2022-09-02 00:00:01 │ 3 │ ᴺᵁᴸᴸ │
└─────────────────────┴─────────┴────────┘
```
## aes_decrypt_mysql
Compatible with mysql encryption and decrypts data encrypted with [AES_ENCRYPT](https://dev.mysql.com/doc/refman/8.0/en/encryption-functions.html#function_aes-encrypt) function.

View File

@ -624,6 +624,7 @@ ClickHouse поддерживает динамическое изменение
- `http_proxy` - Настройка HTTP proxy для отсылки отчетов о сбоях.
- `debug` - Настроить клиентскую библиотеку Sentry в debug режим.
- `tmp_path` - Путь в файловой системе для временного хранения состояния отчетов о сбоях перед отправкой на сервер Sentry.
- `environment` - Произвольное название среды, в которой запущен сервер ClickHouse, которое будет упомянуто в каждом отчете от сбое. По умолчанию имеет значение `test` или `prod` в зависимости от версии ClickHouse.
**Рекомендованные настройки**

View File

@ -12,6 +12,7 @@
#include <string>
#include "Client.h"
#include "Core/Protocol.h"
#include "Parsers/formatAST.h"
#include <base/find_symbols.h>
@ -514,6 +515,66 @@ static bool queryHasWithClause(const IAST & ast)
return false;
}
std::optional<bool> Client::processFuzzingStep(const String & query_to_execute, const ASTPtr & parsed_query)
{
processParsedSingleQuery(query_to_execute, query_to_execute, parsed_query);
const auto * exception = server_exception ? server_exception.get() : client_exception.get();
// Sometimes you may get TOO_DEEP_RECURSION from the server,
// and TOO_DEEP_RECURSION should not fail the fuzzer check.
if (have_error && exception->code() == ErrorCodes::TOO_DEEP_RECURSION)
{
have_error = false;
server_exception.reset();
client_exception.reset();
return true;
}
if (have_error)
{
fmt::print(stderr, "Error on processing query '{}': {}\n", parsed_query->formatForErrorMessage(), exception->message());
// Try to reconnect after errors, for two reasons:
// 1. We might not have realized that the server died, e.g. if
// it sent us a <Fatal> trace and closed connection properly.
// 2. The connection might have gotten into a wrong state and
// the next query will get false positive about
// "Unknown packet from server".
try
{
connection->forceConnected(connection_parameters.timeouts);
}
catch (...)
{
// Just report it, we'll terminate below.
fmt::print(stderr,
"Error while reconnecting to the server: {}\n",
getCurrentExceptionMessage(true));
// The reconnection might fail, but we'll still be connected
// in the sense of `connection->isConnected() = true`,
// in case when the requested database doesn't exist.
// Disconnect manually now, so that the following code doesn't
// have any doubts, and the connection state is predictable.
connection->disconnect();
}
}
if (!connection->isConnected())
{
// Probably the server is dead because we found an assertion
// failure. Fail fast.
fmt::print(stderr, "Lost connection to the server.\n");
// Print the changed settings because they might be needed to
// reproduce the error.
printChangedSettings();
return false;
}
return std::nullopt;
}
/// Returns false when server is not available.
bool Client::processWithFuzzing(const String & full_query)
@ -558,18 +619,33 @@ bool Client::processWithFuzzing(const String & full_query)
// - SET -- The time to fuzz the settings has not yet come
// (see comments in Client/QueryFuzzer.cpp)
size_t this_query_runs = query_fuzzer_runs;
if (orig_ast->as<ASTInsertQuery>() ||
orig_ast->as<ASTCreateQuery>() ||
orig_ast->as<ASTDropQuery>() ||
orig_ast->as<ASTSetQuery>())
ASTs queries_for_fuzzed_tables;
if (orig_ast->as<ASTSetQuery>())
{
this_query_runs = 1;
}
else if (const auto * create = orig_ast->as<ASTCreateQuery>())
{
if (QueryFuzzer::isSuitableForFuzzing(*create))
this_query_runs = create_query_fuzzer_runs;
else
this_query_runs = 1;
}
else if (const auto * insert = orig_ast->as<ASTInsertQuery>())
{
this_query_runs = 1;
queries_for_fuzzed_tables = fuzzer.getInsertQueriesForFuzzedTables(full_query);
}
else if (const auto * drop = orig_ast->as<ASTDropQuery>())
{
this_query_runs = 1;
queries_for_fuzzed_tables = fuzzer.getDropQueriesForFuzzedTables(*drop);
}
String query_to_execute;
ASTPtr parsed_query;
ASTPtr fuzz_base = orig_ast;
for (size_t fuzz_step = 0; fuzz_step < this_query_runs; ++fuzz_step)
{
fmt::print(stderr, "Fuzzing step {} out of {}\n", fuzz_step, this_query_runs);
@ -630,9 +706,9 @@ bool Client::processWithFuzzing(const String & full_query)
continue;
}
parsed_query = ast_to_process;
query_to_execute = parsed_query->formatForErrorMessage();
processParsedSingleQuery(full_query, query_to_execute, parsed_query);
query_to_execute = ast_to_process->formatForErrorMessage();
if (auto res = processFuzzingStep(query_to_execute, ast_to_process))
return *res;
}
catch (...)
{
@ -645,60 +721,6 @@ bool Client::processWithFuzzing(const String & full_query)
have_error = true;
}
const auto * exception = server_exception ? server_exception.get() : client_exception.get();
// Sometimes you may get TOO_DEEP_RECURSION from the server,
// and TOO_DEEP_RECURSION should not fail the fuzzer check.
if (have_error && exception->code() == ErrorCodes::TOO_DEEP_RECURSION)
{
have_error = false;
server_exception.reset();
client_exception.reset();
return true;
}
if (have_error)
{
fmt::print(stderr, "Error on processing query '{}': {}\n", ast_to_process->formatForErrorMessage(), exception->message());
// Try to reconnect after errors, for two reasons:
// 1. We might not have realized that the server died, e.g. if
// it sent us a <Fatal> trace and closed connection properly.
// 2. The connection might have gotten into a wrong state and
// the next query will get false positive about
// "Unknown packet from server".
try
{
connection->forceConnected(connection_parameters.timeouts);
}
catch (...)
{
// Just report it, we'll terminate below.
fmt::print(stderr,
"Error while reconnecting to the server: {}\n",
getCurrentExceptionMessage(true));
// The reconnection might fail, but we'll still be connected
// in the sense of `connection->isConnected() = true`,
// in case when the requested database doesn't exist.
// Disconnect manually now, so that the following code doesn't
// have any doubts, and the connection state is predictable.
connection->disconnect();
}
}
if (!connection->isConnected())
{
// Probably the server is dead because we found an assertion
// failure. Fail fast.
fmt::print(stderr, "Lost connection to the server.\n");
// Print the changed settings because they might be needed to
// reproduce the error.
printChangedSettings();
return false;
}
// Check that after the query is formatted, we can parse it back,
// format again and get the same result. Unfortunately, we can't
// compare the ASTs, which would be more sensitive to errors. This
@ -729,13 +751,12 @@ bool Client::processWithFuzzing(const String & full_query)
// query, but second and third.
// If you have to add any more workarounds to this check, just remove
// it altogether, it's not so useful.
if (parsed_query && !have_error && !queryHasWithClause(*parsed_query))
if (ast_to_process && !have_error && !queryHasWithClause(*ast_to_process))
{
ASTPtr ast_2;
try
{
const auto * tmp_pos = query_to_execute.c_str();
ast_2 = parseQuery(tmp_pos, tmp_pos + query_to_execute.size(), false /* allow_multi_statements */);
}
catch (Exception & e)
@ -762,7 +783,7 @@ bool Client::processWithFuzzing(const String & full_query)
"Got the following (different) text after formatting the fuzzed query and parsing it back:\n'{}'\n, expected:\n'{}'\n",
text_3, text_2);
fmt::print(stderr, "In more detail:\n");
fmt::print(stderr, "AST-1 (generated by fuzzer):\n'{}'\n", parsed_query->dumpTree());
fmt::print(stderr, "AST-1 (generated by fuzzer):\n'{}'\n", ast_to_process->dumpTree());
fmt::print(stderr, "Text-1 (AST-1 formatted):\n'{}'\n", query_to_execute);
fmt::print(stderr, "AST-2 (Text-1 parsed):\n'{}'\n", ast_2->dumpTree());
fmt::print(stderr, "Text-2 (AST-2 formatted):\n'{}'\n", text_2);
@ -784,6 +805,7 @@ bool Client::processWithFuzzing(const String & full_query)
// so that it doesn't influence the exit code.
server_exception.reset();
client_exception.reset();
fuzzer.notifyQueryFailed(ast_to_process);
have_error = false;
}
else if (ast_to_process->formatForErrorMessage().size() > 500)
@ -800,6 +822,35 @@ bool Client::processWithFuzzing(const String & full_query)
}
}
for (const auto & query : queries_for_fuzzed_tables)
{
std::cout << std::endl;
WriteBufferFromOStream ast_buf(std::cout, 4096);
formatAST(*query, ast_buf, false /*highlight*/);
ast_buf.next();
std::cout << std::endl << std::endl;
try
{
query_to_execute = query->formatForErrorMessage();
if (auto res = processFuzzingStep(query_to_execute, query))
return *res;
}
catch (...)
{
client_exception = std::make_unique<Exception>(getCurrentExceptionMessage(print_stack_trace), getCurrentExceptionCode());
have_error = true;
}
if (have_error)
{
server_exception.reset();
client_exception.reset();
fuzzer.notifyQueryFailed(query);
have_error = false;
}
}
return true;
}
@ -834,6 +885,7 @@ void Client::addOptions(OptionsDescription & options_description)
("compression", po::value<bool>(), "enable or disable compression (enabled by default for remote communication and disabled for localhost communication).")
("query-fuzzer-runs", po::value<int>()->default_value(0), "After executing every SELECT query, do random mutations in it and run again specified number of times. This is used for testing to discover unexpected corner cases.")
("create-query-fuzzer-runs", po::value<int>()->default_value(0), "")
("interleave-queries-file", po::value<std::vector<std::string>>()->multitoken(),
"file path with queries to execute before every file from 'queries-file'; multiple files can be specified (--queries-file file1 file2...); this is needed to enable more aggressive fuzzing of newly added tests (see 'query-fuzzer-runs' option)")
@ -994,6 +1046,17 @@ void Client::processOptions(const OptionsDescription & options_description,
ignore_error = true;
}
if ((create_query_fuzzer_runs = options["create-query-fuzzer-runs"].as<int>()))
{
// Fuzzer implies multiquery.
config().setBool("multiquery", true);
// Ignore errors in parsing queries.
config().setBool("ignore-error", true);
global_context->setSetting("allow_suspicious_low_cardinality_types", true);
ignore_error = true;
}
if (options.count("opentelemetry-traceparent"))
{
String traceparent = options["opentelemetry-traceparent"].as<std::string>();

View File

@ -17,6 +17,7 @@ public:
protected:
bool processWithFuzzing(const String & full_query) override;
std::optional<bool> processFuzzingStep(const String & query_to_execute, const ASTPtr & parsed_query);
void connect() override;

View File

@ -79,7 +79,9 @@
#include <Common/ThreadFuzzer.h>
#include <Common/getHashOfLoadedBinary.h>
#include <Common/filesystemHelpers.h>
#if USE_BORINGSSL
#include <Compression/CompressionCodecEncrypted.h>
#endif
#include <Server/MySQLHandlerFactory.h>
#include <Server/PostgreSQLHandlerFactory.h>
#include <Server/CertificateReloader.h>
@ -1264,8 +1266,9 @@ int Server::main(const std::vector<std::string> & /*args*/)
global_context->updateStorageConfiguration(*config);
global_context->updateInterserverCredentials(*config);
#if USE_BORINGSSL
CompressionCodecEncrypted::Configuration::instance().tryLoad(*config, "encryption_codecs");
#endif
#if USE_SSL
CertificateReloader::instance().tryLoad(*config);
#endif
@ -1418,8 +1421,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
global_context->setAsynchronousInsertQueue(std::make_shared<AsynchronousInsertQueue>(
global_context,
settings.async_insert_threads,
settings.async_insert_max_data_size,
AsynchronousInsertQueue::Timeout{.busy = settings.async_insert_busy_timeout_ms, .stale = settings.async_insert_stale_timeout_ms}));
settings.async_insert_cleanup_timeout_ms));
/// Size of cache for marks (index of MergeTree family of tables).
size_t mark_cache_size = config().getUInt64("mark_cache_size", 5368709120);
@ -1471,9 +1473,10 @@ int Server::main(const std::vector<std::string> & /*args*/)
global_context->getMergeTreeSettings().sanityCheck(background_pool_tasks);
global_context->getReplicatedMergeTreeSettings().sanityCheck(background_pool_tasks);
}
#if USE_BORINGSSL
/// try set up encryption. There are some errors in config, error will be printed and server wouldn't start.
CompressionCodecEncrypted::Configuration::instance().load(config(), "encryption_codecs");
#endif
SCOPE_EXIT({
/// Stop reloading of the main config. This must be done before `global_context->shutdown()` because

View File

@ -32,7 +32,7 @@ struct RankCorrelationData : public StatisticalSample<Float64, Float64>
std::tie(ranks_y, std::ignore) = computeRanksAndTieCorrection(this->y);
/// Sizes can be non-equal due to skipped NaNs.
const auto size = std::min(this->size_x, this->size_y);
const Float64 size = static_cast<Float64>(std::min(this->size_x, this->size_y));
/// Count d^2 sum
Float64 answer = 0;

View File

@ -247,7 +247,13 @@ add_object_library(clickhouse_access Access)
add_object_library(clickhouse_backups Backups)
add_object_library(clickhouse_core Core)
add_object_library(clickhouse_core_mysql Core/MySQL)
add_object_library(clickhouse_compression Compression)
if (NOT ENABLE_EXTERNAL_OPENSSL)
add_object_library(clickhouse_compression Compression)
else ()
add_headers_and_sources(dbms Compression)
list(REMOVE_ITEM dbms_headers Compression/CompressionCodecEncrypted.h)
list(REMOVE_ITEM dbms_sources Compression/CompressionCodecEncrypted.cpp)
endif ()
add_object_library(clickhouse_querypipeline QueryPipeline)
add_object_library(clickhouse_datatypes DataTypes)
add_object_library(clickhouse_datatypes_serializations DataTypes/Serializations)
@ -368,8 +374,6 @@ target_link_libraries(clickhouse_common_io PUBLIC ch_contrib::re2_st)
target_link_libraries(clickhouse_common_io PUBLIC ch_contrib::re2)
target_link_libraries(clickhouse_common_io
PRIVATE
${EXECINFO_LIBRARIES}
PUBLIC
boost::program_options
boost::system

View File

@ -251,6 +251,7 @@ protected:
QueryFuzzer fuzzer;
int query_fuzzer_runs = 0;
int create_query_fuzzer_runs = 0;
struct
{

View File

@ -1,4 +1,22 @@
#include "QueryFuzzer.h"
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeFactory.h>
#include <DataTypes/DataTypeFixedString.h>
#include <DataTypes/DataTypeLowCardinality.h>
#include <DataTypes/DataTypeMap.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeTuple.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypesDecimal.h>
#include <DataTypes/IDataType.h>
#include <IO/ReadBufferFromString.h>
#include <IO/WriteHelpers.h>
#include <Parsers/ASTColumnDeclaration.h>
#include <Parsers/ASTCreateQuery.h>
#include <Parsers/IAST_fwd.h>
#include <Parsers/ParserDataType.h>
#include <Parsers/ParserInsertQuery.h>
#include <Parsers/ASTDropQuery.h>
#include <unordered_set>
@ -430,6 +448,303 @@ void QueryFuzzer::fuzzWindowFrame(ASTWindowDefinition & def)
}
}
bool QueryFuzzer::isSuitableForFuzzing(const ASTCreateQuery & create)
{
return create.columns_list && create.columns_list->columns;
}
void QueryFuzzer::fuzzCreateQuery(ASTCreateQuery & create)
{
if (create.columns_list && create.columns_list->columns)
{
for (auto & ast : create.columns_list->columns->children)
{
if (auto * column = ast->as<ASTColumnDeclaration>())
{
fuzzColumnDeclaration(*column);
}
}
}
if (create.storage && create.storage->engine)
{
/// Replace ReplicatedMergeTree to ordinary MergeTree
/// to avoid inconsistency of metadata in zookeeper.
auto & engine_name = create.storage->engine->name;
if (startsWith(engine_name, "Replicated"))
{
engine_name = engine_name.substr(strlen("Replicated"));
if (auto & arguments = create.storage->engine->arguments)
{
auto & children = arguments->children;
if (children.size() <= 2)
arguments.reset();
else
children.erase(children.begin(), children.begin() + 2);
}
}
}
auto full_name = create.getTable();
auto original_name = full_name.substr(0, full_name.find("__fuzz_"));
size_t index = index_of_fuzzed_table[original_name]++;
auto new_name = original_name + "__fuzz_" + toString(index);
create.setTable(new_name);
SipHash sip_hash;
sip_hash.update(original_name);
if (create.columns_list)
create.columns_list->updateTreeHash(sip_hash);
if (create.storage)
create.storage->updateTreeHash(sip_hash);
IAST::Hash hash;
sip_hash.get128(hash);
/// Save only tables with unique definition.
if (created_tables_hashes.insert(hash).second)
original_table_name_to_fuzzed[original_name].insert(new_name);
}
void QueryFuzzer::fuzzColumnDeclaration(ASTColumnDeclaration & column)
{
if (column.type)
{
auto data_type = fuzzDataType(DataTypeFactory::instance().get(column.type));
ParserDataType parser;
column.type = parseQuery(parser, data_type->getName(), DBMS_DEFAULT_MAX_QUERY_SIZE, DBMS_DEFAULT_MAX_PARSER_DEPTH);
}
}
DataTypePtr QueryFuzzer::fuzzDataType(DataTypePtr type)
{
/// Do not replace Array/Tuple/etc. with not Array/Tuple too often.
const auto * type_array = typeid_cast<const DataTypeArray *>(type.get());
if (type_array && fuzz_rand() % 4 != 0)
return std::make_shared<DataTypeArray>(fuzzDataType(type_array->getNestedType()));
const auto * type_tuple = typeid_cast<const DataTypeTuple *>(type.get());
if (type_tuple && fuzz_rand() % 4 != 0)
{
DataTypes elements;
for (const auto & element : type_tuple->getElements())
elements.push_back(fuzzDataType(element));
return type_tuple->haveExplicitNames()
? std::make_shared<DataTypeTuple>(elements, type_tuple->getElementNames())
: std::make_shared<DataTypeTuple>(elements);
}
const auto * type_map = typeid_cast<const DataTypeMap *>(type.get());
if (type_map && fuzz_rand() % 4 != 0)
{
auto key_type = fuzzDataType(type_map->getKeyType());
auto value_type = fuzzDataType(type_map->getValueType());
if (!DataTypeMap::checkKeyType(key_type))
key_type = type_map->getKeyType();
return std::make_shared<DataTypeMap>(key_type, value_type);
}
const auto * type_nullable = typeid_cast<const DataTypeNullable *>(type.get());
if (type_nullable)
{
size_t tmp = fuzz_rand() % 3;
if (tmp == 0)
return fuzzDataType(type_nullable->getNestedType());
if (tmp == 1)
{
auto nested_type = fuzzDataType(type_nullable->getNestedType());
if (nested_type->canBeInsideNullable())
return std::make_shared<DataTypeNullable>(nested_type);
}
}
const auto * type_low_cardinality = typeid_cast<const DataTypeLowCardinality *>(type.get());
if (type_low_cardinality)
{
size_t tmp = fuzz_rand() % 3;
if (tmp == 0)
return fuzzDataType(type_low_cardinality->getDictionaryType());
if (tmp == 1)
{
auto nested_type = fuzzDataType(type_low_cardinality->getDictionaryType());
if (nested_type->canBeInsideLowCardinality())
return std::make_shared<DataTypeLowCardinality>(nested_type);
}
}
size_t tmp = fuzz_rand() % 8;
if (tmp == 0)
return std::make_shared<DataTypeArray>(type);
if (tmp <= 1 && type->canBeInsideNullable())
return std::make_shared<DataTypeNullable>(type);
if (tmp <= 2 && type->canBeInsideLowCardinality())
return std::make_shared<DataTypeLowCardinality>(type);
if (tmp <= 3)
return getRandomType();
return type;
}
DataTypePtr QueryFuzzer::getRandomType()
{
auto type_id = static_cast<TypeIndex>(fuzz_rand() % static_cast<size_t>(TypeIndex::Tuple) + 1);
if (type_id == TypeIndex::Tuple)
{
size_t tuple_size = fuzz_rand() % 6 + 1;
DataTypes elements;
for (size_t i = 0; i < tuple_size; ++i)
elements.push_back(getRandomType());
return std::make_shared<DataTypeTuple>(elements);
}
if (type_id == TypeIndex::Array)
return std::make_shared<DataTypeArray>(getRandomType());
/// NOLINTBEGIN(bugprone-macro-parentheses)
#define DISPATCH(DECIMAL) \
if (type_id == TypeIndex::DECIMAL) \
return std::make_shared<DataTypeDecimal<DECIMAL>>( \
DataTypeDecimal<DECIMAL>::maxPrecision(), \
(fuzz_rand() % DataTypeDecimal<DECIMAL>::maxPrecision()) + 1);
DISPATCH(Decimal32)
DISPATCH(Decimal64)
DISPATCH(Decimal128)
DISPATCH(Decimal256)
#undef DISPATCH
/// NOLINTEND(bugprone-macro-parentheses)
if (type_id == TypeIndex::FixedString)
return std::make_shared<DataTypeFixedString>(fuzz_rand() % 20);
if (type_id == TypeIndex::Enum8)
return std::make_shared<DataTypeUInt8>();
if (type_id == TypeIndex::Enum16)
return std::make_shared<DataTypeUInt16>();
return DataTypeFactory::instance().get(String(magic_enum::enum_name(type_id)));
}
void QueryFuzzer::fuzzTableName(ASTTableExpression & table)
{
if (!table.database_and_table_name || fuzz_rand() % 3 == 0)
return;
const auto * identifier = table.database_and_table_name->as<ASTTableIdentifier>();
if (!identifier)
return;
auto table_id = identifier->getTableId();
if (table_id.empty())
return;
auto it = original_table_name_to_fuzzed.find(table_id.getTableName());
if (it != original_table_name_to_fuzzed.end() && !it->second.empty())
{
auto new_table_name = it->second.begin();
std::advance(new_table_name, fuzz_rand() % it->second.size());
StorageID new_table_id(table_id.database_name, *new_table_name);
table.database_and_table_name = std::make_shared<ASTTableIdentifier>(new_table_id);
}
}
static ASTPtr tryParseInsertQuery(const String & full_query)
{
const char * pos = full_query.data();
const char * end = full_query.data() + full_query.size();
ParserInsertQuery parser(end, false);
String message;
return tryParseQuery(parser, pos, end, message, false, "", false, DBMS_DEFAULT_MAX_QUERY_SIZE, DBMS_DEFAULT_MAX_PARSER_DEPTH);
}
ASTs QueryFuzzer::getInsertQueriesForFuzzedTables(const String & full_query)
{
auto parsed_query = tryParseInsertQuery(full_query);
if (!parsed_query)
return {};
const auto & insert = *parsed_query->as<ASTInsertQuery>();
if (!insert.table)
return {};
auto table_name = insert.getTable();
auto it = original_table_name_to_fuzzed.find(table_name);
if (it == original_table_name_to_fuzzed.end())
return {};
ASTs queries;
for (const auto & fuzzed_name : it->second)
{
/// Parse query from scratch for each table instead of clone,
/// to store proper pointers to inlined data,
/// which are not copied during clone.
auto & query = queries.emplace_back(tryParseInsertQuery(full_query));
query->as<ASTInsertQuery>()->setTable(fuzzed_name);
}
return queries;
}
ASTs QueryFuzzer::getDropQueriesForFuzzedTables(const ASTDropQuery & drop_query)
{
if (drop_query.kind != ASTDropQuery::Drop)
return {};
auto table_name = drop_query.getTable();
auto it = index_of_fuzzed_table.find(table_name);
if (it == index_of_fuzzed_table.end())
return {};
ASTs queries;
/// Drop all created tables, not only unique ones.
for (size_t i = 0; i < it->second; ++i)
{
auto fuzzed_name = table_name + "__fuzz_" + toString(i);
auto & query = queries.emplace_back(drop_query.clone());
query->as<ASTDropQuery>()->setTable(fuzzed_name);
/// Just in case add IF EXISTS to avoid exceptions.
query->as<ASTDropQuery>()->if_exists = true;
}
index_of_fuzzed_table.erase(it);
original_table_name_to_fuzzed.erase(table_name);
return queries;
}
void QueryFuzzer::notifyQueryFailed(ASTPtr ast)
{
auto remove_fuzzed_table = [this](const auto & table_name)
{
auto pos = table_name.find("__fuzz_");
if (pos != std::string::npos)
{
auto original_name = table_name.substr(0, pos);
original_table_name_to_fuzzed[original_name].erase(table_name);
}
};
if (const auto * create = ast->as<ASTCreateQuery>())
remove_fuzzed_table(create->getTable());
if (const auto * insert = ast->as<ASTInsertQuery>())
remove_fuzzed_table(insert->getTable());
}
void QueryFuzzer::fuzz(ASTs & asts)
{
for (auto & ast : asts)
@ -497,6 +812,7 @@ void QueryFuzzer::fuzz(ASTPtr & ast)
}
else if (auto * table_expr = typeid_cast<ASTTableExpression *>(ast.get()))
{
fuzzTableName(*table_expr);
fuzz(table_expr->children);
}
else if (auto * expr_list = typeid_cast<ASTExpressionList *>(ast.get()))
@ -563,6 +879,10 @@ void QueryFuzzer::fuzz(ASTPtr & ast)
literal->value = fuzzField(literal->value);
}
}
else if (auto * create_query = typeid_cast<ASTCreateQuery *>(ast.get()))
{
fuzzCreateQuery(*create_query);
}
else
{
fuzz(ast->children);

View File

@ -1,5 +1,6 @@
#pragma once
#include <DataTypes/IDataType.h>
#include <unordered_set>
#include <unordered_map>
#include <vector>
@ -16,6 +17,11 @@ namespace DB
class ASTExpressionList;
class ASTOrderByElement;
class ASTCreateQuery;
class ASTInsertQuery;
class ASTColumnDeclaration;
class ASTDropQuery;
struct ASTTableExpression;
struct ASTWindowDefinition;
/*
@ -54,6 +60,9 @@ struct QueryFuzzer
std::unordered_set<const IAST *> debug_visited_nodes;
ASTPtr * debug_top_ast = nullptr;
std::unordered_map<std::string, std::unordered_set<std::string>> original_table_name_to_fuzzed;
std::unordered_map<std::string, size_t> index_of_fuzzed_table;
std::set<IAST::Hash> created_tables_hashes;
// This is the only function you have to call -- it will modify the passed
// ASTPtr to point to new AST with some random changes.
@ -63,18 +72,28 @@ struct QueryFuzzer
Field getRandomField(int type);
Field fuzzField(Field field);
ASTPtr getRandomColumnLike();
DataTypePtr fuzzDataType(DataTypePtr type);
DataTypePtr getRandomType();
ASTs getInsertQueriesForFuzzedTables(const String & full_query);
ASTs getDropQueriesForFuzzedTables(const ASTDropQuery & drop_query);
void notifyQueryFailed(ASTPtr ast);
void replaceWithColumnLike(ASTPtr & ast);
void replaceWithTableLike(ASTPtr & ast);
void fuzzOrderByElement(ASTOrderByElement * elem);
void fuzzOrderByList(IAST * ast);
void fuzzColumnLikeExpressionList(IAST * ast);
void fuzzWindowFrame(ASTWindowDefinition & def);
void fuzzCreateQuery(ASTCreateQuery & create);
void fuzzColumnDeclaration(ASTColumnDeclaration & column);
void fuzzTableName(ASTTableExpression & table);
void fuzz(ASTs & asts);
void fuzz(ASTPtr & ast);
void collectFuzzInfoMain(ASTPtr ast);
void addTableLike(ASTPtr ast);
void addColumnLike(ASTPtr ast);
void collectFuzzInfoRecurse(ASTPtr ast);
static bool isSuitableForFuzzing(const ASTCreateQuery & create);
};
}

View File

@ -176,7 +176,9 @@ void registerCodecDelta(CompressionCodecFactory & factory);
void registerCodecT64(CompressionCodecFactory & factory);
void registerCodecDoubleDelta(CompressionCodecFactory & factory);
void registerCodecGorilla(CompressionCodecFactory & factory);
#if USE_BORINGSSL
void registerCodecEncrypted(CompressionCodecFactory & factory);
#endif
void registerCodecFPC(CompressionCodecFactory & factory);
#endif
@ -193,7 +195,9 @@ CompressionCodecFactory::CompressionCodecFactory()
registerCodecT64(*this);
registerCodecDoubleDelta(*this);
registerCodecGorilla(*this);
#if USE_BORINGSSL
registerCodecEncrypted(*this);
#endif
registerCodecFPC(*this);
#ifdef ENABLE_QPL_COMPRESSION
registerCodecDeflateQpl(*this);

View File

@ -596,9 +596,9 @@ static constexpr UInt64 operator""_GiB(unsigned long long value)
M(Bool, async_insert, false, "If true, data from INSERT query is stored in queue and later flushed to table in background. Makes sense only for inserts via HTTP protocol. If wait_for_async_insert is false, INSERT query is processed almost instantly, otherwise client will wait until data will be flushed to table", 0) \
M(Bool, wait_for_async_insert, true, "If true wait for processing of asynchronous insertion", 0) \
M(Seconds, wait_for_async_insert_timeout, DBMS_DEFAULT_LOCK_ACQUIRE_TIMEOUT_SEC, "Timeout for waiting for processing asynchronous insertion", 0) \
M(UInt64, async_insert_max_data_size, 100000, "Maximum size in bytes of unparsed data collected per query before being inserted", 0) \
M(UInt64, async_insert_max_data_size, 1000000, "Maximum size in bytes of unparsed data collected per query before being inserted", 0) \
M(Milliseconds, async_insert_busy_timeout_ms, 200, "Maximum time to wait before dumping collected data per query since the first data appeared", 0) \
M(Milliseconds, async_insert_stale_timeout_ms, 0, "Maximum time to wait before dumping collected data per query since the last data appeared. Zero means no timeout at all", 0) \
M(Milliseconds, async_insert_cleanup_timeout_ms, 1000, "Time to wait before each iteration of cleaning up buffers for INSERT queries which don't appear anymore. Only has meaning at server startup.", 0) \
\
M(UInt64, remote_fs_read_max_backoff_ms, 10000, "Max wait time when trying to read data for remote disk", 0) \
M(UInt64, remote_fs_read_backoff_max_tries, 5, "Max attempts to read with backoff", 0) \
@ -671,6 +671,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value)
MAKE_OBSOLETE(M, Bool, allow_experimental_database_atomic, true) \
MAKE_OBSOLETE(M, Bool, allow_experimental_bigint_types, true) \
MAKE_OBSOLETE(M, Bool, allow_experimental_window_functions, true) \
MAKE_OBSOLETE(M, Milliseconds, async_insert_stale_timeout_ms, 0) \
MAKE_OBSOLETE(M, HandleKafkaErrorMode, handle_kafka_error_mode, HandleKafkaErrorMode::DEFAULT) \
MAKE_OBSOLETE(M, Bool, database_replicated_ddl_output, true) \
MAKE_OBSOLETE(M, UInt64, replication_alter_columns_timeout, 60) \

View File

@ -22,3 +22,4 @@
#cmakedefine01 USE_ODBC
#cmakedefine01 USE_REPLXX
#cmakedefine01 USE_JEMALLOC
#cmakedefine01 USE_BORINGSSL

View File

@ -11,7 +11,7 @@ if (OS_DARWIN AND NOT USE_STATIC_LIBRARIES)
target_link_libraries (daemon PUBLIC -Wl,-undefined,dynamic_lookup)
endif()
target_link_libraries (daemon PUBLIC loggers common PRIVATE clickhouse_common_io clickhouse_common_config ${EXECINFO_LIBRARIES})
target_link_libraries (daemon PUBLIC loggers common PRIVATE clickhouse_common_io clickhouse_common_config)
if (TARGET ch_contrib::sentry)
target_link_libraries (daemon PRIVATE ch_contrib::sentry dbms)

View File

@ -96,14 +96,14 @@ void SentryWriter::initialize(Poco::Util::LayeredConfiguration & config)
}
sentry_options_set_dsn(options, endpoint.c_str());
sentry_options_set_database_path(options, temp_folder_path.c_str());
/// This value will be attached to each report
String environment_default_value = "test";
if (strstr(VERSION_DESCRIBE, "-stable") || strstr(VERSION_DESCRIBE, "-lts"))
{
sentry_options_set_environment(options, "prod");
}
else
{
sentry_options_set_environment(options, "test");
}
environment_default_value = "prod";
/// If the value is set in config - use it
auto value = config.getString("send_crash_reports.environment", environment_default_value);
sentry_options_set_environment(options, value.c_str());
const std::string & http_proxy = config.getString("send_crash_reports.http_proxy", "");
if (!http_proxy.empty())

View File

@ -205,10 +205,9 @@ inline ReturnType convertToDecimalImpl(const typename FromDataType::FieldType &
if (!std::isfinite(value))
{
if constexpr (throw_exception)
throw Exception(std::string(ToDataType::family_name) + " convert overflow. Cannot convert infinity or NaN to decimal",
ErrorCodes::DECIMAL_OVERFLOW);
throw Exception(ErrorCodes::DECIMAL_OVERFLOW, "{} convert overflow. Cannot convert infinity or NaN to decimal", ToDataType::family_name);
else
return false;
return ReturnType(false);
}
auto out = value * static_cast<FromFieldType>(DecimalUtils::scaleMultiplier<ToNativeType>(scale));
@ -217,8 +216,7 @@ inline ReturnType convertToDecimalImpl(const typename FromDataType::FieldType &
out >= static_cast<FromFieldType>(std::numeric_limits<ToNativeType>::max()))
{
if constexpr (throw_exception)
throw Exception(std::string(ToDataType::family_name) + " convert overflow. Float is out of Decimal range",
ErrorCodes::DECIMAL_OVERFLOW);
throw Exception(ErrorCodes::DECIMAL_OVERFLOW, "{} convert overflow. Float is out of Decimal range", ToDataType::family_name);
else
return ReturnType(false);
}

View File

@ -65,38 +65,61 @@ void WriteBufferFromAzureBlobStorage::execWithRetry(std::function<void()> func,
void WriteBufferFromAzureBlobStorage::finalizeImpl()
{
execWithRetry([this](){ next(); }, DEFAULT_RETRY_NUM);
if (tmp_buffer_write_offset > 0)
uploadBlock(tmp_buffer->data(), tmp_buffer_write_offset);
auto block_blob_client = blob_container_client->GetBlockBlobClient(blob_path);
execWithRetry([&](){ block_blob_client.CommitBlockList(block_ids); }, DEFAULT_RETRY_NUM);
LOG_TRACE(log, "Committed {} blocks for blob `{}`", block_ids.size(), blob_path);
}
void WriteBufferFromAzureBlobStorage::uploadBlock(const char * data, size_t size)
{
auto block_blob_client = blob_container_client->GetBlockBlobClient(blob_path);
const std::string & block_id = block_ids.emplace_back(getRandomASCIIString(64));
Azure::Core::IO::MemoryBodyStream memory_stream(reinterpret_cast<const uint8_t *>(data), size);
execWithRetry([&](){ block_blob_client.StageBlock(block_id, memory_stream); }, DEFAULT_RETRY_NUM);
tmp_buffer_write_offset = 0;
LOG_TRACE(log, "Staged block (id: {}) of size {} (blob path: {}).", block_id, size, blob_path);
}
WriteBufferFromAzureBlobStorage::MemoryBufferPtr WriteBufferFromAzureBlobStorage::allocateBuffer() const
{
return std::make_unique<Memory<>>(max_single_part_upload_size);
}
void WriteBufferFromAzureBlobStorage::nextImpl()
{
if (!offset())
size_t size_to_upload = offset();
if (size_to_upload == 0)
return;
char * buffer_begin = working_buffer.begin();
size_t total_size = offset();
if (!tmp_buffer)
tmp_buffer = allocateBuffer();
auto block_blob_client = blob_container_client->GetBlockBlobClient(blob_path);
size_t current_size = 0;
std::vector<std::string> block_ids;
while (current_size < total_size)
size_t uploaded_size = 0;
while (uploaded_size != size_to_upload)
{
size_t part_len = std::min(total_size - current_size, max_single_part_upload_size);
const std::string & block_id = block_ids.emplace_back(getRandomASCIIString(64));
size_t memory_buffer_remaining_size = max_single_part_upload_size - tmp_buffer_write_offset;
if (memory_buffer_remaining_size == 0)
uploadBlock(tmp_buffer->data(), tmp_buffer->size());
Azure::Core::IO::MemoryBodyStream tmp_buffer(reinterpret_cast<uint8_t *>(buffer_begin + current_size), part_len);
execWithRetry([&](){ block_blob_client.StageBlock(block_id, tmp_buffer); }, DEFAULT_RETRY_NUM);
current_size += part_len;
LOG_TRACE(log, "Staged block (id: {}) of size {} (written {}/{}, blob path: {}).", block_id, part_len, current_size, total_size, blob_path);
size_t size = std::min(memory_buffer_remaining_size, size_to_upload - uploaded_size);
memcpy(tmp_buffer->data() + tmp_buffer_write_offset, working_buffer.begin() + uploaded_size, size);
uploaded_size += size;
tmp_buffer_write_offset += size;
}
execWithRetry([&](){ block_blob_client.CommitBlockList(block_ids); }, DEFAULT_RETRY_NUM);
LOG_TRACE(log, "Committed {} blocks for blob `{}`", block_ids.size(), blob_path);
if (tmp_buffer_write_offset == max_single_part_upload_size)
uploadBlock(tmp_buffer->data(), tmp_buffer->size());
if (write_settings.remote_throttler)
write_settings.remote_throttler->add(total_size);
write_settings.remote_throttler->add(size_to_upload);
}
}

View File

@ -40,6 +40,7 @@ public:
private:
void finalizeImpl() override;
void execWithRetry(std::function<void()> func, size_t num_tries);
void uploadBlock(const char * data, size_t size);
Poco::Logger * log;
@ -48,6 +49,13 @@ private:
const WriteSettings write_settings;
AzureClientPtr blob_container_client;
std::vector<std::string> block_ids;
using MemoryBufferPtr = std::unique_ptr<Memory<>>;
MemoryBufferPtr tmp_buffer;
size_t tmp_buffer_write_offset = 0;
MemoryBufferPtr allocateBuffer() const;
};
}

View File

@ -1,4 +1,5 @@
#include <Functions/FunctionsAES.h>
#include <Interpreters/Context.h>
#if USE_SSL
@ -8,7 +9,6 @@
#include <string>
#include <cassert>
namespace DB
{
namespace ErrorCodes

View File

@ -1,6 +1,9 @@
#pragma once
#include <Common/config.h>
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnsNumber.h>
#include <DataTypes/DataTypeNullable.h>
#if USE_SSL
#include <DataTypes/DataTypeString.h>
@ -20,7 +23,6 @@
#include <string.h>
namespace DB
{
namespace ErrorCodes
@ -411,6 +413,7 @@ class FunctionDecrypt : public IFunction
public:
static constexpr OpenSSLDetails::CompatibilityMode compatibility_mode = Impl::compatibility_mode;
static constexpr auto name = Impl::name;
static constexpr bool use_null_when_decrypt_fail = Impl::use_null_when_decrypt_fail;
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionDecrypt>(); }
private:
@ -445,6 +448,9 @@ private:
optional_args
);
if constexpr (use_null_when_decrypt_fail)
return std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>());
return std::make_shared<DataTypeString>();
}
@ -468,7 +474,7 @@ private:
ColumnPtr result_column;
if (arguments.size() <= 3)
{
result_column = doDecrypt(evp_cipher, input_rows_count, input_column, key_column, nullptr, nullptr);
result_column = doDecrypt<use_null_when_decrypt_fail>(evp_cipher, input_rows_count, input_column, key_column, nullptr, nullptr);
}
else
{
@ -478,7 +484,7 @@ private:
if (arguments.size() <= 4)
{
result_column = doDecrypt(evp_cipher, input_rows_count, input_column, key_column, iv_column, nullptr);
result_column = doDecrypt<use_null_when_decrypt_fail>(evp_cipher, input_rows_count, input_column, key_column, iv_column, nullptr);
}
else
{
@ -486,13 +492,13 @@ private:
throw Exception("AAD can be only set for GCM-mode", ErrorCodes::BAD_ARGUMENTS);
const auto aad_column = arguments[4].column;
result_column = doDecrypt(evp_cipher, input_rows_count, input_column, key_column, iv_column, aad_column);
result_column = doDecrypt<use_null_when_decrypt_fail>(evp_cipher, input_rows_count, input_column, key_column, iv_column, aad_column);
}
}
return result_column;
}
template<bool use_null_when_decrypt_fail>
static ColumnPtr doDecrypt(
const EVP_CIPHER * evp_cipher,
size_t input_rows_count,
@ -503,25 +509,25 @@ private:
{
if constexpr (compatibility_mode == OpenSSLDetails::CompatibilityMode::MySQL)
{
return doDecryptImpl<CipherMode::MySQLCompatibility>(evp_cipher, input_rows_count, input_column, key_column, iv_column, aad_column);
return doDecryptImpl<CipherMode::MySQLCompatibility, use_null_when_decrypt_fail>(evp_cipher, input_rows_count, input_column, key_column, iv_column, aad_column);
}
else
{
const auto cipher_mode = EVP_CIPHER_mode(evp_cipher);
if (cipher_mode == EVP_CIPH_GCM_MODE)
{
return doDecryptImpl<CipherMode::RFC5116_AEAD_AES_GCM>(evp_cipher, input_rows_count, input_column, key_column, iv_column, aad_column);
return doDecryptImpl<CipherMode::RFC5116_AEAD_AES_GCM, use_null_when_decrypt_fail>(evp_cipher, input_rows_count, input_column, key_column, iv_column, aad_column);
}
else
{
return doDecryptImpl<CipherMode::OpenSSLCompatibility>(evp_cipher, input_rows_count, input_column, key_column, iv_column, aad_column);
return doDecryptImpl<CipherMode::OpenSSLCompatibility, use_null_when_decrypt_fail>(evp_cipher, input_rows_count, input_column, key_column, iv_column, aad_column);
}
}
return nullptr;
}
template <CipherMode mode>
template <CipherMode mode, bool use_null_when_decrypt_fail>
static ColumnPtr doDecryptImpl(const EVP_CIPHER * evp_cipher,
size_t input_rows_count,
const ColumnPtr & input_column,
@ -541,6 +547,7 @@ private:
static constexpr size_t tag_size = 16; // https://tools.ietf.org/html/rfc5116#section-5.1
auto decrypted_result_column = ColumnString::create();
auto null_map = ColumnUInt8::create();
auto & decrypted_result_column_data = decrypted_result_column->getChars();
auto & decrypted_result_column_offsets = decrypted_result_column->getOffsets();
@ -616,6 +623,7 @@ private:
}
}
bool decrypt_fail = false;
/// Avoid extra work on empty ciphertext/plaintext. Always decrypt empty to empty.
/// This makes sense for default implementation for NULLs.
if (input_value.size > 0)
@ -662,9 +670,14 @@ private:
if (EVP_DecryptUpdate(evp_ctx,
reinterpret_cast<unsigned char*>(decrypted), &output_len,
reinterpret_cast<const unsigned char*>(input_value.data), static_cast<int>(input_value.size)) != 1)
{
if constexpr (!use_null_when_decrypt_fail)
onError("Failed to decrypt");
decrypt_fail = true;
}
else
{
decrypted += output_len;
// 3: optionally get tag from the ciphertext (RFC5116) and feed it to the context
if constexpr (mode == CipherMode::RFC5116_AEAD_AES_GCM)
{
@ -674,16 +687,29 @@ private:
}
// 4: retrieve encrypted data (ciphertext)
if (EVP_DecryptFinal_ex(evp_ctx,
if (!decrypt_fail && EVP_DecryptFinal_ex(evp_ctx,
reinterpret_cast<unsigned char*>(decrypted), &output_len) != 1)
{
if constexpr (!use_null_when_decrypt_fail)
onError("Failed to decrypt");
decrypt_fail = true;
}
else
decrypted += output_len;
}
}
*decrypted = '\0';
++decrypted;
decrypted_result_column_offsets.push_back(decrypted - decrypted_result_column_data.data());
if constexpr (use_null_when_decrypt_fail)
{
if (decrypt_fail)
null_map->insertValue(1);
else
null_map->insertValue(0);
}
}
@ -694,6 +720,9 @@ private:
}
decrypted_result_column->validate();
if constexpr (use_null_when_decrypt_fail)
return ColumnNullable::create(std::move(decrypted_result_column), std::move(null_map));
else
return decrypted_result_column;
}
};

View File

@ -323,13 +323,13 @@ struct ToDateTimeImpl
{
static constexpr auto name = "toDateTime";
static inline UInt32 execute(UInt16 d, const DateLUTImpl & time_zone)
static UInt32 execute(UInt16 d, const DateLUTImpl & time_zone)
{
auto date_time = time_zone.fromDayNum(ExtendedDayNum(d));
return date_time <= 0xffffffff ? UInt32(date_time) : UInt32(0xffffffff);
}
static inline UInt32 execute(Int32 d, const DateLUTImpl & time_zone)
static UInt32 execute(Int32 d, const DateLUTImpl & time_zone)
{
if (d < 0)
return 0;
@ -338,12 +338,12 @@ struct ToDateTimeImpl
return date_time <= 0xffffffff ? date_time : 0xffffffff;
}
static inline UInt32 execute(UInt32 dt, const DateLUTImpl & /*time_zone*/)
static UInt32 execute(UInt32 dt, const DateLUTImpl & /*time_zone*/)
{
return dt;
}
static inline UInt32 execute(Int64 d, const DateLUTImpl & time_zone)
static UInt32 execute(Int64 d, const DateLUTImpl & time_zone)
{
if (d < 0)
return 0;
@ -352,7 +352,7 @@ struct ToDateTimeImpl
return date_time <= 0xffffffff ? date_time : 0xffffffff;
}
static inline UInt32 execute(const DecimalUtils::DecimalComponents<DateTime64> & t, const DateLUTImpl & /*time_zone*/)
static UInt32 execute(const DecimalUtils::DecimalComponents<DateTime64> & t, const DateLUTImpl & /*time_zone*/)
{
if (t.whole < 0 || (t.whole >= 0 && t.fractional < 0))
return 0;
@ -374,7 +374,7 @@ struct ToDateTransform32Or64
{
static constexpr auto name = "toDate";
static inline NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl & time_zone)
static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl & time_zone)
{
// since converting to Date, no need in values outside of default LUT range.
if (from < 0)
@ -391,7 +391,7 @@ struct ToDateTransform32Or64Signed
{
static constexpr auto name = "toDate";
static inline NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl & time_zone)
static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl & time_zone)
{
// TODO: decide narrow or extended range based on FromType
/// The function should be monotonic (better for query optimizations), so we saturate instead of overflow.
@ -413,7 +413,7 @@ struct ToDateTransform8Or16Signed
{
static constexpr auto name = "toDate";
static inline NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl &)
static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl &)
{
if (from < 0)
return 0;
@ -431,7 +431,7 @@ struct ToDate32Transform32Or64
{
static constexpr auto name = "toDate32";
static inline NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl & time_zone)
static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl & time_zone)
{
return (from < DATE_LUT_MAX_EXTEND_DAY_NUM)
? from
@ -444,7 +444,7 @@ struct ToDate32Transform32Or64Signed
{
static constexpr auto name = "toDate32";
static inline NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl & time_zone)
static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl & time_zone)
{
static const Int32 daynum_min_offset = -static_cast<Int32>(DateLUT::instance().getDayNumOffsetEpoch());
if (from < daynum_min_offset)
@ -460,7 +460,7 @@ struct ToDate32Transform8Or16Signed
{
static constexpr auto name = "toDate32";
static inline NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl &)
static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl &)
{
return from;
}
@ -529,7 +529,7 @@ struct ToDateTimeTransform64
{
static constexpr auto name = "toDateTime";
static inline NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl &)
static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl &)
{
return std::min<Int64>(Int64(from), Int64(0xFFFFFFFF));
}
@ -540,7 +540,7 @@ struct ToDateTimeTransformSigned
{
static constexpr auto name = "toDateTime";
static inline NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl &)
static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl &)
{
if (from < 0)
return 0;
@ -553,7 +553,7 @@ struct ToDateTimeTransform64Signed
{
static constexpr auto name = "toDateTime";
static inline NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl & /* time_zone */)
static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl & /* time_zone */)
{
if (from < 0)
return 0;
@ -581,9 +581,9 @@ template <typename Name> struct ConvertImpl<DataTypeFloat32, DataTypeDateTime, N
template <typename Name> struct ConvertImpl<DataTypeFloat64, DataTypeDateTime, Name>
: DateTimeTransformImpl<DataTypeFloat64, DataTypeDateTime, ToDateTimeTransform64Signed<Float64, UInt32>> {};
const time_t LUT_MIN_TIME = -2208988800l; // 1900-01-01 UTC
constexpr time_t LUT_MIN_TIME = -2208988800l; // 1900-01-01 UTC
const time_t LUT_MAX_TIME = 10413791999l; // 2299-12-31 UTC
constexpr time_t LUT_MAX_TIME = 10413791999l; // 2299-12-31 UTC
/** Conversion of numeric to DateTime64
*/
@ -599,7 +599,7 @@ struct ToDateTime64TransformUnsigned
: scale_multiplier(DecimalUtils::scaleMultiplier<DateTime64::NativeType>(scale))
{}
inline NO_SANITIZE_UNDEFINED DateTime64::NativeType execute(FromType from, const DateLUTImpl &) const
NO_SANITIZE_UNDEFINED DateTime64::NativeType execute(FromType from, const DateLUTImpl &) const
{
from = std::min<time_t>(from, LUT_MAX_TIME);
return DecimalUtils::decimalFromComponentsWithMultiplier<DateTime64>(from, 0, scale_multiplier);
@ -616,7 +616,7 @@ struct ToDateTime64TransformSigned
: scale_multiplier(DecimalUtils::scaleMultiplier<DateTime64::NativeType>(scale))
{}
inline NO_SANITIZE_UNDEFINED DateTime64::NativeType execute(FromType from, const DateLUTImpl &) const
NO_SANITIZE_UNDEFINED DateTime64::NativeType execute(FromType from, const DateLUTImpl &) const
{
from = std::max<time_t>(from, LUT_MIN_TIME);
from = std::min<time_t>(from, LUT_MAX_TIME);
@ -634,11 +634,10 @@ struct ToDateTime64TransformFloat
: scale(scale_)
{}
inline NO_SANITIZE_UNDEFINED DateTime64::NativeType execute(FromType from, const DateLUTImpl &) const
NO_SANITIZE_UNDEFINED DateTime64::NativeType execute(FromType from, const DateLUTImpl &) const
{
if (from < 0)
return 0;
from = std::min<FromType>(from, FromType(0xFFFFFFFF));
from = std::max(from, static_cast<FromType>(LUT_MIN_TIME));
from = std::min(from, static_cast<FromType>(LUT_MAX_TIME));
return convertToDecimal<FromDataType, DataTypeDateTime64>(from, scale);
}
};
@ -672,7 +671,7 @@ struct FromDateTime64Transform
: scale_multiplier(DecimalUtils::scaleMultiplier<DateTime64::NativeType>(scale))
{}
inline auto execute(DateTime64::NativeType dt, const DateLUTImpl & time_zone) const
auto execute(DateTime64::NativeType dt, const DateLUTImpl & time_zone) const
{
const auto c = DecimalUtils::splitWithScaleMultiplier(DateTime64(dt), scale_multiplier);
return Transform::execute(static_cast<UInt32>(c.whole), time_zone);
@ -694,19 +693,19 @@ struct ToDateTime64Transform
: scale_multiplier(DecimalUtils::scaleMultiplier<DateTime64::NativeType>(scale))
{}
inline DateTime64::NativeType execute(UInt16 d, const DateLUTImpl & time_zone) const
DateTime64::NativeType execute(UInt16 d, const DateLUTImpl & time_zone) const
{
const auto dt = ToDateTimeImpl::execute(d, time_zone);
return execute(dt, time_zone);
}
inline DateTime64::NativeType execute(Int32 d, const DateLUTImpl & time_zone) const
DateTime64::NativeType execute(Int32 d, const DateLUTImpl & time_zone) const
{
const auto dt = time_zone.fromDayNum(ExtendedDayNum(d));
return DecimalUtils::decimalFromComponentsWithMultiplier<DateTime64>(dt, 0, scale_multiplier);
}
inline DateTime64::NativeType execute(UInt32 dt, const DateLUTImpl & /*time_zone*/) const
DateTime64::NativeType execute(UInt32 dt, const DateLUTImpl & /*time_zone*/) const
{
return DecimalUtils::decimalFromComponentsWithMultiplier<DateTime64>(dt, 0, scale_multiplier);
}

View File

@ -12,6 +12,7 @@ struct DecryptMySQLModeImpl
{
static constexpr auto name = "aes_decrypt_mysql";
static constexpr auto compatibility_mode = OpenSSLDetails::CompatibilityMode::MySQL;
static constexpr bool use_null_when_decrypt_fail = false;
};
}

View File

@ -12,6 +12,7 @@ struct DecryptImpl
{
static constexpr auto name = "decrypt";
static constexpr auto compatibility_mode = OpenSSLDetails::CompatibilityMode::OpenSSL;
static constexpr bool use_null_when_decrypt_fail = false;
};
}

View File

@ -0,0 +1,32 @@
#include <Common/Documentation.h>
#include <Common/config.h>
#if USE_SSL
# include <Functions/FunctionFactory.h>
# include <Functions/FunctionsAES.h>
namespace
{
struct TryDecryptImpl
{
static constexpr auto name = "tryDecrypt";
static constexpr auto compatibility_mode = OpenSSLDetails::CompatibilityMode::OpenSSL;
static constexpr bool use_null_when_decrypt_fail = true;
};
}
namespace DB
{
REGISTER_FUNCTION(TryDecrypt)
{
factory.registerFunction<FunctionDecrypt<TryDecryptImpl>>(Documentation(
"Similar to `decrypt`, but returns NULL if decryption fails because of using the wrong key."));
}
}
#endif

View File

@ -120,11 +120,9 @@ std::exception_ptr AsynchronousInsertQueue::InsertData::Entry::getException() co
}
AsynchronousInsertQueue::AsynchronousInsertQueue(ContextPtr context_, size_t pool_size, size_t max_data_size_, const Timeout & timeouts)
AsynchronousInsertQueue::AsynchronousInsertQueue(ContextPtr context_, size_t pool_size, Milliseconds cleanup_timeout_)
: WithContext(context_)
, max_data_size(max_data_size_)
, busy_timeout(timeouts.busy)
, stale_timeout(timeouts.stale)
, cleanup_timeout(cleanup_timeout_)
, pool(pool_size)
, dump_by_first_update_thread(&AsynchronousInsertQueue::busyCheck, this)
, cleanup_thread(&AsynchronousInsertQueue::cleanup, this)
@ -132,9 +130,6 @@ AsynchronousInsertQueue::AsynchronousInsertQueue(ContextPtr context_, size_t poo
using namespace std::chrono;
assert(pool_size);
if (stale_timeout > 0ms)
dump_by_last_update_thread = ThreadFromGlobalPool(&AsynchronousInsertQueue::staleCheck, this);
}
AsynchronousInsertQueue::~AsynchronousInsertQueue()
@ -143,10 +138,14 @@ AsynchronousInsertQueue::~AsynchronousInsertQueue()
LOG_TRACE(log, "Shutting down the asynchronous insertion queue");
{
std::lock_guard lock(shutdown_mutex);
shutdown = true;
shutdown_cv.notify_all();
{
std::lock_guard lock(deadline_mutex);
are_tasks_available.notify_one();
}
{
std::lock_guard lock(cleanup_mutex);
cleanup_can_run.notify_one();
}
assert(dump_by_first_update_thread.joinable());
@ -155,9 +154,6 @@ AsynchronousInsertQueue::~AsynchronousInsertQueue()
assert(cleanup_thread.joinable());
cleanup_thread.join();
if (dump_by_last_update_thread.joinable())
dump_by_last_update_thread.join();
pool.wait();
std::lock_guard lock(currently_processing_mutex);
@ -234,12 +230,18 @@ void AsynchronousInsertQueue::pushImpl(InsertData::EntryPtr entry, QueueIterator
std::lock_guard data_lock(data_mutex);
if (!data)
data = std::make_unique<InsertData>();
{
auto now = std::chrono::steady_clock::now();
data = std::make_unique<InsertData>(now);
std::lock_guard lock(deadline_mutex);
deadline_queue.insert({now + Milliseconds{it->first.settings.async_insert_busy_timeout_ms}, it});
are_tasks_available.notify_one();
}
size_t entry_data_size = entry->bytes.size();
data->size += entry_data_size;
data->last_update = std::chrono::steady_clock::now();
data->entries.emplace_back(entry);
{
@ -250,7 +252,10 @@ void AsynchronousInsertQueue::pushImpl(InsertData::EntryPtr entry, QueueIterator
LOG_TRACE(log, "Have {} pending inserts with total {} bytes of data for query '{}'",
data->entries.size(), data->size, queryToString(it->first.query));
if (data->size > max_data_size)
/// Here we check whether we hit the limit on maximum data size in the buffer.
/// And use setting from query context!
/// It works, because queries with the same set of settings are already grouped together.
if (data->size > it->first.settings.async_insert_max_data_size)
scheduleDataProcessingJob(it->first, std::move(data), getContext());
CurrentMetrics::add(CurrentMetrics::PendingAsyncInsert);
@ -282,43 +287,45 @@ void AsynchronousInsertQueue::waitForProcessingQuery(const String & query_id, co
void AsynchronousInsertQueue::busyCheck()
{
auto timeout = busy_timeout;
while (!waitForShutdown(timeout))
while (!shutdown)
{
/// TODO: use priority queue instead of raw unsorted queue.
timeout = busy_timeout;
std::vector<QueueIterator> entries_to_flush;
{
std::unique_lock deadline_lock(deadline_mutex);
are_tasks_available.wait_for(deadline_lock, Milliseconds(getContext()->getSettingsRef().async_insert_busy_timeout_ms), [this]()
{
if (shutdown)
return true;
if (!deadline_queue.empty() && deadline_queue.begin()->first < std::chrono::steady_clock::now())
return true;
return false;
});
if (shutdown)
return;
const auto now = std::chrono::steady_clock::now();
while (true)
{
if (deadline_queue.empty() || deadline_queue.begin()->first > now)
break;
entries_to_flush.emplace_back(deadline_queue.begin()->second);
deadline_queue.erase(deadline_queue.begin());
}
}
std::shared_lock read_lock(rwlock);
for (auto & [key, elem] : queue)
for (auto & entry : entries_to_flush)
{
auto & [key, elem] = *entry;
std::lock_guard data_lock(elem->mutex);
if (!elem->data)
continue;
auto lag = std::chrono::steady_clock::now() - elem->data->first_update;
if (lag >= busy_timeout)
scheduleDataProcessingJob(key, std::move(elem->data), getContext());
else
timeout = std::min(timeout, std::chrono::ceil<std::chrono::milliseconds>(busy_timeout - lag));
}
}
}
void AsynchronousInsertQueue::staleCheck()
{
while (!waitForShutdown(stale_timeout))
{
std::shared_lock read_lock(rwlock);
for (auto & [key, elem] : queue)
{
std::lock_guard data_lock(elem->mutex);
if (!elem->data)
continue;
auto lag = std::chrono::steady_clock::now() - elem->data->last_update;
if (lag >= stale_timeout)
scheduleDataProcessingJob(key, std::move(elem->data), getContext());
}
}
@ -326,12 +333,16 @@ void AsynchronousInsertQueue::staleCheck()
void AsynchronousInsertQueue::cleanup()
{
/// Do not run cleanup too often,
/// because it holds exclusive lock.
auto timeout = busy_timeout * 5;
while (!waitForShutdown(timeout))
while (true)
{
{
std::unique_lock cleanup_lock(cleanup_mutex);
cleanup_can_run.wait_for(cleanup_lock, Milliseconds(cleanup_timeout), [this]() -> bool { return shutdown; });
if (shutdown)
return;
}
std::vector<InsertQuery> keys_to_remove;
{
@ -383,11 +394,6 @@ void AsynchronousInsertQueue::cleanup()
}
}
bool AsynchronousInsertQueue::waitForShutdown(const Milliseconds & timeout)
{
std::unique_lock shutdown_lock(shutdown_mutex);
return shutdown_cv.wait_for(shutdown_lock, timeout, [this]() { return shutdown; });
}
// static
void AsynchronousInsertQueue::processData(InsertQuery key, InsertDataPtr data, ContextPtr global_context)

View File

@ -5,6 +5,7 @@
#include <Core/Settings.h>
#include <Poco/Logger.h>
#include <atomic>
#include <unordered_map>
@ -18,14 +19,7 @@ class AsynchronousInsertQueue : public WithContext
public:
using Milliseconds = std::chrono::milliseconds;
/// Using structure to allow and benefit from designated initialization and not mess with a positional arguments in ctor.
struct Timeout
{
Milliseconds busy;
Milliseconds stale;
};
AsynchronousInsertQueue(ContextPtr context_, size_t pool_size, size_t max_data_size, const Timeout & timeouts);
AsynchronousInsertQueue(ContextPtr context_, size_t pool_size, Milliseconds cleanup_timeout);
~AsynchronousInsertQueue();
void push(ASTPtr query, ContextPtr query_context);
@ -69,6 +63,10 @@ private:
std::exception_ptr exception;
};
explicit InsertData(std::chrono::steady_clock::time_point now)
: first_update(now)
{}
using EntryPtr = std::shared_ptr<Entry>;
std::list<EntryPtr> entries;
@ -76,11 +74,7 @@ private:
/// Timestamp of the first insert into queue, or after the last queue dump.
/// Used to detect for how long the queue is active, so we can dump it by timer.
std::chrono::time_point<std::chrono::steady_clock> first_update = std::chrono::steady_clock::now();
/// Timestamp of the last insert into queue.
/// Used to detect for how long the queue is stale, so we can dump it by another timer.
std::chrono::time_point<std::chrono::steady_clock> last_update;
std::chrono::time_point<std::chrono::steady_clock> first_update;
};
using InsertDataPtr = std::unique_ptr<InsertData>;
@ -96,10 +90,21 @@ private:
using Queue = std::unordered_map<InsertQuery, std::shared_ptr<Container>, InsertQuery::Hash>;
using QueueIterator = Queue::iterator;
/// Ordered container
using DeadlineQueue = std::map<std::chrono::steady_clock::time_point, QueueIterator>;
mutable std::shared_mutex rwlock;
Queue queue;
/// This is needed only for using inside cleanup() function and correct signaling about shutdown
mutable std::mutex cleanup_mutex;
mutable std::condition_variable cleanup_can_run;
mutable std::mutex deadline_mutex;
mutable std::condition_variable are_tasks_available;
DeadlineQueue deadline_queue;
using QueryIdToEntry = std::unordered_map<String, InsertData::EntryPtr>;
mutable std::mutex currently_processing_mutex;
QueryIdToEntry currently_processing_queries;
@ -109,25 +114,21 @@ private:
/// grow for a long period of time and users will be able to select new data in deterministic manner.
/// - stale_timeout: if queue is stale for too long, then we dump the data too, so that users will be able to select the last
/// piece of inserted data.
/// - max_data_size: if the maximum size of data is reached, then again we dump the data.
///
/// During processing incoming INSERT queries we can also check whether the maximum size of data in buffer is reached (async_insert_max_data_size setting)
/// If so, then again we dump the data.
const size_t max_data_size; /// in bytes
const Milliseconds busy_timeout;
const Milliseconds stale_timeout;
const Milliseconds cleanup_timeout;
std::mutex shutdown_mutex;
std::condition_variable shutdown_cv;
bool shutdown{false};
std::atomic<bool> shutdown{false};
ThreadPool pool; /// dump the data only inside this pool.
ThreadFromGlobalPool dump_by_first_update_thread; /// uses busy_timeout and busyCheck()
ThreadFromGlobalPool dump_by_last_update_thread; /// uses stale_timeout and staleCheck()
ThreadFromGlobalPool cleanup_thread; /// uses busy_timeout and cleanup()
Poco::Logger * log = &Poco::Logger::get("AsynchronousInsertQueue");
void busyCheck();
void staleCheck();
void cleanup();
/// Should be called with shared or exclusively locked 'rwlock'.

View File

@ -30,6 +30,7 @@ public:
{
auto res = std::make_shared<ASTExplainQuery>(*this);
res->children.clear();
if (!children.empty())
res->children.push_back(children[0]->clone());
cloneOutputOptions(*res);
return res;

View File

@ -27,6 +27,11 @@
#include <Parsers/ASTWindowDefinition.h>
#include <Parsers/ASTAssignment.h>
#include <Parsers/ASTColumnsMatcher.h>
#include <Parsers/ASTExplainQuery.h>
#include <Parsers/ASTSelectQuery.h>
#include <Parsers/ASTTablesInSelectQuery.h>
#include <Parsers/parseIdentifierOrStringLiteral.h>
#include <Parsers/parseIntervalKind.h>
@ -36,6 +41,7 @@
#include <Parsers/ExpressionElementParsers.h>
#include <Parsers/ParserCreateQuery.h>
#include <Parsers/ParserExplainQuery.h>
#include <Parsers/queryToString.h>
@ -52,25 +58,84 @@ namespace ErrorCodes
extern const int LOGICAL_ERROR;
}
/*
* Build an AST with the following structure:
*
* ```
* SelectWithUnionQuery (children 1)
* ExpressionList (children 1)
* SelectQuery (children 2)
* ExpressionList (children 1)
* Asterisk
* TablesInSelectQuery (children 1)
* TablesInSelectQueryElement (children 1)
* TableExpression (children 1)
* Function <...>
* ```
*/
static ASTPtr buildSelectFromTableFunction(const std::shared_ptr<ASTFunction> & ast_function)
{
auto result_select_query = std::make_shared<ASTSelectWithUnionQuery>();
{
auto select_ast = std::make_shared<ASTSelectQuery>();
select_ast->setExpression(ASTSelectQuery::Expression::SELECT, std::make_shared<ASTExpressionList>());
select_ast->select()->children.push_back(std::make_shared<ASTAsterisk>());
auto list_of_selects = std::make_shared<ASTExpressionList>();
list_of_selects->children.push_back(select_ast);
result_select_query->children.push_back(std::move(list_of_selects));
result_select_query->list_of_selects = result_select_query->children.back();
{
auto tables = std::make_shared<ASTTablesInSelectQuery>();
select_ast->setExpression(ASTSelectQuery::Expression::TABLES, tables);
auto tables_elem = std::make_shared<ASTTablesInSelectQueryElement>();
auto table_expr = std::make_shared<ASTTableExpression>();
tables->children.push_back(tables_elem);
tables_elem->table_expression = table_expr;
tables_elem->children.push_back(table_expr);
table_expr->table_function = ast_function;
table_expr->children.push_back(table_expr->table_function);
}
}
return result_select_query;
}
bool ParserSubquery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
{
ASTPtr select_node;
ParserSelectWithUnionQuery select;
ParserExplainQuery explain;
if (pos->type != TokenType::OpeningRoundBracket)
return false;
++pos;
if (!select.parse(pos, select_node, expected))
ASTPtr result_node = nullptr;
if (ASTPtr select_node; select.parse(pos, select_node, expected))
{
result_node = std::move(select_node);
}
else if (ASTPtr explain_node; explain.parse(pos, explain_node, expected))
{
/// Replace SELECT * FROM (EXPLAIN SELECT ...) with SELECT * FROM viewExplain(EXPLAIN SELECT ...)
result_node = buildSelectFromTableFunction(makeASTFunction("viewExplain", explain_node));
}
else
{
return false;
}
if (pos->type != TokenType::ClosingRoundBracket)
return false;
++pos;
node = std::make_shared<ASTSubquery>();
node->children.push_back(select_node);
node->children.push_back(result_node);
return true;
}

View File

@ -88,11 +88,20 @@ bool ParserExplainQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
{
/// Nothing to parse
}
else if (select_only)
{
if (select_p.parse(pos, query, expected))
explain_query->setExplainedQuery(std::move(query));
else
return false;
}
else if (select_p.parse(pos, query, expected) ||
create_p.parse(pos, query, expected) ||
insert_p.parse(pos, query, expected) ||
system_p.parse(pos, query, expected))
{
explain_query->setExplainedQuery(std::move(query));
}
else
return false;

View File

@ -11,6 +11,7 @@ class ParserExplainQuery : public IParserBase
protected:
const char * end;
bool allow_settings_after_format_in_insert;
bool select_only;
const char * getName() const override { return "EXPLAIN"; }
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
@ -18,7 +19,13 @@ public:
explicit ParserExplainQuery(const char* end_, bool allow_settings_after_format_in_insert_)
: end(end_)
, allow_settings_after_format_in_insert(allow_settings_after_format_in_insert_)
, select_only(false)
{}
explicit ParserExplainQuery()
: end(nullptr) , allow_settings_after_format_in_insert(false) , select_only(true)
{}
};
}

View File

@ -1241,19 +1241,18 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks)
for (const auto & disk_ptr : disks)
defined_disk_names.insert(disk_ptr->getName());
for (const auto & [_, disk_ptr] : getContext()->getDisksMap())
for (const auto & [disk_name, disk_ptr] : getContext()->getDisksMap())
{
/// In composable cache with the underlying source disk there might the following structure:
/// DiskObjectStorage(CachedObjectStorage(...(CachedObjectStored(ObjectStorage)...)))
/// In configuration file each of these layers has a different name, but data path
/// (getPath() result) is the same. We need to take it into account here.
if (disk_ptr->supportsCache())
{
if (defined_disk_names.contains(disk_ptr->getName()))
if (disk_ptr->supportsCache() && defined_disk_names.contains(disk_ptr->getName()))
{
auto caches = disk_ptr->getCacheLayersNames();
disk_names_wrapped_in_cache.insert(caches.begin(), caches.end());
}
LOG_TEST(log, "Cache layers for cache disk `{}`, inner disk `{}`: {}",
disk_name, disk_ptr->getName(), fmt::join(caches, ", "));
}
}
@ -1272,8 +1271,9 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks)
{
throw Exception(
ErrorCodes::UNKNOWN_DISK,
"Part {} ({}) was found on disk {} which is not defined in the storage policy",
backQuote(it->name()), backQuote(it->path()), backQuote(disk_name));
"Part {} ({}) was found on disk {} which is not defined in the storage policy (defined disks: {}, wrapped disks: {})",
backQuote(it->name()), backQuote(it->path()), backQuote(disk_name),
fmt::join(defined_disk_names, ", "), fmt::join(disk_names_wrapped_in_cache, ", "));
}
}
}

View File

@ -542,7 +542,10 @@ bool MergeTreeIndexConditionBloomFilter::traverseASTEquals(
{
out.function = RPNElement::FUNCTION_HAS;
const DataTypePtr actual_type = BloomFilter::getPrimitiveType(array_type->getNestedType());
Field converted_field = convertFieldToType(value_field, *actual_type, value_type.get());
auto converted_field = convertFieldToType(value_field, *actual_type, value_type.get());
if (converted_field.isNull())
return false;
out.predicate.emplace_back(std::make_pair(position, BloomFilterHash::hashWithField(actual_type.get(), converted_field)));
}
}
@ -565,7 +568,11 @@ bool MergeTreeIndexConditionBloomFilter::traverseASTEquals(
if ((f.isNull() && !is_nullable) || f.isDecimal(f.getType()))
return false;
mutable_column->insert(convertFieldToType(f, *actual_type, value_type.get()));
auto converted = convertFieldToType(f, *actual_type);
if (converted.isNull())
return false;
mutable_column->insert(converted);
}
column = std::move(mutable_column);
@ -583,7 +590,10 @@ bool MergeTreeIndexConditionBloomFilter::traverseASTEquals(
out.function = function_name == "equals" ? RPNElement::FUNCTION_EQUALS : RPNElement::FUNCTION_NOT_EQUALS;
const DataTypePtr actual_type = BloomFilter::getPrimitiveType(index_type);
Field converted_field = convertFieldToType(value_field, *actual_type, value_type.get());
auto converted_field = convertFieldToType(value_field, *actual_type, value_type.get());
if (converted_field.isNull())
return false;
out.predicate.emplace_back(std::make_pair(position, BloomFilterHash::hashWithField(actual_type.get(), converted_field)));
}
@ -611,9 +621,11 @@ bool MergeTreeIndexConditionBloomFilter::traverseASTEquals(
out.function = RPNElement::FUNCTION_HAS;
const DataTypePtr actual_type = BloomFilter::getPrimitiveType(array_type->getNestedType());
Field converted_field = convertFieldToType(value_field, *actual_type, value_type.get());
out.predicate.emplace_back(std::make_pair(position, BloomFilterHash::hashWithField(actual_type.get(), converted_field)));
auto converted_field = convertFieldToType(value_field, *actual_type, value_type.get());
if (converted_field.isNull())
return false;
out.predicate.emplace_back(std::make_pair(position, BloomFilterHash::hashWithField(actual_type.get(), converted_field)));
return true;
}

View File

@ -32,7 +32,7 @@ namespace ErrorCodes
namespace
{
constexpr auto retry_period_ms = 10 * 1000;
constexpr auto retry_period_ms = 1000;
}
/// Used to check whether it's us who set node `is_active`, or not.

View File

@ -265,7 +265,7 @@ static StoragePtr create(const StorageFactory::Arguments & args)
if (max_num_params == 0)
msg += "no parameters";
if (min_num_params == max_num_params)
else if (min_num_params == max_num_params)
msg += fmt::format("{} parameters: {}", min_num_params, needed_params);
else
msg += fmt::format("{} to {} parameters: {}", min_num_params, max_num_params, needed_params);

View File

@ -4180,6 +4180,7 @@ void StorageReplicatedMergeTree::startupImpl()
/// And this is just a callback
session_expired_callback_handler = EventNotifier::instance().subscribe(Coordination::Error::ZSESSIONEXPIRED, [this]()
{
LOG_TEST(log, "Received event for expired session. Waking up restarting thread");
restarting_thread.start();
});

View File

@ -24,7 +24,6 @@ NamesAndTypesList StorageSystemAsynchronousInserts::getNamesAndTypes()
{"table", std::make_shared<DataTypeString>()},
{"format", std::make_shared<DataTypeString>()},
{"first_update", std::make_shared<DataTypeDateTime64>(TIME_SCALE)},
{"last_update", std::make_shared<DataTypeDateTime64>(TIME_SCALE)},
{"total_bytes", std::make_shared<DataTypeUInt64>()},
{"entries.query_id", std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())},
{"entries.bytes", std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt64>())},
@ -77,7 +76,6 @@ void StorageSystemAsynchronousInserts::fillData(MutableColumns & res_columns, Co
res_columns[i++]->insert(insert_query.format);
res_columns[i++]->insert(time_in_microseconds(elem->data->first_update));
res_columns[i++]->insert(time_in_microseconds(elem->data->last_update));
res_columns[i++]->insert(elem->data->size);
Array arr_query_id;

View File

@ -0,0 +1,110 @@
#include <Interpreters/InterpreterSelectWithUnionQuery.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTSelectWithUnionQuery.h>
#include <Parsers/queryToString.h>
#include <Storages/StorageValues.h>
#include <TableFunctions/ITableFunction.h>
#include <TableFunctions/TableFunctionFactory.h>
#include <TableFunctions/TableFunctionExplain.h>
#include <TableFunctions/registerTableFunctions.h>
#include <Processors/Executors/PullingPipelineExecutor.h>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int BAD_ARGUMENTS;
}
void TableFunctionExplain::parseArguments(const ASTPtr & ast_function, ContextPtr /*context*/)
{
const auto * function = ast_function->as<ASTFunction>();
if (function && function->arguments && function->arguments->children.size() == 1)
{
const auto & query_arg = function->arguments->children[0];
if (!query_arg->as<ASTExplainQuery>())
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Table function '{}' requires a explain query argument, got '{}'",
getName(), queryToString(query_arg));
query = query_arg;
}
else
{
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Table function '{}' cannot be called directly, use `SELECT * FROM (EXPLAIN ...)` syntax", getName());
}
}
ColumnsDescription TableFunctionExplain::getActualTableStructure(ContextPtr context) const
{
Block sample_block = getInterpreter(context).getSampleBlock(query->as<ASTExplainQuery>()->getKind());
ColumnsDescription columns_description;
for (const auto & column : sample_block.getColumnsWithTypeAndName())
columns_description.add(ColumnDescription(column.name, column.type));
return columns_description;
}
static Block executeMonoBlock(QueryPipeline & pipeline)
{
if (!pipeline.pulling())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected pulling pipeline");
PullingPipelineExecutor pulling_executor(pipeline);
std::vector<Block> blocks;
while (true)
{
Block block;
if (pulling_executor.pull(block))
blocks.push_back(std::move(block));
else
break;
}
if (blocks.size() == 1)
return blocks[0];
return concatenateBlocks(blocks);
}
StoragePtr TableFunctionExplain::executeImpl(
const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/) const
{
BlockIO blockio = getInterpreter(context).execute();
Block block = executeMonoBlock(blockio.pipeline);
StorageID storage_id(getDatabaseName(), table_name);
auto storage = std::make_shared<StorageValues>(storage_id, getActualTableStructure(context), std::move(block));
storage->startup();
return storage;
}
InterpreterExplainQuery TableFunctionExplain::getInterpreter(ContextPtr context) const
{
if (!query)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Table function '{}' requires a explain query argument", getName());
return InterpreterExplainQuery(query, context);
}
void registerTableFunctionExplain(TableFunctionFactory & factory)
{
factory.registerFunction<TableFunctionExplain>({R"(
Returns result of EXPLAIN query.
The function should not be called directly but can be invoked via `SELECT * FROM (EXPLAIN <query>)`.
You can use this query to process the result of EXPLAIN further using SQL (e.g., in tests).
Example:
[example:1]
)",
{{"1", "SELECT explain FROM (EXPLAIN AST SELECT * FROM system.numbers) WHERE explain LIKE '%Asterisk%'"}}
});
}
}

View File

@ -0,0 +1,31 @@
#pragma once
#include <TableFunctions/ITableFunction.h>
#include <Parsers/ASTExplainQuery.h>
#include <Interpreters/InterpreterExplainQuery.h>
#include <base/types.h>
namespace DB
{
class TableFunctionExplain : public ITableFunction
{
public:
static constexpr auto name = "viewExplain";
std::string getName() const override { return name; }
private:
StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const String & table_name, ColumnsDescription cached_columns) const override;
const char * getStorageTypeName() const override { return "Explain"; }
void parseArguments(const ASTPtr & ast_function, ContextPtr context) override;
ColumnsDescription getActualTableStructure(ContextPtr context) const override;
InterpreterExplainQuery getInterpreter(ContextPtr context) const;
ASTPtr query = nullptr;
};
}

View File

@ -59,6 +59,7 @@ void registerTableFunctions()
registerTableFunctionDictionary(factory);
registerTableFunctionFormat(factory);
registerTableFunctionExplain(factory);
}
}

View File

@ -58,6 +58,8 @@ void registerTableFunctionDictionary(TableFunctionFactory & factory);
void registerTableFunctionFormat(TableFunctionFactory & factory);
void registerTableFunctionExplain(TableFunctionFactory & factory);
void registerTableFunctions();
}

View File

@ -106,3 +106,6 @@ endif()
if (TARGET ch_contrib::jemalloc)
set(USE_JEMALLOC 1)
endif()
if (NOT ENABLE_EXTERNAL_OPENSSL)
set(USE_BORINGSSL 1)
endif ()

View File

@ -10,7 +10,7 @@
<!-- default credentials for Azurite storage account -->
<account_name>devstoreaccount1</account_name>
<account_key>Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==</account_key>
<max_single_part_upload_size>33554432</max_single_part_upload_size>
<max_single_part_upload_size>100000</max_single_part_upload_size>
</blob_storage_disk>
<hdd>
<type>local</type>

View File

@ -4,10 +4,9 @@ import os
import pytest
pytestmark = pytest.mark.skip
from helpers.cluster import ClickHouseCluster
from helpers.utility import generate_values, replace_config, SafeThread
from azure.storage.blob import BlobServiceClient
SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
@ -573,8 +572,42 @@ def test_restart_during_load(cluster):
def test_big_insert(cluster):
node = cluster.instances[NODE_NAME]
create_table(node, TABLE_NAME)
check_query = "SELECT '2020-01-03', number, toString(number) FROM numbers(1000000)"
azure_query(
node,
f"INSERT INTO {TABLE_NAME} select '2020-01-03', number, toString(number) from numbers(5000000)",
f"INSERT INTO {TABLE_NAME} {check_query}",
)
assert int(azure_query(node, f"SELECT count() FROM {TABLE_NAME}")) == 5000000
assert azure_query(node, f"SELECT * FROM {TABLE_NAME} ORDER BY id") == node.query(
check_query
)
blob_container_client = cluster.blob_service_client.get_container_client(
CONTAINER_NAME
)
blobs = blob_container_client.list_blobs()
max_single_part_upload_size = 100000
checked = False
for blob in blobs:
blob_client = cluster.blob_service_client.get_blob_client(
CONTAINER_NAME, blob.name
)
committed, uncommited = blob_client.get_block_list()
blocks = committed
last_id = len(blocks)
id = 1
if len(blocks) > 1:
checked = True
for block in blocks:
print(f"blob: {blob.name}, block size: {block.size}")
if id == last_id:
assert max_single_part_upload_size >= block.size
else:
assert max_single_part_upload_size == block.size
id += 1
assert checked

View File

@ -51,7 +51,7 @@ select * from enums order by e;
select * from enums order by e desc;
-- GROUP BY
select count(), e from enums group by e;
select count(), e from enums group by e order by e;
select any(e) from enums;
-- IN

View File

@ -11,4 +11,4 @@ ${CLICKHOUSE_LOCAL} --query="SELECT * FROM numbers(1) UNION ALL SELECT * FROM nu
echo 'extremes'
${CLICKHOUSE_LOCAL} --query="SELECT * FROM numbers(3)" --format PrettyCompactMonoBlock --extremes=1
echo 'totals'
${CLICKHOUSE_LOCAL} --query="SELECT sum(number) FROM numbers(3) GROUP BY number%2 WITH TOTALS" --format PrettyCompactMonoBlock
${CLICKHOUSE_LOCAL} --query="SELECT sum(number) FROM numbers(3) GROUP BY number%2 WITH TOTALS ORDER BY number%2" --format PrettyCompactMonoBlock

View File

@ -9,4 +9,5 @@ SELECT
number IN (1, 2) AS x,
count()
FROM numbers(10)
GROUP BY x;
GROUP BY x
ORDER BY x;

View File

@ -66,10 +66,10 @@ drop table if exists lc_null_fix_str_1;
select '-';
SELECT toLowCardinality('a') AS s, toTypeName(s), toTypeName(length(s)) from system.one;
select toLowCardinality('a') as val group by val;
select (toLowCardinality('a') as val) || 'b' group by val;
select toLowCardinality(z) as val from (select arrayJoin(['c', 'd']) as z) group by val;
select (toLowCardinality(z) as val) || 'b' from (select arrayJoin(['c', 'd']) as z) group by val;
select toLowCardinality('a') as val group by val order by val;
select (toLowCardinality('a') as val) || 'b' group by val order by val;
select toLowCardinality(z) as val from (select arrayJoin(['c', 'd']) as z) group by val order by val;
select (toLowCardinality(z) as val) || 'b' from (select arrayJoin(['c', 'd']) as z) group by val order by val;
select '-';
drop table if exists lc_str_uuid;

View File

@ -20,13 +20,14 @@ INSERT INTO bitmap_state_test SELECT
city_id,
groupBitmapState(uid) AS uv
FROM bitmap_test
GROUP BY pickup_date, city_id;
GROUP BY pickup_date, city_id
ORDER BY pickup_date, city_id;
SELECT pickup_date, groupBitmapMerge(uv) AS users from bitmap_state_test group by pickup_date;
SELECT pickup_date, groupBitmapMerge(uv) AS users from bitmap_state_test group by pickup_date order by pickup_date;
SELECT groupBitmap( uid ) AS user_num FROM bitmap_test;
SELECT pickup_date, groupBitmap( uid ) AS user_num, bitmapToArray(groupBitmapState( uid )) AS users FROM bitmap_test GROUP BY pickup_date;
SELECT pickup_date, groupBitmap( uid ) AS user_num, bitmapToArray(groupBitmapState( uid )) AS users FROM bitmap_test GROUP BY pickup_date order by pickup_date;
SELECT
bitmapCardinality(day_today) AS today_users,
@ -37,11 +38,11 @@ SELECT
bitmapXorCardinality(day_today, day_before) AS diff_users
FROM
(
SELECT city_id, groupBitmapState( uid ) AS day_today FROM bitmap_test WHERE pickup_date = '2019-01-02' GROUP BY city_id
SELECT city_id, groupBitmapState( uid ) AS day_today FROM bitmap_test WHERE pickup_date = '2019-01-02' GROUP BY city_id ORDER BY city_id
) js1
ALL LEFT JOIN
(
SELECT city_id, groupBitmapState( uid ) AS day_before FROM bitmap_test WHERE pickup_date = '2019-01-01' GROUP BY city_id
SELECT city_id, groupBitmapState( uid ) AS day_before FROM bitmap_test WHERE pickup_date = '2019-01-01' GROUP BY city_id ORDER BY city_id
) js2
USING city_id;
@ -54,11 +55,11 @@ SELECT
bitmapCardinality(bitmapXor(day_today, day_before)) AS diff_users
FROM
(
SELECT city_id, groupBitmapState( uid ) AS day_today FROM bitmap_test WHERE pickup_date = '2019-01-02' GROUP BY city_id
SELECT city_id, groupBitmapState( uid ) AS day_today FROM bitmap_test WHERE pickup_date = '2019-01-02' GROUP BY city_id ORDER BY city_id
) js1
ALL LEFT JOIN
(
SELECT city_id, groupBitmapState( uid ) AS day_before FROM bitmap_test WHERE pickup_date = '2019-01-01' GROUP BY city_id
SELECT city_id, groupBitmapState( uid ) AS day_before FROM bitmap_test WHERE pickup_date = '2019-01-01' GROUP BY city_id ORDER BY city_id
) js2
USING city_id;
@ -68,7 +69,7 @@ SELECT count(*) FROM bitmap_test WHERE bitmapHasAny(bitmapBuild([uid]), (SELECT
SELECT count(*) FROM bitmap_test WHERE 0 = bitmapHasAny((SELECT groupBitmapState(uid) FROM bitmap_test WHERE pickup_date = '2019-01-01'), bitmapBuild([uid]));
SELECT bitmapToArray(bitmapAnd(groupBitmapState(uid), bitmapBuild(CAST([4294967296, 4294967297, 4294967298], 'Array(UInt64)')))) FROM bitmap_test GROUP BY city_id;
SELECT bitmapToArray(bitmapAnd(groupBitmapState(uid), bitmapBuild(CAST([4294967296, 4294967297, 4294967298], 'Array(UInt64)')))) FROM bitmap_test GROUP BY city_id ORDER BY city_id;
DROP TABLE bitmap_state_test;
DROP TABLE bitmap_test;

View File

@ -20,7 +20,7 @@ INSERT INTO bitmap_test SELECT '2019-01-03', 2, number FROM numbers(1,10);
SELECT groupBitmap( uid ) AS user_num FROM bitmap_test;
SELECT pickup_date, groupBitmap( uid ) AS user_num, bitmapToArray(groupBitmapState( uid )) AS users FROM bitmap_test GROUP BY pickup_date;
SELECT pickup_date, groupBitmap( uid ) AS user_num, bitmapToArray(groupBitmapState( uid )) AS users FROM bitmap_test GROUP BY pickup_date ORDER BY pickup_date;
SELECT
bitmapCardinality(day_today) AS today_users,
@ -31,11 +31,11 @@ SELECT
bitmapXorCardinality(day_today, day_before) AS diff_users
FROM
(
SELECT city_id, groupBitmapState( uid ) AS day_today FROM bitmap_test WHERE pickup_date = '2019-01-02' GROUP BY city_id
SELECT city_id, groupBitmapState( uid ) AS day_today FROM bitmap_test WHERE pickup_date = '2019-01-02' GROUP BY city_id ORDER BY city_id
) js1
ALL LEFT JOIN
(
SELECT city_id, groupBitmapState( uid ) AS day_before FROM bitmap_test WHERE pickup_date = '2019-01-01' GROUP BY city_id
SELECT city_id, groupBitmapState( uid ) AS day_before FROM bitmap_test WHERE pickup_date = '2019-01-01' GROUP BY city_id ORDER BY city_id
) js2
USING city_id;
@ -48,11 +48,11 @@ SELECT
bitmapCardinality(bitmapXor(day_today, day_before)) AS diff_users
FROM
(
SELECT city_id, groupBitmapState( uid ) AS day_today FROM bitmap_test WHERE pickup_date = '2019-01-02' GROUP BY city_id
SELECT city_id, groupBitmapState( uid ) AS day_today FROM bitmap_test WHERE pickup_date = '2019-01-02' GROUP BY city_id ORDER BY city_id
) js1
ALL LEFT JOIN
(
SELECT city_id, groupBitmapState( uid ) AS day_before FROM bitmap_test WHERE pickup_date = '2019-01-01' GROUP BY city_id
SELECT city_id, groupBitmapState( uid ) AS day_before FROM bitmap_test WHERE pickup_date = '2019-01-01' GROUP BY city_id ORDER BY city_id
) js2
USING city_id;
@ -67,7 +67,7 @@ SELECT count(*) FROM bitmap_test WHERE bitmapContains((SELECT groupBitmapState(u
SELECT count(*) FROM bitmap_test WHERE 0 = bitmapContains((SELECT groupBitmapState(uid) FROM bitmap_test WHERE pickup_date = '2019-01-01'), uid);
-- PR#8082
SELECT bitmapToArray(bitmapAnd(groupBitmapState(uid), bitmapBuild(CAST([1, 2, 3], 'Array(UInt32)')))) FROM bitmap_test GROUP BY city_id;
SELECT bitmapToArray(bitmapAnd(groupBitmapState(uid), bitmapBuild(CAST([1, 2, 3], 'Array(UInt32)')))) FROM bitmap_test GROUP BY city_id ORDER BY city_id;
-- bitmap state test
DROP TABLE IF EXISTS bitmap_state_test;
@ -87,7 +87,7 @@ INSERT INTO bitmap_state_test SELECT
FROM bitmap_test
GROUP BY pickup_date, city_id;
SELECT pickup_date, groupBitmapMerge(uv) AS users from bitmap_state_test group by pickup_date;
SELECT pickup_date, groupBitmapMerge(uv) AS users from bitmap_state_test group by pickup_date order by pickup_date;
-- between column and expression test
DROP TABLE IF EXISTS bitmap_column_expr_test;

View File

@ -1,4 +1,4 @@
select k, groupArraySample(10, 123456)(v) from (select number % 4 as k, number as v from numbers(1024)) group by k;
select k, groupArraySample(10, 123456)(v) from (select number % 4 as k, number as v from numbers(1024)) group by k order by k;
-- different seed
select k, groupArraySample(10, 1)(v) from (select number % 4 as k, number as v from numbers(1024)) group by k;
select k, groupArraySample(10, 1)(v) from (select number % 4 as k, number as v from numbers(1024)) group by k order by k;

View File

@ -3,17 +3,17 @@ CREATE TABLE t (item_id UInt64, price_sold Float32, date Date) ENGINE MergeTree
SELECT item_id
FROM (SELECT item_id FROM t GROUP BY item_id WITH TOTALS) l
FULL JOIN (SELECT item_id FROM t GROUP BY item_id WITH TOTALS) r
FULL JOIN (SELECT item_id FROM t GROUP BY item_id WITH TOTALS ORDER BY item_id) r
USING (item_id);
SELECT id
FROM (SELECT item_id AS id FROM t GROUP BY id WITH TOTALS) l
FULL JOIN (SELECT item_id AS id FROM t GROUP BY id WITH TOTALS) r
FULL JOIN (SELECT item_id AS id FROM t GROUP BY id WITH TOTALS ORDER BY item_id) r
USING (id);
SELECT item_id
FROM (SELECT item_id FROM t GROUP BY item_id WITH TOTALS) l
INNER JOIN (SELECT item_id FROM t GROUP BY item_id WITH TOTALS) r
INNER JOIN (SELECT item_id FROM t GROUP BY item_id WITH TOTALS ORDER BY item_id) r
USING (item_id);
SELECT id
@ -26,75 +26,77 @@ FROM (
SELECT item_id AS id, SUM(price_sold) AS recent
FROM t WHERE (date BETWEEN '2019-12-16' AND '2020-03-08')
GROUP BY id WITH TOTALS
ORDER BY id
) ll
FULL JOIN
(
SELECT item_id AS id, SUM(price_sold) AS yago
FROM t WHERE (date BETWEEN '2018-12-17' AND '2019-03-10')
GROUP BY id WITH TOTALS
ORDER BY id
) rr
USING (id);
SELECT id, yago
FROM ( SELECT item_id AS id FROM t GROUP BY id ) AS ll
FULL OUTER JOIN ( SELECT item_id AS id, arrayJoin([111, 222, 333, 444]), SUM(price_sold) AS yago FROM t GROUP BY id WITH TOTALS ) AS rr
FULL OUTER JOIN ( SELECT item_id AS id, arrayJoin([111, 222, 333, 444]), SUM(price_sold) AS yago FROM t GROUP BY id WITH TOTALS ORDER BY id ) AS rr
USING (id);
SELECT id, yago
FROM ( SELECT item_id AS id, arrayJoin([111, 222, 333]) FROM t GROUP BY id WITH TOTALS ) AS ll
FROM ( SELECT item_id AS id, arrayJoin([111, 222, 333]) FROM t GROUP BY id WITH TOTALS ORDER BY id ) AS ll
FULL OUTER JOIN ( SELECT item_id AS id, SUM(price_sold) AS yago FROM t GROUP BY id ) AS rr
USING (id);
SELECT id, yago
FROM ( SELECT item_id AS id, arrayJoin(emptyArrayInt32()) FROM t GROUP BY id WITH TOTALS ) AS ll
FROM ( SELECT item_id AS id, arrayJoin(emptyArrayInt32()) FROM t GROUP BY id WITH TOTALS ORDER BY id ) AS ll
FULL OUTER JOIN ( SELECT item_id AS id, SUM(price_sold) AS yago FROM t GROUP BY id ) AS rr
USING (id);
SELECT id, yago
FROM ( SELECT item_id AS id FROM t GROUP BY id ) AS ll
FULL OUTER JOIN ( SELECT item_id AS id, arrayJoin(emptyArrayInt32()), SUM(price_sold) AS yago FROM t GROUP BY id WITH TOTALS ) AS rr
FULL OUTER JOIN ( SELECT item_id AS id, arrayJoin(emptyArrayInt32()), SUM(price_sold) AS yago FROM t GROUP BY id WITH TOTALS ORDER BY id ) AS rr
USING (id);
SELECT id, yago
FROM ( SELECT item_id AS id, arrayJoin([111, 222, 333]) FROM t GROUP BY id WITH TOTALS ) AS ll
FULL OUTER JOIN ( SELECT item_id AS id, arrayJoin([111, 222, 333, 444]), SUM(price_sold) AS yago FROM t GROUP BY id WITH TOTALS ) AS rr
FROM ( SELECT item_id AS id, arrayJoin([111, 222, 333]) FROM t GROUP BY id WITH TOTALS ORDER BY id ) AS ll
FULL OUTER JOIN ( SELECT item_id AS id, arrayJoin([111, 222, 333, 444]), SUM(price_sold) AS yago FROM t GROUP BY id WITH TOTALS ORDER BY id ) AS rr
USING (id);
INSERT INTO t VALUES (1, 100, '1970-01-01'), (1, 200, '1970-01-02');
SELECT *
FROM (SELECT item_id FROM t GROUP BY item_id WITH TOTALS) l
FROM (SELECT item_id FROM t GROUP BY item_id WITH TOTALS ORDER BY item_id) l
LEFT JOIN (SELECT item_id FROM t ) r
ON l.item_id = r.item_id;
SELECT *
FROM (SELECT item_id FROM t GROUP BY item_id WITH TOTALS) l
FROM (SELECT item_id FROM t GROUP BY item_id WITH TOTALS ORDER BY item_id) l
RIGHT JOIN (SELECT item_id FROM t ) r
ON l.item_id = r.item_id;
SELECT *
FROM (SELECT item_id FROM t) l
LEFT JOIN (SELECT item_id FROM t GROUP BY item_id WITH TOTALS ) r
LEFT JOIN (SELECT item_id FROM t GROUP BY item_id WITH TOTALS ORDER BY item_id ) r
ON l.item_id = r.item_id;
SELECT *
FROM (SELECT item_id FROM t) l
RIGHT JOIN (SELECT item_id FROM t GROUP BY item_id WITH TOTALS ) r
RIGHT JOIN (SELECT item_id FROM t GROUP BY item_id WITH TOTALS ORDER BY item_id ) r
ON l.item_id = r.item_id;
SELECT *
FROM (SELECT item_id FROM t GROUP BY item_id WITH TOTALS) l
LEFT JOIN (SELECT item_id FROM t GROUP BY item_id WITH TOTALS ) r
FROM (SELECT item_id FROM t GROUP BY item_id WITH TOTALS ORDER BY item_id) l
LEFT JOIN (SELECT item_id FROM t GROUP BY item_id WITH TOTALS ORDER BY item_id ) r
ON l.item_id = r.item_id;
SELECT *
FROM (SELECT item_id, 'foo' AS key, 1 AS val FROM t GROUP BY item_id WITH TOTALS) l
LEFT JOIN (SELECT item_id, sum(price_sold) AS val FROM t GROUP BY item_id WITH TOTALS ) r
FROM (SELECT item_id, 'foo' AS key, 1 AS val FROM t GROUP BY item_id WITH TOTALS ORDER BY item_id) l
LEFT JOIN (SELECT item_id, sum(price_sold) AS val FROM t GROUP BY item_id WITH TOTALS ORDER BY item_id ) r
ON l.item_id = r.item_id;
SELECT *
FROM (SELECT * FROM t GROUP BY item_id, price_sold, date WITH TOTALS) l
LEFT JOIN (SELECT * FROM t GROUP BY item_id, price_sold, date WITH TOTALS ) r
FROM (SELECT * FROM t GROUP BY item_id, price_sold, date WITH TOTALS ORDER BY item_id, price_sold, date) l
LEFT JOIN (SELECT * FROM t GROUP BY item_id, price_sold, date WITH TOTALS ORDER BY item_id, price_sold, date ) r
ON l.item_id = r.item_id;
DROP TABLE t;

View File

@ -5,7 +5,7 @@ SELECT finalizeAggregation(countState(DISTINCT toString(number % 20))) FROM numb
SELECT round(corrStable(DISTINCT x, y), 5) FROM (SELECT number % 10 AS x, number % 5 AS y FROM numbers(1000));
SELECT round(corrStable(x, y), 5) FROM (SELECT DISTINCT number % 10 AS x, number % 5 AS y FROM numbers(1000));
SELECT sum(DISTINCT y) FROM (SELECT number % 5 AS x, number % 15 AS y FROM numbers(1000)) GROUP BY x;
SELECT sum(DISTINCT y) FROM (SELECT number % 5 AS x, number % 15 AS y FROM numbers(1000)) GROUP BY x ORDER BY x;
SELECT countIf(DISTINCT number % 10, number % 5 = 2) FROM numbers(10000);
EXPLAIN SYNTAX SELECT countIf(DISTINCT number % 10, number % 5 = 2) FROM numbers(10000);

View File

@ -87,3 +87,7 @@ aes-256-gcm 1
aes-256-gcm 1
aes-256-gcm 1
F56E87055BC32D0EEB31B2EACC2BF2A5 1
2022-09-02 00:00:00 2
2022-08-02 00:00:00 1 \N
2022-09-02 00:00:00 2 value2
2022-09-02 00:00:01 3 \N

View File

@ -129,4 +129,18 @@ SELECT
hex(decrypt('aes-256-gcm', concat(ciphertext, tag), key, iv, aad)) as plaintext_actual,
plaintext_actual = hex(plaintext);
-- tryDecrypt
CREATE TABLE decrypt_null (
dt DateTime,
user_id UInt32,
encrypted String,
iv String
) ENGINE = Memory;
INSERT INTO decrypt_null VALUES ('2022-08-02 00:00:00', 1, encrypt('aes-256-gcm', 'value1', 'keykeykeykeykeykeykeykeykeykey01', 'iv1'), 'iv1'), ('2022-09-02 00:00:00', 2, encrypt('aes-256-gcm', 'value2', 'keykeykeykeykeykeykeykeykeykey02', 'iv2'), 'iv2'), ('2022-09-02 00:00:01', 3, encrypt('aes-256-gcm', 'value3', 'keykeykeykeykeykeykeykeykeykey03', 'iv3'), 'iv3');
SELECT dt, user_id FROM decrypt_null WHERE (user_id > 0) AND (decrypt('aes-256-gcm', encrypted, 'keykeykeykeykeykeykeykeykeykey02', iv) = 'value2'); --{serverError 454}
SELECT dt, user_id FROM decrypt_null WHERE (user_id > 0) AND (tryDecrypt('aes-256-gcm', encrypted, 'keykeykeykeykeykeykeykeykeykey02', iv) = 'value2');
SELECT dt, user_id, (tryDecrypt('aes-256-gcm', encrypted, 'keykeykeykeykeykeykeykeykeykey02', iv)) as value FROM decrypt_null ORDER BY user_id;
DROP TABLE encryption_test;

View File

@ -2,7 +2,7 @@ select sumResample(0, 20, 1)(number, number % 20) from numbers(200);
select arrayMap(x -> finalizeAggregation(x), state) from (select sumStateResample(0, 20, 1)(number, number % 20) as state from numbers(200));
select arrayMap(x -> finalizeAggregation(x), state) from
(
select sumStateResample(0,20,1)(number, number%20) as state from numbers(200) group by number % 3
select sumStateResample(0,20,1)(number, number%20) as state from numbers(200) group by number % 3 order by number % 3
);
select groupArrayResample(0, 20, 1)(number, number % 20) from numbers(50);

View File

@ -1 +1 @@
SELECT number % 100 AS k, sumArray(emptyArrayUInt8()) AS v FROM numbers(10) GROUP BY k;
SELECT number % 100 AS k, sumArray(emptyArrayUInt8()) AS v FROM numbers(10) GROUP BY k ORDER BY k;

View File

@ -1,9 +1,9 @@
set output_format_write_statistics = 0;
select g, s from (select g, sum(number) as s from numbers(4) group by bitAnd(number, 1) as g with totals) array join [1, 2] as a format Pretty;
select g, s from (select g, sum(number) as s from numbers(4) group by bitAnd(number, 1) as g with totals order by g) array join [1, 2] as a format Pretty;
select '--';
select g, s from (select g, sum(number) as s from numbers(4) group by bitAnd(number, 1) as g with totals) array join [1, 2] as a format TSV;
select g, s from (select g, sum(number) as s from numbers(4) group by bitAnd(number, 1) as g with totals order by g) array join [1, 2] as a format TSV;
select '--';
select g, s from (select g, sum(number) as s from numbers(4) group by bitAnd(number, 1) as g with totals) array join [1, 2] as a format JSON;
select g, s from (select g, sum(number) as s from numbers(4) group by bitAnd(number, 1) as g with totals order by g) array join [1, 2] as a format JSON;
select '--';

View File

@ -28,7 +28,7 @@ SELECT name, active FROM system.parts WHERE database = currentDatabase() AND tab
SELECT '# optimize';
SYSTEM START MERGES data_01660;
OPTIMIZE TABLE data_01660 FINAL;
SELECT count(), _state FROM system.parts WHERE database = currentDatabase() AND table = 'data_01660' GROUP BY _state;
SELECT count(), _state FROM system.parts WHERE database = currentDatabase() AND table = 'data_01660' GROUP BY _state ORDER BY _state;
-- TRUNCATE does not remove parts instantly
SELECT '# truncate';

View File

@ -10,11 +10,11 @@ SELECT CAST('2020-01-01 00:00:00.3' AS DateTime64(0, 'Asia/Istanbul'));
2020-01-01 00:00:00
SELECT toDateTime64(bitShiftLeft(toUInt64(1), 33), 2, 'Asia/Istanbul') FORMAT Null;
SELECT toTimeZone(toDateTime(-2., 2), 'Asia/Istanbul');
1970-01-01 02:00:00.00
1970-01-01 01:59:58.00
SELECT toDateTime64(-2., 2, 'Asia/Istanbul');
1970-01-01 02:00:00.00
1970-01-01 01:59:58.00
SELECT toDateTime64(toFloat32(bitShiftLeft(toUInt64(1),33)), 2, 'Asia/Istanbul');
2106-02-07 09:28:16.00
2242-03-16 15:56:32.00
SELECT toDateTime64(toFloat64(bitShiftLeft(toUInt64(1),33)), 2, 'Asia/Istanbul') FORMAT Null;
-- These are outsize of extended range and hence clamped
SELECT toDateTime64(-1 * bitShiftLeft(toUInt64(1), 35), 2, 'Asia/Istanbul');

View File

@ -1,3 +1,13 @@
2005-03-18 03:58:31.222
2005-03-18 03:58:31.222
2005-03-18 03:58:31.222
1970-01-01 00:00:00.000000000
1970-01-01 00:00:00.000000000
1900-04-15 00:53:20.000000000
1900-04-15 00:53:20.000000000
1900-01-01 00:00:00.000000000
1900-01-01 00:00:00.000000000
1900-01-01 00:00:00.000000000
1900-01-01 00:00:00.000000000
2261-07-15 11:33:20.000000000
2261-07-15 11:33:20.000000000

View File

@ -1,3 +1,22 @@
SELECT CAST(1111111111.222 AS DateTime64(3, 'Asia/Istanbul'));
SELECT toDateTime(1111111111.222, 3, 'Asia/Istanbul');
SELECT toDateTime64(1111111111.222, 3, 'Asia/Istanbul');
SELECT toDateTime64(0.0, 9, 'UTC') ;
SELECT toDateTime64(0, 9, 'UTC');
SELECT toDateTime64(-2200000000.0, 9, 'UTC'); -- 1900-01-01 < value
SELECT toDateTime64(-2200000000, 9, 'UTC');
SELECT toDateTime64(-2300000000.0, 9, 'UTC'); -- value < 1900-01-01
SELECT toDateTime64(-2300000000, 9, 'UTC');
SELECT toDateTime64(-999999999999.0, 9, 'UTC'); -- value << 1900-01-01
SELECT toDateTime64(-999999999999, 9, 'UTC');
SELECT toDateTime64(9200000000.0, 9, 'UTC'); -- value < 2262-04-11
SELECT toDateTime64(9200000000, 9, 'UTC');
SELECT toDateTime64(9300000000.0, 9, 'UTC'); -- { serverError 407 } # 2262-04-11 < value
SELECT toDateTime64(9300000000, 9, 'UTC'); -- { serverError 407 }

View File

@ -124,14 +124,14 @@ SELECT * FROM t_sparse WHERE arr2 != [] ORDER BY id;
192 0 [] [0,1]
SELECT sum(u) FROM t_sparse;
1900
SELECT sum(u) FROM t_sparse GROUP BY id % 7;
210
360
300
240
190
330
270
SELECT id % 7, sum(u) FROM t_sparse GROUP BY id % 7 ORDER BY id % 7;
0 210
1 360
2 300
3 240
4 190
5 330
6 270
SELECT arrayFilter(x -> x % 2 = 1, arr2) FROM t_sparse WHERE arr2 != [] LIMIT 5;
[1]
[1,3]

View File

@ -25,7 +25,7 @@ SELECT * FROM t_sparse WHERE arr1 != [] ORDER BY id;
SELECT * FROM t_sparse WHERE arr2 != [] ORDER BY id;
SELECT sum(u) FROM t_sparse;
SELECT sum(u) FROM t_sparse GROUP BY id % 7;
SELECT id % 7, sum(u) FROM t_sparse GROUP BY id % 7 ORDER BY id % 7;
SELECT arrayFilter(x -> x % 2 = 1, arr2) FROM t_sparse WHERE arr2 != [] LIMIT 5;

View File

@ -95,4 +95,5 @@ FROM
FROM numbers(10)
)
GROUP BY b
ORDER BY b ASC
1 1 1

View File

@ -58,8 +58,8 @@ SELECT quantileBFloat16Weighted(0.2)(d, 1), quantileBFloat16Weighted(0.3)(d, 1),
EXPLAIN SYNTAX SELECT quantile(0.2)(d) as k, quantile(0.3)(d) FROM datetime order by quantile(0.2)(d);
SELECT b, quantile(0.5)(x) as a, quantile(0.9)(x) as y, quantile(0.95)(x) FROM (select number as x, number % 2 as b from numbers(10)) group by b;
EXPLAIN SYNTAX SELECT b, quantile(0.5)(x) as a, quantile(0.9)(x) as y, quantile(0.95)(x) FROM (select number as x, number % 2 as b from numbers(10)) group by b;
SELECT b, quantile(0.5)(x) as a, quantile(0.9)(x) as y, quantile(0.95)(x) FROM (select number as x, number % 2 as b from numbers(10)) group by b order by b;
EXPLAIN SYNTAX SELECT b, quantile(0.5)(x) as a, quantile(0.9)(x) as y, quantile(0.95)(x) FROM (select number as x, number % 2 as b from numbers(10)) group by b order by b;
-- fuzzer
SELECT quantileDeterministic(0.99)(1023) FROM datetime FORMAT Null; -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }

View File

@ -12,7 +12,6 @@ CREATE TABLE system.asynchronous_inserts
`table` String,
`format` String,
`first_update` DateTime64(6),
`last_update` DateTime64(6),
`total_bytes` UInt64,
`entries.query_id` Array(String),
`entries.bytes` Array(UInt64),

View File

@ -0,0 +1 @@
SELECT round(rankCorr(number, -number)) FROM numbers(5000000);

View File

@ -1,5 +1,5 @@
#!/usr/bin/env bash
# Tags: race, zookeeper, no-parallel
# Tags: race, zookeeper, no-parallel, disabled
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh

Some files were not shown because too many files have changed in this diff Show More