Merge remote-tracking branch 'blessed/master' into perf_experiment

This commit is contained in:
Raúl Marín 2022-10-17 12:07:31 +02:00
commit be932f49d2
90 changed files with 2145 additions and 757 deletions

2
contrib/cctz vendored

@ -1 +1 @@
Subproject commit 49c656c62fbd36a1bc20d64c476853bdb7cf7bb9
Subproject commit 7a454c25c7d16053bcd327cdd16329212a08fa4a

View File

@ -1020,6 +1020,62 @@ Example:
}
```
To use object name as column value you can use special setting [format_json_object_each_row_column_for_object_name](../operations/settings/settings.md#format_json_object_each_row_column_for_object_name). Value of this setting is set to the name of a column, that is used as JSON key for a row in resulting object.
Examples:
For output:
Let's say we have table `test` with two columns:
```
┌─object_name─┬─number─┐
│ first_obj │ 1 │
│ second_obj │ 2 │
│ third_obj │ 3 │
└─────────────┴────────┘
```
Let's output it in `JSONObjectEachRow` format and use `format_json_object_each_row_column_for_object_name` setting:
```sql
select * from test settings format_json_object_each_row_column_for_object_name='object_name'
```
The output:
```json
{
"first_obj": {"number": 1},
"second_obj": {"number": 2},
"third_obj": {"number": 3}
}
```
For input:
Let's say we stored output from previous example in a file with name `data.json`:
```sql
select * from file('data.json', JSONObjectEachRow, 'object_name String, number UInt64') settings format_json_object_each_row_column_for_object_name='object_name'
```
```
┌─object_name─┬─number─┐
│ first_obj │ 1 │
│ second_obj │ 2 │
│ third_obj │ 3 │
└─────────────┴────────┘
```
It also works in schema inference:
```sql
desc file('data.json', JSONObjectEachRow) settings format_json_object_each_row_column_for_object_name='object_name'
```
```
┌─name────────┬─type────────────┐
│ object_name │ String │
│ number │ Nullable(Int64) │
└─────────────┴─────────────────┘
```
### Inserting Data {#json-inserting-data}

View File

@ -41,6 +41,7 @@ ClickHouse Inc does **not** maintain the libraries listed below and hasnt don
- [node-clickhouse](https://github.com/apla/node-clickhouse)
- [nestjs-clickhouse](https://github.com/depyronick/nestjs-clickhouse)
- [clickhouse-client](https://github.com/depyronick/clickhouse-client)
- [node-clickhouse-orm](https://github.com/zimv/node-clickhouse-orm)
- Perl
- [perl-DBD-ClickHouse](https://github.com/elcamlost/perl-DBD-ClickHouse)
- [HTTP-ClickHouse](https://metacpan.org/release/HTTP-ClickHouse)

View File

@ -3902,6 +3902,13 @@ Controls validation of UTF-8 sequences in JSON output formats, doesn't impact fo
Disabled by default.
### format_json_object_each_row_column_for_object_name {#format_json_object_each_row_column_for_object_name}
The name of column that will be used for storing/writing object names in [JSONObjectEachRow](../../interfaces/formats.md#jsonobjecteachrow) format.
Column type should be String. If value is empty, default names `row_{i}`will be used for object names.
Default value: ''.
## TSV format settings {#tsv-format-settings}
### input_format_tsv_empty_as_default {#input_format_tsv_empty_as_default}

View File

@ -34,6 +34,7 @@ sidebar_label: "Клиентские библиотеки от сторонни
- [node-clickhouse](https://github.com/apla/node-clickhouse)
- [nestjs-clickhouse](https://github.com/depyronick/nestjs-clickhouse)
- [clickhouse-client](https://github.com/depyronick/clickhouse-client)
- [node-clickhouse-orm](https://github.com/zimv/node-clickhouse-orm)
- Perl
- [perl-DBD-ClickHouse](https://github.com/elcamlost/perl-DBD-ClickHouse)
- [HTTP-ClickHouse](https://metacpan.org/release/HTTP-ClickHouse)

View File

@ -1,10 +1,450 @@
---
slug: /zh/getting-started/example-datasets/uk-price-paid
sidebar_label: UK Property Price Paid
sidebar_label: 英国房地产支付价格
sidebar_position: 1
title: "UK Property Price Paid"
title: "英国房地产支付价格"
---
import Content from '@site/docs/en/getting-started/example-datasets/uk-price-paid.md';
该数据集包含自 1995 年以来有关英格兰和威尔士房地产价格的数据。未压缩的大小约为 4 GiB在 ClickHouse 中大约需要 278 MiB。
<Content />
来源https://www.gov.uk/government/statistical-data-sets/price-paid-data-downloads
字段说明https://www.gov.uk/guidance/about-the-price-data
包含 HM Land Registry data © Crown copyright and database right 2021.。此数据集需在 Open Government License v3.0 的许可下使用。
## 创建表 {#create-table}
```sql
CREATE TABLE uk_price_paid
(
price UInt32,
date Date,
postcode1 LowCardinality(String),
postcode2 LowCardinality(String),
type Enum8('terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4, 'other' = 0),
is_new UInt8,
duration Enum8('freehold' = 1, 'leasehold' = 2, 'unknown' = 0),
addr1 String,
addr2 String,
street LowCardinality(String),
locality LowCardinality(String),
town LowCardinality(String),
district LowCardinality(String),
county LowCardinality(String)
)
ENGINE = MergeTree
ORDER BY (postcode1, postcode2, addr1, addr2);
```
## 预处理和插入数据 {#preprocess-import-data}
我们将使用 `url` 函数将数据流式传输到 ClickHouse。我们需要首先预处理一些传入的数据其中包括
- 将`postcode` 拆分为两个不同的列 - `postcode1` 和 `postcode2`,因为这更适合存储和查询
- 将`time` 字段转换为日期为它只包含 00:00 时间
- 忽略 [UUid](../../sql-reference/data-types/uuid.md) 字段,因为我们不需要它进行分析
- 使用 [transform](../../sql-reference/functions/other-functions.md#transform) 函数将 `Enum` 字段 `type` 和 `duration` 转换为更易读的 `Enum` 字段
- 将 `is_new` 字段从单字符串(` Y`/`N`) 到 [UInt8](../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-uint256-int8-int16-int32-int64 -int128-int256) 字段为 0 或 1
- 删除最后两列,因为它们都具有相同的值(即 0
`url` 函数将来自网络服务器的数据流式传输到 ClickHouse 表中。以下命令将 500 万行插入到 `uk_price_paid` 表中:
```sql
INSERT INTO uk_price_paid
WITH
splitByChar(' ', postcode) AS p
SELECT
toUInt32(price_string) AS price,
parseDateTimeBestEffortUS(time) AS date,
p[1] AS postcode1,
p[2] AS postcode2,
transform(a, ['T', 'S', 'D', 'F', 'O'], ['terraced', 'semi-detached', 'detached', 'flat', 'other']) AS type,
b = 'Y' AS is_new,
transform(c, ['F', 'L', 'U'], ['freehold', 'leasehold', 'unknown']) AS duration,
addr1,
addr2,
street,
locality,
town,
district,
county
FROM url(
'http://prod.publicdata.landregistry.gov.uk.s3-website-eu-west-1.amazonaws.com/pp-complete.csv',
'CSV',
'uuid_string String,
price_string String,
time String,
postcode String,
a String,
b String,
c String,
addr1 String,
addr2 String,
street String,
locality String,
town String,
district String,
county String,
d String,
e String'
) SETTINGS max_http_get_redirects=10;
```
需要等待一两分钟以便数据插入,具体时间取决于网络速度。
## 验证数据 {#validate-data}
让我们通过查看插入了多少行来验证它是否有效:
```sql
SELECT count()
FROM uk_price_paid
```
在执行此查询时,数据集有 27,450,499 行。让我们看看 ClickHouse 中表的大小是多少:
```sql
SELECT formatReadableSize(total_bytes)
FROM system.tables
WHERE name = 'uk_price_paid'
```
请注意,表的大小仅为 221.43 MiB
## 运行一些查询 {#run-queries}
让我们运行一些查询来分析数据:
### 查询 1. 每年平均价格 {#average-price}
```sql
SELECT
toYear(date) AS year,
round(avg(price)) AS price,
bar(price, 0, 1000000, 80
)
FROM uk_price_paid
GROUP BY year
ORDER BY year
```
结果如下所示:
```response
┌─year─┬──price─┬─bar(round(avg(price)), 0, 1000000, 80)─┐
│ 1995 │ 67934 │ █████▍ │
│ 1996 │ 71508 │ █████▋ │
│ 1997 │ 78536 │ ██████▎ │
│ 1998 │ 85441 │ ██████▋ │
│ 1999 │ 96038 │ ███████▋ │
│ 2000 │ 107487 │ ████████▌ │
│ 2001 │ 118888 │ █████████▌ │
│ 2002 │ 137948 │ ███████████ │
│ 2003 │ 155893 │ ████████████▍ │
│ 2004 │ 178888 │ ██████████████▎ │
│ 2005 │ 189359 │ ███████████████▏ │
│ 2006 │ 203532 │ ████████████████▎ │
│ 2007 │ 219375 │ █████████████████▌ │
│ 2008 │ 217056 │ █████████████████▎ │
│ 2009 │ 213419 │ █████████████████ │
│ 2010 │ 236110 │ ██████████████████▊ │
│ 2011 │ 232805 │ ██████████████████▌ │
│ 2012 │ 238381 │ ███████████████████ │
│ 2013 │ 256927 │ ████████████████████▌ │
│ 2014 │ 280008 │ ██████████████████████▍ │
│ 2015 │ 297263 │ ███████████████████████▋ │
│ 2016 │ 313518 │ █████████████████████████ │
│ 2017 │ 346371 │ ███████████████████████████▋ │
│ 2018 │ 350556 │ ████████████████████████████ │
│ 2019 │ 352184 │ ████████████████████████████▏ │
│ 2020 │ 375808 │ ██████████████████████████████ │
│ 2021 │ 381105 │ ██████████████████████████████▍ │
│ 2022 │ 362572 │ █████████████████████████████ │
└──────┴────────┴────────────────────────────────────────┘
```
### 查询 2. 伦敦每年的平均价格 {#average-price-london}
```sql
SELECT
toYear(date) AS year,
round(avg(price)) AS price,
bar(price, 0, 2000000, 100
)
FROM uk_price_paid
WHERE town = 'LONDON'
GROUP BY year
ORDER BY year
```
结果如下所示:
```response
┌─year─┬───price─┬─bar(round(avg(price)), 0, 2000000, 100)───────────────┐
│ 1995 │ 109110 │ █████▍ │
│ 1996 │ 118659 │ █████▊ │
│ 1997 │ 136526 │ ██████▋ │
│ 1998 │ 153002 │ ███████▋ │
│ 1999 │ 180633 │ █████████ │
│ 2000 │ 215849 │ ██████████▋ │
│ 2001 │ 232987 │ ███████████▋ │
│ 2002 │ 263668 │ █████████████▏ │
│ 2003 │ 278424 │ █████████████▊ │
│ 2004 │ 304664 │ ███████████████▏ │
│ 2005 │ 322887 │ ████████████████▏ │
│ 2006 │ 356195 │ █████████████████▋ │
│ 2007 │ 404062 │ ████████████████████▏ │
│ 2008 │ 420741 │ █████████████████████ │
│ 2009 │ 427754 │ █████████████████████▍ │
│ 2010 │ 480322 │ ████████████████████████ │
│ 2011 │ 496278 │ ████████████████████████▋ │
│ 2012 │ 519482 │ █████████████████████████▊ │
│ 2013 │ 616195 │ ██████████████████████████████▋ │
│ 2014 │ 724121 │ ████████████████████████████████████▏ │
│ 2015 │ 792101 │ ███████████████████████████████████████▌ │
│ 2016 │ 843589 │ ██████████████████████████████████████████▏ │
│ 2017 │ 983523 │ █████████████████████████████████████████████████▏ │
│ 2018 │ 1016753 │ ██████████████████████████████████████████████████▋ │
│ 2019 │ 1041673 │ ████████████████████████████████████████████████████ │
│ 2020 │ 1060027 │ █████████████████████████████████████████████████████ │
│ 2021 │ 958249 │ ███████████████████████████████████████████████▊ │
│ 2022 │ 902596 │ █████████████████████████████████████████████▏ │
└──────┴─────────┴───────────────────────────────────────────────────────┘
```
2020 年房价出事了!但这并不令人意外……
### 查询 3. 最昂贵的社区 {#most-expensive-neighborhoods}
```sql
SELECT
town,
district,
count() AS c,
round(avg(price)) AS price,
bar(price, 0, 5000000, 100)
FROM uk_price_paid
WHERE date >= '2020-01-01'
GROUP BY
town,
district
HAVING c >= 100
ORDER BY price DESC
LIMIT 100
```
结果如下所示:
```response
┌─town─────────────────┬─district───────────────┬─────c─┬───price─┬─bar(round(avg(price)), 0, 5000000, 100)─────────────────────────┐
│ LONDON │ CITY OF LONDON │ 578 │ 3149590 │ ██████████████████████████████████████████████████████████████▊ │
│ LONDON │ CITY OF WESTMINSTER │ 7083 │ 2903794 │ ██████████████████████████████████████████████████████████ │
│ LONDON │ KENSINGTON AND CHELSEA │ 4986 │ 2333782 │ ██████████████████████████████████████████████▋ │
│ LEATHERHEAD │ ELMBRIDGE │ 203 │ 2071595 │ █████████████████████████████████████████▍ │
│ VIRGINIA WATER │ RUNNYMEDE │ 308 │ 1939465 │ ██████████████████████████████████████▋ │
│ LONDON │ CAMDEN │ 5750 │ 1673687 │ █████████████████████████████████▍ │
│ WINDLESHAM │ SURREY HEATH │ 182 │ 1428358 │ ████████████████████████████▌ │
│ NORTHWOOD │ THREE RIVERS │ 112 │ 1404170 │ ████████████████████████████ │
│ BARNET │ ENFIELD │ 259 │ 1338299 │ ██████████████████████████▋ │
│ LONDON │ ISLINGTON │ 5504 │ 1275520 │ █████████████████████████▌ │
│ LONDON │ RICHMOND UPON THAMES │ 1345 │ 1261935 │ █████████████████████████▏ │
│ COBHAM │ ELMBRIDGE │ 727 │ 1251403 │ █████████████████████████ │
│ BEACONSFIELD │ BUCKINGHAMSHIRE │ 680 │ 1199970 │ ███████████████████████▊ │
│ LONDON │ TOWER HAMLETS │ 10012 │ 1157827 │ ███████████████████████▏ │
│ LONDON │ HOUNSLOW │ 1278 │ 1144389 │ ██████████████████████▊ │
│ BURFORD │ WEST OXFORDSHIRE │ 182 │ 1139393 │ ██████████████████████▋ │
│ RICHMOND │ RICHMOND UPON THAMES │ 1649 │ 1130076 │ ██████████████████████▌ │
│ KINGSTON UPON THAMES │ RICHMOND UPON THAMES │ 147 │ 1126111 │ ██████████████████████▌ │
│ ASCOT │ WINDSOR AND MAIDENHEAD │ 773 │ 1106109 │ ██████████████████████ │
│ LONDON │ HAMMERSMITH AND FULHAM │ 6162 │ 1056198 │ █████████████████████ │
│ RADLETT │ HERTSMERE │ 513 │ 1045758 │ ████████████████████▊ │
│ LEATHERHEAD │ GUILDFORD │ 354 │ 1045175 │ ████████████████████▊ │
│ WEYBRIDGE │ ELMBRIDGE │ 1275 │ 1036702 │ ████████████████████▋ │
│ FARNHAM │ EAST HAMPSHIRE │ 107 │ 1033682 │ ████████████████████▋ │
│ ESHER │ ELMBRIDGE │ 915 │ 1032753 │ ████████████████████▋ │
│ FARNHAM │ HART │ 102 │ 1002692 │ ████████████████████ │
│ GERRARDS CROSS │ BUCKINGHAMSHIRE │ 845 │ 983639 │ ███████████████████▋ │
│ CHALFONT ST GILES │ BUCKINGHAMSHIRE │ 286 │ 973993 │ ███████████████████▍ │
│ SALCOMBE │ SOUTH HAMS │ 215 │ 965724 │ ███████████████████▎ │
│ SURBITON │ ELMBRIDGE │ 181 │ 960346 │ ███████████████████▏ │
│ BROCKENHURST │ NEW FOREST │ 226 │ 951278 │ ███████████████████ │
│ SUTTON COLDFIELD │ LICHFIELD │ 110 │ 930757 │ ██████████████████▌ │
│ EAST MOLESEY │ ELMBRIDGE │ 372 │ 927026 │ ██████████████████▌ │
│ LLANGOLLEN │ WREXHAM │ 127 │ 925681 │ ██████████████████▌ │
│ OXFORD │ SOUTH OXFORDSHIRE │ 638 │ 923830 │ ██████████████████▍ │
│ LONDON │ MERTON │ 4383 │ 923194 │ ██████████████████▍ │
│ GUILDFORD │ WAVERLEY │ 261 │ 905733 │ ██████████████████ │
│ TEDDINGTON │ RICHMOND UPON THAMES │ 1147 │ 894856 │ █████████████████▊ │
│ HARPENDEN │ ST ALBANS │ 1271 │ 893079 │ █████████████████▋ │
│ HENLEY-ON-THAMES │ SOUTH OXFORDSHIRE │ 1042 │ 887557 │ █████████████████▋ │
│ POTTERS BAR │ WELWYN HATFIELD │ 314 │ 863037 │ █████████████████▎ │
│ LONDON │ WANDSWORTH │ 13210 │ 857318 │ █████████████████▏ │
│ BILLINGSHURST │ CHICHESTER │ 255 │ 856508 │ █████████████████▏ │
│ LONDON │ SOUTHWARK │ 7742 │ 843145 │ ████████████████▋ │
│ LONDON │ HACKNEY │ 6656 │ 839716 │ ████████████████▋ │
│ LUTTERWORTH │ HARBOROUGH │ 1096 │ 836546 │ ████████████████▋ │
│ KINGSTON UPON THAMES │ KINGSTON UPON THAMES │ 1846 │ 828990 │ ████████████████▌ │
│ LONDON │ EALING │ 5583 │ 820135 │ ████████████████▍ │
│ INGATESTONE │ CHELMSFORD │ 120 │ 815379 │ ████████████████▎ │
│ MARLOW │ BUCKINGHAMSHIRE │ 718 │ 809943 │ ████████████████▏ │
│ EAST GRINSTEAD │ TANDRIDGE │ 105 │ 809461 │ ████████████████▏ │
│ CHIGWELL │ EPPING FOREST │ 484 │ 809338 │ ████████████████▏ │
│ EGHAM │ RUNNYMEDE │ 989 │ 807858 │ ████████████████▏ │
│ HASLEMERE │ CHICHESTER │ 223 │ 804173 │ ████████████████ │
│ PETWORTH │ CHICHESTER │ 288 │ 803206 │ ████████████████ │
│ TWICKENHAM │ RICHMOND UPON THAMES │ 2194 │ 802616 │ ████████████████ │
│ WEMBLEY │ BRENT │ 1698 │ 801733 │ ████████████████ │
│ HINDHEAD │ WAVERLEY │ 233 │ 801482 │ ████████████████ │
│ LONDON │ BARNET │ 8083 │ 792066 │ ███████████████▋ │
│ WOKING │ GUILDFORD │ 343 │ 789360 │ ███████████████▋ │
│ STOCKBRIDGE │ TEST VALLEY │ 318 │ 777909 │ ███████████████▌ │
│ BERKHAMSTED │ DACORUM │ 1049 │ 776138 │ ███████████████▌ │
│ MAIDENHEAD │ BUCKINGHAMSHIRE │ 236 │ 775572 │ ███████████████▌ │
│ SOLIHULL │ STRATFORD-ON-AVON │ 142 │ 770727 │ ███████████████▍ │
│ GREAT MISSENDEN │ BUCKINGHAMSHIRE │ 431 │ 764493 │ ███████████████▎ │
│ TADWORTH │ REIGATE AND BANSTEAD │ 920 │ 757511 │ ███████████████▏ │
│ LONDON │ BRENT │ 4124 │ 757194 │ ███████████████▏ │
│ THAMES DITTON │ ELMBRIDGE │ 470 │ 750828 │ ███████████████ │
│ LONDON │ LAMBETH │ 10431 │ 750532 │ ███████████████ │
│ RICKMANSWORTH │ THREE RIVERS │ 1500 │ 747029 │ ██████████████▊ │
│ KINGS LANGLEY │ DACORUM │ 281 │ 746536 │ ██████████████▊ │
│ HARLOW │ EPPING FOREST │ 172 │ 739423 │ ██████████████▋ │
│ TONBRIDGE │ SEVENOAKS │ 103 │ 738740 │ ██████████████▋ │
│ BELVEDERE │ BEXLEY │ 686 │ 736385 │ ██████████████▋ │
│ CRANBROOK │ TUNBRIDGE WELLS │ 769 │ 734328 │ ██████████████▋ │
│ SOLIHULL │ WARWICK │ 116 │ 733286 │ ██████████████▋ │
│ ALDERLEY EDGE │ CHESHIRE EAST │ 357 │ 732882 │ ██████████████▋ │
│ WELWYN │ WELWYN HATFIELD │ 404 │ 730281 │ ██████████████▌ │
│ CHISLEHURST │ BROMLEY │ 870 │ 730279 │ ██████████████▌ │
│ LONDON │ HARINGEY │ 6488 │ 726715 │ ██████████████▌ │
│ AMERSHAM │ BUCKINGHAMSHIRE │ 965 │ 725426 │ ██████████████▌ │
│ SEVENOAKS │ SEVENOAKS │ 2183 │ 725102 │ ██████████████▌ │
│ BOURNE END │ BUCKINGHAMSHIRE │ 269 │ 724595 │ ██████████████▍ │
│ NORTHWOOD │ HILLINGDON │ 568 │ 722436 │ ██████████████▍ │
│ PURFLEET │ THURROCK │ 143 │ 722205 │ ██████████████▍ │
│ SLOUGH │ BUCKINGHAMSHIRE │ 832 │ 721529 │ ██████████████▍ │
│ INGATESTONE │ BRENTWOOD │ 301 │ 718292 │ ██████████████▎ │
│ EPSOM │ REIGATE AND BANSTEAD │ 315 │ 709264 │ ██████████████▏ │
│ ASHTEAD │ MOLE VALLEY │ 524 │ 708646 │ ██████████████▏ │
│ BETCHWORTH │ MOLE VALLEY │ 155 │ 708525 │ ██████████████▏ │
│ OXTED │ TANDRIDGE │ 645 │ 706946 │ ██████████████▏ │
│ READING │ SOUTH OXFORDSHIRE │ 593 │ 705466 │ ██████████████ │
│ FELTHAM │ HOUNSLOW │ 1536 │ 703815 │ ██████████████ │
│ TUNBRIDGE WELLS │ WEALDEN │ 207 │ 703296 │ ██████████████ │
│ LEWES │ WEALDEN │ 116 │ 701349 │ ██████████████ │
│ OXFORD │ OXFORD │ 3656 │ 700813 │ ██████████████ │
│ MAYFIELD │ WEALDEN │ 177 │ 698158 │ █████████████▊ │
│ PINNER │ HARROW │ 997 │ 697876 │ █████████████▊ │
│ LECHLADE │ COTSWOLD │ 155 │ 696262 │ █████████████▊ │
│ WALTON-ON-THAMES │ ELMBRIDGE │ 1850 │ 690102 │ █████████████▋ │
└──────────────────────┴────────────────────────┴───────┴─────────┴─────────────────────────────────────────────────────────────────┘
```
## 使用 Projection 加速查询 {#speedup-with-projections}
[Projections](../../sql-reference/statements/alter/projection.md) 允许我们通过存储任意格式的预先聚合的数据来提高查询速度。在此示例中,我们创建了一个按年份、地区和城镇分组的房产的平均价格、总价格和数量的 Projection。在执行时如果 ClickHouse 认为 Projection 可以提高查询的性能,它将使用 Projection何时使用由 ClickHouse 决定)。
### 构建投影{#build-projection}
让我们通过维度 `toYear(date)`、`district` 和 `town` 创建一个聚合 Projection
```sql
ALTER TABLE uk_price_paid
ADD PROJECTION projection_by_year_district_town
(
SELECT
toYear(date),
district,
town,
avg(price),
sum(price),
count()
GROUP BY
toYear(date),
district,
town
)
```
填充现有数据的 Projection。 (如果不进行 materialize 操作,则 ClickHouse 只会为新插入的数据创建 Projection
```sql
ALTER TABLE uk_price_paid
MATERIALIZE PROJECTION projection_by_year_district_town
SETTINGS mutations_sync = 1
```
## Test Performance {#test-performance}
让我们再次运行相同的 3 个查询:
### 查询 1. 每年平均价格 {#average-price-projections}
```sql
SELECT
toYear(date) AS year,
round(avg(price)) AS price,
bar(price, 0, 1000000, 80)
FROM uk_price_paid
GROUP BY year
ORDER BY year ASC
```
结果是一样的,但是性能更好!
```response
No projection: 28 rows in set. Elapsed: 1.775 sec. Processed 27.45 million rows, 164.70 MB (15.47 million rows/s., 92.79 MB/s.)
With projection: 28 rows in set. Elapsed: 0.665 sec. Processed 87.51 thousand rows, 3.21 MB (131.51 thousand rows/s., 4.82 MB/s.)
```
### 查询 2. 伦敦每年的平均价格 {#average-price-london-projections}
```sql
SELECT
toYear(date) AS year,
round(avg(price)) AS price,
bar(price, 0, 2000000, 100)
FROM uk_price_paid
WHERE town = 'LONDON'
GROUP BY year
ORDER BY year ASC
```
Same result, but notice the improvement in query performance:
```response
No projection: 28 rows in set. Elapsed: 0.720 sec. Processed 27.45 million rows, 46.61 MB (38.13 million rows/s., 64.74 MB/s.)
With projection: 28 rows in set. Elapsed: 0.015 sec. Processed 87.51 thousand rows, 3.51 MB (5.74 million rows/s., 230.24 MB/s.)
```
### 查询 3. 最昂贵的社区 {#most-expensive-neighborhoods-projections}
注意:需要修改 (date >= '2020-01-01') 以使其与 Projection 定义的维度 (`toYear(date) >= 2020)` 匹配:
```sql
SELECT
town,
district,
count() AS c,
round(avg(price)) AS price,
bar(price, 0, 5000000, 100)
FROM uk_price_paid
WHERE toYear(date) >= 2020
GROUP BY
town,
district
HAVING c >= 100
ORDER BY price DESC
LIMIT 100
```
同样,结果是相同的,但请注意查询性能的改进:
```response
No projection: 100 rows in set. Elapsed: 0.928 sec. Processed 27.45 million rows, 103.80 MB (29.56 million rows/s., 111.80 MB/s.)
With projection: 100 rows in set. Elapsed: 0.336 sec. Processed 17.32 thousand rows, 1.23 MB (51.61 thousand rows/s., 3.65 MB/s.)
```
### 在 Playground 上测试{#playground}
也可以在 [Online Playground](https://play.clickhouse.com/play?user=play#U0VMRUNUIHRvd24sIGRpc3RyaWN0LCBjb3VudCgpIEFTIGMsIHJvdW5kKGF2ZyhwcmljZSkpIEFTIHByaWNlLCBiYXIocHJpY2UsIDAsIDUwMDAwMDAsIDEwMCkgRlJPTSB1a19wcmljZV9wYWlkIFdIRVJFIGRhdGUgPj0gJzIwMjAtMDEtMDEnIEdST1VQIEJZIHRvd24sIGRpc3RyaWN0IEhBVklORyBjID49IDEwMCBPUkRFUiBCWSBwcmljZSBERVNDIExJTUlUIDEwMA==) 上找到此数据集。

View File

@ -35,6 +35,9 @@ Yandex**没有**维护下面列出的库,也没有做过任何广泛的测试
- NodeJs
- [clickhouse (NodeJs)](https://github.com/TimonKK/clickhouse)
- [node-clickhouse](https://github.com/apla/node-clickhouse)
- [nestjs-clickhouse](https://github.com/depyronick/nestjs-clickhouse)
- [clickhouse-client](https://github.com/depyronick/clickhouse-client)
- [node-clickhouse-orm](https://github.com/zimv/node-clickhouse-orm)
- Perl
- [perl-DBD-ClickHouse](https://github.com/elcamlost/perl-DBD-ClickHouse)
- [HTTP-ClickHouse](https://metacpan.org/release/HTTP-ClickHouse)

View File

@ -58,7 +58,7 @@ void DisksApp::addOptions(
("disk", po::value<String>(), "Set disk name")
("command_name", po::value<String>(), "Name for command to do")
("send-logs", "Send logs")
("log-level", "Logging level")
("log-level", po::value<String>(), "Logging level")
;
positional_options_description.add("command_name", 1);

View File

@ -13,7 +13,6 @@
#include <Interpreters/DatabaseCatalog.h>
#include <base/getFQDNOrHostName.h>
#include <Common/scope_guard_safe.h>
#include <Interpreters/UserDefinedSQLObjectsLoader.h>
#include <Interpreters/Session.h>
#include <Access/AccessControl.h>
#include <Common/Exception.h>
@ -32,6 +31,7 @@
#include <Parsers/IAST.h>
#include <Parsers/ASTInsertQuery.h>
#include <Common/ErrorHandlers.h>
#include <Functions/UserDefined/IUserDefinedSQLObjectsLoader.h>
#include <Functions/registerFunctions.h>
#include <AggregateFunctions/registerAggregateFunctions.h>
#include <TableFunctions/registerTableFunctions.h>
@ -602,8 +602,6 @@ void LocalServer::processConfig()
global_context->setCurrentDatabase(default_database);
applyCmdOptions(global_context);
bool enable_objects_loader = false;
if (config().has("path"))
{
String path = global_context->getPath();
@ -611,12 +609,6 @@ void LocalServer::processConfig()
/// Lock path directory before read
status.emplace(fs::path(path) / "status", StatusFile::write_full_info);
LOG_DEBUG(log, "Loading user defined objects from {}", path);
Poco::File(path + "user_defined/").createDirectories();
UserDefinedSQLObjectsLoader::instance().loadObjects(global_context);
enable_objects_loader = true;
LOG_DEBUG(log, "Loaded user defined objects.");
LOG_DEBUG(log, "Loading metadata from {}", path);
loadMetadataSystem(global_context);
attachSystemTablesLocal(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::SYSTEM_DATABASE));
@ -630,6 +622,9 @@ void LocalServer::processConfig()
DatabaseCatalog::instance().loadDatabases();
}
/// For ClickHouse local if path is not set the loader will be disabled.
global_context->getUserDefinedSQLObjectsLoader().loadObjects();
LOG_DEBUG(log, "Loaded metadata.");
}
else if (!config().has("no-system-tables"))
@ -639,9 +634,6 @@ void LocalServer::processConfig()
attachInformationSchema(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::INFORMATION_SCHEMA_UPPERCASE));
}
/// Persist SQL user defined objects only if user_defined folder was created
UserDefinedSQLObjectsLoader::instance().enable(enable_objects_loader);
server_display_name = config().getString("display_name", getFQDNOrHostName());
prompt_by_server_display_name = config().getRawString("prompt_by_server_display_name.default", "{display_name} :) ");
std::map<String, String> prompt_substitutions{{"display_name", server_display_name}};

View File

@ -53,7 +53,6 @@
#include <Interpreters/ExternalDictionariesLoader.h>
#include <Interpreters/ProcessList.h>
#include <Interpreters/loadMetadata.h>
#include <Interpreters/UserDefinedSQLObjectsLoader.h>
#include <Interpreters/JIT/CompiledExpressionCache.h>
#include <Access/AccessControl.h>
#include <Storages/StorageReplicatedMergeTree.h>
@ -62,6 +61,7 @@
#include <Storages/Cache/ExternalDataSourceCache.h>
#include <Storages/Cache/registerRemoteFileMetadatas.h>
#include <AggregateFunctions/registerAggregateFunctions.h>
#include <Functions/UserDefined/IUserDefinedSQLObjectsLoader.h>
#include <Functions/registerFunctions.h>
#include <TableFunctions/registerTableFunctions.h>
#include <Formats/registerFormats.h>
@ -1010,12 +1010,6 @@ int Server::main(const std::vector<std::string> & /*args*/)
fs::create_directories(user_scripts_path);
}
{
std::string user_defined_path = config().getString("user_defined_path", path / "user_defined/");
global_context->setUserDefinedPath(user_defined_path);
fs::create_directories(user_defined_path);
}
/// top_level_domains_lists
{
const std::string & top_level_domains_path = config().getString("top_level_domains_path", path / "top_level_domains/");
@ -1559,18 +1553,6 @@ int Server::main(const std::vector<std::string> & /*args*/)
/// system logs may copy global context.
global_context->setCurrentDatabaseNameInGlobalContext(default_database);
LOG_INFO(log, "Loading user defined objects from {}", path_str);
try
{
UserDefinedSQLObjectsLoader::instance().loadObjects(global_context);
}
catch (...)
{
tryLogCurrentException(log, "Caught exception while loading user defined objects");
throw;
}
LOG_DEBUG(log, "Loaded user defined objects");
LOG_INFO(log, "Loading metadata from {}", path_str);
try
@ -1598,6 +1580,8 @@ int Server::main(const std::vector<std::string> & /*args*/)
database_catalog.loadDatabases();
/// After loading validate that default database exists
database_catalog.assertDatabaseExists(default_database);
/// Load user-defined SQL functions.
global_context->getUserDefinedSQLObjectsLoader().loadObjects();
}
catch (...)
{

View File

@ -284,6 +284,7 @@ add_object_library(clickhouse_processors_ttl Processors/TTL)
add_object_library(clickhouse_processors_merges_algorithms Processors/Merges/Algorithms)
add_object_library(clickhouse_processors_queryplan Processors/QueryPlan)
add_object_library(clickhouse_processors_queryplan_optimizations Processors/QueryPlan/Optimizations)
add_object_library(clickhouse_user_defined_functions Functions/UserDefined)
if (TARGET ch_contrib::nuraft)
add_object_library(clickhouse_coordination Coordination)

View File

@ -236,6 +236,21 @@ ASTPtr QueryFuzzer::getRandomColumnLike()
return new_ast;
}
ASTPtr QueryFuzzer::getRandomExpressionList()
{
if (column_like.empty())
{
return nullptr;
}
ASTPtr new_ast = std::make_shared<ASTExpressionList>();
for (size_t i = 0; i < fuzz_rand() % 5 + 1; ++i)
{
new_ast->children.push_back(getRandomColumnLike());
}
return new_ast;
}
void QueryFuzzer::replaceWithColumnLike(ASTPtr & ast)
{
if (column_like.empty())
@ -841,7 +856,52 @@ void QueryFuzzer::fuzz(ASTPtr & ast)
else if (auto * select = typeid_cast<ASTSelectQuery *>(ast.get()))
{
fuzzColumnLikeExpressionList(select->select().get());
fuzzColumnLikeExpressionList(select->groupBy().get());
if (select->groupBy().get())
{
if (fuzz_rand() % 50 == 0)
{
select->groupBy()->children.clear();
select->setExpression(ASTSelectQuery::Expression::GROUP_BY, {});
select->group_by_with_grouping_sets = false;
select->group_by_with_rollup = false;
select->group_by_with_cube = false;
select->group_by_with_totals = true;
}
else if (fuzz_rand() % 100 == 0)
{
select->group_by_with_grouping_sets = !select->group_by_with_grouping_sets;
}
else if (fuzz_rand() % 100 == 0)
{
select->group_by_with_rollup = !select->group_by_with_rollup;
}
else if (fuzz_rand() % 100 == 0)
{
select->group_by_with_cube = !select->group_by_with_cube;
}
else if (fuzz_rand() % 100 == 0)
{
select->group_by_with_totals = !select->group_by_with_totals;
}
}
else if (fuzz_rand() % 50 == 0)
{
select->setExpression(ASTSelectQuery::Expression::GROUP_BY, getRandomExpressionList());
}
if (select->where().get())
{
if (fuzz_rand() % 50 == 0)
{
select->where()->children.clear();
select->setExpression(ASTSelectQuery::Expression::WHERE, {});
}
}
else if (fuzz_rand() % 50 == 0)
{
select->setExpression(ASTSelectQuery::Expression::WHERE, getRandomColumnLike());
}
fuzzOrderByList(select->orderBy().get());
fuzz(select->children);

View File

@ -8,6 +8,7 @@
#include <pcg-random/pcg_random.hpp>
#include <Common/randomSeed.h>
#include "Parsers/IAST_fwd.h"
#include <Core/Field.h>
#include <Parsers/IAST.h>
@ -72,6 +73,7 @@ struct QueryFuzzer
Field getRandomField(int type);
Field fuzzField(Field field);
ASTPtr getRandomColumnLike();
ASTPtr getRandomExpressionList();
DataTypePtr fuzzDataType(DataTypePtr type);
DataTypePtr getRandomType();
ASTs getInsertQueriesForFuzzedTables(const String & full_query);

View File

@ -99,7 +99,7 @@ void ZooKeeper::init(ZooKeeperArgs args_)
if (dns_error)
throw KeeperException("Cannot resolve any of provided ZooKeeper hosts due to DNS error", Coordination::Error::ZCONNECTIONLOSS);
else
throw KeeperException("Cannot use any of provided ZooKeeper nodes", Coordination::Error::ZBADARGUMENTS);
throw KeeperException("Cannot use any of provided ZooKeeper nodes", Coordination::Error::ZCONNECTIONLOSS);
}
impl = std::make_unique<Coordination::ZooKeeper>(nodes, args, zk_log);

View File

@ -306,9 +306,21 @@ void reverseTransposeBytes(const UInt64 * matrix, UInt32 col, T & value)
template <typename T>
void load(const char * src, T * buf, UInt32 tail = 64)
{
if constexpr (std::endian::native == std::endian::little)
{
memcpy(buf, src, tail * sizeof(T));
}
else
{
/// Since the algorithm uses little-endian integers, data is loaded
/// as little-endian types on big-endian machine (s390x, etc).
for (UInt32 i = 0; i < tail; ++i)
{
buf[i] = unalignedLoadLE<T>(src + i * sizeof(T));
}
}
}
template <typename T>
void store(const T * buf, char * dst, UInt32 tail = 64)

View File

@ -776,6 +776,8 @@ static constexpr UInt64 operator""_GiB(unsigned long long value)
M(Bool, output_format_json_array_of_rows, false, "Output a JSON array of all rows in JSONEachRow(Compact) format.", 0) \
M(Bool, output_format_json_validate_utf8, false, "Validate UTF-8 sequences in JSON output formats, doesn't impact formats JSON/JSONCompact/JSONColumnsWithMetadata, they always validate utf8", 0) \
\
M(String, format_json_object_each_row_column_for_object_name, "", "The name of column that will be used as object names in JSONObjectEachRow format. Column type should be String", 0) \
\
M(UInt64, output_format_pretty_max_rows, 10000, "Rows limit for Pretty formats.", 0) \
M(UInt64, output_format_pretty_max_column_pad_width, 250, "Maximum width to pad all values in a column in Pretty formats.", 0) \
M(UInt64, output_format_pretty_max_value_width, 10000, "Maximum width of value to display in Pretty formats. If greater - it will be cut.", 0) \

View File

@ -1,6 +1,7 @@
#include <Databases/DDLDependencyVisitor.h>
#include <Dictionaries/getDictionaryConfigurationFromAST.h>
#include <Interpreters/Context.h>
#include <Interpreters/evaluateConstantExpression.h>
#include <Parsers/ASTCreateQuery.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTIdentifier.h>
@ -11,6 +12,8 @@
namespace DB
{
using TableLoadingDependenciesVisitor = DDLDependencyVisitor::Visitor;
TableNamesSet getDependenciesSetFromCreateQuery(ContextPtr global_context, const QualifiedTableName & table, const ASTPtr & ast)
{
assert(global_context == global_context->getGlobalContext());
@ -35,7 +38,7 @@ void DDLDependencyVisitor::visit(const ASTPtr & ast, Data & data)
visit(*storage, data);
}
bool DDLDependencyVisitor::needChildVisit(const ASTPtr & node, const ASTPtr & child)
bool DDLMatcherBase::needChildVisit(const ASTPtr & node, const ASTPtr & child)
{
if (node->as<ASTStorage>())
return false;
@ -49,20 +52,26 @@ bool DDLDependencyVisitor::needChildVisit(const ASTPtr & node, const ASTPtr & ch
return true;
}
void DDLDependencyVisitor::visit(const ASTFunction & function, Data & data)
ssize_t DDLMatcherBase::getPositionOfTableNameArgument(const ASTFunction & function)
{
if (function.name == "joinGet" ||
function.name == "dictHas" ||
function.name == "dictIsIn" ||
function.name.starts_with("dictGet"))
{
extractTableNameFromArgument(function, data, 0);
}
else if (Poco::toLower(function.name) == "in")
{
extractTableNameFromArgument(function, data, 1);
return 0;
if (Poco::toLower(function.name) == "in")
return 1;
return -1;
}
void DDLDependencyVisitor::visit(const ASTFunction & function, Data & data)
{
ssize_t table_name_arg_idx = getPositionOfTableNameArgument(function);
if (table_name_arg_idx < 0)
return;
extractTableNameFromArgument(function, data, table_name_arg_idx);
}
void DDLDependencyVisitor::visit(const ASTFunctionWithKeyValueArguments & dict_source, Data & data)
@ -140,4 +149,50 @@ void DDLDependencyVisitor::extractTableNameFromArgument(const ASTFunction & func
data.dependencies.emplace(std::move(qualified_name));
}
void NormalizeAndEvaluateConstants::visit(const ASTPtr & ast, Data & data)
{
assert(data.create_query_context->hasQueryContext());
/// Looking for functions in column default expressions and dictionary source definition
if (const auto * function = ast->as<ASTFunction>())
visit(*function, data);
else if (const auto * dict_source = ast->as<ASTFunctionWithKeyValueArguments>())
visit(*dict_source, data);
}
void NormalizeAndEvaluateConstants::visit(const ASTFunction & function, Data & data)
{
/// Replace expressions like "dictGet(currentDatabase() || '.dict', 'value', toUInt32(1))"
/// with "dictGet('db_name.dict', 'value', toUInt32(1))"
ssize_t table_name_arg_idx = getPositionOfTableNameArgument(function);
if (table_name_arg_idx < 0)
return;
if (!function.arguments || function.arguments->children.size() <= static_cast<size_t>(table_name_arg_idx))
return;
auto & arg = function.arguments->as<ASTExpressionList &>().children[table_name_arg_idx];
if (arg->as<ASTFunction>())
arg = evaluateConstantExpressionAsLiteral(arg, data.create_query_context);
}
void NormalizeAndEvaluateConstants::visit(const ASTFunctionWithKeyValueArguments & dict_source, Data & data)
{
if (!dict_source.elements)
return;
auto & expr_list = dict_source.elements->as<ASTExpressionList &>();
for (auto & child : expr_list.children)
{
ASTPair * pair = child->as<ASTPair>();
if (pair->second->as<ASTFunction>())
{
auto ast_literal = evaluateConstantExpressionAsLiteral(pair->children[0], data.create_query_context);
pair->replace(pair->second, ast_literal);
}
}
}
}

View File

@ -14,11 +14,19 @@ using TableNamesSet = std::unordered_set<QualifiedTableName>;
TableNamesSet getDependenciesSetFromCreateQuery(ContextPtr global_context, const QualifiedTableName & table, const ASTPtr & ast);
class DDLMatcherBase
{
public:
static bool needChildVisit(const ASTPtr & node, const ASTPtr & child);
static ssize_t getPositionOfTableNameArgument(const ASTFunction & function);
};
/// Visits ASTCreateQuery and extracts names of table (or dictionary) dependencies
/// from column default expressions (joinGet, dictGet, etc)
/// or dictionary source (for dictionaries from local ClickHouse table).
/// Does not validate AST, works a best-effort way.
class DDLDependencyVisitor
class DDLDependencyVisitor : public DDLMatcherBase
{
public:
struct Data
@ -32,7 +40,6 @@ public:
using Visitor = ConstInDepthNodeVisitor<DDLDependencyVisitor, true>;
static void visit(const ASTPtr & ast, Data & data);
static bool needChildVisit(const ASTPtr & node, const ASTPtr & child);
private:
static void visit(const ASTFunction & function, Data & data);
@ -42,6 +49,24 @@ private:
static void extractTableNameFromArgument(const ASTFunction & function, Data & data, size_t arg_idx);
};
using TableLoadingDependenciesVisitor = DDLDependencyVisitor::Visitor;
class NormalizeAndEvaluateConstants : public DDLMatcherBase
{
public:
struct Data
{
ContextPtr create_query_context;
};
using Visitor = ConstInDepthNodeVisitor<NormalizeAndEvaluateConstants, true>;
static void visit(const ASTPtr & ast, Data & data);
private:
static void visit(const ASTFunction & function, Data & data);
static void visit(const ASTFunctionWithKeyValueArguments & dict_source, Data & data);
};
using NormalizeAndEvaluateConstantsVisitor = NormalizeAndEvaluateConstants::Visitor;
}

View File

@ -452,6 +452,11 @@ void buildConfigurationFromFunctionWithKeyValueArguments(
}
else if (const auto * func = pair->second->as<ASTFunction>())
{
/// This branch exists only for compatibility.
/// It's not possible to have a function in a dictionary definition since 22.10,
/// because query must be normalized on dictionary creation. It's possible only when we load old metadata.
/// For debug builds allow it only during server startup to avoid crash in BC check in Stress Tests.
assert(!Context::getGlobalContextInstance()->isServerCompletelyStarted());
auto builder = FunctionFactory::instance().tryGet(func->name, context);
auto function = builder->build({});
function->prepare({});

View File

@ -125,7 +125,7 @@ std::unique_ptr<Aws::S3::S3Client> getClient(const Poco::Util::AbstractConfigura
throw Exception("S3 path must ends with '/', but '" + uri.key + "' doesn't.", ErrorCodes::BAD_ARGUMENTS);
client_configuration.connectTimeoutMs = config.getUInt(config_prefix + ".connect_timeout_ms", 10000);
client_configuration.requestTimeoutMs = config.getUInt(config_prefix + ".request_timeout_ms", 5000);
client_configuration.requestTimeoutMs = config.getUInt(config_prefix + ".request_timeout_ms", 30000);
client_configuration.maxConnections = config.getUInt(config_prefix + ".max_connections", 100);
client_configuration.endpointOverride = uri.endpoint;

View File

@ -100,6 +100,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
format_settings.json.try_infer_numbers_from_strings = settings.input_format_json_try_infer_numbers_from_strings;
format_settings.json.validate_types_from_metadata = settings.input_format_json_validate_types_from_metadata;
format_settings.json.validate_utf8 = settings.output_format_json_validate_utf8;
format_settings.json_object_each_row.column_for_object_name = settings.format_json_object_each_row_column_for_object_name;
format_settings.json.try_infer_objects = context->getSettingsRef().allow_experimental_object_type;
format_settings.null_as_default = settings.input_format_null_as_default;
format_settings.decimal_trailing_zeros = settings.output_format_decimal_trailing_zeros;

View File

@ -156,6 +156,11 @@ struct FormatSettings
bool try_infer_objects = false;
} json;
struct
{
String column_for_object_name;
} json_object_each_row;
struct
{
UInt64 row_group_size = 1000000;

View File

@ -659,8 +659,8 @@ class FunctionBinaryArithmetic : public IFunction
static FunctionOverloadResolverPtr
getFunctionForIntervalArithmetic(const DataTypePtr & type0, const DataTypePtr & type1, ContextPtr context)
{
bool first_is_date_or_datetime = isDate(type0) || isDateTime(type0) || isDateTime64(type0);
bool second_is_date_or_datetime = isDate(type1) || isDateTime(type1) || isDateTime64(type1);
bool first_is_date_or_datetime = isDateOrDate32(type0) || isDateTime(type0) || isDateTime64(type0);
bool second_is_date_or_datetime = isDateOrDate32(type1) || isDateTime(type1) || isDateTime64(type1);
/// Exactly one argument must be Date or DateTime
if (first_is_date_or_datetime == second_is_date_or_datetime)
@ -699,7 +699,7 @@ class FunctionBinaryArithmetic : public IFunction
}
else
{
if (isDate(type_time))
if (isDateOrDate32(type_time))
function_name = is_plus ? "addDays" : "subtractDays";
else
function_name = is_plus ? "addSeconds" : "subtractSeconds";
@ -895,7 +895,7 @@ class FunctionBinaryArithmetic : public IFunction
ColumnsWithTypeAndName new_arguments = arguments;
/// Interval argument must be second.
if (isDate(arguments[1].type) || isDateTime(arguments[1].type) || isDateTime64(arguments[1].type))
if (isDateOrDate32(arguments[1].type) || isDateTime(arguments[1].type) || isDateTime64(arguments[1].type))
std::swap(new_arguments[0], new_arguments[1]);
/// Change interval argument type to its representation
@ -1099,7 +1099,7 @@ public:
new_arguments[i].type = arguments[i];
/// Interval argument must be second.
if (isDate(new_arguments[1].type) || isDateTime(new_arguments[1].type) || isDateTime64(new_arguments[1].type))
if (isDateOrDate32(new_arguments[1].type) || isDateTime(new_arguments[1].type) || isDateTime64(new_arguments[1].type))
std::swap(new_arguments[0], new_arguments[1]);
/// Change interval argument to its representation

View File

@ -5,8 +5,8 @@
#include <DataTypes/DataTypeFactory.h>
#include <Interpreters/UserDefinedExecutableFunction.h>
#include <Interpreters/UserDefinedExecutableFunctionFactory.h>
#include <Functions/UserDefined/UserDefinedExecutableFunction.h>
#include <Functions/UserDefined/UserDefinedExecutableFunctionFactory.h>
#include <Functions/FunctionFactory.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>

View File

@ -4,7 +4,7 @@
#include <Interpreters/Context_fwd.h>
#include <Interpreters/ExternalLoader.h>
#include <Interpreters/UserDefinedExecutableFunction.h>
#include <Functions/UserDefined/UserDefinedExecutableFunction.h>
namespace DB
{

View File

@ -0,0 +1,47 @@
#pragma once
#include <base/types.h>
namespace DB
{
class IAST;
struct Settings;
enum class UserDefinedSQLObjectType;
/// Interface for a loader of user-defined SQL objects.
/// Implementations: UserDefinedSQLLoaderFromDisk, UserDefinedSQLLoaderFromZooKeeper
class IUserDefinedSQLObjectsLoader
{
public:
virtual ~IUserDefinedSQLObjectsLoader() = default;
/// Whether this loader can replicate SQL objects to another node.
virtual bool isReplicated() const { return false; }
virtual String getReplicationID() const { return ""; }
/// Loads all objects. Can be called once - if objects are already loaded the function does nothing.
virtual void loadObjects() = 0;
/// Stops watching.
virtual void stopWatching() {}
/// Immediately reloads all objects, throws an exception if failed.
virtual void reloadObjects() = 0;
/// Immediately reloads a specified object only.
virtual void reloadObject(UserDefinedSQLObjectType object_type, const String & object_name) = 0;
/// Stores an object (must be called only by UserDefinedSQLFunctionFactory::registerFunction).
virtual bool storeObject(
UserDefinedSQLObjectType object_type,
const String & object_name,
const IAST & create_object_query,
bool throw_if_exists,
bool replace_if_exists,
const Settings & settings) = 0;
/// Removes an object (must be called only by UserDefinedSQLFunctionFactory::unregisterFunction).
virtual bool removeObject(UserDefinedSQLObjectType object_type, const String & object_name, bool throw_if_not_exists) = 0;
};
}

View File

@ -12,9 +12,9 @@
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionHelpers.h>
#include <Functions/UserDefined/ExternalUserDefinedExecutableFunctionsLoader.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <Interpreters/convertFieldToType.h>
#include <Interpreters/ExternalUserDefinedExecutableFunctionsLoader.h>
#include <Interpreters/Context.h>
#include <Interpreters/castColumn.h>

View File

@ -0,0 +1,301 @@
#include <Functions/UserDefined/UserDefinedSQLFunctionFactory.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <Backups/RestorerFromBackup.h>
#include <Functions/FunctionFactory.h>
#include <Functions/UserDefined/IUserDefinedSQLObjectsLoader.h>
#include <Functions/UserDefined/UserDefinedExecutableFunctionFactory.h>
#include <Functions/UserDefined/UserDefinedSQLObjectType.h>
#include <Functions/UserDefined/UserDefinedSQLObjectsBackup.h>
#include <Interpreters/Context.h>
#include <Interpreters/FunctionNameNormalizer.h>
#include <Parsers/ASTCreateFunctionQuery.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTIdentifier.h>
#include <Common/quoteString.h>
namespace DB
{
namespace ErrorCodes
{
extern const int FUNCTION_ALREADY_EXISTS;
extern const int UNKNOWN_FUNCTION;
extern const int CANNOT_DROP_FUNCTION;
extern const int CANNOT_CREATE_RECURSIVE_FUNCTION;
extern const int UNSUPPORTED_METHOD;
}
namespace
{
void validateFunctionRecursiveness(const IAST & node, const String & function_to_create)
{
for (const auto & child : node.children)
{
auto function_name_opt = tryGetFunctionName(child);
if (function_name_opt && function_name_opt.value() == function_to_create)
throw Exception(ErrorCodes::CANNOT_CREATE_RECURSIVE_FUNCTION, "You cannot create recursive function");
validateFunctionRecursiveness(*child, function_to_create);
}
}
void validateFunction(ASTPtr function, const String & name)
{
ASTFunction * lambda_function = function->as<ASTFunction>();
if (!lambda_function)
throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Expected function, got: {}", function->formatForErrorMessage());
auto & lambda_function_expression_list = lambda_function->arguments->children;
if (lambda_function_expression_list.size() != 2)
throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Lambda must have arguments and body");
const ASTFunction * tuple_function_arguments = lambda_function_expression_list[0]->as<ASTFunction>();
if (!tuple_function_arguments || !tuple_function_arguments->arguments)
throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Lambda must have valid arguments");
std::unordered_set<String> arguments;
for (const auto & argument : tuple_function_arguments->arguments->children)
{
const auto * argument_identifier = argument->as<ASTIdentifier>();
if (!argument_identifier)
throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Lambda argument must be identifier");
const auto & argument_name = argument_identifier->name();
auto [_, inserted] = arguments.insert(argument_name);
if (!inserted)
throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Identifier {} already used as function parameter", argument_name);
}
ASTPtr function_body = lambda_function_expression_list[1];
if (!function_body)
throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Lambda must have valid function body");
validateFunctionRecursiveness(*function_body, name);
}
ASTPtr normalizeCreateFunctionQuery(const IAST & create_function_query)
{
auto ptr = create_function_query.clone();
auto & res = typeid_cast<ASTCreateFunctionQuery &>(*ptr);
res.if_not_exists = false;
res.or_replace = false;
FunctionNameNormalizer().visit(res.function_core.get());
return ptr;
}
}
UserDefinedSQLFunctionFactory & UserDefinedSQLFunctionFactory::instance()
{
static UserDefinedSQLFunctionFactory result;
return result;
}
void UserDefinedSQLFunctionFactory::checkCanBeRegistered(const ContextPtr & context, const String & function_name, const IAST & create_function_query)
{
if (FunctionFactory::instance().hasNameOrAlias(function_name))
throw Exception(ErrorCodes::FUNCTION_ALREADY_EXISTS, "The function '{}' already exists", function_name);
if (AggregateFunctionFactory::instance().hasNameOrAlias(function_name))
throw Exception(ErrorCodes::FUNCTION_ALREADY_EXISTS, "The aggregate function '{}' already exists", function_name);
if (UserDefinedExecutableFunctionFactory::instance().has(function_name, context))
throw Exception(ErrorCodes::FUNCTION_ALREADY_EXISTS, "User defined executable function '{}' already exists", function_name);
validateFunction(assert_cast<const ASTCreateFunctionQuery &>(create_function_query).function_core, function_name);
}
void UserDefinedSQLFunctionFactory::checkCanBeUnregistered(const ContextPtr & context, const String & function_name)
{
if (FunctionFactory::instance().hasNameOrAlias(function_name) ||
AggregateFunctionFactory::instance().hasNameOrAlias(function_name))
throw Exception(ErrorCodes::CANNOT_DROP_FUNCTION, "Cannot drop system function '{}'", function_name);
if (UserDefinedExecutableFunctionFactory::instance().has(function_name, context))
throw Exception(ErrorCodes::CANNOT_DROP_FUNCTION, "Cannot drop user defined executable function '{}'", function_name);
}
bool UserDefinedSQLFunctionFactory::registerFunction(const ContextMutablePtr & context, const String & function_name, ASTPtr create_function_query, bool throw_if_exists, bool replace_if_exists)
{
checkCanBeRegistered(context, function_name, *create_function_query);
create_function_query = normalizeCreateFunctionQuery(*create_function_query);
std::lock_guard lock{mutex};
auto it = function_name_to_create_query_map.find(function_name);
if (it != function_name_to_create_query_map.end())
{
if (throw_if_exists)
throw Exception(ErrorCodes::FUNCTION_ALREADY_EXISTS, "User-defined function '{}' already exists", function_name);
else if (!replace_if_exists)
return false;
}
try
{
auto & loader = context->getUserDefinedSQLObjectsLoader();
bool stored = loader.storeObject(UserDefinedSQLObjectType::Function, function_name, *create_function_query, throw_if_exists, replace_if_exists, context->getSettingsRef());
if (!stored)
return false;
}
catch (Exception & exception)
{
exception.addMessage(fmt::format("while storing user defined function {}", backQuote(function_name)));
throw;
}
function_name_to_create_query_map[function_name] = create_function_query;
return true;
}
bool UserDefinedSQLFunctionFactory::unregisterFunction(const ContextMutablePtr & context, const String & function_name, bool throw_if_not_exists)
{
checkCanBeUnregistered(context, function_name);
std::lock_guard lock(mutex);
auto it = function_name_to_create_query_map.find(function_name);
if (it == function_name_to_create_query_map.end())
{
if (throw_if_not_exists)
throw Exception(ErrorCodes::UNKNOWN_FUNCTION, "User-defined function '{}' doesn't exist", function_name);
else
return false;
}
try
{
auto & loader = context->getUserDefinedSQLObjectsLoader();
bool removed = loader.removeObject(UserDefinedSQLObjectType::Function, function_name, throw_if_not_exists);
if (!removed)
return false;
}
catch (Exception & exception)
{
exception.addMessage(fmt::format("while removing user defined function {}", backQuote(function_name)));
throw;
}
function_name_to_create_query_map.erase(function_name);
return true;
}
ASTPtr UserDefinedSQLFunctionFactory::get(const String & function_name) const
{
std::lock_guard lock(mutex);
auto it = function_name_to_create_query_map.find(function_name);
if (it == function_name_to_create_query_map.end())
throw Exception(ErrorCodes::UNKNOWN_FUNCTION,
"The function name '{}' is not registered",
function_name);
return it->second;
}
ASTPtr UserDefinedSQLFunctionFactory::tryGet(const std::string & function_name) const
{
std::lock_guard lock(mutex);
auto it = function_name_to_create_query_map.find(function_name);
if (it == function_name_to_create_query_map.end())
return nullptr;
return it->second;
}
bool UserDefinedSQLFunctionFactory::has(const String & function_name) const
{
return tryGet(function_name) != nullptr;
}
std::vector<std::string> UserDefinedSQLFunctionFactory::getAllRegisteredNames() const
{
std::vector<std::string> registered_names;
std::lock_guard lock(mutex);
registered_names.reserve(function_name_to_create_query_map.size());
for (const auto & [name, _] : function_name_to_create_query_map)
registered_names.emplace_back(name);
return registered_names;
}
bool UserDefinedSQLFunctionFactory::empty() const
{
std::lock_guard lock(mutex);
return function_name_to_create_query_map.empty();
}
void UserDefinedSQLFunctionFactory::backup(BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup) const
{
backupUserDefinedSQLObjects(backup_entries_collector, data_path_in_backup, UserDefinedSQLObjectType::Function, getAllFunctions());
}
void UserDefinedSQLFunctionFactory::restore(RestorerFromBackup & restorer, const String & data_path_in_backup)
{
auto restored_functions = restoreUserDefinedSQLObjects(restorer, data_path_in_backup, UserDefinedSQLObjectType::Function);
const auto & restore_settings = restorer.getRestoreSettings();
bool throw_if_exists = (restore_settings.create_function == RestoreUDFCreationMode::kCreate);
bool replace_if_exists = (restore_settings.create_function == RestoreUDFCreationMode::kReplace);
auto context = restorer.getContext();
for (const auto & [function_name, create_function_query] : restored_functions)
registerFunction(context, function_name, create_function_query, throw_if_exists, replace_if_exists);
}
void UserDefinedSQLFunctionFactory::setAllFunctions(const std::vector<std::pair<String, ASTPtr>> & new_functions)
{
std::unordered_map<String, ASTPtr> normalized_functions;
for (const auto & [function_name, create_query] : new_functions)
normalized_functions[function_name] = normalizeCreateFunctionQuery(*create_query);
std::lock_guard lock(mutex);
function_name_to_create_query_map = std::move(normalized_functions);
}
std::vector<std::pair<String, ASTPtr>> UserDefinedSQLFunctionFactory::getAllFunctions() const
{
std::lock_guard lock{mutex};
std::vector<std::pair<String, ASTPtr>> all_functions;
all_functions.reserve(function_name_to_create_query_map.size());
std::copy(function_name_to_create_query_map.begin(), function_name_to_create_query_map.end(), std::back_inserter(all_functions));
return all_functions;
}
void UserDefinedSQLFunctionFactory::setFunction(const String & function_name, const IAST & create_function_query)
{
std::lock_guard lock(mutex);
function_name_to_create_query_map[function_name] = normalizeCreateFunctionQuery(create_function_query);
}
void UserDefinedSQLFunctionFactory::removeFunction(const String & function_name)
{
std::lock_guard lock(mutex);
function_name_to_create_query_map.erase(function_name);
}
void UserDefinedSQLFunctionFactory::removeAllFunctionsExcept(const Strings & function_names_to_keep)
{
boost::container::flat_set<std::string_view> names_set_to_keep{function_names_to_keep.begin(), function_names_to_keep.end()};
std::lock_guard lock(mutex);
for (auto it = function_name_to_create_query_map.begin(); it != function_name_to_create_query_map.end();)
{
auto current = it++;
if (!names_set_to_keep.contains(current->first))
function_name_to_create_query_map.erase(current);
}
}
std::unique_lock<std::recursive_mutex> UserDefinedSQLFunctionFactory::getLock() const
{
return std::unique_lock{mutex};
}
}

View File

@ -0,0 +1,70 @@
#pragma once
#include <unordered_map>
#include <mutex>
#include <Common/NamePrompter.h>
#include <Parsers/ASTCreateFunctionQuery.h>
#include <Interpreters/Context_fwd.h>
namespace DB
{
class BackupEntriesCollector;
class RestorerFromBackup;
/// Factory for SQLUserDefinedFunctions
class UserDefinedSQLFunctionFactory : public IHints<1, UserDefinedSQLFunctionFactory>
{
public:
static UserDefinedSQLFunctionFactory & instance();
/// Register function for function_name in factory for specified create_function_query.
bool registerFunction(const ContextMutablePtr & context, const String & function_name, ASTPtr create_function_query, bool throw_if_exists, bool replace_if_exists);
/// Unregister function for function_name.
bool unregisterFunction(const ContextMutablePtr & context, const String & function_name, bool throw_if_not_exists);
/// Get function create query for function_name. If no function registered with function_name throws exception.
ASTPtr get(const String & function_name) const;
/// Get function create query for function_name. If no function registered with function_name return nullptr.
ASTPtr tryGet(const String & function_name) const;
/// Check if function with function_name registered.
bool has(const String & function_name) const;
/// Get all user defined functions registered names.
std::vector<String> getAllRegisteredNames() const override;
/// Check whether any UDFs have been registered
bool empty() const;
/// Makes backup entries for all user-defined SQL functions.
void backup(BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup) const;
/// Restores user-defined SQL functions from the backup.
void restore(RestorerFromBackup & restorer, const String & data_path_in_backup);
private:
friend class UserDefinedSQLObjectsLoaderFromDisk;
friend class UserDefinedSQLObjectsLoaderFromZooKeeper;
/// Checks that a specified function can be registered, throws an exception if not.
static void checkCanBeRegistered(const ContextPtr & context, const String & function_name, const IAST & create_function_query);
static void checkCanBeUnregistered(const ContextPtr & context, const String & function_name);
/// The following functions must be called only by the loader.
void setAllFunctions(const std::vector<std::pair<String, ASTPtr>> & new_functions);
std::vector<std::pair<String, ASTPtr>> getAllFunctions() const;
void setFunction(const String & function_name, const IAST & create_function_query);
void removeFunction(const String & function_name);
void removeAllFunctionsExcept(const Strings & function_names_to_keep);
std::unique_lock<std::recursive_mutex> getLock() const;
std::unordered_map<String, ASTPtr> function_name_to_create_query_map;
mutable std::recursive_mutex mutex;
};
}

View File

@ -8,7 +8,7 @@
#include <Parsers/ASTCreateFunctionQuery.h>
#include <Parsers/ASTExpressionList.h>
#include <Parsers/ASTIdentifier.h>
#include <Interpreters/UserDefinedSQLFunctionFactory.h>
#include <Functions/UserDefined/UserDefinedSQLFunctionFactory.h>
namespace DB

View File

@ -0,0 +1,12 @@
#pragma once
namespace DB
{
enum class UserDefinedSQLObjectType
{
Function
};
}

View File

@ -0,0 +1,103 @@
#include <Functions/UserDefined/UserDefinedSQLObjectsBackup.h>
#include <Backups/BackupEntriesCollector.h>
#include <Backups/BackupEntryFromMemory.h>
#include <Backups/IBackup.h>
#include <Backups/IBackupCoordination.h>
#include <Backups/IRestoreCoordination.h>
#include <Backups/RestorerFromBackup.h>
#include <Functions/UserDefined/IUserDefinedSQLObjectsLoader.h>
#include <Functions/UserDefined/UserDefinedSQLObjectType.h>
#include <Interpreters/Context.h>
#include <Parsers/ParserCreateFunctionQuery.h>
#include <Parsers/parseQuery.h>
#include <Parsers/queryToString.h>
#include <Common/escapeForFileName.h>
namespace DB
{
namespace ErrorCodes
{
extern const int CANNOT_RESTORE_TABLE;
}
void backupUserDefinedSQLObjects(
BackupEntriesCollector & backup_entries_collector,
const String & data_path_in_backup,
UserDefinedSQLObjectType /* object_type */,
const std::vector<std::pair<String, ASTPtr>> & objects)
{
std::vector<std::pair<String, BackupEntryPtr>> backup_entries;
backup_entries.reserve(objects.size());
for (const auto & [function_name, create_function_query] : objects)
backup_entries.emplace_back(
escapeForFileName(function_name) + ".sql", std::make_shared<BackupEntryFromMemory>(queryToString(create_function_query)));
fs::path data_path_in_backup_fs{data_path_in_backup};
for (const auto & entry : backup_entries)
backup_entries_collector.addBackupEntry(data_path_in_backup_fs / entry.first, entry.second);
}
std::vector<std::pair<String, ASTPtr>>
restoreUserDefinedSQLObjects(RestorerFromBackup & restorer, const String & data_path_in_backup, UserDefinedSQLObjectType object_type)
{
auto context = restorer.getContext();
auto backup = restorer.getBackup();
fs::path data_path_in_backup_fs{data_path_in_backup};
Strings filenames = backup->listFiles(data_path_in_backup);
if (filenames.empty())
return {}; /// Nothing to restore.
for (const auto & filename : filenames)
{
if (!filename.ends_with(".sql"))
{
throw Exception(
ErrorCodes::CANNOT_RESTORE_TABLE,
"Cannot restore user-defined SQL objects: File name {} doesn't have the extension .sql",
String{data_path_in_backup_fs / filename});
}
}
std::vector<std::pair<String, ASTPtr>> res;
for (const auto & filename : filenames)
{
String escaped_function_name = filename.substr(0, filename.length() - strlen(".sql"));
String function_name = unescapeForFileName(escaped_function_name);
String filepath = data_path_in_backup_fs / filename;
auto backup_entry = backup->readFile(filepath);
auto in = backup_entry->getReadBuffer();
String statement_def;
readStringUntilEOF(statement_def, *in);
ASTPtr ast;
switch (object_type)
{
case UserDefinedSQLObjectType::Function:
{
ParserCreateFunctionQuery parser;
ast = parseQuery(
parser,
statement_def.data(),
statement_def.data() + statement_def.size(),
"in file " + filepath + " from backup " + backup->getName(),
0,
context->getSettingsRef().max_parser_depth);
break;
}
}
res.emplace_back(std::move(function_name), ast);
}
return res;
}
}

View File

@ -0,0 +1,25 @@
#pragma once
#include <Parsers/IAST_fwd.h>
#include <base/types.h>
namespace DB
{
class BackupEntriesCollector;
class RestorerFromBackup;
enum class UserDefinedSQLObjectType;
class IBackupEntry;
using BackupEntryPtr = std::shared_ptr<const IBackupEntry>;
/// Makes backup entries to backup user-defined SQL objects.
void backupUserDefinedSQLObjects(
BackupEntriesCollector & backup_entries_collector,
const String & data_path_in_backup,
UserDefinedSQLObjectType object_type,
const std::vector<std::pair<String, ASTPtr>> & objects);
/// Restores user-defined SQL objects from the backup.
std::vector<std::pair<String, ASTPtr>>
restoreUserDefinedSQLObjects(RestorerFromBackup & restorer, const String & data_path_in_backup, UserDefinedSQLObjectType object_type);
}

View File

@ -0,0 +1,265 @@
#include "Functions/UserDefined/UserDefinedSQLObjectsLoaderFromDisk.h"
#include "Functions/UserDefined/UserDefinedSQLFunctionFactory.h"
#include "Functions/UserDefined/UserDefinedSQLObjectType.h"
#include <Common/StringUtils/StringUtils.h>
#include <Common/atomicRename.h>
#include <Common/escapeForFileName.h>
#include <Common/logger_useful.h>
#include <Common/quoteString.h>
#include <IO/ReadBufferFromFile.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteBufferFromFile.h>
#include <IO/WriteHelpers.h>
#include <Interpreters/Context.h>
#include <Parsers/parseQuery.h>
#include <Parsers/formatAST.h>
#include <Parsers/ParserCreateFunctionQuery.h>
#include <Poco/DirectoryIterator.h>
#include <Poco/Logger.h>
#include <filesystem>
namespace fs = std::filesystem;
namespace DB
{
namespace ErrorCodes
{
extern const int DIRECTORY_DOESNT_EXIST;
extern const int FUNCTION_ALREADY_EXISTS;
extern const int UNKNOWN_FUNCTION;
}
namespace
{
/// Converts a path to an absolute path and append it with a separator.
String makeDirectoryPathCanonical(const String & directory_path)
{
auto canonical_directory_path = std::filesystem::weakly_canonical(directory_path);
if (canonical_directory_path.has_filename())
canonical_directory_path += std::filesystem::path::preferred_separator;
return canonical_directory_path;
}
}
UserDefinedSQLObjectsLoaderFromDisk::UserDefinedSQLObjectsLoaderFromDisk(const ContextPtr & global_context_, const String & dir_path_)
: global_context(global_context_)
, dir_path{makeDirectoryPathCanonical(dir_path_)}
, log{&Poco::Logger::get("UserDefinedSQLObjectsLoaderFromDisk")}
{
createDirectory();
}
ASTPtr UserDefinedSQLObjectsLoaderFromDisk::tryLoadObject(UserDefinedSQLObjectType object_type, const String & object_name)
{
return tryLoadObject(object_type, object_name, getFilePath(object_type, object_name), /* check_file_exists= */ true);
}
ASTPtr UserDefinedSQLObjectsLoaderFromDisk::tryLoadObject(UserDefinedSQLObjectType object_type, const String & object_name, const String & path, bool check_file_exists)
{
LOG_DEBUG(log, "Loading user defined object {} from file {}", backQuote(object_name), path);
try
{
if (check_file_exists && !fs::exists(path))
return nullptr;
/// There is .sql file with user defined object creation statement.
ReadBufferFromFile in(path);
String object_create_query;
readStringUntilEOF(object_create_query, in);
switch (object_type)
{
case UserDefinedSQLObjectType::Function:
{
ParserCreateFunctionQuery parser;
ASTPtr ast = parseQuery(
parser,
object_create_query.data(),
object_create_query.data() + object_create_query.size(),
"",
0,
global_context->getSettingsRef().max_parser_depth);
UserDefinedSQLFunctionFactory::checkCanBeRegistered(global_context, object_name, *ast);
return ast;
}
}
}
catch (...)
{
tryLogCurrentException(log, fmt::format("while loading user defined SQL object {} from path {}", backQuote(object_name), path));
return nullptr; /// Failed to load this sql object, will ignore it
}
}
void UserDefinedSQLObjectsLoaderFromDisk::loadObjects()
{
if (!objects_loaded)
loadObjectsImpl();
}
void UserDefinedSQLObjectsLoaderFromDisk::reloadObjects()
{
loadObjectsImpl();
}
void UserDefinedSQLObjectsLoaderFromDisk::loadObjectsImpl()
{
LOG_INFO(log, "Loading user defined objects from {}", dir_path);
createDirectory();
std::vector<std::pair<String, ASTPtr>> function_names_and_queries;
Poco::DirectoryIterator dir_end;
for (Poco::DirectoryIterator it(dir_path); it != dir_end; ++it)
{
if (it->isDirectory())
continue;
const String & file_name = it.name();
if (!startsWith(file_name, "function_") || !endsWith(file_name, ".sql"))
continue;
size_t prefix_length = strlen("function_");
size_t suffix_length = strlen(".sql");
String function_name = unescapeForFileName(file_name.substr(prefix_length, file_name.length() - prefix_length - suffix_length));
if (function_name.empty())
continue;
ASTPtr ast = tryLoadObject(UserDefinedSQLObjectType::Function, function_name, dir_path + it.name(), /* check_file_exists= */ false);
if (ast)
function_names_and_queries.emplace_back(function_name, ast);
}
UserDefinedSQLFunctionFactory::instance().setAllFunctions(function_names_and_queries);
objects_loaded = true;
LOG_DEBUG(log, "User defined objects loaded");
}
void UserDefinedSQLObjectsLoaderFromDisk::reloadObject(UserDefinedSQLObjectType object_type, const String & object_name)
{
createDirectory();
auto ast = tryLoadObject(object_type, object_name);
auto & factory = UserDefinedSQLFunctionFactory::instance();
if (ast)
factory.setFunction(object_name, *ast);
else
factory.removeFunction(object_name);
}
void UserDefinedSQLObjectsLoaderFromDisk::createDirectory()
{
std::error_code create_dir_error_code;
fs::create_directories(dir_path, create_dir_error_code);
if (!fs::exists(dir_path) || !fs::is_directory(dir_path) || create_dir_error_code)
throw Exception("Couldn't create directory " + dir_path + " reason: '" + create_dir_error_code.message() + "'", ErrorCodes::DIRECTORY_DOESNT_EXIST);
}
bool UserDefinedSQLObjectsLoaderFromDisk::storeObject(
UserDefinedSQLObjectType object_type,
const String & object_name,
const IAST & create_object_query,
bool throw_if_exists,
bool replace_if_exists,
const Settings & settings)
{
String file_path = getFilePath(object_type, object_name);
LOG_DEBUG(log, "Storing user-defined object {} to file {}", backQuote(object_name), file_path);
if (fs::exists(file_path))
{
if (throw_if_exists)
throw Exception(ErrorCodes::FUNCTION_ALREADY_EXISTS, "User-defined function '{}' already exists", object_name);
else if (!replace_if_exists)
return false;
}
WriteBufferFromOwnString create_statement_buf;
formatAST(create_object_query, create_statement_buf, false);
writeChar('\n', create_statement_buf);
String create_statement = create_statement_buf.str();
String temp_file_path = file_path + ".tmp";
try
{
WriteBufferFromFile out(temp_file_path, create_statement.size());
writeString(create_statement, out);
out.next();
if (settings.fsync_metadata)
out.sync();
out.close();
if (replace_if_exists)
fs::rename(temp_file_path, file_path);
else
renameNoReplace(temp_file_path, file_path);
}
catch (...)
{
fs::remove(temp_file_path);
throw;
}
LOG_TRACE(log, "Object {} stored", backQuote(object_name));
return true;
}
bool UserDefinedSQLObjectsLoaderFromDisk::removeObject(
UserDefinedSQLObjectType object_type, const String & object_name, bool throw_if_not_exists)
{
String file_path = getFilePath(object_type, object_name);
LOG_DEBUG(log, "Removing user defined object {} stored in file {}", backQuote(object_name), file_path);
bool existed = fs::remove(file_path);
if (!existed)
{
if (throw_if_not_exists)
throw Exception(ErrorCodes::UNKNOWN_FUNCTION, "User-defined function '{}' doesn't exist", object_name);
else
return false;
}
LOG_TRACE(log, "Object {} removed", backQuote(object_name));
return true;
}
String UserDefinedSQLObjectsLoaderFromDisk::getFilePath(UserDefinedSQLObjectType object_type, const String & object_name) const
{
String file_path;
switch (object_type)
{
case UserDefinedSQLObjectType::Function:
{
file_path = dir_path + "function_" + escapeForFileName(object_name) + ".sql";
break;
}
}
return file_path;
}
}

View File

@ -0,0 +1,46 @@
#pragma once
#include <Functions/UserDefined/IUserDefinedSQLObjectsLoader.h>
#include <Interpreters/Context_fwd.h>
#include <Parsers/IAST_fwd.h>
namespace DB
{
/// Loads user-defined sql objects from a specified folder.
class UserDefinedSQLObjectsLoaderFromDisk : public IUserDefinedSQLObjectsLoader
{
public:
UserDefinedSQLObjectsLoaderFromDisk(const ContextPtr & global_context_, const String & dir_path_);
void loadObjects() override;
void reloadObjects() override;
void reloadObject(UserDefinedSQLObjectType object_type, const String & object_name) override;
bool storeObject(
UserDefinedSQLObjectType object_type,
const String & object_name,
const IAST & create_object_query,
bool throw_if_exists,
bool replace_if_exists,
const Settings & settings) override;
bool removeObject(UserDefinedSQLObjectType object_type, const String & object_name, bool throw_if_not_exists) override;
private:
void createDirectory();
void loadObjectsImpl();
ASTPtr tryLoadObject(UserDefinedSQLObjectType object_type, const String & object_name);
ASTPtr tryLoadObject(UserDefinedSQLObjectType object_type, const String & object_name, const String & file_path, bool check_file_exists);
String getFilePath(UserDefinedSQLObjectType object_type, const String & object_name) const;
ContextPtr global_context;
String dir_path;
Poco::Logger * log;
std::atomic<bool> objects_loaded = false;
};
}

View File

@ -0,0 +1,21 @@
#include <Functions/UserDefined/createUserDefinedSQLObjectsLoader.h>
#include <Functions/UserDefined/UserDefinedSQLObjectsLoaderFromDisk.h>
#include <Interpreters/Context.h>
#include <Poco/Util/AbstractConfiguration.h>
#include <filesystem>
namespace fs = std::filesystem;
namespace DB
{
std::unique_ptr<IUserDefinedSQLObjectsLoader> createUserDefinedSQLObjectsLoader(const ContextMutablePtr & global_context)
{
const auto & config = global_context->getConfigRef();
String default_path = fs::path{global_context->getPath()} / "user_defined/";
String path = config.getString("user_defined_path", default_path);
return std::make_unique<UserDefinedSQLObjectsLoaderFromDisk>(global_context, path);
}
}

View File

@ -0,0 +1,12 @@
#pragma once
#include <Interpreters/Context_fwd.h>
namespace DB
{
class IUserDefinedSQLObjectsLoader;
std::unique_ptr<IUserDefinedSQLObjectsLoader> createUserDefinedSQLObjectsLoader(const ContextMutablePtr & global_context);
}

View File

@ -52,7 +52,7 @@
#include <Interpreters/interpretSubquery.h>
#include <Interpreters/DatabaseAndTableWithAlias.h>
#include <Interpreters/IdentifierSemantic.h>
#include <Interpreters/UserDefinedExecutableFunctionFactory.h>
#include <Functions/UserDefined/UserDefinedExecutableFunctionFactory.h>
namespace DB
@ -91,15 +91,35 @@ static size_t getTypeDepth(const DataTypePtr & type)
return 0;
}
template <typename T>
static bool decimalEqualsFloat(Field field, Float64 float_value)
{
auto decimal_field = field.get<DecimalField<T>>();
auto decimal_to_float = DecimalUtils::convertTo<Float64>(decimal_field.getValue(), decimal_field.getScale());
return decimal_to_float == float_value;
}
/// Applies stricter rules than convertFieldToType:
/// Doesn't allow :
/// - loss of precision with `Decimals`
/// - loss of precision converting to Decimal
static bool convertFieldToTypeStrict(const Field & from_value, const IDataType & to_type, Field & result_value)
{
result_value = convertFieldToType(from_value, to_type);
if (Field::isDecimal(from_value.getType()) && Field::isDecimal(result_value.getType()))
return applyVisitor(FieldVisitorAccurateEquals{}, from_value, result_value);
if (from_value.getType() == Field::Types::Float64 && Field::isDecimal(result_value.getType()))
{
/// Convert back to Float64 and compare
if (result_value.getType() == Field::Types::Decimal32)
return decimalEqualsFloat<Decimal32>(result_value, from_value.get<Float64>());
if (result_value.getType() == Field::Types::Decimal64)
return decimalEqualsFloat<Decimal64>(result_value, from_value.get<Float64>());
if (result_value.getType() == Field::Types::Decimal128)
return decimalEqualsFloat<Decimal128>(result_value, from_value.get<Float64>());
if (result_value.getType() == Field::Types::Decimal256)
return decimalEqualsFloat<Decimal256>(result_value, from_value.get<Float64>());
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown decimal type {}", result_value.getTypeName());
}
return true;
}

View File

@ -57,7 +57,9 @@
#include <Dictionaries/Embedded/GeoDictionariesLoader.h>
#include <Interpreters/EmbeddedDictionaries.h>
#include <Interpreters/ExternalDictionariesLoader.h>
#include <Interpreters/ExternalUserDefinedExecutableFunctionsLoader.h>
#include <Functions/UserDefined/ExternalUserDefinedExecutableFunctionsLoader.h>
#include <Functions/UserDefined/IUserDefinedSQLObjectsLoader.h>
#include <Functions/UserDefined/createUserDefinedSQLObjectsLoader.h>
#include <Interpreters/ExpressionActions.h>
#include <Interpreters/ProcessList.h>
#include <Interpreters/InterserverCredentials.h>
@ -186,7 +188,6 @@ struct ContextSharedPart : boost::noncopyable
String user_files_path; /// Path to the directory with user provided files, usable by 'file' table function.
String dictionaries_lib_path; /// Path to the directory with user provided binaries and libraries for external dictionaries.
String user_scripts_path; /// Path to the directory with user provided scripts.
String user_defined_path; /// Path to the directory with user defined objects.
ConfigurationPtr config; /// Global configuration settings.
String tmp_path; /// Path to the temporary files that occur when processing the request.
@ -194,16 +195,18 @@ struct ContextSharedPart : boost::noncopyable
mutable std::unique_ptr<EmbeddedDictionaries> embedded_dictionaries; /// Metrica's dictionaries. Have lazy initialization.
mutable std::unique_ptr<ExternalDictionariesLoader> external_dictionaries_loader;
mutable std::unique_ptr<ExternalUserDefinedExecutableFunctionsLoader> external_user_defined_executable_functions_loader;
scope_guard models_repository_guard;
ExternalLoaderXMLConfigRepository * external_dictionaries_config_repository = nullptr;
scope_guard dictionaries_xmls;
mutable std::unique_ptr<ExternalUserDefinedExecutableFunctionsLoader> external_user_defined_executable_functions_loader;
ExternalLoaderXMLConfigRepository * user_defined_executable_functions_config_repository = nullptr;
scope_guard user_defined_executable_functions_xmls;
mutable std::unique_ptr<IUserDefinedSQLObjectsLoader> user_defined_sql_objects_loader;
#if USE_NLP
mutable std::optional<SynonymsExtensions> synonyms_extensions;
mutable std::optional<Lemmatizers> lemmatizers;
@ -420,6 +423,8 @@ struct ContextSharedPart : boost::noncopyable
external_dictionaries_loader->enablePeriodicUpdates(false);
if (external_user_defined_executable_functions_loader)
external_user_defined_executable_functions_loader->enablePeriodicUpdates(false);
if (user_defined_sql_objects_loader)
user_defined_sql_objects_loader->stopWatching();
Session::shutdownNamedSessions();
@ -450,6 +455,7 @@ struct ContextSharedPart : boost::noncopyable
std::unique_ptr<EmbeddedDictionaries> delete_embedded_dictionaries;
std::unique_ptr<ExternalDictionariesLoader> delete_external_dictionaries_loader;
std::unique_ptr<ExternalUserDefinedExecutableFunctionsLoader> delete_external_user_defined_executable_functions_loader;
std::unique_ptr<IUserDefinedSQLObjectsLoader> delete_user_defined_sql_objects_loader;
std::unique_ptr<BackgroundSchedulePool> delete_buffer_flush_schedule_pool;
std::unique_ptr<BackgroundSchedulePool> delete_schedule_pool;
std::unique_ptr<BackgroundSchedulePool> delete_distributed_schedule_pool;
@ -488,6 +494,7 @@ struct ContextSharedPart : boost::noncopyable
delete_embedded_dictionaries = std::move(embedded_dictionaries);
delete_external_dictionaries_loader = std::move(external_dictionaries_loader);
delete_external_user_defined_executable_functions_loader = std::move(external_user_defined_executable_functions_loader);
delete_user_defined_sql_objects_loader = std::move(user_defined_sql_objects_loader);
delete_buffer_flush_schedule_pool = std::move(buffer_flush_schedule_pool);
delete_schedule_pool = std::move(schedule_pool);
delete_distributed_schedule_pool = std::move(distributed_schedule_pool);
@ -515,6 +522,7 @@ struct ContextSharedPart : boost::noncopyable
delete_embedded_dictionaries.reset();
delete_external_dictionaries_loader.reset();
delete_external_user_defined_executable_functions_loader.reset();
delete_user_defined_sql_objects_loader.reset();
delete_ddl_worker.reset();
delete_buffer_flush_schedule_pool.reset();
delete_schedule_pool.reset();
@ -658,12 +666,6 @@ String Context::getUserScriptsPath() const
return shared->user_scripts_path;
}
String Context::getUserDefinedPath() const
{
auto lock = getLock();
return shared->user_defined_path;
}
Strings Context::getWarnings() const
{
Strings common_warnings;
@ -726,9 +728,6 @@ void Context::setPath(const String & path)
if (shared->user_scripts_path.empty())
shared->user_scripts_path = shared->path + "user_scripts/";
if (shared->user_defined_path.empty())
shared->user_defined_path = shared->path + "user_defined/";
}
VolumePtr Context::setTemporaryStorage(const String & path, const String & policy_name, size_t max_size)
@ -804,12 +803,6 @@ void Context::setUserScriptsPath(const String & path)
shared->user_scripts_path = path;
}
void Context::setUserDefinedPath(const String & path)
{
auto lock = getLock();
shared->user_defined_path = path;
}
void Context::addWarningMessage(const String & msg) const
{
auto lock = getLock();
@ -1652,6 +1645,22 @@ void Context::loadOrReloadUserDefinedExecutableFunctions(const Poco::Util::Abstr
shared->user_defined_executable_functions_xmls = external_user_defined_executable_functions_loader.addConfigRepository(std::move(repository));
}
const IUserDefinedSQLObjectsLoader & Context::getUserDefinedSQLObjectsLoader() const
{
auto lock = getLock();
if (!shared->user_defined_sql_objects_loader)
shared->user_defined_sql_objects_loader = createUserDefinedSQLObjectsLoader(getGlobalContext());
return *shared->user_defined_sql_objects_loader;
}
IUserDefinedSQLObjectsLoader & Context::getUserDefinedSQLObjectsLoader()
{
auto lock = getLock();
if (!shared->user_defined_sql_objects_loader)
shared->user_defined_sql_objects_loader = createUserDefinedSQLObjectsLoader(getGlobalContext());
return *shared->user_defined_sql_objects_loader;
}
#if USE_NLP
SynonymsExtensions & Context::getSynonymsExtensions() const
@ -3410,7 +3419,7 @@ void Context::initializeBackgroundExecutorsIfNeeded()
size_t background_merges_mutations_concurrency_ratio = 2;
if (config.has("background_merges_mutations_concurrency_ratio"))
background_merges_mutations_concurrency_ratio = config.getUInt64("background_merges_mutations_concurrency_ratio");
else if (config.has("profiles.default.background_pool_size"))
else if (config.has("profiles.default.background_merges_mutations_concurrency_ratio"))
background_merges_mutations_concurrency_ratio = config.getUInt64("profiles.default.background_merges_mutations_concurrency_ratio");
size_t background_move_pool_size = 8;

View File

@ -54,6 +54,7 @@ enum class RowPolicyFilterType;
class EmbeddedDictionaries;
class ExternalDictionariesLoader;
class ExternalUserDefinedExecutableFunctionsLoader;
class IUserDefinedSQLObjectsLoader;
class InterserverCredentials;
using InterserverCredentialsPtr = std::shared_ptr<const InterserverCredentials>;
class InterserverIOHandler;
@ -435,7 +436,6 @@ public:
String getUserFilesPath() const;
String getDictionariesLibPath() const;
String getUserScriptsPath() const;
String getUserDefinedPath() const;
/// A list of warnings about server configuration to place in `system.warnings` table.
Strings getWarnings() const;
@ -450,7 +450,6 @@ public:
void setUserFilesPath(const String & path);
void setDictionariesLibPath(const String & path);
void setUserScriptsPath(const String & path);
void setUserDefinedPath(const String & path);
void addWarningMessage(const String & msg) const;
@ -653,16 +652,19 @@ public:
/// Returns the current constraints (can return null).
std::shared_ptr<const SettingsConstraintsAndProfileIDs> getSettingsConstraintsAndCurrentProfiles() const;
const EmbeddedDictionaries & getEmbeddedDictionaries() const;
const ExternalDictionariesLoader & getExternalDictionariesLoader() const;
const ExternalUserDefinedExecutableFunctionsLoader & getExternalUserDefinedExecutableFunctionsLoader() const;
EmbeddedDictionaries & getEmbeddedDictionaries();
ExternalDictionariesLoader & getExternalDictionariesLoader();
ExternalDictionariesLoader & getExternalDictionariesLoaderUnlocked();
ExternalUserDefinedExecutableFunctionsLoader & getExternalUserDefinedExecutableFunctionsLoader();
ExternalUserDefinedExecutableFunctionsLoader & getExternalUserDefinedExecutableFunctionsLoaderUnlocked();
const EmbeddedDictionaries & getEmbeddedDictionaries() const;
EmbeddedDictionaries & getEmbeddedDictionaries();
void tryCreateEmbeddedDictionaries(const Poco::Util::AbstractConfiguration & config) const;
void loadOrReloadDictionaries(const Poco::Util::AbstractConfiguration & config);
const ExternalUserDefinedExecutableFunctionsLoader & getExternalUserDefinedExecutableFunctionsLoader() const;
ExternalUserDefinedExecutableFunctionsLoader & getExternalUserDefinedExecutableFunctionsLoader();
ExternalUserDefinedExecutableFunctionsLoader & getExternalUserDefinedExecutableFunctionsLoaderUnlocked();
const IUserDefinedSQLObjectsLoader & getUserDefinedSQLObjectsLoader() const;
IUserDefinedSQLObjectsLoader & getUserDefinedSQLObjectsLoader();
void loadOrReloadUserDefinedExecutableFunctions(const Poco::Util::AbstractConfiguration & config);
#if USE_NLP

View File

@ -1088,23 +1088,8 @@ TableNamesSet DatabaseCatalog::tryRemoveLoadingDependenciesUnlocked(const Qualif
TableNamesSet & dependent = it->second.dependent_database_objects;
if (!dependent.empty())
{
if (check_dependencies && !is_drop_database)
throw Exception(ErrorCodes::HAVE_DEPENDENT_OBJECTS, "Cannot drop or rename {}, because some tables depend on it: {}",
removing_table, fmt::join(dependent, ", "));
/// For DROP DATABASE we should ignore dependent tables from the same database.
/// TODO unload tables in reverse topological order and remove this code
if (check_dependencies)
{
TableNames from_other_databases;
for (const auto & table : dependent)
if (table.database != removing_table.database)
from_other_databases.push_back(table);
if (!from_other_databases.empty())
throw Exception(ErrorCodes::HAVE_DEPENDENT_OBJECTS, "Cannot drop or rename {}, because some tables depend on it: {}",
removing_table, fmt::join(from_other_databases, ", "));
}
checkTableCanBeRemovedOrRenamedImpl(dependent, removing_table, is_drop_database);
for (const auto & table : dependent)
{
@ -1125,7 +1110,7 @@ TableNamesSet DatabaseCatalog::tryRemoveLoadingDependenciesUnlocked(const Qualif
return dependencies;
}
void DatabaseCatalog::checkTableCanBeRemovedOrRenamed(const StorageID & table_id) const
void DatabaseCatalog::checkTableCanBeRemovedOrRenamed(const StorageID & table_id, bool is_drop_database) const
{
QualifiedTableName removing_table = table_id.getQualifiedName();
std::lock_guard lock{databases_mutex};
@ -1134,9 +1119,28 @@ void DatabaseCatalog::checkTableCanBeRemovedOrRenamed(const StorageID & table_id
return;
const TableNamesSet & dependent = it->second.dependent_database_objects;
checkTableCanBeRemovedOrRenamedImpl(dependent, removing_table, is_drop_database);
}
void DatabaseCatalog::checkTableCanBeRemovedOrRenamedImpl(const TableNamesSet & dependent, const QualifiedTableName & removing_table, bool is_drop_database)
{
if (!is_drop_database)
{
if (!dependent.empty())
throw Exception(ErrorCodes::HAVE_DEPENDENT_OBJECTS, "Cannot drop or rename {}, because some tables depend on it: {}",
table_id.getNameForLogs(), fmt::join(dependent, ", "));
removing_table, fmt::join(dependent, ", "));
}
/// For DROP DATABASE we should ignore dependent tables from the same database.
/// TODO unload tables in reverse topological order and remove this code
TableNames from_other_databases;
for (const auto & table : dependent)
if (table.database != removing_table.database)
from_other_databases.push_back(table);
if (!from_other_databases.empty())
throw Exception(ErrorCodes::HAVE_DEPENDENT_OBJECTS, "Cannot drop or rename {}, because some tables depend on it: {}",
removing_table, fmt::join(from_other_databases, ", "));
}
void DatabaseCatalog::updateLoadingDependencies(const StorageID & table_id, TableNamesSet && new_dependencies)

View File

@ -229,7 +229,7 @@ public:
TableNamesSet tryRemoveLoadingDependencies(const StorageID & table_id, bool check_dependencies, bool is_drop_database = false);
TableNamesSet tryRemoveLoadingDependenciesUnlocked(const QualifiedTableName & removing_table, bool check_dependencies, bool is_drop_database = false) TSA_REQUIRES(databases_mutex);
void checkTableCanBeRemovedOrRenamed(const StorageID & table_id) const;
void checkTableCanBeRemovedOrRenamed(const StorageID & table_id, bool is_drop_database = false) const;
void updateLoadingDependencies(const StorageID & table_id, TableNamesSet && new_dependencies);
@ -245,6 +245,7 @@ private:
void shutdownImpl();
static void checkTableCanBeRemovedOrRenamedImpl(const TableNamesSet & dependent, const QualifiedTableName & removing_table, bool is_drop_database);
struct UUIDToStorageMapPart
{

View File

@ -1,16 +1,11 @@
#include <Interpreters/InterpreterCreateFunctionQuery.h>
#include <Access/ContextAccess.h>
#include <Functions/UserDefined/IUserDefinedSQLObjectsLoader.h>
#include <Functions/UserDefined/UserDefinedSQLFunctionFactory.h>
#include <Interpreters/Context.h>
#include <Interpreters/ExpressionActions.h>
#include <Interpreters/ExpressionAnalyzer.h>
#include <Interpreters/FunctionNameNormalizer.h>
#include <Interpreters/UserDefinedSQLFunctionFactory.h>
#include <Interpreters/UserDefinedSQLObjectsLoader.h>
#include <Interpreters/executeDDLQueryOnCluster.h>
#include <Parsers/ASTCreateFunctionQuery.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTIdentifier.h>
namespace DB
@ -18,13 +13,11 @@ namespace DB
namespace ErrorCodes
{
extern const int CANNOT_CREATE_RECURSIVE_FUNCTION;
extern const int UNSUPPORTED_METHOD;
extern const int INCORRECT_QUERY;
}
BlockIO InterpreterCreateFunctionQuery::execute()
{
FunctionNameNormalizer().visit(query_ptr.get());
ASTCreateFunctionQuery & create_function_query = query_ptr->as<ASTCreateFunctionQuery &>();
AccessRightsElements access_rights_elements;
@ -33,80 +26,27 @@ BlockIO InterpreterCreateFunctionQuery::execute()
if (create_function_query.or_replace)
access_rights_elements.emplace_back(AccessType::DROP_FUNCTION);
auto current_context = getContext();
if (!create_function_query.cluster.empty())
{
if (current_context->getUserDefinedSQLObjectsLoader().isReplicated())
throw Exception(ErrorCodes::INCORRECT_QUERY, "ON CLUSTER is not allowed because used-defined functions are replicated automatically");
DDLQueryOnClusterParams params;
params.access_to_check = std::move(access_rights_elements);
return executeDDLQueryOnCluster(query_ptr, getContext(), params);
return executeDDLQueryOnCluster(query_ptr, current_context, params);
}
auto current_context = getContext();
current_context->checkAccess(access_rights_elements);
auto & user_defined_function_factory = UserDefinedSQLFunctionFactory::instance();
auto function_name = create_function_query.getFunctionName();
bool throw_if_exists = !create_function_query.if_not_exists && !create_function_query.or_replace;
bool replace_if_exists = create_function_query.or_replace;
bool if_not_exists = create_function_query.if_not_exists;
bool replace = create_function_query.or_replace;
create_function_query.if_not_exists = false;
create_function_query.or_replace = false;
validateFunction(create_function_query.function_core, function_name);
user_defined_function_factory.registerFunction(current_context, function_name, query_ptr, replace, if_not_exists, persist_function);
UserDefinedSQLFunctionFactory::instance().registerFunction(current_context, function_name, query_ptr, throw_if_exists, replace_if_exists);
return {};
}
void InterpreterCreateFunctionQuery::validateFunction(ASTPtr function, const String & name)
{
ASTFunction * lambda_function = function->as<ASTFunction>();
if (!lambda_function)
throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Expected function, got: {}", function->formatForErrorMessage());
auto & lambda_function_expression_list = lambda_function->arguments->children;
if (lambda_function_expression_list.size() != 2)
throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Lambda must have arguments and body");
const ASTFunction * tuple_function_arguments = lambda_function_expression_list[0]->as<ASTFunction>();
if (!tuple_function_arguments || !tuple_function_arguments->arguments)
throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Lambda must have valid arguments");
std::unordered_set<String> arguments;
for (const auto & argument : tuple_function_arguments->arguments->children)
{
const auto * argument_identifier = argument->as<ASTIdentifier>();
if (!argument_identifier)
throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Lambda argument must be identifier");
const auto & argument_name = argument_identifier->name();
auto [_, inserted] = arguments.insert(argument_name);
if (!inserted)
throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Identifier {} already used as function parameter", argument_name);
}
ASTPtr function_body = lambda_function_expression_list[1];
if (!function_body)
throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Lambda must have valid function body");
validateFunctionRecursiveness(function_body, name);
}
void InterpreterCreateFunctionQuery::validateFunctionRecursiveness(ASTPtr node, const String & function_to_create)
{
for (const auto & child : node->children)
{
auto function_name_opt = tryGetFunctionName(child);
if (function_name_opt && function_name_opt.value() == function_to_create)
throw Exception(ErrorCodes::CANNOT_CREATE_RECURSIVE_FUNCTION, "You cannot create recursive function");
validateFunctionRecursiveness(child, function_to_create);
}
}
}

View File

@ -8,24 +8,18 @@ namespace DB
class Context;
class InterpreterCreateFunctionQuery : public IInterpreter, WithContext
class InterpreterCreateFunctionQuery : public IInterpreter, WithMutableContext
{
public:
InterpreterCreateFunctionQuery(const ASTPtr & query_ptr_, ContextPtr context_, bool persist_function_)
: WithContext(context_)
, query_ptr(query_ptr_)
, persist_function(persist_function_) {}
InterpreterCreateFunctionQuery(const ASTPtr & query_ptr_, ContextMutablePtr context_)
: WithMutableContext(context_), query_ptr(query_ptr_)
{
}
BlockIO execute() override;
void setInternal(bool internal_);
private:
static void validateFunction(ASTPtr function, const String & name);
static void validateFunctionRecursiveness(ASTPtr node, const String & function_to_create);
ASTPtr query_ptr;
bool persist_function;
};
}

View File

@ -582,6 +582,15 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription(
if (col_decl.default_expression)
{
if (context_->hasQueryContext() && context_->getQueryContext().get() == context_.get())
{
/// Normalize query only for original CREATE query, not on metadata loading.
/// And for CREATE query we can pass local context, because result will not change after restart.
NormalizeAndEvaluateConstantsVisitor::Data visitor_data{context_};
NormalizeAndEvaluateConstantsVisitor visitor(visitor_data);
visitor.visit(col_decl.default_expression);
}
ASTPtr default_expr =
col_decl.default_specifier == "EPHEMERAL" && col_decl.default_expression->as<ASTLiteral>()->value.isNull() ?
std::make_shared<ASTLiteral>(DataTypeFactory::instance().get(col_decl.type)->getDefault()) :
@ -664,6 +673,9 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::getTableProperti
if (create.as_table_function && (create.columns_list->indices || create.columns_list->constraints))
throw Exception("Indexes and constraints are not supported for table functions", ErrorCodes::INCORRECT_QUERY);
/// Dictionaries have dictionary_attributes_list instead of columns_list
assert(!create.is_dictionary);
if (create.columns_list->columns)
{
properties.columns = getColumnsDescription(*create.columns_list->columns, getContext(), create.attach);
@ -725,6 +737,14 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::getTableProperti
}
else if (create.is_dictionary)
{
if (!create.dictionary || !create.dictionary->source)
return {};
/// Evaluate expressions (like currentDatabase() or tcpPort()) in dictionary source definition.
NormalizeAndEvaluateConstantsVisitor::Data visitor_data{getContext()};
NormalizeAndEvaluateConstantsVisitor visitor(visitor_data);
visitor.visit(create.dictionary->source->ptr());
return {};
}
else if (!create.storage || !create.storage->engine)

View File

@ -1,17 +1,22 @@
#include <Parsers/ASTDropFunctionQuery.h>
#include <Access/ContextAccess.h>
#include <Functions/UserDefined/IUserDefinedSQLObjectsLoader.h>
#include <Functions/UserDefined/UserDefinedSQLFunctionFactory.h>
#include <Interpreters/Context.h>
#include <Interpreters/FunctionNameNormalizer.h>
#include <Interpreters/InterpreterDropFunctionQuery.h>
#include <Interpreters/UserDefinedSQLObjectsLoader.h>
#include <Interpreters/UserDefinedSQLFunctionFactory.h>
#include <Interpreters/executeDDLQueryOnCluster.h>
namespace DB
{
namespace ErrorCodes
{
extern const int INCORRECT_QUERY;
}
BlockIO InterpreterDropFunctionQuery::execute()
{
FunctionNameNormalizer().visit(query_ptr.get());
@ -20,17 +25,23 @@ BlockIO InterpreterDropFunctionQuery::execute()
AccessRightsElements access_rights_elements;
access_rights_elements.emplace_back(AccessType::DROP_FUNCTION);
auto current_context = getContext();
if (!drop_function_query.cluster.empty())
{
if (current_context->getUserDefinedSQLObjectsLoader().isReplicated())
throw Exception(ErrorCodes::INCORRECT_QUERY, "ON CLUSTER is not allowed because used-defined functions are replicated automatically");
DDLQueryOnClusterParams params;
params.access_to_check = std::move(access_rights_elements);
return executeDDLQueryOnCluster(query_ptr, getContext(), params);
return executeDDLQueryOnCluster(query_ptr, current_context, params);
}
auto current_context = getContext();
current_context->checkAccess(access_rights_elements);
UserDefinedSQLFunctionFactory::instance().unregisterFunction(current_context, drop_function_query.function_name, drop_function_query.if_exists);
bool throw_if_not_exists = !drop_function_query.if_exists;
UserDefinedSQLFunctionFactory::instance().unregisterFunction(current_context, drop_function_query.function_name, throw_if_not_exists);
return {};
}

View File

@ -233,6 +233,10 @@ BlockIO InterpreterDropQuery::executeToTableImpl(ContextPtr context_, ASTDropQue
else
table->checkTableCanBeDropped();
/// Check dependencies before shutting table down
if (context_->getSettingsRef().check_table_dependencies)
DatabaseCatalog::instance().checkTableCanBeRemovedOrRenamed(table_id, is_drop_or_detach_database);
table->flushAndShutdown();
TableExclusiveLockHolder table_lock;

View File

@ -296,7 +296,7 @@ std::unique_ptr<IInterpreter> InterpreterFactory::get(ASTPtr & query, ContextMut
}
else if (query->as<ASTCreateFunctionQuery>())
{
return std::make_unique<InterpreterCreateFunctionQuery>(query, context, true /*persist_function*/);
return std::make_unique<InterpreterCreateFunctionQuery>(query, context);
}
else if (query->as<ASTDropFunctionQuery>())
{

View File

@ -12,7 +12,7 @@
#include <Interpreters/Context.h>
#include <Interpreters/DatabaseCatalog.h>
#include <Interpreters/ExternalDictionariesLoader.h>
#include <Interpreters/ExternalUserDefinedExecutableFunctionsLoader.h>
#include <Functions/UserDefined/ExternalUserDefinedExecutableFunctionsLoader.h>
#include <Interpreters/EmbeddedDictionaries.h>
#include <Interpreters/ActionLocksManager.h>
#include <Interpreters/InterpreterDropQuery.h>

View File

@ -23,7 +23,6 @@
#include <Interpreters/Context.h>
#include <Interpreters/ExternalDictionariesLoader.h>
#include <Interpreters/GatherFunctionQuantileVisitor.h>
#include <Interpreters/UserDefinedExecutableFunctionFactory.h>
#include <Parsers/ASTExpressionList.h>
#include <Parsers/ASTFunction.h>
@ -35,6 +34,7 @@
#include <Parsers/ASTTablesInSelectQuery.h>
#include <Functions/FunctionFactory.h>
#include <Functions/UserDefined/UserDefinedExecutableFunctionFactory.h>
#include <Storages/IStorage.h>
#include <Interpreters/RewriteSumIfFunctionVisitor.h>

View File

@ -24,13 +24,14 @@
#include <Interpreters/TranslateQualifiedNamesVisitor.h>
#include <Interpreters/TreeOptimizer.h>
#include <Interpreters/TreeRewriter.h>
#include <Interpreters/UserDefinedSQLFunctionFactory.h>
#include <Interpreters/UserDefinedSQLFunctionVisitor.h>
#include <Interpreters/evaluateConstantExpression.h>
#include <Interpreters/getTableExpressions.h>
#include <Interpreters/replaceAliasColumnsInQuery.h>
#include <Interpreters/replaceForPositionalArguments.h>
#include <Functions/UserDefined/UserDefinedSQLFunctionFactory.h>
#include <Functions/UserDefined/UserDefinedSQLFunctionVisitor.h>
#include <Parsers/IAST_fwd.h>
#include <Parsers/ASTExpressionList.h>
#include <Parsers/ASTFunction.h>

View File

@ -1,168 +0,0 @@
#include "UserDefinedSQLFunctionFactory.h"
#include <Common/quoteString.h>
#include <Functions/FunctionFactory.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <Interpreters/UserDefinedSQLObjectsLoader.h>
#include <Interpreters/UserDefinedExecutableFunctionFactory.h>
#include <Interpreters/Context.h>
namespace DB
{
namespace ErrorCodes
{
extern const int FUNCTION_ALREADY_EXISTS;
extern const int UNKNOWN_FUNCTION;
extern const int CANNOT_DROP_FUNCTION;
}
UserDefinedSQLFunctionFactory & UserDefinedSQLFunctionFactory::instance()
{
static UserDefinedSQLFunctionFactory result;
return result;
}
void UserDefinedSQLFunctionFactory::registerFunction(ContextPtr context, const String & function_name, ASTPtr create_function_query, bool replace, bool if_not_exists, bool persist)
{
if (FunctionFactory::instance().hasNameOrAlias(function_name))
{
if (if_not_exists)
return;
throw Exception(ErrorCodes::FUNCTION_ALREADY_EXISTS, "The function '{}' already exists", function_name);
}
if (AggregateFunctionFactory::instance().hasNameOrAlias(function_name))
{
if (if_not_exists)
return;
throw Exception(ErrorCodes::FUNCTION_ALREADY_EXISTS, "The aggregate function '{}' already exists", function_name);
}
if (UserDefinedExecutableFunctionFactory::instance().has(function_name, context))
{
if (if_not_exists)
return;
throw Exception(ErrorCodes::FUNCTION_ALREADY_EXISTS, "User defined executable function '{}' already exists", function_name);
}
std::lock_guard lock(mutex);
auto [it, inserted] = function_name_to_create_query.emplace(function_name, create_function_query);
if (!inserted)
{
if (if_not_exists)
return;
if (replace)
it->second = create_function_query;
else
throw Exception(ErrorCodes::FUNCTION_ALREADY_EXISTS,
"The function name '{}' is not unique",
function_name);
}
if (persist)
{
try
{
UserDefinedSQLObjectsLoader::instance().storeObject(context, UserDefinedSQLObjectType::Function, function_name, *create_function_query, replace);
}
catch (Exception & exception)
{
function_name_to_create_query.erase(it);
exception.addMessage(fmt::format("while storing user defined function {} on disk", backQuote(function_name)));
throw;
}
}
}
void UserDefinedSQLFunctionFactory::unregisterFunction(ContextPtr context, const String & function_name, bool if_exists)
{
if (FunctionFactory::instance().hasNameOrAlias(function_name) ||
AggregateFunctionFactory::instance().hasNameOrAlias(function_name))
throw Exception(ErrorCodes::CANNOT_DROP_FUNCTION, "Cannot drop system function '{}'", function_name);
if (UserDefinedExecutableFunctionFactory::instance().has(function_name, context))
throw Exception(ErrorCodes::CANNOT_DROP_FUNCTION, "Cannot drop user defined executable function '{}'", function_name);
std::lock_guard lock(mutex);
auto it = function_name_to_create_query.find(function_name);
if (it == function_name_to_create_query.end())
{
if (if_exists)
return;
throw Exception(ErrorCodes::UNKNOWN_FUNCTION,
"The function name '{}' is not registered",
function_name);
}
try
{
UserDefinedSQLObjectsLoader::instance().removeObject(context, UserDefinedSQLObjectType::Function, function_name);
}
catch (Exception & exception)
{
exception.addMessage(fmt::format("while removing user defined function {} from disk", backQuote(function_name)));
throw;
}
function_name_to_create_query.erase(it);
}
ASTPtr UserDefinedSQLFunctionFactory::get(const String & function_name) const
{
std::lock_guard lock(mutex);
auto it = function_name_to_create_query.find(function_name);
if (it == function_name_to_create_query.end())
throw Exception(ErrorCodes::UNKNOWN_FUNCTION,
"The function name '{}' is not registered",
function_name);
return it->second;
}
ASTPtr UserDefinedSQLFunctionFactory::tryGet(const std::string & function_name) const
{
std::lock_guard lock(mutex);
auto it = function_name_to_create_query.find(function_name);
if (it == function_name_to_create_query.end())
return nullptr;
return it->second;
}
bool UserDefinedSQLFunctionFactory::has(const String & function_name) const
{
return tryGet(function_name) != nullptr;
}
std::vector<std::string> UserDefinedSQLFunctionFactory::getAllRegisteredNames() const
{
std::vector<std::string> registered_names;
std::lock_guard lock(mutex);
registered_names.reserve(function_name_to_create_query.size());
for (const auto & [name, _] : function_name_to_create_query)
registered_names.emplace_back(name);
return registered_names;
}
bool UserDefinedSQLFunctionFactory::empty() const
{
std::lock_guard lock(mutex);
return function_name_to_create_query.empty();
}
}

View File

@ -1,54 +0,0 @@
#pragma once
#include <unordered_map>
#include <mutex>
#include <Common/NamePrompter.h>
#include <Parsers/ASTCreateFunctionQuery.h>
#include <Interpreters/Context_fwd.h>
namespace DB
{
/// Factory for SQLUserDefinedFunctions
class UserDefinedSQLFunctionFactory : public IHints<1, UserDefinedSQLFunctionFactory>
{
public:
static UserDefinedSQLFunctionFactory & instance();
/** Register function for function_name in factory for specified create_function_query.
* If function exists and if_not_exists = false and replace = false throws exception.
* If replace = true and sql user defined function with function_name already exists replace it with create_function_query.
* If persist = true persist function on disk.
*/
void registerFunction(ContextPtr context, const String & function_name, ASTPtr create_function_query, bool replace, bool if_not_exists, bool persist);
/** Unregister function for function_name.
* If if_exists = true then do not throw exception if function is not registered.
* If if_exists = false then throw exception if function is not registered.
*/
void unregisterFunction(ContextPtr context, const String & function_name, bool if_exists);
/// Get function create query for function_name. If no function registered with function_name throws exception.
ASTPtr get(const String & function_name) const;
/// Get function create query for function_name. If no function registered with function_name return nullptr.
ASTPtr tryGet(const String & function_name) const;
/// Check if function with function_name registered.
bool has(const String & function_name) const;
/// Get all user defined functions registered names.
std::vector<String> getAllRegisteredNames() const override;
/// Check whether any UDFs have been registered
bool empty() const;
private:
std::unordered_map<String, ASTPtr> function_name_to_create_query;
mutable std::mutex mutex;
};
}

View File

@ -1,184 +0,0 @@
#include "UserDefinedSQLObjectsLoader.h"
#include <filesystem>
#include <Common/escapeForFileName.h>
#include <Common/quoteString.h>
#include <Common/StringUtils/StringUtils.h>
#include <IO/ReadBufferFromFile.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteBufferFromFile.h>
#include <IO/WriteHelpers.h>
#include <Interpreters/Context.h>
#include <Interpreters/InterpreterCreateFunctionQuery.h>
#include <Parsers/parseQuery.h>
#include <Parsers/ASTCreateFunctionQuery.h>
#include <Parsers/formatAST.h>
#include <Parsers/ParserCreateFunctionQuery.h>
#include <Poco/DirectoryIterator.h>
#include <Poco/Logger.h>
#include <Common/logger_useful.h>
namespace DB
{
namespace ErrorCodes
{
extern const int OBJECT_ALREADY_STORED_ON_DISK;
extern const int OBJECT_WAS_NOT_STORED_ON_DISK;
}
UserDefinedSQLObjectsLoader & UserDefinedSQLObjectsLoader::instance()
{
static UserDefinedSQLObjectsLoader ret;
return ret;
}
UserDefinedSQLObjectsLoader::UserDefinedSQLObjectsLoader()
: log(&Poco::Logger::get("UserDefinedSQLObjectsLoader"))
{}
void UserDefinedSQLObjectsLoader::loadUserDefinedObject(ContextPtr context, UserDefinedSQLObjectType object_type, std::string_view name, const String & path)
{
auto name_ref = StringRef(name.data(), name.size());
LOG_DEBUG(log, "Loading user defined object {} from file {}", backQuote(name_ref), path);
/// There is .sql file with user defined object creation statement.
ReadBufferFromFile in(path);
String object_create_query;
readStringUntilEOF(object_create_query, in);
try
{
switch (object_type)
{
case UserDefinedSQLObjectType::Function:
{
ParserCreateFunctionQuery parser;
ASTPtr ast = parseQuery(
parser,
object_create_query.data(),
object_create_query.data() + object_create_query.size(),
"in file " + path,
0,
context->getSettingsRef().max_parser_depth);
InterpreterCreateFunctionQuery interpreter(ast, context, false /*persist_function*/);
interpreter.execute();
}
}
}
catch (Exception & e)
{
e.addMessage(fmt::format("while loading user defined objects {} from path {}", backQuote(name_ref), path));
throw;
}
}
void UserDefinedSQLObjectsLoader::loadObjects(ContextPtr context)
{
if (unlikely(!enable_persistence))
return;
LOG_DEBUG(log, "Loading user defined objects");
String dir_path = context->getUserDefinedPath();
Poco::DirectoryIterator dir_end;
for (Poco::DirectoryIterator it(dir_path); it != dir_end; ++it)
{
if (it->isDirectory())
continue;
const std::string & file_name = it.name();
/// For '.svn', '.gitignore' directory and similar.
if (file_name.at(0) == '.')
continue;
if (!startsWith(file_name, "function_") || !endsWith(file_name, ".sql"))
continue;
std::string_view object_name = file_name;
object_name.remove_prefix(strlen("function_"));
object_name.remove_suffix(strlen(".sql"));
if (object_name.empty())
continue;
loadUserDefinedObject(context, UserDefinedSQLObjectType::Function, object_name, dir_path + it.name());
}
}
void UserDefinedSQLObjectsLoader::storeObject(ContextPtr context, UserDefinedSQLObjectType object_type, const String & object_name, const IAST & ast, bool replace)
{
if (unlikely(!enable_persistence))
return;
String dir_path = context->getUserDefinedPath();
String file_path;
switch (object_type)
{
case UserDefinedSQLObjectType::Function:
{
file_path = dir_path + "function_" + escapeForFileName(object_name) + ".sql";
}
}
if (!replace && std::filesystem::exists(file_path))
throw Exception(ErrorCodes::OBJECT_ALREADY_STORED_ON_DISK, "User defined object {} already stored on disk", backQuote(file_path));
LOG_DEBUG(log, "Storing object {} to file {}", backQuote(object_name), file_path);
WriteBufferFromOwnString create_statement_buf;
formatAST(ast, create_statement_buf, false);
writeChar('\n', create_statement_buf);
String create_statement = create_statement_buf.str();
WriteBufferFromFile out(file_path, create_statement.size());
writeString(create_statement, out);
out.next();
if (context->getSettingsRef().fsync_metadata)
out.sync();
out.close();
LOG_DEBUG(log, "Stored object {}", backQuote(object_name));
}
void UserDefinedSQLObjectsLoader::removeObject(ContextPtr context, UserDefinedSQLObjectType object_type, const String & object_name)
{
if (unlikely(!enable_persistence))
return;
String dir_path = context->getUserDefinedPath();
LOG_DEBUG(log, "Removing file for user defined object {} from {}", backQuote(object_name), dir_path);
std::filesystem::path file_path;
switch (object_type)
{
case UserDefinedSQLObjectType::Function:
{
file_path = dir_path + "function_" + escapeForFileName(object_name) + ".sql";
}
}
if (!std::filesystem::exists(file_path))
throw Exception(ErrorCodes::OBJECT_WAS_NOT_STORED_ON_DISK, "User defined object {} was not stored on disk", backQuote(file_path.string()));
std::filesystem::remove(file_path);
}
void UserDefinedSQLObjectsLoader::enable(bool enable_persistence_)
{
enable_persistence = enable_persistence_;
}
}

View File

@ -1,37 +0,0 @@
#pragma once
#include <Interpreters/Context_fwd.h>
#include <Parsers/IAST.h>
#include <boost/noncopyable.hpp>
namespace DB
{
enum class UserDefinedSQLObjectType
{
Function
};
class UserDefinedSQLObjectsLoader : private boost::noncopyable
{
public:
static UserDefinedSQLObjectsLoader & instance();
UserDefinedSQLObjectsLoader();
void loadObjects(ContextPtr context);
void storeObject(ContextPtr context, UserDefinedSQLObjectType object_type, const String & object_name, const IAST & ast, bool replace);
void removeObject(ContextPtr context, UserDefinedSQLObjectType object_type, const String & object_name);
/// For ClickHouse local if path is not set we can disable loader.
void enable(bool enable_persistence);
private:
void loadUserDefinedObject(ContextPtr context, UserDefinedSQLObjectType object_type, std::string_view object_name, const String & file_path);
Poco::Logger * log;
bool enable_persistence = true;
};
}

View File

@ -109,6 +109,21 @@ Field convertDecimalToDecimalType(const Field & from, const DataTypeDecimal<T> &
return DecimalField<T>(value, type.getScale());
}
template <typename From, typename T>
Field convertFloatToDecimalType(const Field & from, const DataTypeDecimal<T> & type)
{
From value = from.get<From>();
if (!type.canStoreWhole(value))
throw Exception("Number is too big to place in " + type.getName(), ErrorCodes::ARGUMENT_OUT_OF_BOUND);
//String sValue = convertFieldToString(from);
//int fromScale = sValue.length()- sValue.find('.') - 1;
UInt32 scale = type.getScale();
auto scaled_value = convertToDecimal<DataTypeNumber<From>, DataTypeDecimal<T>>(value, scale);
return DecimalField<T>(scaled_value, scale);
}
template <typename To>
Field convertDecimalType(const Field & from, const To & type)
{
@ -135,6 +150,9 @@ Field convertDecimalType(const Field & from, const To & type)
if (from.getType() == Field::Types::Decimal128)
return convertDecimalToDecimalType<Decimal128>(from, type);
if (from.getType() == Field::Types::Float64)
return convertFloatToDecimalType<Float64>(from, type);
throw Exception(ErrorCodes::TYPE_MISMATCH, "Type mismatch in IN or VALUES section. Expected: {}. Got: {}",
type.getName(), from.getType());
}

View File

@ -549,15 +549,9 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
if (insert_query)
{
if (insert_query->table_id)
{
insert_query->table_id = context->resolveStorageID(insert_query->table_id);
LOG_DEBUG(&Poco::Logger::get("executeQuery"), "2) database: {}", insert_query->table_id.getDatabaseName());
}
else if (auto table = insert_query->getTable(); !table.empty())
{
insert_query->table_id = context->resolveStorageID(StorageID{insert_query->getDatabase(), table});
LOG_DEBUG(&Poco::Logger::get("executeQuery"), "2) database: {}", insert_query->table_id.getDatabaseName());
}
}
if (insert_query && insert_query->select)

View File

@ -214,7 +214,7 @@ bool JSONEachRowRowInputFormat::readRow(MutableColumns & columns, RowReadExtensi
seen_columns.assign(num_columns, false);
nested_prefix_length = 0;
readRowStart();
readRowStart(columns);
readJSONObject(columns);
const auto & header = getPort().getHeader();

View File

@ -48,7 +48,7 @@ private:
void readJSONObject(MutableColumns & columns);
void readNestedData(const String & name, MutableColumns & columns);
virtual void readRowStart() {}
virtual void readRowStart(MutableColumns &) {}
virtual bool checkEndOfData(bool is_first_row);
const FormatSettings format_settings;
@ -66,10 +66,6 @@ private:
/// the nested column names are 'n.i' and 'n.s' and the nested prefix is 'n.'
size_t nested_prefix_length = 0;
/// Set of columns for which the values were read. The rest will be filled with default values.
std::vector<UInt8> read_columns;
/// Set of columns which already met in row. Exception is thrown if there are more than one column with the same name.
std::vector<UInt8> seen_columns;
/// These sets may be different, because if null_as_default=1 read_columns[i] will be false and seen_columns[i] will be true
/// for row like {..., "non-nullable column name" : null, ...}
@ -85,6 +81,12 @@ private:
bool yield_strings;
protected:
/// Set of columns for which the values were read. The rest will be filled with default values.
std::vector<UInt8> read_columns;
/// Set of columns which already met in row. Exception is thrown if there are more than one column with the same name.
std::vector<UInt8> seen_columns;
/// This flag is needed to know if data is in square brackets.
bool data_in_square_brackets = false;
};

View File

@ -2,12 +2,39 @@
#include <Formats/JSONUtils.h>
#include <Formats/FormatFactory.h>
#include <Formats/EscapingRuleUtils.h>
#include <DataTypes/DataTypeString.h>
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
}
std::optional<size_t> getColumnIndexForJSONObjectEachRowObjectName(const Block & header, const FormatSettings & format_settings)
{
if (format_settings.json_object_each_row.column_for_object_name.empty())
return std::nullopt;
if (!header.has(format_settings.json_object_each_row.column_for_object_name))
throw Exception(
ErrorCodes::BAD_ARGUMENTS,
"Column name '{}' from setting format_json_object_each_row_column_for_object_name doesn't exists in header",
format_settings.json_object_each_row.column_for_object_name);
size_t index = header.getPositionByName(format_settings.json_object_each_row.column_for_object_name);
if (!isStringOrFixedString(header.getDataTypes()[index]))
throw Exception(
ErrorCodes::BAD_ARGUMENTS,
"Column '{}' from setting json_object_each_row_column_for_object_name must have String type",
format_settings.json_object_each_row.column_for_object_name);
return index;
}
JSONObjectEachRowInputFormat::JSONObjectEachRowInputFormat(ReadBuffer & in_, const Block & header_, Params params_, const FormatSettings & format_settings_)
: JSONEachRowRowInputFormat(in_, header_, params_, format_settings_, false)
: JSONEachRowRowInputFormat(in_, header_, params_, format_settings_, false), field_index_for_object_name(getColumnIndexForJSONObjectEachRowObjectName(header_, format_settings_))
{
}
@ -16,9 +43,15 @@ void JSONObjectEachRowInputFormat::readPrefix()
JSONUtils::skipObjectStart(*in);
}
void JSONObjectEachRowInputFormat::readRowStart()
void JSONObjectEachRowInputFormat::readRowStart(MutableColumns & columns)
{
JSONUtils::readFieldName(*in);
auto object_name = JSONUtils::readFieldName(*in);
if (field_index_for_object_name)
{
columns[*field_index_for_object_name]->insertData(object_name.data(), object_name.size());
seen_columns[*field_index_for_object_name] = true;
read_columns[*field_index_for_object_name] = true;
}
}
bool JSONObjectEachRowInputFormat::checkEndOfData(bool is_first_row)
@ -30,7 +63,6 @@ bool JSONObjectEachRowInputFormat::checkEndOfData(bool is_first_row)
return false;
}
JSONObjectEachRowSchemaReader::JSONObjectEachRowSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_)
: IRowWithNamesSchemaReader(in_, format_settings_)
{
@ -53,7 +85,10 @@ NamesAndTypesList JSONObjectEachRowSchemaReader::readRowAndGetNamesAndDataTypes(
JSONUtils::skipComma(in);
JSONUtils::readFieldName(in);
return JSONUtils::readRowAndGetNamesAndDataTypesForJSONEachRow(in, format_settings, false);
auto names_and_types = JSONUtils::readRowAndGetNamesAndDataTypesForJSONEachRow(in, format_settings, false);
if (!format_settings.json_object_each_row.column_for_object_name.empty())
names_and_types.emplace_front(format_settings.json_object_each_row.column_for_object_name, std::make_shared<DataTypeString>());
return names_and_types;
}
void JSONObjectEachRowSchemaReader::transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type)
@ -83,7 +118,8 @@ void registerJSONObjectEachRowSchemaReader(FormatFactory & factory)
});
factory.registerAdditionalInfoForSchemaCacheGetter("JSONObjectEachRow", [](const FormatSettings & settings)
{
return getAdditionalFormatInfoByEscapingRule(settings, FormatSettings::EscapingRule::JSON);
return getAdditionalFormatInfoByEscapingRule(settings, FormatSettings::EscapingRule::JSON)
+ fmt::format(", format_json_object_each_row_column_for_object_name={}", settings.json_object_each_row.column_for_object_name);
});
}

View File

@ -27,8 +27,10 @@ public:
private:
void readPrefix() override;
void readSuffix() override {}
void readRowStart() override;
void readRowStart(MutableColumns & columns) override;
bool checkEndOfData(bool is_first_row) override;
std::optional<size_t> field_index_for_object_name;
};
@ -44,4 +46,6 @@ private:
bool first_row = true;
};
std::optional<size_t> getColumnIndexForJSONObjectEachRowObjectName(const Block & header, const FormatSettings & settings);
}

View File

@ -1,4 +1,5 @@
#include <Processors/Formats/Impl/JSONObjectEachRowRowOutputFormat.h>
#include <Processors/Formats/Impl/JSONObjectEachRowRowInputFormat.h>
#include <Formats/JSONUtils.h>
#include <IO/WriteHelpers.h>
@ -6,10 +7,38 @@ namespace DB
{
JSONObjectEachRowRowOutputFormat::JSONObjectEachRowRowOutputFormat(WriteBuffer & out_, const Block & header_, const RowOutputFormatParams & params_, const FormatSettings & settings_)
: JSONEachRowRowOutputFormat(out_, header_, params_, settings_)
: JSONEachRowRowOutputFormat(out_, header_, params_, settings_), field_index_for_object_name(getColumnIndexForJSONObjectEachRowObjectName(header_, settings_))
{
}
void JSONObjectEachRowRowOutputFormat::writeField(const IColumn & column, const ISerialization & serialization, size_t row)
{
if (field_number == field_index_for_object_name)
{
++field_number;
return;
}
JSONEachRowRowOutputFormat::writeField(column, serialization, row);
}
void JSONObjectEachRowRowOutputFormat::write(const Columns & columns, size_t row)
{
if (field_index_for_object_name)
object_name = columns[*field_index_for_object_name]->getDataAt(row).toString();
else
object_name = "row_" + std::to_string(row + 1);
IRowOutputFormat::write(columns, row);
}
void JSONObjectEachRowRowOutputFormat::writeFieldDelimiter()
{
/// We should not write comma before column that is used for
/// object name and also after it if it's in the first place
if (field_number != field_index_for_object_name && !(field_index_for_object_name == 0 && field_number == 1))
JSONEachRowRowOutputFormat::writeFieldDelimiter();
}
void JSONObjectEachRowRowOutputFormat::writePrefix()
{
JSONUtils::writeObjectStart(*ostr);
@ -17,9 +46,7 @@ void JSONObjectEachRowRowOutputFormat::writePrefix()
void JSONObjectEachRowRowOutputFormat::writeRowStartDelimiter()
{
++row_num;
String title = "row_" + std::to_string(row_num);
JSONUtils::writeCompactObjectStart(*ostr, 1, title.c_str());
JSONUtils::writeCompactObjectStart(*ostr, 1, object_name.c_str());
}
void JSONObjectEachRowRowOutputFormat::writeRowEndDelimiter()
@ -52,6 +79,7 @@ void registerOutputFormatJSONObjectEachRow(FormatFactory & factory)
return std::make_shared<JSONObjectEachRowRowOutputFormat>(buf, sample, params, settings);
});
factory.markOutputFormatSupportsParallelFormatting("JSONObjectEachRow");
factory.markFormatHasNoAppendSupport("JSONObjectEachRow");
}
}

View File

@ -29,6 +29,9 @@ public:
String getName() const override { return "JSONObjectEachRowRowOutputFormat"; }
private:
void write(const Columns & columns, size_t row) override;
void writeField(const IColumn & column, const ISerialization & serialization, size_t row) override;
void writeFieldDelimiter() override;
void writeRowStartDelimiter() override;
void writeRowEndDelimiter() override;
void writeRowBetweenDelimiter() override;
@ -36,7 +39,8 @@ private:
void writePrefix() override;
void writeSuffix() override;
size_t row_num = 0;
std::optional<size_t> field_index_for_object_name;
String object_name;
};
}

View File

@ -5683,7 +5683,8 @@ std::optional<ProjectionCandidate> MergeTreeData::getQueryProcessingStageWithAgg
{
const auto & metadata_snapshot = storage_snapshot->metadata;
const auto & settings = query_context->getSettingsRef();
if (!settings.allow_experimental_projection_optimization || query_info.ignore_projections || query_info.is_projection_query)
if (!settings.allow_experimental_projection_optimization || query_info.ignore_projections || query_info.is_projection_query
|| settings.aggregate_functions_null_for_empty /* projections don't work correctly with this setting */)
return std::nullopt;
// Currently projections don't support parallel replicas reading yet.

View File

@ -6,18 +6,9 @@
#include <Functions/FunctionFactory.h>
#include <Functions/IFunction.h>
#include <Interpreters/Context.h>
#include <Interpreters/UserDefinedSQLFunctionFactory.h>
#include <Interpreters/UserDefinedExecutableFunctionFactory.h>
#include <Functions/UserDefined/UserDefinedSQLFunctionFactory.h>
#include <Functions/UserDefined/UserDefinedExecutableFunctionFactory.h>
#include <Storages/System/StorageSystemFunctions.h>
#include <Common/escapeForFileName.h>
#include <Backups/BackupEntriesCollector.h>
#include <Backups/BackupEntryFromMemory.h>
#include <Backups/RestorerFromBackup.h>
#include <Backups/IBackup.h>
#include <Parsers/ParserCreateFunctionQuery.h>
#include <Parsers/parseQuery.h>
namespace fs = std::filesystem;
namespace DB
@ -30,11 +21,6 @@ enum class FunctionOrigin : Int8
EXECUTABLE_USER_DEFINED = 2
};
namespace ErrorCodes
{
extern const int CANNOT_RESTORE_TABLE;
}
namespace
{
template <typename Factory>
@ -134,63 +120,12 @@ void StorageSystemFunctions::fillData(MutableColumns & res_columns, ContextPtr c
void StorageSystemFunctions::backupData(BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, const std::optional<ASTs> & /* partitions */)
{
const auto & user_defined_sql_functions_factory = UserDefinedSQLFunctionFactory::instance();
const auto & user_defined_sql_functions_names = user_defined_sql_functions_factory.getAllRegisteredNames();
fs::path data_path_in_backup_fs{data_path_in_backup};
for (const auto & function_name : user_defined_sql_functions_names)
{
auto ast = user_defined_sql_functions_factory.tryGet(function_name);
if (!ast)
continue;
backup_entries_collector.addBackupEntry(
data_path_in_backup_fs / (escapeForFileName(function_name) + ".sql"),
std::make_shared<BackupEntryFromMemory>(queryToString(ast)));
}
UserDefinedSQLFunctionFactory::instance().backup(backup_entries_collector, data_path_in_backup);
}
void StorageSystemFunctions::restoreDataFromBackup(RestorerFromBackup & restorer, const String & data_path_in_backup, const std::optional<ASTs> & /* partitions */)
{
auto backup = restorer.getBackup();
fs::path data_path_in_backup_fs{data_path_in_backup};
Strings filenames = backup->listFiles(data_path_in_backup);
for (const auto & filename : filenames)
{
if (!filename.ends_with(".sql"))
{
throw Exception(ErrorCodes::CANNOT_RESTORE_TABLE, "Cannot restore table {}: File name {} doesn't have the extension .sql",
getStorageID().getFullTableName(), String{data_path_in_backup_fs / filename});
}
}
auto & user_defined_sql_functions_factory = UserDefinedSQLFunctionFactory::instance();
const auto & restore_settings = restorer.getRestoreSettings();
auto context = restorer.getContext();
for (const auto & filename : filenames)
{
String escaped_function_name = filename.substr(0, filename.length() - strlen(".sql"));
String function_name = unescapeForFileName(escaped_function_name);
String filepath = data_path_in_backup_fs / filename;
auto function_def_entry = backup->readFile(filepath);
auto function_def_in = function_def_entry->getReadBuffer();
String function_def;
readStringUntilEOF(function_def, *function_def_in);
ParserCreateFunctionQuery parser;
ASTPtr ast = parseQuery(
parser,
function_def.data(),
function_def.data() + function_def.size(),
"in file " + filepath + " from backup " + backup->getName(),
0,
context->getSettingsRef().max_parser_depth);
bool replace = (restore_settings.create_function == RestoreUDFCreationMode::kReplace);
bool if_not_exists = (restore_settings.create_function == RestoreUDFCreationMode::kCreateIfNotExists);
user_defined_sql_functions_factory.registerFunction(context, function_name, ast, replace, if_not_exists, true);
}
UserDefinedSQLFunctionFactory::instance().restore(restorer, data_path_in_backup);
}
}

View File

@ -73,4 +73,4 @@ def test_replicated_merge_tree_defaults_compatibility(started_cluster):
node2.restart_with_latest_version()
node1.query(create_query.format(replica=1))
node1.query("EXISTS TABLE test.table") == "1\n"
assert node1.query("EXISTS TABLE test.table") == "1\n"

View File

@ -166,3 +166,63 @@
2005-01-01
2004-01-01
2003-01-01
2216-09-23
2216-10-13
2216-11-02
2216-11-22
2216-12-12
2217-01-01
2217-01-21
2217-02-10
2217-03-02
2217-03-22
2217-04-11
2217-03-22
2217-03-02
2217-02-10
2217-01-21
2217-01-01
2216-12-12
2216-11-22
2216-11-02
2216-10-13
2215-05-01
2215-09-01
2216-01-01
2216-05-01
2216-09-01
2217-01-01
2217-05-01
2217-09-01
2218-01-01
2218-05-01
2218-09-01
2218-05-01
2218-01-01
2217-09-01
2217-05-01
2217-01-01
2216-09-01
2216-05-01
2216-01-01
2215-09-01
2197-01-01
2201-01-01
2205-01-01
2209-01-01
2213-01-01
2217-01-01
2221-01-01
2225-01-01
2229-01-01
2233-01-01
2237-01-01
2233-01-01
2229-01-01
2225-01-01
2221-01-01
2217-01-01
2213-01-01
2209-01-01
2205-01-01
2201-01-01

View File

@ -53,8 +53,18 @@ SELECT toDate('2017-01-01') - INTERVAL 1 YEAR AS x;
SELECT toDate('2017-01-01') - INTERVAL -1 YEAR AS x;
SELECT toDate('2017-01-01') + INTERVAL number - 15 MONTH AS x FROM system.numbers LIMIT 30;
SELECT INTERVAL number - 15 MONTH + toDate('2017-01-01') AS x FROM system.numbers LIMIT 30;
SELECT toDate('2017-01-01') - INTERVAL number - 15 MONTH AS x FROM system.numbers LIMIT 30;
SELECT toDate('2017-01-01') + INTERVAL number - 15 YEAR AS x FROM system.numbers LIMIT 30;
SELECT INTERVAL number - 15 YEAR + toDate('2017-01-01') AS x FROM system.numbers LIMIT 30;
SELECT toDate('2017-01-01') - INTERVAL number - 15 YEAR AS x FROM system.numbers LIMIT 30;
SELECT toDate32('2217-01-01') + INTERVAL number * 20 - 100 DAY AS x FROM system.numbers LIMIT 10;
SELECT INTERVAL 100 - number * 20 DAY + toDate32('2217-01-01') AS x FROM system.numbers LIMIT 10;
SELECT INTERVAL number * 4 - 20 MONTH + toDate32('2217-01-01') AS x FROM system.numbers LIMIT 10;
SELECT toDate32('2217-01-01') - INTERVAL number * 4 - 20 MONTH AS x FROM system.numbers LIMIT 10;
SELECT INTERVAL number * 4 - 20 YEAR + toDate32('2217-01-01') AS x FROM system.numbers LIMIT 10;
SELECT toDate32('2217-01-01') - INTERVAL number * 4 - 20 YEAR AS x FROM system.numbers LIMIT 10;

View File

@ -1,5 +1,5 @@
=DICTIONARY in Ordinary DB
CREATE DICTIONARY db_01018.dict1\n(\n `key_column` UInt64 DEFAULT 0,\n `second_column` UInt8 DEFAULT 1,\n `third_column` String DEFAULT \'qqq\'\n)\nPRIMARY KEY key_column\nSOURCE(CLICKHOUSE(HOST \'localhost\' PORT tcpPort() USER \'default\' TABLE \'table_for_dict\' PASSWORD \'\' DB \'database_for_dict_01018\'))\nLIFETIME(MIN 1 MAX 10)\nLAYOUT(FLAT())
CREATE DICTIONARY db_01018.dict1\n(\n `key_column` UInt64 DEFAULT 0,\n `second_column` UInt8 DEFAULT 1,\n `third_column` String DEFAULT \'qqq\'\n)\nPRIMARY KEY key_column\nSOURCE(CLICKHOUSE(HOST \'localhost\' PORT 9000 USER \'default\' TABLE \'table_for_dict\' PASSWORD \'\' DB \'database_for_dict_01018\'))\nLIFETIME(MIN 1 MAX 10)\nLAYOUT(FLAT())
dict1
1
db_01018 dict1
@ -12,7 +12,7 @@ db_01018 dict1
==DROP DICTIONARY
0
=DICTIONARY in Memory DB
CREATE DICTIONARY memory_db.dict2\n(\n `key_column` UInt64 DEFAULT 0 INJECTIVE,\n `second_column` UInt8 DEFAULT 1 EXPRESSION rand() % 222,\n `third_column` String DEFAULT \'qqq\'\n)\nPRIMARY KEY key_column\nSOURCE(CLICKHOUSE(HOST \'localhost\' PORT tcpPort() USER \'default\' TABLE \'table_for_dict\' PASSWORD \'\' DB \'database_for_dict_01018\'))\nLIFETIME(MIN 1 MAX 10)\nLAYOUT(FLAT())
CREATE DICTIONARY memory_db.dict2\n(\n `key_column` UInt64 DEFAULT 0 INJECTIVE,\n `second_column` UInt8 DEFAULT 1 EXPRESSION rand() % 222,\n `third_column` String DEFAULT \'qqq\'\n)\nPRIMARY KEY key_column\nSOURCE(CLICKHOUSE(HOST \'localhost\' PORT 9000 USER \'default\' TABLE \'table_for_dict\' PASSWORD \'\' DB \'database_for_dict_01018\'))\nLIFETIME(MIN 1 MAX 10)\nLAYOUT(FLAT())
dict2
1
memory_db dict2

View File

@ -1,3 +1,3 @@
World
CREATE DICTIONARY db_for_dict.dict_with_hashed_layout\n(\n `key1` UInt64,\n `value` String\n)\nPRIMARY KEY key1\nSOURCE(CLICKHOUSE(HOST \'localhost\' PORT tcpPort() USER \'default\' TABLE \'table_for_dict\' DB \'db_for_dict\'))\nLIFETIME(MIN 1 MAX 10)\nLAYOUT(HASHED)
CREATE DICTIONARY db_for_dict.dict_with_hashed_layout\n(\n `key1` UInt64,\n `value` String\n)\nPRIMARY KEY key1\nSOURCE(CLICKHOUSE(HOST \'localhost\' PORT 9000 USER \'default\' TABLE \'table_for_dict\' DB \'db_for_dict\'))\nLIFETIME(MIN 1 MAX 10)\nLAYOUT(HASHED)
Hello

View File

@ -1,5 +1,5 @@
CREATE DICTIONARY test_01190.dict\n(\n `key` UInt64 DEFAULT 0,\n `col` UInt8 DEFAULT 1\n)\nPRIMARY KEY key\nSOURCE(CLICKHOUSE(HOST \'localhost\' PORT tcpPort() USER \'default\' TABLE \'table_for_dict\' PASSWORD \'\' DB \'test_01190\'))\nLIFETIME(MIN 1 MAX 10)\nLAYOUT(FLAT())
CREATE DICTIONARY test_01190.dict\n(\n `key` UInt64 DEFAULT 0,\n `col` UInt8 DEFAULT 1\n)\nPRIMARY KEY key\nSOURCE(CLICKHOUSE(HOST \'localhost\' PORT tcpPort() USER \'default\' TABLE \'table_for_dict\' PASSWORD \'\' DB \'test_01190\'))\nLIFETIME(MIN 1 MAX 10)\nLAYOUT(FLAT())
CREATE DICTIONARY test_01190.dict\n(\n `key` UInt64 DEFAULT 0,\n `col` UInt8 DEFAULT 1\n)\nPRIMARY KEY key\nSOURCE(CLICKHOUSE(HOST \'localhost\' PORT 9000 USER \'default\' TABLE \'table_for_dict\' PASSWORD \'\' DB \'test_01190\'))\nLIFETIME(MIN 1 MAX 10)\nLAYOUT(FLAT())
CREATE DICTIONARY test_01190.dict\n(\n `key` UInt64 DEFAULT 0,\n `col` UInt8 DEFAULT 1\n)\nPRIMARY KEY key\nSOURCE(CLICKHOUSE(HOST \'localhost\' PORT 9000 USER \'default\' TABLE \'table_for_dict\' PASSWORD \'\' DB \'test_01190\'))\nLIFETIME(MIN 1 MAX 10)\nLAYOUT(FLAT())
CREATE TABLE test_01190.log\n(\n `s` String\n)\nENGINE = Log
CREATE TABLE test_01190.log\n(\n `s` String\n)\nENGINE = Log
test

View File

@ -14,7 +14,7 @@ renamed
10 45
10 45
ok
CREATE DICTIONARY test_01192_atomic.dict UUID \'00001192-0000-4000-8000-000000000002\'\n(\n `n` UInt64,\n `_part` String DEFAULT \'no\'\n)\nPRIMARY KEY n\nSOURCE(CLICKHOUSE(HOST \'localhost\' PORT tcpPort() USER \'default\' TABLE \'mt\' DB \'test_01192\'))\nLAYOUT(DIRECT())
CREATE DICTIONARY test_01192_atomic.dict UUID \'00001192-0000-4000-8000-000000000002\'\n(\n `n` UInt64,\n `_part` String DEFAULT \'no\'\n)\nPRIMARY KEY n\nSOURCE(CLICKHOUSE(HOST \'localhost\' PORT 9000 USER \'default\' TABLE \'mt\' DB \'test_01192\'))\nLAYOUT(DIRECT())
test_01192_atomic dict NOT_LOADED 00001192-0000-4000-8000-000000000002
no
ok

View File

@ -6,7 +6,7 @@ CREATE DICTIONARY dict_db_01224.dict
`val` UInt64 DEFAULT 10
)
PRIMARY KEY key
SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'dict_data' PASSWORD '' DB 'dict_db_01224'))
SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'dict_data' PASSWORD '' DB 'dict_db_01224'))
LIFETIME(MIN 0 MAX 0)
LAYOUT(FLAT())
NOT_LOADED
@ -17,7 +17,7 @@ CREATE TABLE dict_db_01224_dictionary.`dict_db_01224.dict`
)
ENGINE = Dictionary(`dict_db_01224.dict`)
NOT_LOADED
Dictionary 1 CREATE DICTIONARY dict_db_01224.dict (`key` UInt64 DEFAULT 0, `val` UInt64 DEFAULT 10) PRIMARY KEY key SOURCE(CLICKHOUSE(HOST \'localhost\' PORT tcpPort() USER \'default\' TABLE \'dict_data\' PASSWORD \'\' DB \'dict_db_01224\')) LIFETIME(MIN 0 MAX 0) LAYOUT(FLAT())
Dictionary 1 CREATE DICTIONARY dict_db_01224.dict (`key` UInt64 DEFAULT 0, `val` UInt64 DEFAULT 10) PRIMARY KEY key SOURCE(CLICKHOUSE(HOST \'localhost\' PORT 9000 USER \'default\' TABLE \'dict_data\' PASSWORD \'\' DB \'dict_db_01224\')) LIFETIME(MIN 0 MAX 0) LAYOUT(FLAT())
NOT_LOADED
key UInt64
val UInt64

View File

@ -1,5 +1,5 @@
CREATE DICTIONARY default.dict_01509\n(\n `key` UInt64,\n `value` String DEFAULT \'-\'\n)\nPRIMARY KEY key\nSOURCE(CLICKHOUSE(HOST \'localhost\' PORT tcpPort() TABLE \'data_01509\'))\nLIFETIME(MIN 0 MAX 0)\nLAYOUT(SPARSE_HASHED(PREALLOCATE 0))
CREATE DICTIONARY default.dict_01509_preallocate\n(\n `key` UInt64,\n `value` String DEFAULT \'-\'\n)\nPRIMARY KEY key\nSOURCE(CLICKHOUSE(HOST \'localhost\' PORT tcpPort() TABLE \'data_01509\'))\nLIFETIME(MIN 0 MAX 0)\nLAYOUT(SPARSE_HASHED(PREALLOCATE 1))
CREATE DICTIONARY default.dict_01509\n(\n `key` UInt64,\n `value` String DEFAULT \'-\'\n)\nPRIMARY KEY key\nSOURCE(CLICKHOUSE(HOST \'localhost\' PORT 9000 TABLE \'data_01509\'))\nLIFETIME(MIN 0 MAX 0)\nLAYOUT(SPARSE_HASHED(PREALLOCATE 0))
CREATE DICTIONARY default.dict_01509_preallocate\n(\n `key` UInt64,\n `value` String DEFAULT \'-\'\n)\nPRIMARY KEY key\nSOURCE(CLICKHOUSE(HOST \'localhost\' PORT 9000 TABLE \'data_01509\'))\nLIFETIME(MIN 0 MAX 0)\nLAYOUT(SPARSE_HASHED(PREALLOCATE 1))
HashedDictionary: Preallocated 10000 elements
-
0

View File

@ -47,5 +47,5 @@ SELECT field2 FROM agg_view01747 WHERE field1 = 'test';
drop table summing_table01747;
drop view rates01747;
drop view agg_view01747;
drop table dictst01747;
drop DICTIONARY default.dict01747;
drop table dictst01747;

View File

@ -1,2 +1,2 @@
CREATE DICTIONARY default.`2024_dictionary_with_comment`\n(\n `id` UInt64,\n `value` String\n)\nPRIMARY KEY id\nSOURCE(CLICKHOUSE(HOST \'localhost\' PORT tcpPort() TABLE \'source_table\'))\nLIFETIME(MIN 0 MAX 1000)\nLAYOUT(FLAT())\nCOMMENT \'Test dictionary with comment\'
CREATE DICTIONARY default.`2024_dictionary_with_comment`\n(\n `id` UInt64,\n `value` String\n)\nPRIMARY KEY id\nSOURCE(CLICKHOUSE(HOST \'localhost\' PORT 9000 TABLE \'source_table\'))\nLIFETIME(MIN 0 MAX 1000)\nLAYOUT(FLAT())\nCOMMENT \'Test dictionary with comment\'
Test dictionary with comment

View File

@ -0,0 +1,21 @@
0
1
1
0
0
1
1
0
0
1
1
0
0
1
1
0
1
1
1
1
1

View File

@ -0,0 +1,32 @@
SELECT toDecimal32(1.555,3) IN (1.5551);
SELECT toDecimal32(1.555,3) IN (1.5551,1.555);
SELECT toDecimal32(1.555,3) IN (1.5551,1.555000);
SELECT toDecimal32(1.555,3) IN (1.550,1.5);
SELECT toDecimal64(1.555,3) IN (1.5551);
SELECT toDecimal64(1.555,3) IN (1.5551,1.555);
SELECT toDecimal64(1.555,3) IN (1.5551,1.555000);
SELECT toDecimal64(1.555,3) IN (1.550,1.5);
SELECT toDecimal128(1.555,3) IN (1.5551);
SELECT toDecimal128(1.555,3) IN (1.5551,1.555);
SELECT toDecimal128(1.555,3) IN (1.5551,1.555000);
SELECT toDecimal128(1.555,3) IN (1.550,1.5);
SELECT toDecimal256(1.555,3) IN (1.5551);
SELECT toDecimal256(1.555,3) IN (1.5551,1.555);
SELECT toDecimal256(1.555,3) IN (1.5551,1.555000);
SELECT toDecimal256(1.555,3) IN (1.550,1.5);
DROP TABLE IF EXISTS decimal_in_float_test;
CREATE TABLE decimal_in_float_test ( `a` Decimal(18, 0), `b` Decimal(36, 2) ) ENGINE = Memory;
INSERT INTO decimal_in_float_test VALUES ('33', '44.44');
SELECT count() == 1 FROM decimal_in_float_test WHERE a IN (33);
SELECT count() == 1 FROM decimal_in_float_test WHERE a IN (33.0);
SELECT count() == 1 FROM decimal_in_float_test WHERE a NOT IN (33.333);
SELECT count() == 1 FROM decimal_in_float_test WHERE b IN (44.44);
SELECT count() == 1 FROM decimal_in_float_test WHERE b NOT IN (44.4,44.444);
DROP TABLE IF EXISTS decimal_in_float_test;

View File

@ -0,0 +1,6 @@
CREATE DICTIONARY default.dict\n(\n `id` UInt32,\n `value` String\n)\nPRIMARY KEY id\nSOURCE(CLICKHOUSE(HOST \'localhost\' PORT 9000 USER \'default\' DB \'default\' TABLE \'view\'))\nLIFETIME(MIN 0 MAX 600)\nLAYOUT(HASHED())
CREATE TABLE default.table\n(\n `col` String MATERIALIZED dictGet(\'default.dict\', \'value\', toUInt32(1))\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192
1 v
1 v
1 v
2 a

View File

@ -0,0 +1,40 @@
DROP TABLE IF EXISTS table;
DROP DICTIONARY IF EXISTS dict;
DROP TABLE IF EXISTS view;
CREATE TABLE view (id UInt32, value String) ENGINE=ReplicatedMergeTree('/test/2449/{database}', '1') ORDER BY id;
INSERT INTO view VALUES (1, 'v');
CREATE DICTIONARY dict (id UInt32, value String)
PRIMARY KEY id
SOURCE(CLICKHOUSE(host 'localhost' port tcpPort() user 'default' db currentDatabase() table 'view'))
LAYOUT (HASHED()) LIFETIME (600);
SHOW CREATE dict;
CREATE TABLE table
(
col MATERIALIZED dictGet(currentDatabase() || '.dict', 'value', toUInt32(1))
)
ENGINE = MergeTree()
ORDER BY tuple();
SHOW CREATE TABLE table;
SELECT * FROM dictionary('dict');
DROP TABLE view; -- {serverError HAVE_DEPENDENT_OBJECTS}
-- check that table is not readonly
INSERT INTO view VALUES (2, 'a');
DROP DICTIONARY dict; -- {serverError HAVE_DEPENDENT_OBJECTS}
-- check that dictionary was not detached
SELECT * FROM dictionary('dict');
SYSTEM RELOAD DICTIONARY dict;
SELECT * FROM dictionary('dict') ORDER BY id;
DROP TABLE table;
DROP DICTIONARY dict;
DROP TABLE view;

View File

@ -0,0 +1,20 @@
{
"name_0": {"number":"0"},
"name_1": {"number":"1"},
"name_2": {"number":"2"}
}
{
"name_0": {"number":"0","x":"1"},
"name_1": {"number":"1","x":"2"},
"name_2": {"number":"2","x":"3"}
}
{
"name_0": {"number":"0"},
"name_1": {"number":"1"},
"name_2": {"number":"2"}
}
name String
number Nullable(Int64)
name_0 0
name_1 1
name_2 2

View File

@ -0,0 +1,11 @@
-- Tags: no-fasttest, no-parallel
set format_json_object_each_row_column_for_object_name='name';
select number, concat('name_', toString(number)) as name from numbers(3) format JSONObjectEachRow;
select number, concat('name_', toString(number)) as name, number + 1 as x from numbers(3) format JSONObjectEachRow;
select concat('name_', toString(number)) as name, number from numbers(3) format JSONObjectEachRow;
insert into function file(02454_data.jsonobjecteachrow) select number, concat('name_', toString(number)) as name from numbers(3) settings engine_file_truncate_on_insert=1;
desc file(02454_data.jsonobjecteachrow);
select * from file(02454_data.jsonobjecteachrow);

View File

@ -0,0 +1,3 @@
2019-05-01 test
2019-05-01 test
2019-05-01 test

View File

@ -0,0 +1,36 @@
DROP TABLE IF EXISTS session;
DROP TABLE IF EXISTS queue;
DROP TABLE IF EXISTS forward;
CREATE TABLE session
(
`day` Date,
`uid` String,
`dummy` String DEFAULT ''
)
ENGINE = MergeTree
ORDER BY (day, uid);
CREATE TABLE queue
(
`day` Date,
`uid` String
)
ENGINE = MergeTree
ORDER BY (day, uid);
CREATE MATERIALIZED VIEW IF NOT EXISTS forward TO session AS
SELECT
day,
uid
FROM queue;
insert into queue values ('2019-05-01', 'test');
SELECT * FROM queue;
SELECT * FROM session;
SELECT * FROM forward;
DROP TABLE session;
DROP TABLE queue;
DROP TABLE forward;

View File

@ -0,0 +1,9 @@
#!/usr/bin/env bash
# Tags: no-fasttest
# Tag no-fasttest: depends on bzip2
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
${CLICKHOUSE_LOCAL} --aggregate_functions_null_for_empty=1 --multiquery --query "create table test_date (date Int32) ENGINE = MergeTree ORDER BY (date) as select 20220920; SELECT max(date) FROM test_date";