Merge branch 'master' of github.com:ClickHouse/ClickHouse into pipe_reading

This commit is contained in:
BoloniniD 2021-07-06 14:43:55 +03:00
commit cb975e6d4a
241 changed files with 12912 additions and 1144 deletions

View File

@ -13,6 +13,3 @@ ClickHouse® is an open-source column-oriented database management system that a
* [Code Browser](https://clickhouse.tech/codebrowser/html_report/ClickHouse/index.html) with syntax highlight and navigation.
* [Contacts](https://clickhouse.tech/#contacts) can help to get your questions answered if there are any.
* You can also [fill this form](https://clickhouse.tech/#meet) to meet Yandex ClickHouse team in person.
## Upcoming Events
* [China ClickHouse Community Meetup (online)](http://hdxu.cn/rhbfZ) on 26 June 2021.

View File

@ -0,0 +1,41 @@
#include <functional>
/** Adapt functor to static method where functor passed as context.
* Main use case to convert lambda into function that can be passed into JIT code.
*/
template <typename Functor>
class FunctorToStaticMethodAdaptor : public FunctorToStaticMethodAdaptor<decltype(&Functor::operator())>
{
};
template <typename R, typename C, typename ...Args>
class FunctorToStaticMethodAdaptor<R (C::*)(Args...) const>
{
public:
static R call(C * ptr, Args &&... arguments)
{
return std::invoke(&C::operator(), ptr, std::forward<Args>(arguments)...);
}
static R unsafeCall(char * ptr, Args &&... arguments)
{
C * ptr_typed = reinterpret_cast<C*>(ptr);
return std::invoke(&C::operator(), ptr_typed, std::forward<Args>(arguments)...);
}
};
template <typename R, typename C, typename ...Args>
class FunctorToStaticMethodAdaptor<R (C::*)(Args...)>
{
public:
static R call(C * ptr, Args &&... arguments)
{
return std::invoke(&C::operator(), ptr, std::forward<Args>(arguments)...);
}
static R unsafeCall(char * ptr, Args &&... arguments)
{
C * ptr_typed = static_cast<C*>(ptr);
return std::invoke(&C::operator(), ptr_typed, std::forward<Args>(arguments)...);
}
};

2
contrib/libunwind vendored

@ -1 +1 @@
Subproject commit cdcc3d8c6f6e80a0886082704a0902d61d8d3ffe
Subproject commit 6b816d2fba3991f8fd6aaec17d92f68947eab667

View File

@ -373,12 +373,6 @@ function run_tests
# Depends on AWS
01801_s3_cluster
# Depends on LLVM JIT
01072_nullable_jit
01852_jit_if
01865_jit_comparison_constant_result
01871_merge_tree_compile_expressions
# needs psql
01889_postgresql_protocol_null_fields

View File

@ -11,6 +11,7 @@ services:
interval: 10s
timeout: 5s
retries: 5
command: [ "postgres", "-c", "wal_level=logical", "-c", "max_replication_slots=2"]
networks:
default:
aliases:
@ -22,4 +23,4 @@ services:
volumes:
- type: ${POSTGRES_LOGS_FS:-tmpfs}
source: ${POSTGRES_DIR:-}
target: /postgres/
target: /postgres/

View File

@ -23,6 +23,7 @@
<!-- disable jit for perf tests -->
<compile_expressions>0</compile_expressions>
<compile_aggregate_expressions>0</compile_aggregate_expressions>
</default>
</profiles>
<users>

View File

@ -7,13 +7,13 @@ toc_title: Third-Party Libraries Used
The list of third-party libraries can be obtained by the following query:
```
``` sql
SELECT library_name, license_type, license_path FROM system.licenses ORDER BY library_name COLLATE 'en'
```
[Example](https://gh-api.clickhouse.tech/play?user=play#U0VMRUNUIGxpYnJhcnlfbmFtZSwgbGljZW5zZV90eXBlLCBsaWNlbnNlX3BhdGggRlJPTSBzeXN0ZW0ubGljZW5zZXMgT1JERVIgQlkgbGlicmFyeV9uYW1lIENPTExBVEUgJ2VuJw==)
| library_name | license_type | license_path |
| library_name | license_type | license_path |
|:-|:-|:-|
| abseil-cpp | Apache | /contrib/abseil-cpp/LICENSE |
| AMQP-CPP | Apache | /contrib/AMQP-CPP/LICENSE |
@ -89,3 +89,15 @@ SELECT library_name, license_type, license_path FROM system.licenses ORDER BY li
| xz | Public Domain | /contrib/xz/COPYING |
| zlib-ng | zLib | /contrib/zlib-ng/LICENSE.md |
| zstd | BSD | /contrib/zstd/LICENSE |
## Guidelines for adding new third-party libraries and maintaining custom changes in them {#adding-third-party-libraries}
1. All external third-party code should reside in the dedicated directories under `contrib` directory of ClickHouse repo. Prefer Git submodules, when available.
2. Fork/mirror the official repo in [Clickhouse-extras](https://github.com/ClickHouse-Extras). Prefer official GitHub repos, when available.
3. Branch from the branch you want to integrate, e.g., `master` -> `clickhouse/master`, or `release/vX.Y.Z` -> `clickhouse/release/vX.Y.Z`.
4. All forks in [Clickhouse-extras](https://github.com/ClickHouse-Extras) can be automatically synchronized with upstreams. `clickhouse/...` branches will remain unaffected, since virtually nobody is going to use that naming pattern in their upstream repos.
5. Add submodules under `contrib` of ClickHouse repo that refer the above forks/mirrors. Set the submodules to track the corresponding `clickhouse/...` branches.
6. Every time the custom changes have to be made in the library code, a dedicated branch should be created, like `clickhouse/my-fix`. Then this branch should be merged into the branch, that is tracked by the submodule, e.g., `clickhouse/master` or `clickhouse/release/vX.Y.Z`.
7. No code should be pushed in any branch of the forks in [Clickhouse-extras](https://github.com/ClickHouse-Extras), whose names do not follow `clickhouse/...` pattern.
8. Always write the custom changes with the official repo in mind. Once the PR is merged from (a feature/fix branch in) your personal fork into the fork in [Clickhouse-extras](https://github.com/ClickHouse-Extras), and the submodule is bumped in ClickHouse repo, consider opening another PR from (a feature/fix branch in) the fork in [Clickhouse-extras](https://github.com/ClickHouse-Extras) to the official repo of the library. This will make sure, that 1) the contribution has more than a single use case and importance, 2) others will also benefit from it, 3) the change will not remain a maintenance burden solely on ClickHouse developers.
9. When a submodule needs to start using a newer code from the original branch (e.g., `master`), and since the custom changes might be merged in the branch it is tracking (e.g., `clickhouse/master`) and so it may diverge from its original counterpart (i.e., `master`), a careful merge should be carried out first, i.e., `master` -> `clickhouse/master`, and only then the submodule can be bumped in ClickHouse.

View File

@ -237,6 +237,8 @@ The description of ClickHouse architecture can be found here: https://clickhouse
The Code Style Guide: https://clickhouse.tech/docs/en/development/style/
Adding third-party libraries: https://clickhouse.tech/docs/en/development/contrib/#adding-third-party-libraries
Writing tests: https://clickhouse.tech/docs/en/development/tests/
List of tasks: https://github.com/ClickHouse/ClickHouse/issues?q=is%3Aopen+is%3Aissue+label%3A%22easy+task%22

View File

@ -628,7 +628,7 @@ If the class is not intended for polymorphic use, you do not need to make functi
**18.** Encodings.
Use UTF-8 everywhere. Use `std::string`and`char *`. Do not use `std::wstring`and`wchar_t`.
Use UTF-8 everywhere. Use `std::string` and `char *`. Do not use `std::wstring` and `wchar_t`.
**19.** Logging.
@ -749,17 +749,9 @@ If your code in the `master` branch is not buildable yet, exclude it from the bu
**1.** The C++20 standard library is used (experimental extensions are allowed), as well as `boost` and `Poco` frameworks.
**2.** If necessary, you can use any well-known libraries available in the OS package.
**2.** It is not allowed to use libraries from OS packages. It is also not allowed to use pre-installed libraries. All libraries should be placed in form of source code in `contrib` directory and built with ClickHouse.
If there is a good solution already available, then use it, even if it means you have to install another library.
(But be prepared to remove bad libraries from code.)
**3.** You can install a library that isnt in the packages, if the packages do not have what you need or have an outdated version or the wrong type of compilation.
**4.** If the library is small and does not have its own complex build system, put the source files in the `contrib` folder.
**5.** Preference is always given to libraries that are already in use.
**3.** Preference is always given to libraries that are already in use.
## General Recommendations {#general-recommendations-1}

View File

@ -0,0 +1,71 @@
---
toc_priority: 30
toc_title: MaterializedPostgreSQL
---
# MaterializedPostgreSQL {#materialize-postgresql}
## Creating a Database {#creating-a-database}
``` sql
CREATE DATABASE test_database
ENGINE = MaterializedPostgreSQL('postgres1:5432', 'postgres_database', 'postgres_user', 'postgres_password'
SELECT * FROM test_database.postgres_table;
```
## Settings {#settings}
1. `materialized_postgresql_max_block_size` - Number of rows collected before flushing data into table. Default: `65536`.
2. `materialized_postgresql_tables_list` - List of tables for MaterializedPostgreSQL database engine. Default: `whole database`.
3. `materialized_postgresql_allow_automatic_update` - Allow to reload table in the background, when schema changes are detected. Default: `0` (`false`).
``` sql
CREATE DATABASE test_database
ENGINE = MaterializedPostgreSQL('postgres1:5432', 'postgres_database', 'postgres_user', 'postgres_password'
SETTINGS materialized_postgresql_max_block_size = 65536,
materialized_postgresql_tables_list = 'table1,table2,table3';
SELECT * FROM test_database.table1;
```
## Requirements {#requirements}
- Setting `wal_level`to `logical` and `max_replication_slots` to at least `2` in the postgresql config file.
- Each replicated table must have one of the following **replica identity**:
1. **default** (primary key)
2. **index**
``` bash
postgres# CREATE TABLE postgres_table (a Integer NOT NULL, b Integer, c Integer NOT NULL, d Integer, e Integer NOT NULL);
postgres# CREATE unique INDEX postgres_table_index on postgres_table(a, c, e);
postgres# ALTER TABLE postgres_table REPLICA IDENTITY USING INDEX postgres_table_index;
```
Primary key is always checked first. If it is absent, then index, defined as replica identity index, is checked.
If index is used as replica identity, there has to be only one such index in a table.
You can check what type is used for a specific table with the following command:
``` bash
postgres# SELECT CASE relreplident
WHEN 'd' THEN 'default'
WHEN 'n' THEN 'nothing'
WHEN 'f' THEN 'full'
WHEN 'i' THEN 'index'
END AS replica_identity
FROM pg_class
WHERE oid = 'postgres_table'::regclass;
```
## Warning {#warning}
1. **TOAST** values convertion is not supported. Default value for the data type will be used.

View File

@ -0,0 +1,46 @@
---
toc_priority: 12
toc_title: MateriaziePostgreSQL
---
# MaterializedPostgreSQL {#materialize-postgresql}
## Creating a Table {#creating-a-table}
``` sql
CREATE TABLE test.postgresql_replica (key UInt64, value UInt64)
ENGINE = MaterializedPostgreSQL('postgres1:5432', 'postgres_database', 'postgresql_replica', 'postgres_user', 'postgres_password')
PRIMARY KEY key;
```
## Requirements {#requirements}
- Setting `wal_level`to `logical` and `max_replication_slots` to at least `2` in the postgresql config file.
- A table with engine `MaterializedPostgreSQL` must have a primary key - the same as a replica identity index (default: primary key) of a postgres table (See [details on replica identity index](../../database-engines/materialized-postgresql.md#requirements)).
- Only database `Atomic` is allowed.
## Virtual columns {#creating-a-table}
- `_version` (`UInt64`)
- `_sign` (`Int8`)
These columns do not need to be added, when table is created. They are always accessible in `SELECT` query.
`_version` column equals `LSN` position in `WAL`, so it might be used to check how up-to-date replication is.
``` sql
CREATE TABLE test.postgresql_replica (key UInt64, value UInt64)
ENGINE = MaterializedPostgreSQL('postgres1:5432', 'postgres_database', 'postgresql_replica', 'postgres_user', 'postgres_password')
PRIMARY KEY key;
SELECT key, value, _version FROM test.postgresql_replica;
```
## Warning {#warning}
1. **TOAST** values convertion is not supported. Default value for the data type will be used.

View File

@ -1,3 +1,7 @@
---
toc_priority: 212
---
# median {#median}
The `median*` functions are the aliases for the corresponding `quantile*` functions. They calculate median of a numeric data sample.
@ -12,6 +16,7 @@ Functions:
- `medianTimingWeighted` — Alias for [quantileTimingWeighted](../../../sql-reference/aggregate-functions/reference/quantiletimingweighted.md#quantiletimingweighted).
- `medianTDigest` — Alias for [quantileTDigest](../../../sql-reference/aggregate-functions/reference/quantiletdigest.md#quantiletdigest).
- `medianTDigestWeighted` — Alias for [quantileTDigestWeighted](../../../sql-reference/aggregate-functions/reference/quantiletdigestweighted.md#quantiletdigestweighted).
- `medianBFloat16` — Alias for [quantileBFloat16](../../../sql-reference/aggregate-functions/reference/quantilebfloat16.md#quantilebfloat16).
**Example**

View File

@ -0,0 +1,64 @@
---
toc_priority: 209
---
# quantileBFloat16 {#quantilebfloat16}
Computes an approximate [quantile](https://en.wikipedia.org/wiki/Quantile) of a sample consisting of [bfloat16](https://en.wikipedia.org/wiki/Bfloat16_floating-point_format) numbers. `bfloat16` is a floating-point data type with 1 sign bit, 8 exponent bits and 7 fraction bits.
The function converts input values to 32-bit floats and takes the most significant 16 bits. Then it calculates `bfloat16` quantile value and converts the result to a 64-bit float by appending zero bits.
The function is a fast quantile estimator with a relative error no more than 0.390625%.
**Syntax**
``` sql
quantileBFloat16[(level)](expr)
```
Alias: `medianBFloat16`
**Arguments**
- `expr` — Column with numeric data. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md).
**Parameters**
- `level` — Level of quantile. Optional. Possible values are in the range from 0 to 1. Default value: 0.5. [Float](../../../sql-reference/data-types/float.md).
**Returned value**
- Approximate quantile of the specified level.
Type: [Float64](../../../sql-reference/data-types/float.md#float32-float64).
**Example**
Input table has an integer and a float columns:
``` text
┌─a─┬─────b─┐
│ 1 │ 1.001 │
│ 2 │ 1.002 │
│ 3 │ 1.003 │
│ 4 │ 1.004 │
└───┴───────┘
```
Query to calculate 0.75-quantile (third quartile):
``` sql
SELECT quantileBFloat16(0.75)(a), quantileBFloat16(0.75)(b) FROM example_table;
```
Result:
``` text
┌─quantileBFloat16(0.75)(a)─┬─quantileBFloat16(0.75)(b)─┐
│ 3 │ 1 │
└───────────────────────────┴───────────────────────────┘
```
Note that all floating point values in the example are truncated to 1.0 when converting to `bfloat16`.
**See Also**
- [median](../../../sql-reference/aggregate-functions/reference/median.md#median)
- [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles)

View File

@ -74,7 +74,7 @@ When using multiple `quantile*` functions with different levels in a query, the
**Syntax**
``` sql
quantileExact(level)(expr)
quantileExactLow(level)(expr)
```
Alias: `medianExactLow`.

View File

@ -8,7 +8,7 @@ toc_priority: 201
Syntax: `quantiles(level1, level2, …)(x)`
All the quantile functions also have corresponding quantiles functions: `quantiles`, `quantilesDeterministic`, `quantilesTiming`, `quantilesTimingWeighted`, `quantilesExact`, `quantilesExactWeighted`, `quantilesTDigest`. These functions calculate all the quantiles of the listed levels in one pass, and return an array of the resulting values.
All the quantile functions also have corresponding quantiles functions: `quantiles`, `quantilesDeterministic`, `quantilesTiming`, `quantilesTimingWeighted`, `quantilesExact`, `quantilesExactWeighted`, `quantilesTDigest`, `quantilesBFloat16`. These functions calculate all the quantiles of the listed levels in one pass, and return an array of the resulting values.
## quantilesExactExclusive {#quantilesexactexclusive}
@ -18,7 +18,7 @@ To get exact value, all the passed values are combined into an array, whic
This function is equivalent to [PERCENTILE.EXC](https://support.microsoft.com/en-us/office/percentile-exc-function-bbaa7204-e9e1-4010-85bf-c31dc5dce4ba) Excel function, ([type R6](https://en.wikipedia.org/wiki/Quantile#Estimating_quantiles_from_a_sample)).
Works more efficiently with sets of levels than [quantilesExactExclusive](../../../sql-reference/aggregate-functions/reference/quantileexact.md#quantileexactexclusive).
Works more efficiently with sets of levels than [quantileExactExclusive](../../../sql-reference/aggregate-functions/reference/quantileexact.md#quantileexactexclusive).
**Syntax**
@ -70,7 +70,7 @@ To get exact value, all the passed values are combined into an array, whic
This function is equivalent to [PERCENTILE.INC](https://support.microsoft.com/en-us/office/percentile-inc-function-680f9539-45eb-410b-9a5e-c1355e5fe2ed) Excel function, ([type R7](https://en.wikipedia.org/wiki/Quantile#Estimating_quantiles_from_a_sample)).
Works more efficiently with sets of levels than [quantilesExactInclusive](../../../sql-reference/aggregate-functions/reference/quantileexact.md#quantilesexactinclusive).
Works more efficiently with sets of levels than [quantileExactInclusive](../../../sql-reference/aggregate-functions/reference/quantileexact.md#quantileexactinclusive).
**Syntax**

View File

@ -80,6 +80,7 @@ SELECT toDateTime('2019-01-01 00:00:00', 'UTC') AS time_utc,
toInt32(time_samoa) AS int32samoa
FORMAT Vertical;
```
Result:
```text
@ -1014,7 +1015,7 @@ Result:
## dateName {#dataname}
Returns part of date with specified date part.
Returns specified part of date.
**Syntax**
@ -1024,13 +1025,13 @@ dateName(date_part, date)
**Arguments**
- `date_part` - Date part. Possible values .
- `date` — Date [Date](../../sql-reference/data-types/date.md) or DateTime [DateTime](../../sql-reference/data-types/datetime.md), [DateTime64](../../sql-reference/data-types/datetime64.md).
- `date_part` — Date part. Possible values: 'year', 'quarter', 'month', 'week', 'dayofyear', 'day', 'weekday', 'hour', 'minute', 'second'. [String](../../sql-reference/data-types/string.md).
- `date` — Date. [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
- `timezone` — Timezone. Optional. [String](../../sql-reference/data-types/string.md).
**Returned value**
- Specified date part of date.
- The specified part of date.
Type: [String](../../sql-reference/data-types/string.md#string)

View File

@ -224,7 +224,7 @@ Accepts an integer. Returns an array of UInt64 numbers containing the list of po
## bitPositionsToArray(num) {#bitpositionstoarraynum}
Accepts an integer, argument will be converted to unsigned integer type. Returns an array of UInt64 numbers containing the list of positions of bits that equals 1. Numbers in the array are in ascending order.
Accepts an integer and converts it to an unsigned integer. Returns an array of `UInt64` numbers containing the list of positions of bits of `arg` that equal `1`, in ascending order.
**Syntax**
@ -234,11 +234,13 @@ bitPositionsToArray(arg)
**Arguments**
- `arg` — Integer value.Types: [Int/UInt](../../sql-reference/data-types/int-uint.md)
- `arg` — Integer value. [Int/UInt](../../sql-reference/data-types/int-uint.md).
**Returned value**
An array of UInt64 numbers containing the list of positions of bits that equals 1. Numbers in the array are in ascending order.
- An array containing a list of positions of bits that equal `1`, in ascending order.
Type: [Array](../../sql-reference/data-types/array.md)([UInt64](../../sql-reference/data-types/int-uint.md)).
**Example**

View File

@ -749,19 +749,11 @@ CPU命令セットは、サーバー間でサポートされる最小のセッ
## 図書館 {#libraries}
**1.** C++20標準ライブラリが使用されています実験的な拡張が許可されています`boost``Poco` フレームワーク
**1.** The C++20 standard library is used (experimental extensions are allowed), as well as `boost` and `Poco` frameworks.
**2.** 必要に応じて、OSパッケージで利用可能な既知のライブラリを使用できます。
**2.** It is not allowed to use libraries from OS packages. It is also not allowed to use pre-installed libraries. All libraries should be placed in form of source code in `contrib` directory and built with ClickHouse.
すでに利用可能な良い解決策がある場合は、別のライブラリをインストールする必要がある場合でも、それを使用してください。
(が準備をしておいてくださ去の悪い図書館からのコードです。)
**3.** パッケージに必要なものがない場合や、古いバージョンや間違った種類のコンパイルがある場合は、パッケージにないライブラリをインストールできます。
**4.** ライブラリが小さく、独自の複雑なビルドシステムがない場合は、ソースファイルを `contrib` フォルダ。
**5.** すでに使用されているライブラリが優先されます。
**3.** Preference is always given to libraries that are already in use.
## 一般的な推奨事項 {#general-recommendations-1}

View File

@ -824,17 +824,9 @@ The dictionary is configured incorrectly.
**1.** Используются стандартная библиотека C++20 (допустимо использовать экспериментальные расширения) а также фреймворки `boost`, `Poco`.
**2.** При необходимости, можно использовать любые известные библиотеки, доступные в ОС из пакетов.
**2.** Библиотеки должны быть расположены в виде исходников в директории `contrib` и собираться вместе с ClickHouse. Не разрешено использовать библиотеки, доступные в пакетах ОС или любые другие способы установки библиотек в систему.
Если есть хорошее готовое решение, то оно используется, даже если для этого придётся установить ещё одну библиотеку.
(Но будьте готовы к тому, что иногда вам придётся выкидывать плохие библиотеки из кода.)
**3.** Если в пакетах нет нужной библиотеки, или её версия достаточно старая, или если она собрана не так, как нужно, то можно использовать библиотеку, устанавливаемую не из пакетов.
**4.** Если библиотека достаточно маленькая и у неё нет своей системы сборки, то следует включить её файлы в проект, в директорию `contrib`.
**5.** Предпочтение всегда отдаётся уже использующимся библиотекам.
**3.** Предпочтение отдаётся уже использующимся библиотекам.
## Общее {#obshchee-1}

View File

@ -1,17 +1,19 @@
# median {#median}
Функции `median*`алиасы для соответствущих функций `quantile*`. Они вычисляют медиану числовой последовательности.
Функции `median*`синонимы для соответствущих функций `quantile*`. Они вычисляют медиану числовой последовательности.
Functions:
Функции:
- `median` — алиас [quantile](#quantile).
- `medianDeterministic` — алиас [quantileDeterministic](#quantiledeterministic).
- `medianExact` — алиас [quantileExact](#quantileexact).
- `medianExactWeighted` — алиас [quantileExactWeighted](#quantileexactweighted).
- `medianTiming` — алиас [quantileTiming](#quantiletiming).
- `medianTimingWeighted` — алиас [quantileTimingWeighted](#quantiletimingweighted).
- `medianTDigest` — алиас [quantileTDigest](#quantiletdigest).
- `medianTDigestWeighted` — алиас [quantileTDigestWeighted](#quantiletdigestweighted).
- `median` — синоним для [quantile](../../../sql-reference/aggregate-functions/reference/quantile.md#quantile).
- `medianDeterministic` — синоним для [quantileDeterministic](../../../sql-reference/aggregate-functions/reference/quantiledeterministic.md#quantiledeterministic).
- `medianExact` — синоним для [quantileExact](../../../sql-reference/aggregate-functions/reference/quantileexact.md#quantileexact).
- `medianExactWeighted` — синоним для [quantileExactWeighted](../../../sql-reference/aggregate-functions/reference/quantileexactweighted.md#quantileexactweighted).
- `medianTiming` — синоним для [quantileTiming](../../../sql-reference/aggregate-functions/reference/quantiletiming.md#quantiletiming).
- `medianTimingWeighted` — синоним для [quantileTimingWeighted](../../../sql-reference/aggregate-functions/reference/quantiletimingweighted.md#quantiletimingweighted).
- `medianTDigest` — синоним для [quantileTDigest](../../../sql-reference/aggregate-functions/reference/quantiletdigest.md#quantiletdigest).
- `medianTDigestWeighted` — синоним для [quantileTDigestWeighted](../../../sql-reference/aggregate-functions/reference/quantiletdigestweighted.md#quantiletdigestweighted).
- `medianBFloat16` — синоним для [quantileBFloat16](../../../sql-reference/aggregate-functions/reference/quantilebfloat16.md#quantilebfloat16).
**Пример**

View File

@ -0,0 +1,64 @@
---
toc_priority: 209
---
# quantileBFloat16 {#quantilebfloat16}
Приближенно вычисляет [квантиль](https://ru.wikipedia.org/wiki/Квантиль) выборки чисел в формате [bfloat16](https://en.wikipedia.org/wiki/Bfloat16_floating-point_format). `bfloat16` — это формат с плавающей точкой, в котором для представления числа используется 1 знаковый бит, 8 бит для порядка и 7 бит для мантиссы.
Функция преобразует входное число в 32-битное с плавающей точкой и обрабатывает его старшие 16 бит. Она вычисляет квантиль в формате `bfloat16` и преобразует его в 64-битное число с плавающей точкой, добавляя нулевые биты.
Эта функция выполняет быстрые приближенные вычисления с относительной ошибкой не более 0.390625%.
**Синтаксис**
``` sql
quantileBFloat16[(level)](expr)
```
Синоним: `medianBFloat16`
**Аргументы**
- `expr` — столбец с числовыми данными. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md).
**Параметры**
- `level` — уровень квантиля. Необязательный параметр. Допустимый диапазон значений от 0 до 1. Значение по умолчанию: 0.5. [Float](../../../sql-reference/data-types/float.md).
**Возвращаемое значение**
- Приближенное значение квантиля.
Тип: [Float64](../../../sql-reference/data-types/float.md#float32-float64).
**Пример**
В таблице есть столбцы с целыми числами и с числами с плавающей точкой:
``` text
┌─a─┬─────b─┐
│ 1 │ 1.001 │
│ 2 │ 1.002 │
│ 3 │ 1.003 │
│ 4 │ 1.004 │
└───┴───────┘
```
Запрос для вычисления 0.75-квантиля (верхнего квартиля):
``` sql
SELECT quantileBFloat16(0.75)(a), quantileBFloat16(0.75)(b) FROM example_table;
```
Результат:
``` text
┌─quantileBFloat16(0.75)(a)─┬─quantileBFloat16(0.75)(b)─┐
│ 3 │ 1 │
└───────────────────────────┴───────────────────────────┘
```
Обратите внимание, что все числа с плавающей точкой в примере были округлены до 1.0 при преобразовании к `bfloat16`.
**См. также**
- [median](../../../sql-reference/aggregate-functions/reference/median.md#median)
- [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles)

View File

@ -8,7 +8,7 @@ toc_priority: 201
Синтаксис: `quantiles(level1, level2, …)(x)`
Все функции для вычисления квантилей имеют соответствующие функции для вычисления нескольких квантилей: `quantiles`, `quantilesDeterministic`, `quantilesTiming`, `quantilesTimingWeighted`, `quantilesExact`, `quantilesExactWeighted`, `quantilesTDigest`. Эти функции вычисляют все квантили указанных уровней в один проход и возвращают массив с вычисленными значениями.
Все функции для вычисления квантилей имеют соответствующие функции для вычисления нескольких квантилей: `quantiles`, `quantilesDeterministic`, `quantilesTiming`, `quantilesTimingWeighted`, `quantilesExact`, `quantilesExactWeighted`, `quantilesTDigest`, `quantilesBFloat16`. Эти функции вычисляют все квантили указанных уровней в один проход и возвращают массив с вычисленными значениями.
## quantilesExactExclusive {#quantilesexactexclusive}
@ -18,7 +18,7 @@ toc_priority: 201
Эта функция эквивалентна Excel функции [PERCENTILE.EXC](https://support.microsoft.com/en-us/office/percentile-exc-function-bbaa7204-e9e1-4010-85bf-c31dc5dce4ba), [тип R6](https://en.wikipedia.org/wiki/Quantile#Estimating_quantiles_from_a_sample).
С наборами уровней работает эффективнее, чем [quantilesExactExclusive](../../../sql-reference/aggregate-functions/reference/quantileexact.md#quantileexactexclusive).
С наборами уровней работает эффективнее, чем [quantileExactExclusive](../../../sql-reference/aggregate-functions/reference/quantileexact.md#quantileexactexclusive).
**Синтаксис**
@ -70,7 +70,7 @@ SELECT quantilesExactExclusive(0.25, 0.5, 0.75, 0.9, 0.95, 0.99, 0.999)(x) FROM
Эта функция эквивалентна Excel функции [PERCENTILE.INC](https://support.microsoft.com/en-us/office/percentile-inc-function-680f9539-45eb-410b-9a5e-c1355e5fe2ed), [тип R7](https://en.wikipedia.org/wiki/Quantile#Estimating_quantiles_from_a_sample).
С наборами уровней работает эффективнее, чем [quantilesExactInclusive](../../../sql-reference/aggregate-functions/reference/quantileexact.md#quantilesexactinclusive).
С наборами уровней работает эффективнее, чем [quantileExactInclusive](../../../sql-reference/aggregate-functions/reference/quantileexact.md#quantileexactinclusive).
**Синтаксис**

View File

@ -27,40 +27,40 @@ SELECT
Возвращает часовой пояс сервера.
**Синтаксис**
**Синтаксис**
``` sql
timeZone()
```
Псевдоним: `timezone`.
Синоним: `timezone`.
**Возвращаемое значение**
- Часовой пояс.
- Часовой пояс.
Тип: [String](../../sql-reference/data-types/string.md).
## toTimeZone {#totimezone}
Переводит дату или дату с временем в указанный часовой пояс. Часовой пояс - это атрибут типов `Date` и `DateTime`. Внутреннее значение (количество секунд) поля таблицы или результирующего столбца не изменяется, изменяется тип поля и, соответственно, его текстовое отображение.
Переводит дату или дату с временем в указанный часовой пояс. Часовой пояс - это атрибут типов `Date` и `DateTime`. Внутреннее значение (количество секунд) поля таблицы или результирующего столбца не изменяется, изменяется тип поля и, соответственно, его текстовое отображение.
**Синтаксис**
**Синтаксис**
``` sql
toTimezone(value, timezone)
```
Псевдоним: `toTimezone`.
Синоним: `toTimezone`.
**Аргументы**
**Аргументы**
- `value` — время или дата с временем. [DateTime64](../../sql-reference/data-types/datetime64.md).
- `timezone` — часовой пояс для возвращаемого значения. [String](../../sql-reference/data-types/string.md).
**Возвращаемое значение**
- Дата с временем.
- Дата с временем.
Тип: [DateTime](../../sql-reference/data-types/datetime.md).
@ -80,6 +80,7 @@ SELECT toDateTime('2019-01-01 00:00:00', 'UTC') AS time_utc,
toInt32(time_samoa) AS int32samoa
FORMAT Vertical;
```
Результат:
```text
@ -102,21 +103,21 @@ int32samoa: 1546300800
Возвращает название часового пояса для значений типа [DateTime](../../sql-reference/data-types/datetime.md) и [DateTime64](../../sql-reference/data-types/datetime64.md).
**Синтаксис**
**Синтаксис**
``` sql
timeZoneOf(value)
```
Псевдоним: `timezoneOf`.
Синоним: `timezoneOf`.
**Аргументы**
- `value` — Дата с временем. [DateTime](../../sql-reference/data-types/datetime.md) или [DateTime64](../../sql-reference/data-types/datetime64.md).
- `value` — Дата с временем. [DateTime](../../sql-reference/data-types/datetime.md) или [DateTime64](../../sql-reference/data-types/datetime64.md).
**Возвращаемое значение**
- Название часового пояса.
- Название часового пояса.
Тип: [String](../../sql-reference/data-types/string.md).
@ -145,15 +146,15 @@ SELECT timezoneOf(now());
timeZoneOffset(value)
```
Псевдоним: `timezoneOffset`.
Синоним: `timezoneOffset`.
**Аргументы**
- `value` — Дата с временем. [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
- `value` — Дата с временем. [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
**Возвращаемое значение**
- Смещение в секундах от UTC.
- Смещение в секундах от UTC.
Тип: [Int32](../../sql-reference/data-types/int-uint.md).
@ -626,7 +627,7 @@ SELECT now(), date_trunc('hour', now(), 'Europe/Moscow');
Добавляет интервал времени или даты к указанной дате или дате со временем.
**Синтаксис**
**Синтаксис**
``` sql
date_add(unit, value, date)
@ -1025,6 +1026,45 @@ SELECT formatDateTime(toDate('2010-01-04'), '%g');
└────────────────────────────────────────────┘
```
## dateName {#dataname}
Возвращает указанную часть даты.
**Синтаксис**
``` sql
dateName(date_part, date)
```
**Аргументы**
- `date_part` — часть даты. Возможные значения: 'year', 'quarter', 'month', 'week', 'dayofyear', 'day', 'weekday', 'hour', 'minute', 'second'. [String](../../sql-reference/data-types/string.md).
- `date` — дата. [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md) или [DateTime64](../../sql-reference/data-types/datetime64.md).
- `timezone` — часовой пояс. Необязательный аргумент. [String](../../sql-reference/data-types/string.md).
**Возвращаемое значение**
- Указанная часть даты.
Тип: [String](../../sql-reference/data-types/string.md#string).
**Пример**
Запрос:
```sql
WITH toDateTime('2021-04-14 11:22:33') AS date_value
SELECT dateName('year', date_value), dateName('month', date_value), dateName('day', date_value);
```
Результат:
```text
┌─dateName('year', date_value)─┬─dateName('month', date_value)─┬─dateName('day', date_value)─┐
│ 2021 │ April │ 14 │
└──────────────────────────────┴───────────────────────────────┴─────────────────────────────
```
## FROM\_UNIXTIME {#fromunixtime}
Функция преобразует Unix timestamp в календарную дату и время.

View File

@ -223,3 +223,53 @@ SELECT reinterpretAsUInt64(reverse(unhex('FFF'))) AS num;
## bitmaskToArray(num) {#bitmasktoarraynum}
Принимает целое число. Возвращает массив чисел типа UInt64, содержащий степени двойки, в сумме дающих исходное число; числа в массиве идут по возрастанию.
## bitPositionsToArray(num) {#bitpositionstoarraynum}
Принимает целое число и приводит его к беззнаковому виду. Возвращает массив `UInt64` чисел, который содержит список позиций битов `arg`, равных `1`, в порядке возрастания.
**Синтаксис**
```sql
bitPositionsToArray(arg)
```
**Аргументы**
- `arg` — целое значение. [Int/UInt](../../sql-reference/data-types/int-uint.md).
**Возвращаемое значение**
- Массив, содержащий список позиций битов, равных `1`, в порядке возрастания.
Тип: [Array](../../sql-reference/data-types/array.md)([UInt64](../../sql-reference/data-types/int-uint.md)).
**Примеры**
Запрос:
``` sql
SELECT bitPositionsToArray(toInt8(1)) AS bit_positions;
```
Результат:
``` text
┌─bit_positions─┐
│ [0] │
└───────────────┘
```
Запрос:
``` sql
select bitPositionsToArray(toInt8(-1)) as bit_positions;
```
Результат:
``` text
┌─bit_positions─────┐
│ [0,1,2,3,4,5,6,7] │
└───────────────────┘
```

View File

@ -742,19 +742,11 @@ CPU指令集是我们服务器中支持的最小集合。 目前它是SSE 4.2
## 库 {#ku}
**1.** 使用C++20标准库允许实验性功能以及 `boost``Poco` 框架。
**1.** The C++20 standard library is used (experimental extensions are allowed), as well as `boost` and `Poco` frameworks.
**2.** 如有必要,您可以使用 OS 包中提供的任何已知库。
**2.** It is not allowed to use libraries from OS packages. It is also not allowed to use pre-installed libraries. All libraries should be placed in form of source code in `contrib` directory and built with ClickHouse.
如果有一个好的解决方案已经可用,那就使用它,即使这意味着你必须安装另一个库。
(但要准备从代码中删除不好的库)
**3.** 如果软件包没有您需要的软件包或者有过时的版本或错误的编译类型,则可以安装不在软件包中的库。
**4.** 如果库很小并且没有自己的复杂构建系统,请将源文件放在 `contrib` 文件夹中。
**5.** 始终优先考虑已经使用的库。
**3.** Preference is always given to libraries that are already in use.
## 一般建议 {#yi-ban-jian-yi-1}

View File

@ -50,7 +50,7 @@
#include <Interpreters/DNSCacheUpdater.h>
#include <Interpreters/ExternalLoaderXMLConfigRepository.h>
#include <Interpreters/InterserverCredentials.h>
#include <Interpreters/ExpressionJIT.h>
#include <Interpreters/JIT/CompiledExpressionCache.h>
#include <Access/AccessControlManager.h>
#include <Storages/StorageReplicatedMergeTree.h>
#include <Storages/System/attachSystemTables.h>

View File

@ -9,6 +9,14 @@
#include <AggregateFunctions/IAggregateFunction.h>
#include <Core/DecimalFunctions.h>
#if !defined(ARCADIA_BUILD)
# include <Common/config.h>
#endif
#if USE_EMBEDDED_COMPILER
# include <llvm/IR/IRBuilder.h>
# include <DataTypes/Native.h>
#endif
namespace DB
{
@ -85,13 +93,15 @@ struct AvgFraction
* @tparam Derived When deriving from this class, use the child class name as in CRTP, e.g.
* class Self : Agg<char, bool, bool, Self>.
*/
template <typename Numerator, typename Denominator, typename Derived>
template <typename TNumerator, typename TDenominator, typename Derived>
class AggregateFunctionAvgBase : public
IAggregateFunctionDataHelper<AvgFraction<Numerator, Denominator>, Derived>
IAggregateFunctionDataHelper<AvgFraction<TNumerator, TDenominator>, Derived>
{
public:
using Base = IAggregateFunctionDataHelper<AvgFraction<TNumerator, TDenominator>, Derived>;
using Numerator = TNumerator;
using Denominator = TDenominator;
using Fraction = AvgFraction<Numerator, Denominator>;
using Base = IAggregateFunctionDataHelper<Fraction, Derived>;
explicit AggregateFunctionAvgBase(const DataTypes & argument_types_,
UInt32 num_scale_ = 0, UInt32 denom_scale_ = 0)
@ -135,6 +145,77 @@ public:
else
assert_cast<ColumnVector<Float64> &>(to).getData().push_back(this->data(place).divide());
}
#if USE_EMBEDDED_COMPILER
bool isCompilable() const override
{
bool can_be_compiled = true;
for (const auto & argument : this->argument_types)
can_be_compiled &= canBeNativeType(*argument);
auto return_type = getReturnType();
can_be_compiled &= canBeNativeType(*return_type);
return can_be_compiled;
}
void compileCreate(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr) const override
{
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
b.CreateMemSet(aggregate_data_ptr, llvm::ConstantInt::get(b.getInt8Ty(), 0), sizeof(Fraction), llvm::assumeAligned(this->alignOfData()));
}
void compileMerge(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_dst_ptr, llvm::Value * aggregate_data_src_ptr) const override
{
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
auto * numerator_type = toNativeType<Numerator>(b);
auto * numerator_dst_ptr = b.CreatePointerCast(aggregate_data_dst_ptr, numerator_type->getPointerTo());
auto * numerator_dst_value = b.CreateLoad(numerator_type, numerator_dst_ptr);
auto * numerator_src_ptr = b.CreatePointerCast(aggregate_data_src_ptr, numerator_type->getPointerTo());
auto * numerator_src_value = b.CreateLoad(numerator_type, numerator_src_ptr);
auto * numerator_result_value = numerator_type->isIntegerTy() ? b.CreateAdd(numerator_dst_value, numerator_src_value) : b.CreateFAdd(numerator_dst_value, numerator_src_value);
b.CreateStore(numerator_result_value, numerator_dst_ptr);
auto * denominator_type = toNativeType<Denominator>(b);
static constexpr size_t denominator_offset = offsetof(Fraction, denominator);
auto * denominator_dst_ptr = b.CreatePointerCast(b.CreateConstInBoundsGEP1_64(nullptr, aggregate_data_dst_ptr, denominator_offset), denominator_type->getPointerTo());
auto * denominator_src_ptr = b.CreatePointerCast(b.CreateConstInBoundsGEP1_64(nullptr, aggregate_data_src_ptr, denominator_offset), denominator_type->getPointerTo());
auto * denominator_dst_value = b.CreateLoad(denominator_type, denominator_dst_ptr);
auto * denominator_src_value = b.CreateLoad(denominator_type, denominator_src_ptr);
auto * denominator_result_value = denominator_type->isIntegerTy() ? b.CreateAdd(denominator_src_value, denominator_dst_value) : b.CreateFAdd(denominator_src_value, denominator_dst_value);
b.CreateStore(denominator_result_value, denominator_dst_ptr);
}
llvm::Value * compileGetResult(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr) const override
{
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
auto * numerator_type = toNativeType<Numerator>(b);
auto * numerator_ptr = b.CreatePointerCast(aggregate_data_ptr, numerator_type->getPointerTo());
auto * numerator_value = b.CreateLoad(numerator_type, numerator_ptr);
auto * denominator_type = toNativeType<Denominator>(b);
static constexpr size_t denominator_offset = offsetof(Fraction, denominator);
auto * denominator_ptr = b.CreatePointerCast(b.CreateConstGEP1_32(nullptr, aggregate_data_ptr, denominator_offset), denominator_type->getPointerTo());
auto * denominator_value = b.CreateLoad(denominator_type, denominator_ptr);
auto * double_numerator = nativeCast<Numerator>(b, numerator_value, b.getDoubleTy());
auto * double_denominator = nativeCast<Denominator>(b, denominator_value, b.getDoubleTy());
return b.CreateFDiv(double_numerator, double_denominator);
}
#endif
private:
UInt32 num_scale;
UInt32 denom_scale;
@ -149,7 +230,12 @@ template <typename T>
class AggregateFunctionAvg final : public AggregateFunctionAvgBase<AvgFieldType<T>, UInt64, AggregateFunctionAvg<T>>
{
public:
using AggregateFunctionAvgBase<AvgFieldType<T>, UInt64, AggregateFunctionAvg<T>>::AggregateFunctionAvgBase;
using Base = AggregateFunctionAvgBase<AvgFieldType<T>, UInt64, AggregateFunctionAvg<T>>;
using Base::Base;
using Numerator = typename Base::Numerator;
using Denominator = typename Base::Denominator;
using Fraction = typename Base::Fraction;
void NO_SANITIZE_UNDEFINED add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const final
{
@ -158,5 +244,29 @@ public:
}
String getName() const final { return "avg"; }
#if USE_EMBEDDED_COMPILER
void compileAdd(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, const DataTypes & arguments_types, const std::vector<llvm::Value *> & argument_values) const override
{
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
auto * numerator_type = toNativeType<Numerator>(b);
auto * numerator_ptr = b.CreatePointerCast(aggregate_data_ptr, numerator_type->getPointerTo());
auto * numerator_value = b.CreateLoad(numerator_type, numerator_ptr);
auto * value_cast_to_numerator = nativeCast(b, arguments_types[0], argument_values[0], numerator_type);
auto * numerator_result_value = numerator_type->isIntegerTy() ? b.CreateAdd(numerator_value, value_cast_to_numerator) : b.CreateFAdd(numerator_value, value_cast_to_numerator);
b.CreateStore(numerator_result_value, numerator_ptr);
auto * denominator_type = toNativeType<Denominator>(b);
static constexpr size_t denominator_offset = offsetof(Fraction, denominator);
auto * denominator_ptr = b.CreatePointerCast(b.CreateConstGEP1_32(nullptr, aggregate_data_ptr, denominator_offset), denominator_type->getPointerTo());
auto * denominator_value_updated = b.CreateAdd(b.CreateLoad(denominator_type, denominator_ptr), llvm::ConstantInt::get(denominator_type, 1));
b.CreateStore(denominator_value_updated, denominator_ptr);
}
#endif
};
}

View File

@ -28,19 +28,64 @@ public:
MaxFieldType<Value, Weight>, AvgWeightedFieldType<Weight>, AggregateFunctionAvgWeighted<Value, Weight>>;
using Base::Base;
using ValueT = MaxFieldType<Value, Weight>;
using Numerator = typename Base::Numerator;
using Denominator = typename Base::Denominator;
using Fraction = typename Base::Fraction;
void NO_SANITIZE_UNDEFINED add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
{
const auto& weights = static_cast<const DecimalOrVectorCol<Weight> &>(*columns[1]);
this->data(place).numerator += static_cast<ValueT>(
this->data(place).numerator += static_cast<Numerator>(
static_cast<const DecimalOrVectorCol<Value> &>(*columns[0]).getData()[row_num]) *
static_cast<ValueT>(weights.getData()[row_num]);
static_cast<Numerator>(weights.getData()[row_num]);
this->data(place).denominator += static_cast<AvgWeightedFieldType<Weight>>(weights.getData()[row_num]);
this->data(place).denominator += static_cast<Denominator>(weights.getData()[row_num]);
}
String getName() const override { return "avgWeighted"; }
#if USE_EMBEDDED_COMPILER
bool isCompilable() const override
{
bool can_be_compiled = Base::isCompilable();
can_be_compiled &= canBeNativeType<Weight>();
return can_be_compiled;
}
void compileAdd(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, const DataTypes & arguments_types, const std::vector<llvm::Value *> & argument_values) const override
{
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
auto * numerator_type = toNativeType<Numerator>(b);
auto * numerator_ptr = b.CreatePointerCast(aggregate_data_ptr, numerator_type->getPointerTo());
auto * numerator_value = b.CreateLoad(numerator_type, numerator_ptr);
auto * argument = nativeCast(b, arguments_types[0], argument_values[0], numerator_type);
auto * weight = nativeCast(b, arguments_types[1], argument_values[1], numerator_type);
llvm::Value * value_weight_multiplication = argument->getType()->isIntegerTy() ? b.CreateMul(argument, weight) : b.CreateFMul(argument, weight);
auto * numerator_result_value = numerator_type->isIntegerTy() ? b.CreateAdd(numerator_value, value_weight_multiplication) : b.CreateFAdd(numerator_value, value_weight_multiplication);
b.CreateStore(numerator_result_value, numerator_ptr);
auto * denominator_type = toNativeType<Denominator>(b);
static constexpr size_t denominator_offset = offsetof(Fraction, denominator);
auto * denominator_offset_ptr = b.CreateConstInBoundsGEP1_64(nullptr, aggregate_data_ptr, denominator_offset);
auto * denominator_ptr = b.CreatePointerCast(denominator_offset_ptr, denominator_type->getPointerTo());
auto * weight_cast_to_denominator = nativeCast(b, arguments_types[1], argument_values[1], denominator_type);
auto * denominator_value = b.CreateLoad(denominator_type, denominator_ptr);
auto * denominator_value_updated = denominator_type->isIntegerTy() ? b.CreateAdd(denominator_value, weight_cast_to_denominator) : b.CreateFAdd(denominator_value, weight_cast_to_denominator);
b.CreateStore(denominator_value_updated, denominator_ptr);
}
#endif
};
}

View File

@ -10,6 +10,15 @@
#include <AggregateFunctions/IAggregateFunction.h>
#include <Common/assert_cast.h>
#if !defined(ARCADIA_BUILD)
# include <Common/config.h>
#endif
#if USE_EMBEDDED_COMPILER
# include <llvm/IR/IRBuilder.h>
# include <DataTypes/Native.h>
#endif
namespace DB
{
@ -107,6 +116,66 @@ public:
AggregateFunctionPtr getOwnNullAdapter(
const AggregateFunctionPtr &, const DataTypes & types, const Array & params, const AggregateFunctionProperties & /*properties*/) const override;
#if USE_EMBEDDED_COMPILER
bool isCompilable() const override
{
bool is_compilable = true;
for (const auto & argument_type : argument_types)
is_compilable &= canBeNativeType(*argument_type);
return is_compilable;
}
void compileCreate(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr) const override
{
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
b.CreateMemSet(aggregate_data_ptr, llvm::ConstantInt::get(b.getInt8Ty(), 0), sizeof(AggregateFunctionCountData), llvm::assumeAligned(this->alignOfData()));
}
void compileAdd(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, const DataTypes &, const std::vector<llvm::Value *> &) const override
{
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
auto * return_type = toNativeType(b, getReturnType());
auto * count_value_ptr = b.CreatePointerCast(aggregate_data_ptr, return_type->getPointerTo());
auto * count_value = b.CreateLoad(return_type, count_value_ptr);
auto * updated_count_value = b.CreateAdd(count_value, llvm::ConstantInt::get(return_type, 1));
b.CreateStore(updated_count_value, count_value_ptr);
}
void compileMerge(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_dst_ptr, llvm::Value * aggregate_data_src_ptr) const override
{
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
auto * return_type = toNativeType(b, getReturnType());
auto * count_value_dst_ptr = b.CreatePointerCast(aggregate_data_dst_ptr, return_type->getPointerTo());
auto * count_value_dst = b.CreateLoad(return_type, count_value_dst_ptr);
auto * count_value_src_ptr = b.CreatePointerCast(aggregate_data_src_ptr, return_type->getPointerTo());
auto * count_value_src = b.CreateLoad(return_type, count_value_src_ptr);
auto * count_value_dst_updated = b.CreateAdd(count_value_dst, count_value_src);
b.CreateStore(count_value_dst_updated, count_value_dst_ptr);
}
llvm::Value * compileGetResult(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr) const override
{
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
auto * return_type = toNativeType(b, getReturnType());
auto * count_value_ptr = b.CreatePointerCast(aggregate_data_ptr, return_type->getPointerTo());
return b.CreateLoad(return_type, count_value_ptr);
}
#endif
};
@ -155,6 +224,71 @@ public:
{
assert_cast<ColumnUInt64 &>(to).getData().push_back(data(place).count);
}
#if USE_EMBEDDED_COMPILER
bool isCompilable() const override
{
bool is_compilable = true;
for (const auto & argument_type : argument_types)
is_compilable &= canBeNativeType(*argument_type);
return is_compilable;
}
void compileCreate(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr) const override
{
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
b.CreateMemSet(aggregate_data_ptr, llvm::ConstantInt::get(b.getInt8Ty(), 0), sizeof(AggregateFunctionCountData), llvm::assumeAligned(this->alignOfData()));
}
void compileAdd(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, const DataTypes &, const std::vector<llvm::Value *> & values) const override
{
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
auto * return_type = toNativeType(b, getReturnType());
auto * is_null_value = b.CreateExtractValue(values[0], {1});
auto * increment_value = b.CreateSelect(is_null_value, llvm::ConstantInt::get(return_type, 0), llvm::ConstantInt::get(return_type, 1));
auto * count_value_ptr = b.CreatePointerCast(aggregate_data_ptr, return_type->getPointerTo());
auto * count_value = b.CreateLoad(return_type, count_value_ptr);
auto * updated_count_value = b.CreateAdd(count_value, increment_value);
b.CreateStore(updated_count_value, count_value_ptr);
}
void compileMerge(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_dst_ptr, llvm::Value * aggregate_data_src_ptr) const override
{
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
auto * return_type = toNativeType(b, getReturnType());
auto * count_value_dst_ptr = b.CreatePointerCast(aggregate_data_dst_ptr, return_type->getPointerTo());
auto * count_value_dst = b.CreateLoad(return_type, count_value_dst_ptr);
auto * count_value_src_ptr = b.CreatePointerCast(aggregate_data_src_ptr, return_type->getPointerTo());
auto * count_value_src = b.CreateLoad(return_type, count_value_src_ptr);
auto * count_value_dst_updated = b.CreateAdd(count_value_dst, count_value_src);
b.CreateStore(count_value_dst_updated, count_value_dst_ptr);
}
llvm::Value * compileGetResult(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr) const override
{
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
auto * return_type = toNativeType(b, getReturnType());
auto * count_value_ptr = b.CreatePointerCast(aggregate_data_ptr, return_type->getPointerTo());
return b.CreateLoad(return_type, count_value_ptr);
}
#endif
};
}

View File

@ -106,6 +106,48 @@ public:
this->nested_function->add(this->nestedPlace(place), &nested_column, row_num, arena);
}
}
#if USE_EMBEDDED_COMPILER
void compileAdd(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, const DataTypes & arguments_types, const std::vector<llvm::Value *> & argument_values) const override
{
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
const auto & nullable_type = arguments_types[0];
const auto & nullable_value = argument_values[0];
auto * wrapped_value = b.CreateExtractValue(nullable_value, {0});
auto * is_null_value = b.CreateExtractValue(nullable_value, {1});
const auto & predicate_type = arguments_types[argument_values.size() - 1];
auto * predicate_value = argument_values[argument_values.size() - 1];
auto * is_predicate_true = nativeBoolCast(b, predicate_type, predicate_value);
auto * head = b.GetInsertBlock();
auto * join_block = llvm::BasicBlock::Create(head->getContext(), "join_block", head->getParent());
auto * if_null = llvm::BasicBlock::Create(head->getContext(), "if_null", head->getParent());
auto * if_not_null = llvm::BasicBlock::Create(head->getContext(), "if_not_null", head->getParent());
b.CreateCondBr(b.CreateAnd(b.CreateNot(is_null_value), is_predicate_true), if_not_null, if_null);
b.SetInsertPoint(if_null);
b.CreateBr(join_block);
b.SetInsertPoint(if_not_null);
if constexpr (result_is_nullable)
b.CreateStore(llvm::ConstantInt::get(b.getInt8Ty(), 1), aggregate_data_ptr);
auto * aggregate_data_ptr_with_prefix_size_offset = b.CreateConstInBoundsGEP1_64(nullptr, aggregate_data_ptr, this->prefix_size);
this->nested_function->compileAdd(b, aggregate_data_ptr_with_prefix_size_offset, { removeNullable(nullable_type) }, { wrapped_value });
b.CreateBr(join_block);
b.SetInsertPoint(join_block);
}
#endif
};
template <bool result_is_nullable, bool serialize_flag, bool null_is_skipped>
@ -168,6 +210,95 @@ public:
}
}
#if USE_EMBEDDED_COMPILER
void compileAdd(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, const DataTypes & arguments_types, const std::vector<llvm::Value *> & argument_values) const override
{
/// TODO: Check
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
size_t arguments_size = arguments_types.size();
DataTypes non_nullable_types;
std::vector<llvm::Value * > wrapped_values;
std::vector<llvm::Value * > is_null_values;
non_nullable_types.resize(arguments_size);
wrapped_values.resize(arguments_size);
is_null_values.resize(arguments_size);
for (size_t i = 0; i < arguments_size; ++i)
{
const auto & argument_value = argument_values[i];
if (is_nullable[i])
{
auto * wrapped_value = b.CreateExtractValue(argument_value, {0});
if constexpr (null_is_skipped)
is_null_values[i] = b.CreateExtractValue(argument_value, {1});
wrapped_values[i] = wrapped_value;
non_nullable_types[i] = removeNullable(arguments_types[i]);
}
else
{
wrapped_values[i] = argument_value;
non_nullable_types[i] = arguments_types[i];
}
}
auto * head = b.GetInsertBlock();
auto * join_block = llvm::BasicBlock::Create(head->getContext(), "join_block", head->getParent());
auto * join_block_after_null_checks = llvm::BasicBlock::Create(head->getContext(), "join_block_after_null_checks", head->getParent());
if constexpr (null_is_skipped)
{
auto * values_have_null_ptr = b.CreateAlloca(b.getInt1Ty());
b.CreateStore(b.getInt1(false), values_have_null_ptr);
for (auto * is_null_value : is_null_values)
{
if (!is_null_value)
continue;
auto * values_have_null = b.CreateLoad(b.getInt1Ty(), values_have_null_ptr);
b.CreateStore(b.CreateOr(values_have_null, is_null_value), values_have_null_ptr);
}
b.CreateCondBr(b.CreateLoad(b.getInt1Ty(), values_have_null_ptr), join_block, join_block_after_null_checks);
}
b.SetInsertPoint(join_block_after_null_checks);
const auto & predicate_type = arguments_types[argument_values.size() - 1];
auto * predicate_value = argument_values[argument_values.size() - 1];
auto * is_predicate_true = nativeBoolCast(b, predicate_type, predicate_value);
auto * if_true = llvm::BasicBlock::Create(head->getContext(), "if_true", head->getParent());
auto * if_false = llvm::BasicBlock::Create(head->getContext(), "if_false", head->getParent());
b.CreateCondBr(is_predicate_true, if_true, if_false);
b.SetInsertPoint(if_false);
b.CreateBr(join_block);
b.SetInsertPoint(if_true);
if constexpr (result_is_nullable)
b.CreateStore(llvm::ConstantInt::get(b.getInt8Ty(), 1), aggregate_data_ptr);
auto * aggregate_data_ptr_with_prefix_size_offset = b.CreateConstInBoundsGEP1_64(nullptr, aggregate_data_ptr, this->prefix_size);
this->nested_function->compileAdd(b, aggregate_data_ptr_with_prefix_size_offset, non_nullable_types, wrapped_values);
b.CreateBr(join_block);
b.SetInsertPoint(join_block);
}
#endif
private:
using Base = AggregateFunctionNullBase<result_is_nullable, serialize_flag,
AggregateFunctionIfNullVariadic<result_is_nullable, serialize_flag, null_is_skipped>>;

View File

@ -5,6 +5,14 @@
#include <Common/assert_cast.h>
#include <AggregateFunctions/IAggregateFunction.h>
#if !defined(ARCADIA_BUILD)
# include <Common/config.h>
#endif
#if USE_EMBEDDED_COMPILER
# include <llvm/IR/IRBuilder.h>
# include <DataTypes/Native.h>
#endif
namespace DB
{
@ -154,6 +162,76 @@ public:
const Array & params, const AggregateFunctionProperties & properties) const override;
AggregateFunctionPtr getNestedFunction() const override { return nested_func; }
#if USE_EMBEDDED_COMPILER
bool isCompilable() const override
{
return nested_func->isCompilable();
}
void compileCreate(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr) const override
{
nested_func->compileCreate(builder, aggregate_data_ptr);
}
void compileAdd(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, const DataTypes & arguments_types, const std::vector<llvm::Value *> & argument_values) const override
{
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
const auto & predicate_type = arguments_types[argument_values.size() - 1];
auto * predicate_value = argument_values[argument_values.size() - 1];
auto * head = b.GetInsertBlock();
auto * join_block = llvm::BasicBlock::Create(head->getContext(), "join_block", head->getParent());
auto * if_true = llvm::BasicBlock::Create(head->getContext(), "if_true", head->getParent());
auto * if_false = llvm::BasicBlock::Create(head->getContext(), "if_false", head->getParent());
auto * is_predicate_true = nativeBoolCast(b, predicate_type, predicate_value);
b.CreateCondBr(is_predicate_true, if_true, if_false);
b.SetInsertPoint(if_true);
size_t arguments_size_without_predicate = arguments_types.size() - 1;
DataTypes argument_types_without_predicate;
std::vector<llvm::Value *> argument_values_without_predicate;
argument_types_without_predicate.resize(arguments_size_without_predicate);
argument_values_without_predicate.resize(arguments_size_without_predicate);
for (size_t i = 0; i < arguments_size_without_predicate; ++i)
{
argument_types_without_predicate[i] = arguments_types[i];
argument_values_without_predicate[i] = argument_values[i];
}
nested_func->compileAdd(builder, aggregate_data_ptr, argument_types_without_predicate, argument_values_without_predicate);
b.CreateBr(join_block);
b.SetInsertPoint(if_false);
b.CreateBr(join_block);
b.SetInsertPoint(join_block);
}
void compileMerge(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_dst_ptr, llvm::Value * aggregate_data_src_ptr) const override
{
nested_func->compileMerge(builder, aggregate_data_dst_ptr, aggregate_data_src_ptr);
}
llvm::Value * compileGetResult(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr) const override
{
return nested_func->compileGetResult(builder, aggregate_data_ptr);
}
#endif
};
}

View File

@ -7,11 +7,20 @@
#include <Columns/ColumnDecimal.h>
#include <Columns/ColumnString.h>
#include <DataTypes/IDataType.h>
#include <DataTypes/DataTypesNumber.h>
#include <common/StringRef.h>
#include <Common/assert_cast.h>
#include <AggregateFunctions/IAggregateFunction.h>
#if !defined(ARCADIA_BUILD)
# include <Common/config.h>
#endif
#if USE_EMBEDDED_COMPILER
# include <llvm/IR/IRBuilder.h>
# include <DataTypes/Native.h>
#endif
namespace DB
{
@ -20,6 +29,7 @@ struct Settings;
namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int NOT_IMPLEMENTED;
}
/** Aggregate functions that store one of passed values.
@ -177,6 +187,265 @@ public:
{
return false;
}
#if USE_EMBEDDED_COMPILER
static constexpr bool is_compilable = true;
static llvm::Value * getValuePtrFromAggregateDataPtr(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr)
{
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
static constexpr size_t value_offset_from_structure = offsetof(SingleValueDataFixed<T>, value);
auto * type = toNativeType<T>(builder);
auto * value_ptr_with_offset = b.CreateConstInBoundsGEP1_64(nullptr, aggregate_data_ptr, value_offset_from_structure);
auto * value_ptr = b.CreatePointerCast(value_ptr_with_offset, type->getPointerTo());
return value_ptr;
}
static llvm::Value * getValueFromAggregateDataPtr(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr)
{
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
auto * type = toNativeType<T>(builder);
auto * value_ptr = getValuePtrFromAggregateDataPtr(builder, aggregate_data_ptr);
return b.CreateLoad(type, value_ptr);
}
static void compileChange(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, llvm::Value * value_to_check)
{
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
auto * has_value_ptr = b.CreatePointerCast(aggregate_data_ptr, b.getInt1Ty()->getPointerTo());
b.CreateStore(b.getInt1(true), has_value_ptr);
auto * value_ptr = getValuePtrFromAggregateDataPtr(b, aggregate_data_ptr);
b.CreateStore(value_to_check, value_ptr);
}
static void compileChangeMerge(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_dst_ptr, llvm::Value * aggregate_data_src_ptr)
{
auto * value_src = getValueFromAggregateDataPtr(builder, aggregate_data_src_ptr);
compileChange(builder, aggregate_data_dst_ptr, value_src);
}
static void compileChangeFirstTime(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, llvm::Value * value_to_check)
{
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
auto * has_value_ptr = b.CreatePointerCast(aggregate_data_ptr, b.getInt1Ty()->getPointerTo());
auto * has_value_value = b.CreateLoad(b.getInt1Ty(), has_value_ptr);
auto * head = b.GetInsertBlock();
auto * join_block = llvm::BasicBlock::Create(head->getContext(), "join_block", head->getParent());
auto * if_should_change = llvm::BasicBlock::Create(head->getContext(), "if_should_change", head->getParent());
auto * if_should_not_change = llvm::BasicBlock::Create(head->getContext(), "if_should_not_change", head->getParent());
b.CreateCondBr(has_value_value, if_should_not_change, if_should_change);
b.SetInsertPoint(if_should_not_change);
b.CreateBr(join_block);
b.SetInsertPoint(if_should_change);
compileChange(builder, aggregate_data_ptr, value_to_check);
b.CreateBr(join_block);
b.SetInsertPoint(join_block);
}
static void compileChangeFirstTimeMerge(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_dst_ptr, llvm::Value * aggregate_data_src_ptr)
{
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
auto * has_value_dst_ptr = b.CreatePointerCast(aggregate_data_dst_ptr, b.getInt1Ty()->getPointerTo());
auto * has_value_dst = b.CreateLoad(b.getInt1Ty(), has_value_dst_ptr);
auto * has_value_src_ptr = b.CreatePointerCast(aggregate_data_src_ptr, b.getInt1Ty()->getPointerTo());
auto * has_value_src = b.CreateLoad(b.getInt1Ty(), has_value_src_ptr);
auto * head = b.GetInsertBlock();
auto * join_block = llvm::BasicBlock::Create(head->getContext(), "join_block", head->getParent());
auto * if_should_change = llvm::BasicBlock::Create(head->getContext(), "if_should_change", head->getParent());
auto * if_should_not_change = llvm::BasicBlock::Create(head->getContext(), "if_should_not_change", head->getParent());
b.CreateCondBr(b.CreateAnd(b.CreateNot(has_value_dst), has_value_src), if_should_change, if_should_not_change);
b.SetInsertPoint(if_should_change);
compileChangeMerge(builder, aggregate_data_dst_ptr, aggregate_data_src_ptr);
b.CreateBr(join_block);
b.SetInsertPoint(if_should_not_change);
b.CreateBr(join_block);
b.SetInsertPoint(join_block);
}
static void compileChangeEveryTime(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, llvm::Value * value_to_check)
{
compileChange(builder, aggregate_data_ptr, value_to_check);
}
static void compileChangeEveryTimeMerge(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_dst_ptr, llvm::Value * aggregate_data_src_ptr)
{
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
auto * has_value_src_ptr = b.CreatePointerCast(aggregate_data_src_ptr, b.getInt1Ty()->getPointerTo());
auto * has_value_src = b.CreateLoad(b.getInt1Ty(), has_value_src_ptr);
auto * head = b.GetInsertBlock();
auto * join_block = llvm::BasicBlock::Create(head->getContext(), "join_block", head->getParent());
auto * if_should_change = llvm::BasicBlock::Create(head->getContext(), "if_should_change", head->getParent());
auto * if_should_not_change = llvm::BasicBlock::Create(head->getContext(), "if_should_not_change", head->getParent());
b.CreateCondBr(has_value_src, if_should_change, if_should_not_change);
b.SetInsertPoint(if_should_change);
compileChangeMerge(builder, aggregate_data_dst_ptr, aggregate_data_src_ptr);
b.CreateBr(join_block);
b.SetInsertPoint(if_should_not_change);
b.CreateBr(join_block);
b.SetInsertPoint(join_block);
}
template <bool is_less>
static void compileChangeComparison(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, llvm::Value * value_to_check)
{
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
auto * has_value_ptr = b.CreatePointerCast(aggregate_data_ptr, b.getInt1Ty()->getPointerTo());
auto * has_value_value = b.CreateLoad(b.getInt1Ty(), has_value_ptr);
auto * value = getValueFromAggregateDataPtr(b, aggregate_data_ptr);
auto * head = b.GetInsertBlock();
auto * join_block = llvm::BasicBlock::Create(head->getContext(), "join_block", head->getParent());
auto * if_should_change = llvm::BasicBlock::Create(head->getContext(), "if_should_change", head->getParent());
auto * if_should_not_change = llvm::BasicBlock::Create(head->getContext(), "if_should_not_change", head->getParent());
auto is_signed = std::numeric_limits<T>::is_signed;
llvm::Value * should_change_after_comparison = nullptr;
if constexpr (is_less)
{
if (value_to_check->getType()->isIntegerTy())
should_change_after_comparison = is_signed ? b.CreateICmpSLT(value_to_check, value) : b.CreateICmpULT(value_to_check, value);
else
should_change_after_comparison = b.CreateFCmpOLT(value_to_check, value);
}
else
{
if (value_to_check->getType()->isIntegerTy())
should_change_after_comparison = is_signed ? b.CreateICmpSGT(value_to_check, value) : b.CreateICmpUGT(value_to_check, value);
else
should_change_after_comparison = b.CreateFCmpOGT(value_to_check, value);
}
b.CreateCondBr(b.CreateOr(b.CreateNot(has_value_value), should_change_after_comparison), if_should_change, if_should_not_change);
b.SetInsertPoint(if_should_change);
compileChange(builder, aggregate_data_ptr, value_to_check);
b.CreateBr(join_block);
b.SetInsertPoint(if_should_not_change);
b.CreateBr(join_block);
b.SetInsertPoint(join_block);
}
template <bool is_less>
static void compileChangeComparisonMerge(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_dst_ptr, llvm::Value * aggregate_data_src_ptr)
{
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
auto * has_value_dst_ptr = b.CreatePointerCast(aggregate_data_dst_ptr, b.getInt1Ty()->getPointerTo());
auto * has_value_dst = b.CreateLoad(b.getInt1Ty(), has_value_dst_ptr);
auto * value_dst = getValueFromAggregateDataPtr(b, aggregate_data_dst_ptr);
auto * has_value_src_ptr = b.CreatePointerCast(aggregate_data_src_ptr, b.getInt1Ty()->getPointerTo());
auto * has_value_src = b.CreateLoad(b.getInt1Ty(), has_value_src_ptr);
auto * value_src = getValueFromAggregateDataPtr(b, aggregate_data_src_ptr);
auto * head = b.GetInsertBlock();
auto * join_block = llvm::BasicBlock::Create(head->getContext(), "join_block", head->getParent());
auto * if_should_change = llvm::BasicBlock::Create(head->getContext(), "if_should_change", head->getParent());
auto * if_should_not_change = llvm::BasicBlock::Create(head->getContext(), "if_should_not_change", head->getParent());
auto is_signed = std::numeric_limits<T>::is_signed;
llvm::Value * should_change_after_comparison = nullptr;
if constexpr (is_less)
{
if (value_src->getType()->isIntegerTy())
should_change_after_comparison = is_signed ? b.CreateICmpSLT(value_src, value_dst) : b.CreateICmpULT(value_src, value_dst);
else
should_change_after_comparison = b.CreateFCmpOLT(value_src, value_dst);
}
else
{
if (value_src->getType()->isIntegerTy())
should_change_after_comparison = is_signed ? b.CreateICmpSGT(value_src, value_dst) : b.CreateICmpUGT(value_src, value_dst);
else
should_change_after_comparison = b.CreateFCmpOGT(value_src, value_dst);
}
b.CreateCondBr(b.CreateAnd(has_value_src, b.CreateOr(b.CreateNot(has_value_dst), should_change_after_comparison)), if_should_change, if_should_not_change);
b.SetInsertPoint(if_should_change);
compileChangeMerge(builder, aggregate_data_dst_ptr, aggregate_data_src_ptr);
b.CreateBr(join_block);
b.SetInsertPoint(if_should_not_change);
b.CreateBr(join_block);
b.SetInsertPoint(join_block);
}
static void compileChangeIfLess(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, llvm::Value * value_to_check)
{
static constexpr bool is_less = true;
compileChangeComparison<is_less>(builder, aggregate_data_ptr, value_to_check);
}
static void compileChangeIfLessMerge(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_dst_ptr, llvm::Value * aggregate_data_src_ptr)
{
static constexpr bool is_less = true;
compileChangeComparisonMerge<is_less>(builder, aggregate_data_dst_ptr, aggregate_data_src_ptr);
}
static void compileChangeIfGreater(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, llvm::Value * value_to_check)
{
static constexpr bool is_less = false;
compileChangeComparison<is_less>(builder, aggregate_data_ptr, value_to_check);
}
static void compileChangeIfGreaterMerge(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_dst_ptr, llvm::Value * aggregate_data_src_ptr)
{
static constexpr bool is_less = false;
compileChangeComparisonMerge<is_less>(builder, aggregate_data_dst_ptr, aggregate_data_src_ptr);
}
static llvm::Value * compileGetResult(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr)
{
return getValueFromAggregateDataPtr(builder, aggregate_data_ptr);
}
#endif
};
@ -400,6 +669,13 @@ public:
{
return true;
}
#if USE_EMBEDDED_COMPILER
static constexpr bool is_compilable = false;
#endif
};
static_assert(
@ -576,6 +852,13 @@ public:
{
return false;
}
#if USE_EMBEDDED_COMPILER
static constexpr bool is_compilable = false;
#endif
};
@ -593,6 +876,22 @@ struct AggregateFunctionMinData : Data
bool changeIfBetter(const Self & to, Arena * arena) { return this->changeIfLess(to, arena); }
static const char * name() { return "min"; }
#if USE_EMBEDDED_COMPILER
static constexpr bool is_compilable = Data::is_compilable;
static void compileChangeIfBetter(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, llvm::Value * value_to_check)
{
Data::compileChangeIfLess(builder, aggregate_data_ptr, value_to_check);
}
static void compileChangeIfBetterMerge(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_dst_ptr, llvm::Value * aggregate_data_src_ptr)
{
Data::compileChangeIfLessMerge(builder, aggregate_data_dst_ptr, aggregate_data_src_ptr);
}
#endif
};
template <typename Data>
@ -604,6 +903,22 @@ struct AggregateFunctionMaxData : Data
bool changeIfBetter(const Self & to, Arena * arena) { return this->changeIfGreater(to, arena); }
static const char * name() { return "max"; }
#if USE_EMBEDDED_COMPILER
static constexpr bool is_compilable = Data::is_compilable;
static void compileChangeIfBetter(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, llvm::Value * value_to_check)
{
Data::compileChangeIfGreater(builder, aggregate_data_ptr, value_to_check);
}
static void compileChangeIfBetterMerge(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_dst_ptr, llvm::Value * aggregate_data_src_ptr)
{
Data::compileChangeIfGreaterMerge(builder, aggregate_data_dst_ptr, aggregate_data_src_ptr);
}
#endif
};
template <typename Data>
@ -615,6 +930,22 @@ struct AggregateFunctionAnyData : Data
bool changeIfBetter(const Self & to, Arena * arena) { return this->changeFirstTime(to, arena); }
static const char * name() { return "any"; }
#if USE_EMBEDDED_COMPILER
static constexpr bool is_compilable = Data::is_compilable;
static void compileChangeIfBetter(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, llvm::Value * value_to_check)
{
Data::compileChangeFirstTime(builder, aggregate_data_ptr, value_to_check);
}
static void compileChangeIfBetterMerge(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_dst_ptr, llvm::Value * aggregate_data_src_ptr)
{
Data::compileChangeFirstTimeMerge(builder, aggregate_data_dst_ptr, aggregate_data_src_ptr);
}
#endif
};
template <typename Data>
@ -626,6 +957,22 @@ struct AggregateFunctionAnyLastData : Data
bool changeIfBetter(const Self & to, Arena * arena) { return this->changeEveryTime(to, arena); }
static const char * name() { return "anyLast"; }
#if USE_EMBEDDED_COMPILER
static constexpr bool is_compilable = Data::is_compilable;
static void compileChangeIfBetter(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, llvm::Value * value_to_check)
{
Data::compileChangeEveryTime(builder, aggregate_data_ptr, value_to_check);
}
static void compileChangeIfBetterMerge(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_dst_ptr, llvm::Value * aggregate_data_src_ptr)
{
Data::compileChangeEveryTimeMerge(builder, aggregate_data_dst_ptr, aggregate_data_src_ptr);
}
#endif
};
@ -693,6 +1040,13 @@ struct AggregateFunctionAnyHeavyData : Data
}
static const char * name() { return "anyHeavy"; }
#if USE_EMBEDDED_COMPILER
static constexpr bool is_compilable = false;
#endif
};
@ -752,6 +1106,62 @@ public:
{
this->data(place).insertResultInto(to);
}
#if USE_EMBEDDED_COMPILER
bool isCompilable() const override
{
if constexpr (!Data::is_compilable)
return false;
return canBeNativeType(*this->argument_types[0]);
}
void compileCreate(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr) const override
{
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
b.CreateMemSet(aggregate_data_ptr, llvm::ConstantInt::get(b.getInt8Ty(), 0), this->sizeOfData(), llvm::assumeAligned(this->alignOfData()));
}
void compileAdd(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, const DataTypes &, const std::vector<llvm::Value *> & argument_values) const override
{
if constexpr (Data::is_compilable)
{
Data::compileChangeIfBetter(builder, aggregate_data_ptr, argument_values[0]);
}
else
{
throw Exception(getName() + " is not JIT-compilable", ErrorCodes::NOT_IMPLEMENTED);
}
}
void compileMerge(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_dst_ptr, llvm::Value * aggregate_data_src_ptr) const override
{
if constexpr (Data::is_compilable)
{
Data::compileChangeIfBetterMerge(builder, aggregate_data_dst_ptr, aggregate_data_src_ptr);
}
else
{
throw Exception(getName() + " is not JIT-compilable", ErrorCodes::NOT_IMPLEMENTED);
}
}
llvm::Value * compileGetResult(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr) const override
{
if constexpr (Data::is_compilable)
{
return Data::compileGetResult(builder, aggregate_data_ptr);
}
else
{
throw Exception(getName() + " is not JIT-compilable", ErrorCodes::NOT_IMPLEMENTED);
}
}
#endif
};
}

View File

@ -6,9 +6,18 @@
#include <Common/assert_cast.h>
#include <Columns/ColumnsCommon.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypesNumber.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#if !defined(ARCADIA_BUILD)
# include <Common/config.h>
#endif
#if USE_EMBEDDED_COMPILER
# include <llvm/IR/IRBuilder.h>
# include <DataTypes/Native.h>
#endif
namespace DB
{
@ -183,6 +192,93 @@ public:
}
AggregateFunctionPtr getNestedFunction() const override { return nested_function; }
#if USE_EMBEDDED_COMPILER
bool isCompilable() const override
{
return this->nested_function->isCompilable();
}
void compileCreate(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr) const override
{
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
if constexpr (result_is_nullable)
b.CreateMemSet(aggregate_data_ptr, llvm::ConstantInt::get(b.getInt8Ty(), 0), this->prefix_size, llvm::assumeAligned(this->alignOfData()));
auto * aggregate_data_ptr_with_prefix_size_offset = b.CreateConstInBoundsGEP1_64(nullptr, aggregate_data_ptr, this->prefix_size);
this->nested_function->compileCreate(b, aggregate_data_ptr_with_prefix_size_offset);
}
void compileMerge(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_dst_ptr, llvm::Value * aggregate_data_src_ptr) const override
{
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
if constexpr (result_is_nullable)
{
auto * aggregate_data_is_null_dst_value = b.CreateLoad(aggregate_data_dst_ptr);
auto * aggregate_data_is_null_src_value = b.CreateLoad(aggregate_data_src_ptr);
auto * is_src_null = nativeBoolCast(b, std::make_shared<DataTypeUInt8>(), aggregate_data_is_null_src_value);
auto * is_null_result_value = b.CreateSelect(is_src_null, llvm::ConstantInt::get(b.getInt8Ty(), 1), aggregate_data_is_null_dst_value);
b.CreateStore(is_null_result_value, aggregate_data_dst_ptr);
}
auto * aggregate_data_dst_ptr_with_prefix_size_offset = b.CreateConstInBoundsGEP1_64(nullptr, aggregate_data_dst_ptr, this->prefix_size);
auto * aggregate_data_src_ptr_with_prefix_size_offset = b.CreateConstInBoundsGEP1_64(nullptr, aggregate_data_src_ptr, this->prefix_size);
this->nested_function->compileMerge(b, aggregate_data_dst_ptr_with_prefix_size_offset, aggregate_data_src_ptr_with_prefix_size_offset);
}
llvm::Value * compileGetResult(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr) const override
{
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
auto * return_type = toNativeType(b, this->getReturnType());
llvm::Value * result = nullptr;
if constexpr (result_is_nullable)
{
auto * place = b.CreateLoad(b.getInt8Ty(), aggregate_data_ptr);
auto * head = b.GetInsertBlock();
auto * join_block = llvm::BasicBlock::Create(head->getContext(), "join_block", head->getParent());
auto * if_null = llvm::BasicBlock::Create(head->getContext(), "if_null", head->getParent());
auto * if_not_null = llvm::BasicBlock::Create(head->getContext(), "if_not_null", head->getParent());
auto * nullable_value_ptr = b.CreateAlloca(return_type);
b.CreateStore(llvm::ConstantInt::getNullValue(return_type), nullable_value_ptr);
auto * nullable_value = b.CreateLoad(return_type, nullable_value_ptr);
b.CreateCondBr(nativeBoolCast(b, std::make_shared<DataTypeUInt8>(), place), if_not_null, if_null);
b.SetInsertPoint(if_null);
b.CreateStore(b.CreateInsertValue(nullable_value, b.getInt1(true), {1}), nullable_value_ptr);
b.CreateBr(join_block);
b.SetInsertPoint(if_not_null);
auto * aggregate_data_ptr_with_prefix_size_offset = b.CreateConstInBoundsGEP1_64(nullptr, aggregate_data_ptr, this->prefix_size);
auto * nested_result = this->nested_function->compileGetResult(builder, aggregate_data_ptr_with_prefix_size_offset);
b.CreateStore(b.CreateInsertValue(nullable_value, nested_result, {0}), nullable_value_ptr);
b.CreateBr(join_block);
b.SetInsertPoint(join_block);
result = b.CreateLoad(return_type, nullable_value_ptr);
}
else
{
result = this->nested_function->compileGetResult(b, aggregate_data_ptr);
}
return result;
}
#endif
};
@ -226,6 +322,44 @@ public:
if (!memoryIsByte(null_map, batch_size, 1))
this->setFlag(place);
}
#if USE_EMBEDDED_COMPILER
void compileAdd(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, const DataTypes & arguments_types, const std::vector<llvm::Value *> & argument_values) const override
{
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
const auto & nullable_type = arguments_types[0];
const auto & nullable_value = argument_values[0];
auto * wrapped_value = b.CreateExtractValue(nullable_value, {0});
auto * is_null_value = b.CreateExtractValue(nullable_value, {1});
auto * head = b.GetInsertBlock();
auto * join_block = llvm::BasicBlock::Create(head->getContext(), "join_block", head->getParent());
auto * if_null = llvm::BasicBlock::Create(head->getContext(), "if_null", head->getParent());
auto * if_not_null = llvm::BasicBlock::Create(head->getContext(), "if_not_null", head->getParent());
b.CreateCondBr(is_null_value, if_null, if_not_null);
b.SetInsertPoint(if_null);
b.CreateBr(join_block);
b.SetInsertPoint(if_not_null);
if constexpr (result_is_nullable)
b.CreateStore(llvm::ConstantInt::get(b.getInt8Ty(), 1), aggregate_data_ptr);
auto * aggregate_data_ptr_with_prefix_size_offset = b.CreateConstInBoundsGEP1_64(nullptr, aggregate_data_ptr, this->prefix_size);
this->nested_function->compileAdd(b, aggregate_data_ptr_with_prefix_size_offset, { removeNullable(nullable_type) }, { wrapped_value });
b.CreateBr(join_block);
b.SetInsertPoint(join_block);
}
#endif
};
@ -277,6 +411,90 @@ public:
this->nested_function->add(this->nestedPlace(place), nested_columns, row_num, arena);
}
#if USE_EMBEDDED_COMPILER
void compileAdd(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, const DataTypes & arguments_types, const std::vector<llvm::Value *> & argument_values) const override
{
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
size_t arguments_size = arguments_types.size();
DataTypes non_nullable_types;
std::vector<llvm::Value * > wrapped_values;
std::vector<llvm::Value * > is_null_values;
non_nullable_types.resize(arguments_size);
wrapped_values.resize(arguments_size);
is_null_values.resize(arguments_size);
for (size_t i = 0; i < arguments_size; ++i)
{
const auto & argument_value = argument_values[i];
if (is_nullable[i])
{
auto * wrapped_value = b.CreateExtractValue(argument_value, {0});
if constexpr (null_is_skipped)
is_null_values[i] = b.CreateExtractValue(argument_value, {1});
wrapped_values[i] = wrapped_value;
non_nullable_types[i] = removeNullable(arguments_types[i]);
}
else
{
wrapped_values[i] = argument_value;
non_nullable_types[i] = arguments_types[i];
}
}
if constexpr (null_is_skipped)
{
auto * head = b.GetInsertBlock();
auto * join_block = llvm::BasicBlock::Create(head->getContext(), "join_block", head->getParent());
auto * if_null = llvm::BasicBlock::Create(head->getContext(), "if_null", head->getParent());
auto * if_not_null = llvm::BasicBlock::Create(head->getContext(), "if_not_null", head->getParent());
auto * values_have_null_ptr = b.CreateAlloca(b.getInt1Ty());
b.CreateStore(b.getInt1(false), values_have_null_ptr);
for (auto * is_null_value : is_null_values)
{
if (!is_null_value)
continue;
auto * values_have_null = b.CreateLoad(b.getInt1Ty(), values_have_null_ptr);
b.CreateStore(b.CreateOr(values_have_null, is_null_value), values_have_null_ptr);
}
b.CreateCondBr(b.CreateLoad(b.getInt1Ty(), values_have_null_ptr), if_null, if_not_null);
b.SetInsertPoint(if_null);
b.CreateBr(join_block);
b.SetInsertPoint(if_not_null);
if constexpr (result_is_nullable)
b.CreateStore(llvm::ConstantInt::get(b.getInt8Ty(), 1), aggregate_data_ptr);
auto * aggregate_data_ptr_with_prefix_size_offset = b.CreateConstInBoundsGEP1_64(nullptr, aggregate_data_ptr, this->prefix_size);
this->nested_function->compileAdd(b, aggregate_data_ptr_with_prefix_size_offset, arguments_types, wrapped_values);
b.CreateBr(join_block);
b.SetInsertPoint(join_block);
}
else
{
b.CreateStore(llvm::ConstantInt::get(b.getInt8Ty(), 1), aggregate_data_ptr);
auto * aggregate_data_ptr_with_prefix_size_offset = b.CreateConstInBoundsGEP1_64(nullptr, aggregate_data_ptr, this->prefix_size);
this->nested_function->compileAdd(b, aggregate_data_ptr_with_prefix_size_offset, non_nullable_types, wrapped_values);
}
}
#endif
private:
enum { MAX_ARGS = 8 };
size_t number_of_arguments = 0;

View File

@ -12,6 +12,14 @@
#include <AggregateFunctions/IAggregateFunction.h>
#if !defined(ARCADIA_BUILD)
# include <Common/config.h>
#endif
#if USE_EMBEDDED_COMPILER
# include <llvm/IR/IRBuilder.h>
# include <DataTypes/Native.h>
#endif
namespace DB
{
@ -385,6 +393,80 @@ public:
column.getData().push_back(this->data(place).get());
}
#if USE_EMBEDDED_COMPILER
bool isCompilable() const override
{
if constexpr (Type == AggregateFunctionTypeSumKahan)
return false;
bool can_be_compiled = true;
for (const auto & argument_type : this->argument_types)
can_be_compiled &= canBeNativeType(*argument_type);
auto return_type = getReturnType();
can_be_compiled &= canBeNativeType(*return_type);
return can_be_compiled;
}
void compileCreate(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr) const override
{
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
auto * return_type = toNativeType(b, getReturnType());
auto * aggregate_sum_ptr = b.CreatePointerCast(aggregate_data_ptr, return_type->getPointerTo());
b.CreateStore(llvm::Constant::getNullValue(return_type), aggregate_sum_ptr);
}
void compileAdd(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, const DataTypes & arguments_types, const std::vector<llvm::Value *> & argument_values) const override
{
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
auto * return_type = toNativeType(b, getReturnType());
auto * sum_value_ptr = b.CreatePointerCast(aggregate_data_ptr, return_type->getPointerTo());
auto * sum_value = b.CreateLoad(return_type, sum_value_ptr);
const auto & argument_type = arguments_types[0];
const auto & argument_value = argument_values[0];
auto * value_cast_to_result = nativeCast(b, argument_type, argument_value, return_type);
auto * sum_result_value = sum_value->getType()->isIntegerTy() ? b.CreateAdd(sum_value, value_cast_to_result) : b.CreateFAdd(sum_value, value_cast_to_result);
b.CreateStore(sum_result_value, sum_value_ptr);
}
void compileMerge(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_dst_ptr, llvm::Value * aggregate_data_src_ptr) const override
{
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
auto * return_type = toNativeType(b, getReturnType());
auto * sum_value_dst_ptr = b.CreatePointerCast(aggregate_data_dst_ptr, return_type->getPointerTo());
auto * sum_value_dst = b.CreateLoad(return_type, sum_value_dst_ptr);
auto * sum_value_src_ptr = b.CreatePointerCast(aggregate_data_src_ptr, return_type->getPointerTo());
auto * sum_value_src = b.CreateLoad(return_type, sum_value_src_ptr);
auto * sum_return_value = sum_value_dst->getType()->isIntegerTy() ? b.CreateAdd(sum_value_dst, sum_value_src) : b.CreateFAdd(sum_value_dst, sum_value_src);
b.CreateStore(sum_return_value, sum_value_dst_ptr);
}
llvm::Value * compileGetResult(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr) const override
{
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
auto * return_type = toNativeType(b, getReturnType());
auto * sum_value_ptr = b.CreatePointerCast(aggregate_data_ptr, return_type->getPointerTo());
return b.CreateLoad(return_type, sum_value_ptr);
}
#endif
private:
UInt32 scale;
};

View File

@ -48,6 +48,15 @@ public:
String getName() const final { return "sumCount"; }
#if USE_EMBEDDED_COMPILER
bool isCompilable() const override
{
return false;
}
#endif
private:
UInt32 scale;
};

View File

@ -10,4 +10,44 @@ DataTypePtr IAggregateFunction::getStateType() const
return std::make_shared<DataTypeAggregateFunction>(shared_from_this(), argument_types, parameters);
}
String IAggregateFunction::getDescription() const
{
String description;
description += getName();
description += '(';
for (const auto & parameter : parameters)
{
description += parameter.dump();
description += ", ";
}
if (!parameters.empty())
{
description.pop_back();
description.pop_back();
}
description += ')';
description += '(';
for (const auto & argument_type : argument_types)
{
description += argument_type->getName();
description += ", ";
}
if (!argument_types.empty())
{
description.pop_back();
description.pop_back();
}
description += ')';
return description;
}
}

View File

@ -9,11 +9,21 @@
#include <Common/Exception.h>
#include <common/types.h>
#if !defined(ARCADIA_BUILD)
# include "config_core.h"
#endif
#include <cstddef>
#include <memory>
#include <vector>
#include <type_traits>
namespace llvm
{
class LLVMContext;
class Value;
class IRBuilderBase;
}
namespace DB
{
@ -208,6 +218,26 @@ public:
const IColumn ** columns,
Arena * arena) const = 0;
/** Insert result of aggregate function into result column with batch size.
* If destroy_place_after_insert is true. Then implementation of this method
* must destroy aggregate place if insert state into result column was successful.
* All places that were not inserted must be destroyed if there was exception during insert into result column.
*/
virtual void insertResultIntoBatch(
size_t batch_size,
AggregateDataPtr * places,
size_t place_offset,
IColumn & to,
Arena * arena,
bool destroy_place_after_insert) const = 0;
/** Destroy batch of aggregate places.
*/
virtual void destroyBatch(
size_t batch_size,
AggregateDataPtr * places,
size_t place_offset) const noexcept = 0;
/** By default all NULLs are skipped during aggregation.
* If it returns nullptr, the default one will be used.
* If an aggregate function wants to use something instead of the default one, it overrides this function and returns its own null adapter.
@ -241,6 +271,40 @@ public:
// of true window functions, so this hack-ish interface suffices.
virtual bool isOnlyWindowFunction() const { return false; }
/// Description of AggregateFunction in form of name(parameters)(argument_types).
String getDescription() const;
#if USE_EMBEDDED_COMPILER
/// Is function JIT compilable
virtual bool isCompilable() const { return false; }
/// compileCreate should generate code for initialization of aggregate function state in aggregate_data_ptr
virtual void compileCreate(llvm::IRBuilderBase & /*builder*/, llvm::Value * /*aggregate_data_ptr*/) const
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "{} is not JIT-compilable", getName());
}
/// compileAdd should generate code for updating aggregate function state stored in aggregate_data_ptr
virtual void compileAdd(llvm::IRBuilderBase & /*builder*/, llvm::Value * /*aggregate_data_ptr*/, const DataTypes & /*arguments_types*/, const std::vector<llvm::Value *> & /*arguments_values*/) const
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "{} is not JIT-compilable", getName());
}
/// compileMerge should generate code for merging aggregate function states stored in aggregate_data_dst_ptr and aggregate_data_src_ptr
virtual void compileMerge(llvm::IRBuilderBase & /*builder*/, llvm::Value * /*aggregate_data_dst_ptr*/, llvm::Value * /*aggregate_data_src_ptr*/) const
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "{} is not JIT-compilable", getName());
}
/// compileGetResult should generate code for getting result value from aggregate function state stored in aggregate_data_ptr
virtual llvm::Value * compileGetResult(llvm::IRBuilderBase & /*builder*/, llvm::Value * /*aggregate_data_ptr*/) const
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "{} is not JIT-compilable", getName());
}
#endif
protected:
DataTypes argument_types;
Array parameters;
@ -415,6 +479,37 @@ public:
static_cast<const Derived *>(this)->add(place + place_offset, columns, i, arena);
}
}
void insertResultIntoBatch(size_t batch_size, AggregateDataPtr * places, size_t place_offset, IColumn & to, Arena * arena, bool destroy_place_after_insert) const override
{
size_t batch_index = 0;
try
{
for (; batch_index < batch_size; ++batch_index)
{
static_cast<const Derived *>(this)->insertResultInto(places[batch_index] + place_offset, to, arena);
if (destroy_place_after_insert)
static_cast<const Derived *>(this)->destroy(places[batch_index] + place_offset);
}
}
catch (...)
{
for (size_t destroy_index = batch_index; destroy_index < batch_size; ++destroy_index)
static_cast<const Derived *>(this)->destroy(places[destroy_index] + place_offset);
throw;
}
}
void destroyBatch(size_t batch_size, AggregateDataPtr * places, size_t place_offset) const noexcept override
{
for (size_t i = 0; i < batch_size; ++i)
{
static_cast<const Derived *>(this)->destroy(places[i] + place_offset);
}
}
};

View File

@ -85,6 +85,7 @@ if (USE_AMQPCPP)
endif()
if (USE_LIBPQXX)
add_headers_and_sources(dbms Core/PostgreSQL)
add_headers_and_sources(dbms Databases/PostgreSQL)
add_headers_and_sources(dbms Storages/PostgreSQL)
endif()

View File

@ -7,6 +7,11 @@
#pragma clang diagnostic ignored "-Wreserved-id-macro"
#endif
#undef __msan_unpoison
#undef __msan_test_shadow
#undef __msan_print_shadow
#undef __msan_unpoison_string
#define __msan_unpoison(X, Y)
#define __msan_test_shadow(X, Y) (false)
#define __msan_print_shadow(X, Y)

View File

@ -0,0 +1,74 @@
#include "Connection.h"
#include <common/logger_useful.h>
namespace postgres
{
Connection::Connection(const ConnectionInfo & connection_info_, bool replication_, size_t num_tries_)
: connection_info(connection_info_), replication(replication_), num_tries(num_tries_)
, log(&Poco::Logger::get("PostgreSQLReplicaConnection"))
{
if (replication)
{
connection_info = std::make_pair(
fmt::format("{} replication=database", connection_info.first), connection_info.second);
}
}
void Connection::execWithRetry(const std::function<void(pqxx::nontransaction &)> & exec)
{
for (size_t try_no = 0; try_no < num_tries; ++try_no)
{
try
{
pqxx::nontransaction tx(getRef());
exec(tx);
break;
}
catch (const pqxx::broken_connection & e)
{
LOG_DEBUG(log, "Cannot execute query due to connection failure, attempt: {}/{}. (Message: {})",
try_no, num_tries, e.what());
if (try_no == num_tries)
throw;
}
}
}
pqxx::connection & Connection::getRef()
{
connect();
assert(connection != nullptr);
return *connection;
}
void Connection::tryUpdateConnection()
{
try
{
updateConnection();
}
catch (const pqxx::broken_connection & e)
{
LOG_ERROR(log, "Unable to update connection: {}", e.what());
}
}
void Connection::updateConnection()
{
if (connection)
connection->close();
/// Always throws if there is no connection.
connection = std::make_unique<pqxx::connection>(connection_info.first);
if (replication)
connection->set_variable("default_transaction_isolation", "'repeatable read'");
LOG_DEBUG(&Poco::Logger::get("PostgreSQLConnection"), "New connection to {}", connection_info.second);
}
void Connection::connect()
{
if (!connection || !connection->is_open())
updateConnection();
}
}

View File

@ -0,0 +1,47 @@
#pragma once
#include <pqxx/pqxx> // Y_IGNORE
#include <Core/Types.h>
#include <boost/noncopyable.hpp>
/* Methods to work with PostgreSQL connection object.
* Should only be used in case there has to be a single connection object, which
* is long-lived and there are no concurrent connection queries.
* Now only use case - for replication handler for replication from PostgreSQL.
* In all other integration engine use pool with failover.
**/
namespace Poco { class Logger; }
namespace postgres
{
using ConnectionInfo = std::pair<String, String>;
using ConnectionPtr = std::unique_ptr<pqxx::connection>;
class Connection : private boost::noncopyable
{
public:
Connection(const ConnectionInfo & connection_info_, bool replication_ = false, size_t num_tries = 3);
void execWithRetry(const std::function<void(pqxx::nontransaction &)> & exec);
pqxx::connection & getRef();
void connect();
void tryUpdateConnection();
const ConnectionInfo & getConnectionInfo() { return connection_info; }
private:
void updateConnection();
ConnectionPtr connection;
ConnectionInfo connection_info;
bool replication;
size_t num_tries;
Poco::Logger * log;
};
}

View File

@ -1,7 +1,7 @@
#include <Storages/PostgreSQL/PoolWithFailover.h>
#include "PoolWithFailover.h"
#include "Utils.h"
#include <Common/parseRemoteDescription.h>
#include <Common/Exception.h>
#include <IO/Operators.h>
namespace DB
{
@ -14,18 +14,6 @@ namespace ErrorCodes
namespace postgres
{
String formatConnectionString(String dbname, String host, UInt16 port, String user, String password)
{
DB::WriteBufferFromOwnString out;
out << "dbname=" << DB::quote << dbname
<< " host=" << DB::quote << host
<< " port=" << port
<< " user=" << DB::quote << user
<< " password=" << DB::quote << password
<< " connect_timeout=10";
return out.str();
}
PoolWithFailover::PoolWithFailover(
const Poco::Util::AbstractConfiguration & config, const String & config_prefix,
size_t pool_size, size_t pool_wait_timeout_, size_t max_tries_)
@ -58,14 +46,14 @@ PoolWithFailover::PoolWithFailover(
auto replica_user = config.getString(replica_name + ".user", user);
auto replica_password = config.getString(replica_name + ".password", password);
auto connection_string = formatConnectionString(db, replica_host, replica_port, replica_user, replica_password);
auto connection_string = formatConnectionString(db, replica_host, replica_port, replica_user, replica_password).first;
replicas_with_priority[priority].emplace_back(connection_string, pool_size);
}
}
}
else
{
auto connection_string = formatConnectionString(db, host, port, user, password);
auto connection_string = formatConnectionString(db, host, port, user, password).first;
replicas_with_priority[0].emplace_back(connection_string, pool_size);
}
}
@ -85,7 +73,7 @@ PoolWithFailover::PoolWithFailover(
for (const auto & [host, port] : addresses)
{
LOG_DEBUG(&Poco::Logger::get("PostgreSQLPoolWithFailover"), "Adding address host: {}, port: {} to connection pool", host, port);
auto connection_string = formatConnectionString(database, host, port, user, password);
auto connection_string = formatConnectionString(database, host, port, user, password).first;
replicas_with_priority[0].emplace_back(connection_string, pool_size);
}
}

View File

@ -1,16 +1,14 @@
#pragma once
#include "ConnectionHolder.h"
#include <mutex>
#include <Poco/Util/AbstractConfiguration.h>
#include <Storages/PostgreSQL/ConnectionHolder.h>
#include <common/logger_useful.h>
namespace postgres
{
String formatConnectionString(String dbname, String host, UInt16 port, String user, String password);
class PoolWithFailover
{

View File

@ -0,0 +1,19 @@
#include "Utils.h"
#include <IO/Operators.h>
namespace postgres
{
ConnectionInfo formatConnectionString(String dbname, String host, UInt16 port, String user, String password)
{
DB::WriteBufferFromOwnString out;
out << "dbname=" << DB::quote << dbname
<< " host=" << DB::quote << host
<< " port=" << port
<< " user=" << DB::quote << user
<< " password=" << DB::quote << password
<< " connect_timeout=10";
return std::make_pair(out.str(), host + ':' + DB::toString(port));
}
}

View File

@ -0,0 +1,17 @@
#pragma once
#include <pqxx/pqxx> // Y_IGNORE
#include <Core/Types.h>
#include "Connection.h"
#include <Common/Exception.h>
namespace pqxx
{
using ReadTransaction = pqxx::read_transaction;
using ReplicationTransaction = pqxx::transaction<isolation_level::repeatable_read, write_policy::read_only>;
}
namespace postgres
{
ConnectionInfo formatConnectionString(String dbname, String host, UInt16 port, String user, String password);
}

View File

@ -0,0 +1,241 @@
#include "insertPostgreSQLValue.h"
#if USE_LIBPQXX
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnArray.h>
#include <Columns/ColumnsNumber.h>
#include <Columns/ColumnDecimal.h>
#include <DataTypes/IDataType.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypesDecimal.h>
#include <Interpreters/convertFieldToType.h>
#include <IO/ReadHelpers.h>
#include <IO/ReadBufferFromString.h>
#include <Common/assert_cast.h>
#include <pqxx/pqxx> // Y_IGNORE
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
}
void insertDefaultPostgreSQLValue(IColumn & column, const IColumn & sample_column)
{
column.insertFrom(sample_column, 0);
}
void insertPostgreSQLValue(
IColumn & column, std::string_view value,
const ExternalResultDescription::ValueType type, const DataTypePtr data_type,
std::unordered_map<size_t, PostgreSQLArrayInfo> & array_info, size_t idx)
{
switch (type)
{
case ExternalResultDescription::ValueType::vtUInt8:
{
if (value == "t")
assert_cast<ColumnUInt8 &>(column).insertValue(1);
else if (value == "f")
assert_cast<ColumnUInt8 &>(column).insertValue(0);
else
assert_cast<ColumnUInt8 &>(column).insertValue(pqxx::from_string<uint16_t>(value));
break;
}
case ExternalResultDescription::ValueType::vtUInt16:
assert_cast<ColumnUInt16 &>(column).insertValue(pqxx::from_string<uint16_t>(value));
break;
case ExternalResultDescription::ValueType::vtUInt32:
assert_cast<ColumnUInt32 &>(column).insertValue(pqxx::from_string<uint32_t>(value));
break;
case ExternalResultDescription::ValueType::vtUInt64:
assert_cast<ColumnUInt64 &>(column).insertValue(pqxx::from_string<uint64_t>(value));
break;
case ExternalResultDescription::ValueType::vtInt8:
assert_cast<ColumnInt8 &>(column).insertValue(pqxx::from_string<int16_t>(value));
break;
case ExternalResultDescription::ValueType::vtInt16:
assert_cast<ColumnInt16 &>(column).insertValue(pqxx::from_string<int16_t>(value));
break;
case ExternalResultDescription::ValueType::vtInt32:
assert_cast<ColumnInt32 &>(column).insertValue(pqxx::from_string<int32_t>(value));
break;
case ExternalResultDescription::ValueType::vtInt64:
assert_cast<ColumnInt64 &>(column).insertValue(pqxx::from_string<int64_t>(value));
break;
case ExternalResultDescription::ValueType::vtFloat32:
assert_cast<ColumnFloat32 &>(column).insertValue(pqxx::from_string<float>(value));
break;
case ExternalResultDescription::ValueType::vtFloat64:
assert_cast<ColumnFloat64 &>(column).insertValue(pqxx::from_string<double>(value));
break;
case ExternalResultDescription::ValueType::vtEnum8:[[fallthrough]];
case ExternalResultDescription::ValueType::vtEnum16:[[fallthrough]];
case ExternalResultDescription::ValueType::vtFixedString:[[fallthrough]];
case ExternalResultDescription::ValueType::vtString:
assert_cast<ColumnString &>(column).insertData(value.data(), value.size());
break;
case ExternalResultDescription::ValueType::vtUUID:
assert_cast<ColumnUInt128 &>(column).insert(parse<UUID>(value.data(), value.size()));
break;
case ExternalResultDescription::ValueType::vtDate:
assert_cast<ColumnUInt16 &>(column).insertValue(UInt16{LocalDate{std::string(value)}.getDayNum()});
break;
case ExternalResultDescription::ValueType::vtDateTime:
{
ReadBufferFromString in(value);
time_t time = 0;
readDateTimeText(time, in, assert_cast<const DataTypeDateTime *>(data_type.get())->getTimeZone());
if (time < 0)
time = 0;
assert_cast<ColumnUInt32 &>(column).insertValue(time);
break;
}
case ExternalResultDescription::ValueType::vtDateTime64:[[fallthrough]];
case ExternalResultDescription::ValueType::vtDecimal32: [[fallthrough]];
case ExternalResultDescription::ValueType::vtDecimal64: [[fallthrough]];
case ExternalResultDescription::ValueType::vtDecimal128: [[fallthrough]];
case ExternalResultDescription::ValueType::vtDecimal256:
{
ReadBufferFromString istr(value);
data_type->getDefaultSerialization()->deserializeWholeText(column, istr, FormatSettings{});
break;
}
case ExternalResultDescription::ValueType::vtArray:
{
pqxx::array_parser parser{value};
std::pair<pqxx::array_parser::juncture, std::string> parsed = parser.get_next();
size_t dimension = 0, max_dimension = 0, expected_dimensions = array_info[idx].num_dimensions;
const auto parse_value = array_info[idx].pqxx_parser;
std::vector<Row> dimensions(expected_dimensions + 1);
while (parsed.first != pqxx::array_parser::juncture::done)
{
if ((parsed.first == pqxx::array_parser::juncture::row_start) && (++dimension > expected_dimensions))
throw Exception("Got more dimensions than expected", ErrorCodes::BAD_ARGUMENTS);
else if (parsed.first == pqxx::array_parser::juncture::string_value)
dimensions[dimension].emplace_back(parse_value(parsed.second));
else if (parsed.first == pqxx::array_parser::juncture::null_value)
dimensions[dimension].emplace_back(array_info[idx].default_value);
else if (parsed.first == pqxx::array_parser::juncture::row_end)
{
max_dimension = std::max(max_dimension, dimension);
--dimension;
if (dimension == 0)
break;
dimensions[dimension].emplace_back(Array(dimensions[dimension + 1].begin(), dimensions[dimension + 1].end()));
dimensions[dimension + 1].clear();
}
parsed = parser.get_next();
}
if (max_dimension < expected_dimensions)
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Got less dimensions than expected. ({} instead of {})", max_dimension, expected_dimensions);
assert_cast<ColumnArray &>(column).insert(Array(dimensions[1].begin(), dimensions[1].end()));
break;
}
}
}
void preparePostgreSQLArrayInfo(
std::unordered_map<size_t, PostgreSQLArrayInfo> & array_info, size_t column_idx, const DataTypePtr data_type)
{
const auto * array_type = typeid_cast<const DataTypeArray *>(data_type.get());
auto nested = array_type->getNestedType();
size_t count_dimensions = 1;
while (isArray(nested))
{
++count_dimensions;
nested = typeid_cast<const DataTypeArray *>(nested.get())->getNestedType();
}
Field default_value = nested->getDefault();
if (nested->isNullable())
nested = static_cast<const DataTypeNullable *>(nested.get())->getNestedType();
WhichDataType which(nested);
std::function<Field(std::string & fields)> parser;
if (which.isUInt8() || which.isUInt16())
parser = [](std::string & field) -> Field { return pqxx::from_string<uint16_t>(field); };
else if (which.isInt8() || which.isInt16())
parser = [](std::string & field) -> Field { return pqxx::from_string<int16_t>(field); };
else if (which.isUInt32())
parser = [](std::string & field) -> Field { return pqxx::from_string<uint32_t>(field); };
else if (which.isInt32())
parser = [](std::string & field) -> Field { return pqxx::from_string<int32_t>(field); };
else if (which.isUInt64())
parser = [](std::string & field) -> Field { return pqxx::from_string<uint64_t>(field); };
else if (which.isInt64())
parser = [](std::string & field) -> Field { return pqxx::from_string<int64_t>(field); };
else if (which.isFloat32())
parser = [](std::string & field) -> Field { return pqxx::from_string<float>(field); };
else if (which.isFloat64())
parser = [](std::string & field) -> Field { return pqxx::from_string<double>(field); };
else if (which.isString() || which.isFixedString())
parser = [](std::string & field) -> Field { return field; };
else if (which.isDate())
parser = [](std::string & field) -> Field { return UInt16{LocalDate{field}.getDayNum()}; };
else if (which.isDateTime())
parser = [nested](std::string & field) -> Field
{
ReadBufferFromString in(field);
time_t time = 0;
readDateTimeText(time, in, assert_cast<const DataTypeDateTime *>(nested.get())->getTimeZone());
return time;
};
else if (which.isDecimal32())
parser = [nested](std::string & field) -> Field
{
const auto & type = typeid_cast<const DataTypeDecimal<Decimal32> *>(nested.get());
DataTypeDecimal<Decimal32> res(getDecimalPrecision(*type), getDecimalScale(*type));
return convertFieldToType(field, res);
};
else if (which.isDecimal64())
parser = [nested](std::string & field) -> Field
{
const auto & type = typeid_cast<const DataTypeDecimal<Decimal64> *>(nested.get());
DataTypeDecimal<Decimal64> res(getDecimalPrecision(*type), getDecimalScale(*type));
return convertFieldToType(field, res);
};
else if (which.isDecimal128())
parser = [nested](std::string & field) -> Field
{
const auto & type = typeid_cast<const DataTypeDecimal<Decimal128> *>(nested.get());
DataTypeDecimal<Decimal128> res(getDecimalPrecision(*type), getDecimalScale(*type));
return convertFieldToType(field, res);
};
else if (which.isDecimal256())
parser = [nested](std::string & field) -> Field
{
const auto & type = typeid_cast<const DataTypeDecimal<Decimal256> *>(nested.get());
DataTypeDecimal<Decimal256> res(getDecimalPrecision(*type), getDecimalScale(*type));
return convertFieldToType(field, res);
};
else
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Type conversion to {} is not supported", nested->getName());
array_info[column_idx] = {count_dimensions, default_value, parser};
}
}
#endif

View File

@ -0,0 +1,38 @@
#pragma once
#if !defined(ARCADIA_BUILD)
#include "config_core.h"
#endif
#if USE_LIBPQXX
#include <Core/Block.h>
#include <DataStreams/IBlockInputStream.h>
#include <Core/ExternalResultDescription.h>
#include <Core/Field.h>
namespace DB
{
struct PostgreSQLArrayInfo
{
size_t num_dimensions;
Field default_value;
std::function<Field(std::string & field)> pqxx_parser;
};
void insertPostgreSQLValue(
IColumn & column, std::string_view value,
const ExternalResultDescription::ValueType type, const DataTypePtr data_type,
std::unordered_map<size_t, PostgreSQLArrayInfo> & array_info, size_t idx);
void preparePostgreSQLArrayInfo(
std::unordered_map<size_t, PostgreSQLArrayInfo> & array_info, size_t column_idx, const DataTypePtr data_type);
void insertDefaultPostgreSQLValue(IColumn & column, const IColumn & sample_column);
}
#endif

View File

@ -107,6 +107,8 @@ class IColumn;
M(Bool, allow_suspicious_low_cardinality_types, false, "In CREATE TABLE statement allows specifying LowCardinality modifier for types of small fixed size (8 or less). Enabling this may increase merge times and memory consumption.", 0) \
M(Bool, compile_expressions, true, "Compile some scalar functions and operators to native code.", 0) \
M(UInt64, min_count_to_compile_expression, 3, "The number of identical expressions before they are JIT-compiled", 0) \
M(Bool, compile_aggregate_expressions, true, "Compile aggregate functions to native code.", 0) \
M(UInt64, min_count_to_compile_aggregate_expression, 3, "The number of identical aggregate expressions before they are JIT-compiled", 0) \
M(UInt64, group_by_two_level_threshold, 100000, "From what number of keys, a two-level aggregation starts. 0 - the threshold is not set.", 0) \
M(UInt64, group_by_two_level_threshold_bytes, 50000000, "From what size of the aggregation state in bytes, a two-level aggregation begins to be used. 0 - the threshold is not set. Two-level aggregation is used when at least one of the thresholds is triggered.", 0) \
M(Bool, distributed_aggregation_memory_efficient, true, "Is the memory-saving mode of distributed aggregation enabled.", 0) \
@ -430,6 +432,7 @@ class IColumn;
M(Bool, cast_keep_nullable, false, "CAST operator keep Nullable for result data type", 0) \
M(Bool, alter_partition_verbose_result, false, "Output information about affected parts. Currently works only for FREEZE and ATTACH commands.", 0) \
M(Bool, allow_experimental_database_materialize_mysql, false, "Allow to create database with Engine=MaterializeMySQL(...).", 0) \
M(Bool, allow_experimental_database_materialized_postgresql, false, "Allow to create database with Engine=MaterializedPostgreSQL(...).", 0) \
M(Bool, system_events_show_zero_values, false, "Include all metrics, even with zero values", 0) \
M(MySQLDataTypesSupport, mysql_datatypes_support_level, 0, "Which MySQL types should be converted to corresponding ClickHouse types (rather than being represented as String). Can be empty or any combination of 'decimal' or 'datetime64'. When empty MySQL's DECIMAL and DATETIME/TIMESTAMP with non-zero precision are seen as String on ClickHouse's side.", 0) \
M(Bool, optimize_trivial_insert_select, true, "Optimize trivial 'INSERT INTO table SELECT ... FROM TABLES' query", 0) \

View File

@ -22,12 +22,9 @@
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
}
PostgreSQLBlockInputStream::PostgreSQLBlockInputStream(
template<typename T>
PostgreSQLBlockInputStream<T>::PostgreSQLBlockInputStream(
postgres::ConnectionHolderPtr connection_holder_,
const std::string & query_str_,
const Block & sample_block,
@ -35,25 +32,52 @@ PostgreSQLBlockInputStream::PostgreSQLBlockInputStream(
: query_str(query_str_)
, max_block_size(max_block_size_)
, connection_holder(std::move(connection_holder_))
{
init(sample_block);
}
template<typename T>
PostgreSQLBlockInputStream<T>::PostgreSQLBlockInputStream(
std::shared_ptr<T> tx_,
const std::string & query_str_,
const Block & sample_block,
const UInt64 max_block_size_,
bool auto_commit_)
: query_str(query_str_)
, tx(std::move(tx_))
, max_block_size(max_block_size_)
, auto_commit(auto_commit_)
{
init(sample_block);
}
template<typename T>
void PostgreSQLBlockInputStream<T>::init(const Block & sample_block)
{
description.init(sample_block);
for (const auto idx : collections::range(0, description.sample_block.columns()))
if (description.types[idx].first == ValueType::vtArray)
prepareArrayInfo(idx, description.sample_block.getByPosition(idx).type);
if (description.types[idx].first == ExternalResultDescription::ValueType::vtArray)
preparePostgreSQLArrayInfo(array_info, idx, description.sample_block.getByPosition(idx).type);
/// pqxx::stream_from uses COPY command, will get error if ';' is present
if (query_str.ends_with(';'))
query_str.resize(query_str.size() - 1);
}
void PostgreSQLBlockInputStream::readPrefix()
template<typename T>
void PostgreSQLBlockInputStream<T>::readPrefix()
{
tx = std::make_unique<pqxx::read_transaction>(connection_holder->get());
tx = std::make_shared<T>(connection_holder->get());
stream = std::make_unique<pqxx::stream_from>(*tx, pqxx::from_query, std::string_view(query_str));
}
Block PostgreSQLBlockInputStream::readImpl()
template<typename T>
Block PostgreSQLBlockInputStream<T>::readImpl()
{
/// Check if pqxx::stream_from is finished
if (!stream || !(*stream))
@ -81,17 +105,22 @@ Block PostgreSQLBlockInputStream::readImpl()
{
ColumnNullable & column_nullable = assert_cast<ColumnNullable &>(*columns[idx]);
const auto & data_type = assert_cast<const DataTypeNullable &>(*sample.type);
insertValue(column_nullable.getNestedColumn(), (*row)[idx], description.types[idx].first, data_type.getNestedType(), idx);
insertPostgreSQLValue(
column_nullable.getNestedColumn(), (*row)[idx],
description.types[idx].first, data_type.getNestedType(), array_info, idx);
column_nullable.getNullMapData().emplace_back(0);
}
else
{
insertValue(*columns[idx], (*row)[idx], description.types[idx].first, sample.type, idx);
insertPostgreSQLValue(
*columns[idx], (*row)[idx], description.types[idx].first, sample.type, array_info, idx);
}
}
else
{
insertDefaultValue(*columns[idx], *sample.column);
insertDefaultPostgreSQLValue(*columns[idx], *sample.column);
}
}
@ -104,216 +133,23 @@ Block PostgreSQLBlockInputStream::readImpl()
}
void PostgreSQLBlockInputStream::readSuffix()
template<typename T>
void PostgreSQLBlockInputStream<T>::readSuffix()
{
if (stream)
{
stream->complete();
tx->commit();
if (auto_commit)
tx->commit();
}
}
template
class PostgreSQLBlockInputStream<pqxx::ReplicationTransaction>;
void PostgreSQLBlockInputStream::insertValue(IColumn & column, std::string_view value,
const ExternalResultDescription::ValueType type, const DataTypePtr data_type, size_t idx)
{
switch (type)
{
case ValueType::vtUInt8:
{
if (value == "t")
assert_cast<ColumnUInt8 &>(column).insertValue(1);
else if (value == "f")
assert_cast<ColumnUInt8 &>(column).insertValue(0);
else
assert_cast<ColumnUInt8 &>(column).insertValue(pqxx::from_string<uint16_t>(value));
break;
}
case ValueType::vtUInt16:
assert_cast<ColumnUInt16 &>(column).insertValue(pqxx::from_string<uint16_t>(value));
break;
case ValueType::vtUInt32:
assert_cast<ColumnUInt32 &>(column).insertValue(pqxx::from_string<uint32_t>(value));
break;
case ValueType::vtUInt64:
assert_cast<ColumnUInt64 &>(column).insertValue(pqxx::from_string<uint64_t>(value));
break;
case ValueType::vtInt8:
assert_cast<ColumnInt8 &>(column).insertValue(pqxx::from_string<int16_t>(value));
break;
case ValueType::vtInt16:
assert_cast<ColumnInt16 &>(column).insertValue(pqxx::from_string<int16_t>(value));
break;
case ValueType::vtInt32:
assert_cast<ColumnInt32 &>(column).insertValue(pqxx::from_string<int32_t>(value));
break;
case ValueType::vtInt64:
assert_cast<ColumnInt64 &>(column).insertValue(pqxx::from_string<int64_t>(value));
break;
case ValueType::vtFloat32:
assert_cast<ColumnFloat32 &>(column).insertValue(pqxx::from_string<float>(value));
break;
case ValueType::vtFloat64:
assert_cast<ColumnFloat64 &>(column).insertValue(pqxx::from_string<double>(value));
break;
case ValueType::vtFixedString:[[fallthrough]];
case ValueType::vtEnum8:
case ValueType::vtEnum16:
case ValueType::vtString:
assert_cast<ColumnString &>(column).insertData(value.data(), value.size());
break;
case ValueType::vtUUID:
assert_cast<ColumnUUID &>(column).insert(parse<UUID>(value.data(), value.size()));
break;
case ValueType::vtDate:
assert_cast<ColumnUInt16 &>(column).insertValue(UInt16{LocalDate{std::string(value)}.getDayNum()});
break;
case ValueType::vtDateTime:
{
ReadBufferFromString in(value);
time_t time = 0;
readDateTimeText(time, in, assert_cast<const DataTypeDateTime *>(data_type.get())->getTimeZone());
if (time < 0)
time = 0;
assert_cast<ColumnUInt32 &>(column).insertValue(time);
break;
}
case ValueType::vtDateTime64:[[fallthrough]];
case ValueType::vtDecimal32: [[fallthrough]];
case ValueType::vtDecimal64: [[fallthrough]];
case ValueType::vtDecimal128: [[fallthrough]];
case ValueType::vtDecimal256:
{
ReadBufferFromString istr(value);
data_type->getDefaultSerialization()->deserializeWholeText(column, istr, FormatSettings{});
break;
}
case ValueType::vtArray:
{
pqxx::array_parser parser{value};
std::pair<pqxx::array_parser::juncture, std::string> parsed = parser.get_next();
size_t dimension = 0, max_dimension = 0, expected_dimensions = array_info[idx].num_dimensions;
const auto parse_value = array_info[idx].pqxx_parser;
std::vector<Row> dimensions(expected_dimensions + 1);
while (parsed.first != pqxx::array_parser::juncture::done)
{
if ((parsed.first == pqxx::array_parser::juncture::row_start) && (++dimension > expected_dimensions))
throw Exception("Got more dimensions than expected", ErrorCodes::BAD_ARGUMENTS);
else if (parsed.first == pqxx::array_parser::juncture::string_value)
dimensions[dimension].emplace_back(parse_value(parsed.second));
else if (parsed.first == pqxx::array_parser::juncture::null_value)
dimensions[dimension].emplace_back(array_info[idx].default_value);
else if (parsed.first == pqxx::array_parser::juncture::row_end)
{
max_dimension = std::max(max_dimension, dimension);
--dimension;
if (dimension == 0)
break;
dimensions[dimension].emplace_back(Array(dimensions[dimension + 1].begin(), dimensions[dimension + 1].end()));
dimensions[dimension + 1].clear();
}
parsed = parser.get_next();
}
if (max_dimension < expected_dimensions)
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Got less dimensions than expected. ({} instead of {})", max_dimension, expected_dimensions);
assert_cast<ColumnArray &>(column).insert(Array(dimensions[1].begin(), dimensions[1].end()));
break;
}
}
}
void PostgreSQLBlockInputStream::prepareArrayInfo(size_t column_idx, const DataTypePtr data_type)
{
const auto * array_type = typeid_cast<const DataTypeArray *>(data_type.get());
auto nested = array_type->getNestedType();
size_t count_dimensions = 1;
while (isArray(nested))
{
++count_dimensions;
nested = typeid_cast<const DataTypeArray *>(nested.get())->getNestedType();
}
Field default_value = nested->getDefault();
if (nested->isNullable())
nested = static_cast<const DataTypeNullable *>(nested.get())->getNestedType();
WhichDataType which(nested);
std::function<Field(std::string & fields)> parser;
if (which.isUInt8() || which.isUInt16())
parser = [](std::string & field) -> Field { return pqxx::from_string<uint16_t>(field); };
else if (which.isInt8() || which.isInt16())
parser = [](std::string & field) -> Field { return pqxx::from_string<int16_t>(field); };
else if (which.isUInt32())
parser = [](std::string & field) -> Field { return pqxx::from_string<uint32_t>(field); };
else if (which.isInt32())
parser = [](std::string & field) -> Field { return pqxx::from_string<int32_t>(field); };
else if (which.isUInt64())
parser = [](std::string & field) -> Field { return pqxx::from_string<uint64_t>(field); };
else if (which.isInt64())
parser = [](std::string & field) -> Field { return pqxx::from_string<int64_t>(field); };
else if (which.isFloat32())
parser = [](std::string & field) -> Field { return pqxx::from_string<float>(field); };
else if (which.isFloat64())
parser = [](std::string & field) -> Field { return pqxx::from_string<double>(field); };
else if (which.isString() || which.isFixedString())
parser = [](std::string & field) -> Field { return field; };
else if (which.isDate())
parser = [](std::string & field) -> Field { return UInt16{LocalDate{field}.getDayNum()}; };
else if (which.isDateTime())
parser = [nested](std::string & field) -> Field
{
ReadBufferFromString in(field);
time_t time = 0;
readDateTimeText(time, in, assert_cast<const DataTypeDateTime *>(nested.get())->getTimeZone());
return time;
};
else if (which.isDecimal32())
parser = [nested](std::string & field) -> Field
{
const auto & type = typeid_cast<const DataTypeDecimal<Decimal32> *>(nested.get());
DataTypeDecimal<Decimal32> res(getDecimalPrecision(*type), getDecimalScale(*type));
return convertFieldToType(field, res);
};
else if (which.isDecimal64())
parser = [nested](std::string & field) -> Field
{
const auto & type = typeid_cast<const DataTypeDecimal<Decimal64> *>(nested.get());
DataTypeDecimal<Decimal64> res(getDecimalPrecision(*type), getDecimalScale(*type));
return convertFieldToType(field, res);
};
else if (which.isDecimal128())
parser = [nested](std::string & field) -> Field
{
const auto & type = typeid_cast<const DataTypeDecimal<Decimal128> *>(nested.get());
DataTypeDecimal<Decimal128> res(getDecimalPrecision(*type), getDecimalScale(*type));
return convertFieldToType(field, res);
};
else if (which.isDecimal256())
parser = [nested](std::string & field) -> Field
{
const auto & type = typeid_cast<const DataTypeDecimal<Decimal256> *>(nested.get());
DataTypeDecimal<Decimal256> res(getDecimalPrecision(*type), getDecimalScale(*type));
return convertFieldToType(field, res);
};
else
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Type conversion to {} is not supported", nested->getName());
array_info[column_idx] = {count_dimensions, default_value, parser};
}
template
class PostgreSQLBlockInputStream<pqxx::ReadTransaction>;
}

View File

@ -9,54 +9,76 @@
#include <DataStreams/IBlockInputStream.h>
#include <Core/ExternalResultDescription.h>
#include <Core/Field.h>
#include <Storages/PostgreSQL/ConnectionHolder.h>
#include <Core/PostgreSQL/insertPostgreSQLValue.h>
#include <Core/PostgreSQL/ConnectionHolder.h>
#include <Core/PostgreSQL/Utils.h>
namespace DB
{
template <typename T = pqxx::ReadTransaction>
class PostgreSQLBlockInputStream : public IBlockInputStream
{
public:
PostgreSQLBlockInputStream(
postgres::ConnectionHolderPtr connection_holder_,
const std::string & query_str,
const String & query_str_,
const Block & sample_block,
const UInt64 max_block_size_);
String getName() const override { return "PostgreSQL"; }
Block getHeader() const override { return description.sample_block.cloneEmpty(); }
private:
using ValueType = ExternalResultDescription::ValueType;
void readPrefix() override;
protected:
PostgreSQLBlockInputStream(
std::shared_ptr<T> tx_,
const std::string & query_str_,
const Block & sample_block,
const UInt64 max_block_size_,
bool auto_commit_);
String query_str;
std::shared_ptr<T> tx;
std::unique_ptr<pqxx::stream_from> stream;
private:
Block readImpl() override;
void readSuffix() override;
void insertValue(IColumn & column, std::string_view value,
const ExternalResultDescription::ValueType type, const DataTypePtr data_type, size_t idx);
void insertDefaultValue(IColumn & column, const IColumn & sample_column)
{
column.insertFrom(sample_column, 0);
}
void prepareArrayInfo(size_t column_idx, const DataTypePtr data_type);
void init(const Block & sample_block);
String query_str;
const UInt64 max_block_size;
bool auto_commit = true;
ExternalResultDescription description;
postgres::ConnectionHolderPtr connection_holder;
std::unique_ptr<pqxx::read_transaction> tx;
std::unique_ptr<pqxx::stream_from> stream;
struct ArrayInfo
std::unordered_map<size_t, PostgreSQLArrayInfo> array_info;
};
/// Passes transaction object into PostgreSQLBlockInputStream and does not close transaction after read is finished.
template <typename T>
class PostgreSQLTransactionBlockInputStream : public PostgreSQLBlockInputStream<T>
{
public:
using Base = PostgreSQLBlockInputStream<T>;
PostgreSQLTransactionBlockInputStream(
std::shared_ptr<T> tx_,
const std::string & query_str_,
const Block & sample_block_,
const UInt64 max_block_size_)
: PostgreSQLBlockInputStream<T>(tx_, query_str_, sample_block_, max_block_size_, false) {}
void readPrefix() override
{
size_t num_dimensions;
Field default_value;
std::function<Field(std::string & field)> pqxx_parser;
};
std::unordered_map<size_t, ArrayInfo> array_info;
Base::stream = std::make_unique<pqxx::stream_from>(*Base::tx, pqxx::from_query, std::string_view(Base::query_str));
}
};
}

View File

@ -33,7 +33,8 @@ TTLAggregationAlgorithm::TTLAggregationAlgorithm(
Aggregator::Params params(header, keys, aggregates,
false, settings.max_rows_to_group_by, settings.group_by_overflow_mode, 0, 0,
settings.max_bytes_before_external_group_by, settings.empty_result_for_aggregation_by_empty_set,
storage_.getContext()->getTemporaryVolume(), settings.max_threads, settings.min_free_disk_space_for_temporary_data);
storage_.getContext()->getTemporaryVolume(), settings.max_threads, settings.min_free_disk_space_for_temporary_data,
settings.compile_aggregate_expressions, settings.min_count_to_compile_aggregate_expression);
aggregator = std::make_unique<Aggregator>(params);
}

View File

@ -357,6 +357,7 @@ inline bool isTuple(const DataTypePtr & data_type) { return WhichDataType(data_t
inline bool isArray(const DataTypePtr & data_type) { return WhichDataType(data_type).isArray(); }
inline bool isMap(const DataTypePtr & data_type) { return WhichDataType(data_type).isMap(); }
inline bool isNothing(const DataTypePtr & data_type) { return WhichDataType(data_type).isNothing(); }
inline bool isUUID(const DataTypePtr & data_type) { return WhichDataType(data_type).isUUID(); }
template <typename T>
inline bool isUInt8(const T & data_type)

View File

@ -61,6 +61,44 @@ static inline llvm::Type * toNativeType(llvm::IRBuilderBase & builder, const IDa
return nullptr;
}
template <typename ToType>
static inline llvm::Type * toNativeType(llvm::IRBuilderBase & builder)
{
if constexpr (std::is_same_v<ToType, Int8> || std::is_same_v<ToType, UInt8>)
return builder.getInt8Ty();
else if constexpr (std::is_same_v<ToType, Int16> || std::is_same_v<ToType, UInt16>)
return builder.getInt16Ty();
else if constexpr (std::is_same_v<ToType, Int32> || std::is_same_v<ToType, UInt32>)
return builder.getInt32Ty();
else if constexpr (std::is_same_v<ToType, Int64> || std::is_same_v<ToType, UInt64>)
return builder.getInt64Ty();
else if constexpr (std::is_same_v<ToType, Float32>)
return builder.getFloatTy();
else if constexpr (std::is_same_v<ToType, Float64>)
return builder.getDoubleTy();
return nullptr;
}
template <typename Type>
static inline bool canBeNativeType()
{
if constexpr (std::is_same_v<Type, Int8> || std::is_same_v<Type, UInt8>)
return true;
else if constexpr (std::is_same_v<Type, Int16> || std::is_same_v<Type, UInt16>)
return true;
else if constexpr (std::is_same_v<Type, Int32> || std::is_same_v<Type, UInt32>)
return true;
else if constexpr (std::is_same_v<Type, Int64> || std::is_same_v<Type, UInt64>)
return true;
else if constexpr (std::is_same_v<Type, Float32>)
return true;
else if constexpr (std::is_same_v<Type, Float64>)
return true;
return false;
}
static inline bool canBeNativeType(const IDataType & type)
{
WhichDataType data_type(type);
@ -79,40 +117,62 @@ static inline llvm::Type * toNativeType(llvm::IRBuilderBase & builder, const Dat
return toNativeType(builder, *type);
}
static inline llvm::Value * nativeBoolCast(llvm::IRBuilder<> & b, const DataTypePtr & from, llvm::Value * value)
static inline llvm::Value * nativeBoolCast(llvm::IRBuilder<> & b, const DataTypePtr & from_type, llvm::Value * value)
{
if (from->isNullable())
if (from_type->isNullable())
{
auto * inner = nativeBoolCast(b, removeNullable(from), b.CreateExtractValue(value, {0}));
auto * inner = nativeBoolCast(b, removeNullable(from_type), b.CreateExtractValue(value, {0}));
return b.CreateAnd(b.CreateNot(b.CreateExtractValue(value, {1})), inner);
}
auto * zero = llvm::Constant::getNullValue(value->getType());
if (value->getType()->isIntegerTy())
return b.CreateICmpNE(value, zero);
if (value->getType()->isFloatingPointTy())
return b.CreateFCmpONE(value, zero); /// QNaN is false
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot cast non-number {} to bool", from->getName());
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot cast non-number {} to bool", from_type->getName());
}
static inline llvm::Value * nativeCast(llvm::IRBuilder<> & b, const DataTypePtr & from, llvm::Value * value, llvm::Type * to)
static inline llvm::Value * nativeCast(llvm::IRBuilder<> & b, const DataTypePtr & from, llvm::Value * value, llvm::Type * to_type)
{
auto * n_from = value->getType();
auto * from_type = value->getType();
if (n_from == to)
if (from_type == to_type)
return value;
else if (n_from->isIntegerTy() && to->isFloatingPointTy())
return typeIsSigned(*from) ? b.CreateSIToFP(value, to) : b.CreateUIToFP(value, to);
else if (n_from->isFloatingPointTy() && to->isIntegerTy())
return typeIsSigned(*from) ? b.CreateFPToSI(value, to) : b.CreateFPToUI(value, to);
else if (n_from->isIntegerTy() && to->isIntegerTy())
return b.CreateIntCast(value, to, typeIsSigned(*from));
else if (n_from->isFloatingPointTy() && to->isFloatingPointTy())
return b.CreateFPCast(value, to);
else if (from_type->isIntegerTy() && to_type->isFloatingPointTy())
return typeIsSigned(*from) ? b.CreateSIToFP(value, to_type) : b.CreateUIToFP(value, to_type);
else if (from_type->isFloatingPointTy() && to_type->isIntegerTy())
return typeIsSigned(*from) ? b.CreateFPToSI(value, to_type) : b.CreateFPToUI(value, to_type);
else if (from_type->isIntegerTy() && to_type->isIntegerTy())
return b.CreateIntCast(value, to_type, typeIsSigned(*from));
else if (from_type->isFloatingPointTy() && to_type->isFloatingPointTy())
return b.CreateFPCast(value, to_type);
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot cast {} to requested type", from->getName());
}
template <typename FromType>
static inline llvm::Value * nativeCast(llvm::IRBuilder<> & b, llvm::Value * value, llvm::Type * to_type)
{
auto * from_type = value->getType();
static constexpr bool from_type_is_signed = std::numeric_limits<FromType>::is_signed;
if (from_type == to_type)
return value;
else if (from_type->isIntegerTy() && to_type->isFloatingPointTy())
return from_type_is_signed ? b.CreateSIToFP(value, to_type) : b.CreateUIToFP(value, to_type);
else if (from_type->isFloatingPointTy() && to_type->isIntegerTy())
return from_type_is_signed ? b.CreateFPToSI(value, to_type) : b.CreateFPToUI(value, to_type);
else if (from_type->isIntegerTy() && to_type->isIntegerTy())
return b.CreateIntCast(value, to_type, from_type_is_signed);
else if (from_type->isFloatingPointTy() && to_type->isFloatingPointTy())
return b.CreateFPCast(value, to_type);
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot cast {} to requested type", TypeName<FromType>);
}
static inline llvm::Value * nativeCast(llvm::IRBuilder<> & b, const DataTypePtr & from, llvm::Value * value, const DataTypePtr & to)
{
auto * n_to = toNativeType(b, to);
@ -139,6 +199,37 @@ static inline llvm::Value * nativeCast(llvm::IRBuilder<> & b, const DataTypePtr
return nativeCast(b, from, value, n_to);
}
static inline std::pair<llvm::Value *, llvm::Value *> nativeCastToCommon(llvm::IRBuilder<> & b, const DataTypePtr & lhs_type, llvm::Value * lhs, const DataTypePtr & rhs_type, llvm::Value * rhs)
{
llvm::Type * common;
bool lhs_is_signed = typeIsSigned(*lhs_type);
bool rhs_is_signed = typeIsSigned(*rhs_type);
if (lhs->getType()->isIntegerTy() && rhs->getType()->isIntegerTy())
{
/// if one integer has a sign bit, make sure the other does as well. llvm generates optimal code
/// (e.g. uses overflow flag on x86) for (word size + 1)-bit integer operations.
size_t lhs_bit_width = lhs->getType()->getIntegerBitWidth() + (!lhs_is_signed && rhs_is_signed);
size_t rhs_bit_width = rhs->getType()->getIntegerBitWidth() + (!rhs_is_signed && lhs_is_signed);
size_t max_bit_width = std::max(lhs_bit_width, rhs_bit_width);
common = b.getIntNTy(max_bit_width);
}
else
{
/// TODO: Check
/// (double, float) or (double, int_N where N <= double's mantissa width) -> double
common = b.getDoubleTy();
}
auto * cast_lhs_to_common = nativeCast(b, lhs_type, lhs, common);
auto * cast_rhs_to_common = nativeCast(b, rhs_type, rhs, common);
return std::make_pair(cast_lhs_to_common, cast_rhs_to_common);
}
static inline llvm::Constant * getColumnNativeValue(llvm::IRBuilderBase & builder, const DataTypePtr & column_type, const IColumn & column, size_t index)
{
if (const auto * constant = typeid_cast<const ColumnConst *>(&column))

View File

@ -81,7 +81,7 @@ void SerializationMap::deserializeBinary(IColumn & column, ReadBuffer & istr) co
template <typename Writer>
void SerializationMap::serializeTextImpl(const IColumn & column, size_t row_num, WriteBuffer & ostr, Writer && writer) const
void SerializationMap::serializeTextImpl(const IColumn & column, size_t row_num, bool quote_key, WriteBuffer & ostr, Writer && writer) const
{
const auto & column_map = assert_cast<const ColumnMap &>(column);
@ -97,7 +97,16 @@ void SerializationMap::serializeTextImpl(const IColumn & column, size_t row_num,
{
if (i != offset)
writeChar(',', ostr);
writer(key, nested_tuple.getColumn(0), i);
if (quote_key)
{
writeChar('"', ostr);
writer(key, nested_tuple.getColumn(0), i);
writeChar('"', ostr);
}
else
writer(key, nested_tuple.getColumn(0), i);
writeChar(':', ostr);
writer(value, nested_tuple.getColumn(1), i);
}
@ -161,7 +170,7 @@ void SerializationMap::deserializeTextImpl(IColumn & column, ReadBuffer & istr,
void SerializationMap::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
serializeTextImpl(column, row_num, ostr,
serializeTextImpl(column, row_num, /*quote_key=*/ false, ostr,
[&](const SerializationPtr & subcolumn_serialization, const IColumn & subcolumn, size_t pos)
{
subcolumn_serialization->serializeTextQuoted(subcolumn, pos, ostr, settings);
@ -170,7 +179,6 @@ void SerializationMap::serializeText(const IColumn & column, size_t row_num, Wri
void SerializationMap::deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
deserializeTextImpl(column, istr,
[&](const SerializationPtr & subcolumn_serialization, IColumn & subcolumn)
{
@ -178,10 +186,13 @@ void SerializationMap::deserializeText(IColumn & column, ReadBuffer & istr, cons
});
}
void SerializationMap::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
serializeTextImpl(column, row_num, ostr,
/// We need to double-quote integer keys to produce valid JSON.
const auto & column_key = assert_cast<const ColumnMap &>(column).getNestedData().getColumn(0);
bool quote_key = !WhichDataType(column_key.getDataType()).isStringOrFixedString();
serializeTextImpl(column, row_num, quote_key, ostr,
[&](const SerializationPtr & subcolumn_serialization, const IColumn & subcolumn, size_t pos)
{
subcolumn_serialization->serializeTextJSON(subcolumn, pos, ostr, settings);

View File

@ -61,7 +61,7 @@ public:
private:
template <typename Writer>
void serializeTextImpl(const IColumn & column, size_t row_num, WriteBuffer & ostr, Writer && writer) const;
void serializeTextImpl(const IColumn & column, size_t row_num, bool quote_key, WriteBuffer & ostr, Writer && writer) const;
template <typename Reader>
void deserializeTextImpl(IColumn & column, ReadBuffer & istr, Reader && reader) const;

View File

@ -109,12 +109,11 @@ StoragePtr DatabaseAtomic::detachTable(const String & name)
void DatabaseAtomic::dropTable(ContextPtr local_context, const String & table_name, bool no_delay)
{
if (auto * mv = dynamic_cast<StorageMaterializedView *>(tryGetTable(table_name, local_context).get()))
{
/// Remove the inner table (if any) to avoid deadlock
/// (due to attempt to execute DROP from the worker thread)
mv->dropInnerTable(no_delay, local_context);
}
auto storage = tryGetTable(table_name, local_context);
/// Remove the inner table (if any) to avoid deadlock
/// (due to attempt to execute DROP from the worker thread)
if (storage)
storage->dropInnerTableIfAny(no_delay, local_context);
String table_metadata_path = getObjectMetadataPath(table_name);
String table_metadata_path_drop;
@ -568,4 +567,3 @@ void DatabaseAtomic::checkDetachedTableNotInUse(const UUID & uuid)
}
}

View File

@ -36,7 +36,8 @@
#if USE_LIBPQXX
#include <Databases/PostgreSQL/DatabasePostgreSQL.h> // Y_IGNORE
#include <Storages/PostgreSQL/PoolWithFailover.h>
#include <Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.h>
#include <Storages/PostgreSQL/MaterializedPostgreSQLSettings.h>
#endif
namespace fs = std::filesystem;
@ -99,14 +100,14 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String
const UUID & uuid = create.uuid;
bool engine_may_have_arguments = engine_name == "MySQL" || engine_name == "MaterializeMySQL" || engine_name == "Lazy" ||
engine_name == "Replicated" || engine_name == "PostgreSQL";
engine_name == "Replicated" || engine_name == "PostgreSQL" || engine_name == "MaterializedPostgreSQL";
if (engine_define->engine->arguments && !engine_may_have_arguments)
throw Exception("Database engine " + engine_name + " cannot have arguments", ErrorCodes::BAD_ARGUMENTS);
bool has_unexpected_element = engine_define->engine->parameters || engine_define->partition_by ||
engine_define->primary_key || engine_define->order_by ||
engine_define->sample_by;
bool may_have_settings = endsWith(engine_name, "MySQL") || engine_name == "Replicated";
bool may_have_settings = endsWith(engine_name, "MySQL") || engine_name == "Replicated" || engine_name == "MaterializedPostgreSQL";
if (has_unexpected_element || (!may_have_settings && engine_define->settings))
throw Exception("Database engine " + engine_name + " cannot have parameters, primary_key, order_by, sample_by, settings",
ErrorCodes::UNKNOWN_ELEMENT_IN_AST);
@ -262,6 +263,41 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String
return std::make_shared<DatabasePostgreSQL>(
context, metadata_path, engine_define, database_name, postgres_database_name, connection_pool, use_table_cache);
}
else if (engine_name == "MaterializedPostgreSQL")
{
const ASTFunction * engine = engine_define->engine;
if (!engine->arguments || engine->arguments->children.size() != 4)
{
throw Exception(
fmt::format("{} Database require host:port, database_name, username, password arguments ", engine_name),
ErrorCodes::BAD_ARGUMENTS);
}
ASTs & engine_args = engine->arguments->children;
for (auto & engine_arg : engine_args)
engine_arg = evaluateConstantExpressionOrIdentifierAsLiteral(engine_arg, context);
const auto & host_port = safeGetLiteralValue<String>(engine_args[0], engine_name);
const auto & postgres_database_name = safeGetLiteralValue<String>(engine_args[1], engine_name);
const auto & username = safeGetLiteralValue<String>(engine_args[2], engine_name);
const auto & password = safeGetLiteralValue<String>(engine_args[3], engine_name);
auto parsed_host_port = parseAddress(host_port, 5432);
auto connection_info = postgres::formatConnectionString(postgres_database_name, parsed_host_port.first, parsed_host_port.second, username, password);
auto postgresql_replica_settings = std::make_unique<MaterializedPostgreSQLSettings>();
if (engine_define->settings)
postgresql_replica_settings->loadFromQuery(*engine_define);
return std::make_shared<DatabaseMaterializedPostgreSQL>(
context, metadata_path, uuid, engine_define, create.attach,
database_name, postgres_database_name, connection_info,
std::move(postgresql_replica_settings));
}
#endif

View File

@ -0,0 +1,215 @@
#include <Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.h>
#if USE_LIBPQXX
#include <Storages/PostgreSQL/StorageMaterializedPostgreSQL.h>
#include <Databases/PostgreSQL/fetchPostgreSQLTableStructure.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeArray.h>
#include <Databases/DatabaseOrdinary.h>
#include <Databases/DatabaseAtomic.h>
#include <Storages/StoragePostgreSQL.h>
#include <Interpreters/Context.h>
#include <Parsers/ASTCreateQuery.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ParserCreateQuery.h>
#include <Parsers/parseQuery.h>
#include <Parsers/queryToString.h>
#include <Common/escapeForFileName.h>
#include <Poco/DirectoryIterator.h>
#include <Poco/File.h>
#include <Common/Macros.h>
#include <common/logger_useful.h>
namespace DB
{
namespace ErrorCodes
{
extern const int NOT_IMPLEMENTED;
extern const int LOGICAL_ERROR;
}
DatabaseMaterializedPostgreSQL::DatabaseMaterializedPostgreSQL(
ContextPtr context_,
const String & metadata_path_,
UUID uuid_,
const ASTStorage * database_engine_define_,
bool is_attach_,
const String & database_name_,
const String & postgres_database_name,
const postgres::ConnectionInfo & connection_info_,
std::unique_ptr<MaterializedPostgreSQLSettings> settings_)
: DatabaseAtomic(database_name_, metadata_path_, uuid_, "DatabaseMaterializedPostgreSQL (" + database_name_ + ")", context_)
, database_engine_define(database_engine_define_->clone())
, is_attach(is_attach_)
, remote_database_name(postgres_database_name)
, connection_info(connection_info_)
, settings(std::move(settings_))
{
}
void DatabaseMaterializedPostgreSQL::startSynchronization()
{
replication_handler = std::make_unique<PostgreSQLReplicationHandler>(
/* replication_identifier */database_name,
remote_database_name,
database_name,
connection_info,
getContext(),
is_attach,
settings->materialized_postgresql_max_block_size.value,
settings->materialized_postgresql_allow_automatic_update,
/* is_materialized_postgresql_database = */ true,
settings->materialized_postgresql_tables_list.value);
postgres::Connection connection(connection_info);
NameSet tables_to_replicate;
try
{
tables_to_replicate = replication_handler->fetchRequiredTables(connection);
}
catch (...)
{
LOG_ERROR(log, "Unable to load replicated tables list");
throw;
}
if (tables_to_replicate.empty())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Got empty list of tables to replicate");
for (const auto & table_name : tables_to_replicate)
{
/// Check nested ReplacingMergeTree table.
auto storage = DatabaseAtomic::tryGetTable(table_name, getContext());
if (storage)
{
/// Nested table was already created and synchronized.
storage = StorageMaterializedPostgreSQL::create(storage, getContext());
}
else
{
/// Nested table does not exist and will be created by replication thread.
storage = StorageMaterializedPostgreSQL::create(StorageID(database_name, table_name), getContext());
}
/// Cache MaterializedPostgreSQL wrapper over nested table.
materialized_tables[table_name] = storage;
/// Let replication thread know, which tables it needs to keep in sync.
replication_handler->addStorage(table_name, storage->as<StorageMaterializedPostgreSQL>());
}
LOG_TRACE(log, "Loaded {} tables. Starting synchronization", materialized_tables.size());
replication_handler->startup();
}
void DatabaseMaterializedPostgreSQL::loadStoredObjects(ContextMutablePtr local_context, bool has_force_restore_data_flag, bool force_attach)
{
DatabaseAtomic::loadStoredObjects(local_context, has_force_restore_data_flag, force_attach);
try
{
startSynchronization();
}
catch (...)
{
tryLogCurrentException(log, "Cannot load nested database objects for PostgreSQL database engine.");
if (!force_attach)
throw;
}
}
StoragePtr DatabaseMaterializedPostgreSQL::tryGetTable(const String & name, ContextPtr local_context) const
{
/// In otder to define which table access is needed - to MaterializedPostgreSQL table (only in case of SELECT queries) or
/// to its nested ReplacingMergeTree table (in all other cases), the context of a query os modified.
/// Also if materialzied_tables set is empty - it means all access is done to ReplacingMergeTree tables - it is a case after
/// replication_handler was shutdown.
if (local_context->isInternalQuery() || materialized_tables.empty())
{
return DatabaseAtomic::tryGetTable(name, local_context);
}
/// Note: In select query we call MaterializedPostgreSQL table and it calls tryGetTable from its nested.
/// So the only point, where synchronization is needed - access to MaterializedPostgreSQL table wrapper over nested table.
std::lock_guard lock(tables_mutex);
auto table = materialized_tables.find(name);
/// Return wrapper over ReplacingMergeTree table. If table synchronization just started, table will not
/// be accessible immediately. Table is considered to exist once its nested table was created.
if (table != materialized_tables.end() && table->second->as <StorageMaterializedPostgreSQL>()->hasNested())
{
return table->second;
}
return StoragePtr{};
}
void DatabaseMaterializedPostgreSQL::createTable(ContextPtr local_context, const String & table_name, const StoragePtr & table, const ASTPtr & query)
{
/// Create table query can only be called from replication thread.
if (local_context->isInternalQuery())
{
DatabaseAtomic::createTable(local_context, table_name, table, query);
return;
}
throw Exception(ErrorCodes::NOT_IMPLEMENTED,
"Create table query allowed only for ReplacingMergeTree engine and from synchronization thread");
}
void DatabaseMaterializedPostgreSQL::shutdown()
{
stopReplication();
DatabaseAtomic::shutdown();
}
void DatabaseMaterializedPostgreSQL::stopReplication()
{
if (replication_handler)
replication_handler->shutdown();
/// Clear wrappers over nested, all access is not done to nested tables directly.
materialized_tables.clear();
}
void DatabaseMaterializedPostgreSQL::dropTable(ContextPtr local_context, const String & table_name, bool no_delay)
{
/// Modify context into nested_context and pass query to Atomic database.
DatabaseAtomic::dropTable(StorageMaterializedPostgreSQL::makeNestedTableContext(local_context), table_name, no_delay);
}
void DatabaseMaterializedPostgreSQL::drop(ContextPtr local_context)
{
if (replication_handler)
replication_handler->shutdownFinal();
DatabaseAtomic::drop(StorageMaterializedPostgreSQL::makeNestedTableContext(local_context));
}
DatabaseTablesIteratorPtr DatabaseMaterializedPostgreSQL::getTablesIterator(
ContextPtr local_context, const DatabaseOnDisk::FilterByNameFunction & filter_by_table_name)
{
/// Modify context into nested_context and pass query to Atomic database.
return DatabaseAtomic::getTablesIterator(StorageMaterializedPostgreSQL::makeNestedTableContext(local_context), filter_by_table_name);
}
}
#endif

View File

@ -0,0 +1,79 @@
#pragma once
#if !defined(ARCADIA_BUILD)
#include "config_core.h"
#endif
#if USE_LIBPQXX
#include <Storages/PostgreSQL/PostgreSQLReplicationHandler.h>
#include <Storages/PostgreSQL/MaterializedPostgreSQLSettings.h>
#include <Databases/DatabasesCommon.h>
#include <Core/BackgroundSchedulePool.h>
#include <Parsers/ASTCreateQuery.h>
#include <Databases/IDatabase.h>
#include <Databases/DatabaseOnDisk.h>
#include <Databases/DatabaseAtomic.h>
namespace DB
{
class PostgreSQLConnection;
using PostgreSQLConnectionPtr = std::shared_ptr<PostgreSQLConnection>;
class DatabaseMaterializedPostgreSQL : public DatabaseAtomic
{
public:
DatabaseMaterializedPostgreSQL(
ContextPtr context_,
const String & metadata_path_,
UUID uuid_,
const ASTStorage * database_engine_define_,
bool is_attach_,
const String & database_name_,
const String & postgres_database_name,
const postgres::ConnectionInfo & connection_info,
std::unique_ptr<MaterializedPostgreSQLSettings> settings_);
String getEngineName() const override { return "MaterializedPostgreSQL"; }
String getMetadataPath() const override { return metadata_path; }
void loadStoredObjects(ContextMutablePtr, bool, bool force_attach) override;
DatabaseTablesIteratorPtr getTablesIterator(
ContextPtr context, const DatabaseOnDisk::FilterByNameFunction & filter_by_table_name) override;
StoragePtr tryGetTable(const String & name, ContextPtr context) const override;
void createTable(ContextPtr context, const String & name, const StoragePtr & table, const ASTPtr & query) override;
void dropTable(ContextPtr local_context, const String & name, bool no_delay) override;
void drop(ContextPtr local_context) override;
void stopReplication();
void shutdown() override;
private:
void startSynchronization();
ASTPtr database_engine_define;
bool is_attach;
String remote_database_name;
postgres::ConnectionInfo connection_info;
std::unique_ptr<MaterializedPostgreSQLSettings> settings;
std::shared_ptr<PostgreSQLReplicationHandler> replication_handler;
std::map<std::string, StoragePtr> materialized_tables;
mutable std::mutex tables_mutex;
};
}
#endif

View File

@ -40,14 +40,14 @@ DatabasePostgreSQL::DatabasePostgreSQL(
const ASTStorage * database_engine_define_,
const String & dbname_,
const String & postgres_dbname,
postgres::PoolWithFailoverPtr connection_pool_,
const bool cache_tables_)
postgres::PoolWithFailoverPtr pool_,
bool cache_tables_)
: IDatabase(dbname_)
, WithContext(context_->getGlobalContext())
, metadata_path(metadata_path_)
, database_engine_define(database_engine_define_->clone())
, dbname(postgres_dbname)
, connection_pool(std::move(connection_pool_))
, pool(std::move(pool_))
, cache_tables(cache_tables_)
{
cleaner_task = getContext()->getSchedulePool().createTask("PostgreSQLCleanerTask", [this]{ removeOutdatedTables(); });
@ -59,7 +59,8 @@ bool DatabasePostgreSQL::empty() const
{
std::lock_guard<std::mutex> lock(mutex);
auto tables_list = fetchTablesList();
auto connection_holder = pool->get();
auto tables_list = fetchPostgreSQLTablesList(connection_holder->get());
for (const auto & table_name : tables_list)
if (!detached_or_dropped.count(table_name))
@ -74,7 +75,8 @@ DatabaseTablesIteratorPtr DatabasePostgreSQL::getTablesIterator(ContextPtr local
std::lock_guard<std::mutex> lock(mutex);
Tables tables;
auto table_names = fetchTablesList();
auto connection_holder = pool->get();
auto table_names = fetchPostgreSQLTablesList(connection_holder->get());
for (const auto & table_name : table_names)
if (!detached_or_dropped.count(table_name))
@ -84,21 +86,6 @@ DatabaseTablesIteratorPtr DatabasePostgreSQL::getTablesIterator(ContextPtr local
}
std::unordered_set<std::string> DatabasePostgreSQL::fetchTablesList() const
{
std::unordered_set<std::string> tables;
std::string query = "SELECT tablename FROM pg_catalog.pg_tables "
"WHERE schemaname != 'pg_catalog' AND schemaname != 'information_schema'";
auto connection_holder = connection_pool->get();
pqxx::read_transaction tx(connection_holder->get());
for (auto table_name : tx.stream<std::string>(query))
tables.insert(std::get<0>(table_name));
return tables;
}
bool DatabasePostgreSQL::checkPostgresTable(const String & table_name) const
{
if (table_name.find('\'') != std::string::npos
@ -108,7 +95,7 @@ bool DatabasePostgreSQL::checkPostgresTable(const String & table_name) const
"PostgreSQL table name cannot contain single quote or backslash characters, passed {}", table_name);
}
auto connection_holder = connection_pool->get();
auto connection_holder = pool->get();
pqxx::nontransaction tx(connection_holder->get());
try
@ -163,20 +150,15 @@ StoragePtr DatabasePostgreSQL::fetchTable(const String & table_name, ContextPtr
if (!table_checked && !checkPostgresTable(table_name))
return StoragePtr{};
auto use_nulls = local_context->getSettingsRef().external_table_functions_use_nulls;
auto columns = fetchPostgreSQLTableStructure(connection_pool->get(), doubleQuoteString(table_name), use_nulls);
auto connection_holder = pool->get();
auto columns = fetchPostgreSQLTableStructure(connection_holder->get(), doubleQuoteString(table_name)).columns;
if (!columns)
return StoragePtr{};
auto storage = StoragePostgreSQL::create(
StorageID(database_name, table_name),
connection_pool,
table_name,
ColumnsDescription{*columns},
ConstraintsDescription{},
String{},
local_context);
StorageID(database_name, table_name), pool, table_name,
ColumnsDescription{*columns}, ConstraintsDescription{}, String{}, local_context);
if (cache_tables)
cached_tables[table_name] = storage;
@ -298,7 +280,8 @@ void DatabasePostgreSQL::loadStoredObjects(ContextMutablePtr /* context */, bool
void DatabasePostgreSQL::removeOutdatedTables()
{
std::lock_guard<std::mutex> lock{mutex};
auto actual_tables = fetchTablesList();
auto connection_holder = pool->get();
auto actual_tables = fetchPostgreSQLTablesList(connection_holder->get());
if (cache_tables)
{

View File

@ -9,7 +9,7 @@
#include <Databases/DatabasesCommon.h>
#include <Core/BackgroundSchedulePool.h>
#include <Parsers/ASTCreateQuery.h>
#include <Storages/PostgreSQL/PoolWithFailover.h>
#include <Core/PostgreSQL/PoolWithFailover.h>
namespace DB
@ -33,7 +33,7 @@ public:
const ASTStorage * database_engine_define,
const String & dbname_,
const String & postgres_dbname,
postgres::PoolWithFailoverPtr connection_pool_,
postgres::PoolWithFailoverPtr pool_,
bool cache_tables_);
String getEngineName() const override { return "PostgreSQL"; }
@ -70,7 +70,7 @@ private:
String metadata_path;
ASTPtr database_engine_define;
String dbname;
postgres::PoolWithFailoverPtr connection_pool;
postgres::PoolWithFailoverPtr pool;
const bool cache_tables;
mutable Tables cached_tables;
@ -78,9 +78,11 @@ private:
BackgroundSchedulePool::TaskHolder cleaner_task;
bool checkPostgresTable(const String & table_name) const;
std::unordered_set<std::string> fetchTablesList() const;
StoragePtr fetchTable(const String & table_name, ContextPtr context, bool table_checked) const;
StoragePtr fetchTable(const String & table_name, ContextPtr context, const bool table_checked) const;
void removeOutdatedTables();
ASTPtr getColumnDeclaration(const DataTypePtr & data_type) const;
};

View File

@ -12,7 +12,8 @@
#include <DataTypes/DataTypeDateTime.h>
#include <boost/algorithm/string/split.hpp>
#include <boost/algorithm/string/trim.hpp>
#include <pqxx/pqxx>
#include <Common/quoteString.h>
#include <Core/PostgreSQL/Utils.h>
namespace DB
@ -25,7 +26,21 @@ namespace ErrorCodes
}
static DataTypePtr convertPostgreSQLDataType(String & type, bool is_nullable, uint16_t dimensions, const std::function<void()> & recheck_array)
template<typename T>
std::unordered_set<std::string> fetchPostgreSQLTablesList(T & tx)
{
std::unordered_set<std::string> tables;
std::string query = "SELECT tablename FROM pg_catalog.pg_tables "
"WHERE schemaname != 'pg_catalog' AND schemaname != 'information_schema'";
for (auto table_name : tx.template stream<std::string>(query))
tables.insert(std::get<0>(table_name));
return tables;
}
static DataTypePtr convertPostgreSQLDataType(String & type, const std::function<void()> & recheck_array, bool is_nullable = false, uint16_t dimensions = 0)
{
DataTypePtr res;
bool is_array = false;
@ -116,52 +131,51 @@ static DataTypePtr convertPostgreSQLDataType(String & type, bool is_nullable, ui
}
std::shared_ptr<NamesAndTypesList> fetchPostgreSQLTableStructure(
postgres::ConnectionHolderPtr connection_holder, const String & postgres_table_name, bool use_nulls)
template<typename T>
std::shared_ptr<NamesAndTypesList> readNamesAndTypesList(
T & tx, const String & postgres_table_name, const String & query, bool use_nulls, bool only_names_and_types)
{
auto columns = NamesAndTypes();
if (postgres_table_name.find('\'') != std::string::npos
|| postgres_table_name.find('\\') != std::string::npos)
{
throw Exception(ErrorCodes::BAD_ARGUMENTS, "PostgreSQL table name cannot contain single quote or backslash characters, passed {}",
postgres_table_name);
}
std::string query = fmt::format(
"SELECT attname AS name, format_type(atttypid, atttypmod) AS type, "
"attnotnull AS not_null, attndims AS dims "
"FROM pg_attribute "
"WHERE attrelid = '{}'::regclass "
"AND NOT attisdropped AND attnum > 0", postgres_table_name);
try
{
std::set<size_t> recheck_arrays_indexes;
{
pqxx::read_transaction tx(connection_holder->get());
auto stream{pqxx::stream_from::query(tx, query)};
std::tuple<std::string, std::string, std::string, uint16_t> row;
size_t i = 0;
auto recheck_array = [&]() { recheck_arrays_indexes.insert(i); };
while (stream >> row)
if (only_names_and_types)
{
auto data_type = convertPostgreSQLDataType(std::get<1>(row),
use_nulls && (std::get<2>(row) == "f"), /// 'f' means that postgres `not_null` is false, i.e. value is nullable
std::get<3>(row),
recheck_array);
columns.push_back(NameAndTypePair(std::get<0>(row), data_type));
++i;
std::tuple<std::string, std::string> row;
while (stream >> row)
{
columns.push_back(NameAndTypePair(std::get<0>(row), convertPostgreSQLDataType(std::get<1>(row), recheck_array)));
++i;
}
}
else
{
std::tuple<std::string, std::string, std::string, uint16_t> row;
while (stream >> row)
{
auto data_type = convertPostgreSQLDataType(std::get<1>(row),
recheck_array,
use_nulls && (std::get<2>(row) == "f"), /// 'f' means that postgres `not_null` is false, i.e. value is nullable
std::get<3>(row));
columns.push_back(NameAndTypePair(std::get<0>(row), data_type));
++i;
}
}
stream.complete();
tx.commit();
}
for (const auto & i : recheck_arrays_indexes)
{
const auto & name_and_type = columns[i];
pqxx::nontransaction tx(connection_holder->get());
/// All rows must contain the same number of dimensions, so limit 1 is ok. If number of dimensions in all rows is not the same -
/// such arrays are not able to be used as ClickHouse Array at all.
pqxx::result result{tx.exec(fmt::format("SELECT array_ndims({}) FROM {} LIMIT 1", name_and_type.name, postgres_table_name))};
@ -178,9 +192,7 @@ std::shared_ptr<NamesAndTypesList> fetchPostgreSQLTableStructure(
catch (const pqxx::undefined_table &)
{
throw Exception(fmt::format(
"PostgreSQL table {}.{} does not exist",
connection_holder->get().dbname(), postgres_table_name), ErrorCodes::UNKNOWN_TABLE);
throw Exception(ErrorCodes::UNKNOWN_TABLE, "PostgreSQL table {} does not exist", postgres_table_name);
}
catch (Exception & e)
{
@ -188,12 +200,101 @@ std::shared_ptr<NamesAndTypesList> fetchPostgreSQLTableStructure(
throw;
}
if (columns.empty())
return nullptr;
return std::make_shared<NamesAndTypesList>(NamesAndTypesList(columns.begin(), columns.end()));
return !columns.empty() ? std::make_shared<NamesAndTypesList>(columns.begin(), columns.end()) : nullptr;
}
template<typename T>
PostgreSQLTableStructure fetchPostgreSQLTableStructure(
T & tx, const String & postgres_table_name, bool use_nulls, bool with_primary_key, bool with_replica_identity_index)
{
PostgreSQLTableStructure table;
std::string query = fmt::format(
"SELECT attname AS name, format_type(atttypid, atttypmod) AS type, "
"attnotnull AS not_null, attndims AS dims "
"FROM pg_attribute "
"WHERE attrelid = {}::regclass "
"AND NOT attisdropped AND attnum > 0", quoteString(postgres_table_name));
table.columns = readNamesAndTypesList(tx, postgres_table_name, query, use_nulls, false);
if (with_primary_key)
{
/// wiki.postgresql.org/wiki/Retrieve_primary_key_columns
query = fmt::format(
"SELECT a.attname, format_type(a.atttypid, a.atttypmod) AS data_type "
"FROM pg_index i "
"JOIN pg_attribute a ON a.attrelid = i.indrelid "
"AND a.attnum = ANY(i.indkey) "
"WHERE i.indrelid = {}::regclass AND i.indisprimary", quoteString(postgres_table_name));
table.primary_key_columns = readNamesAndTypesList(tx, postgres_table_name, query, use_nulls, true);
}
if (with_replica_identity_index && !table.primary_key_columns)
{
query = fmt::format(
"SELECT "
"a.attname AS column_name, " /// column name
"format_type(a.atttypid, a.atttypmod) as type " /// column type
"FROM "
"pg_class t, "
"pg_class i, "
"pg_index ix, "
"pg_attribute a "
"WHERE "
"t.oid = ix.indrelid "
"and i.oid = ix.indexrelid "
"and a.attrelid = t.oid "
"and a.attnum = ANY(ix.indkey) "
"and t.relkind = 'r' " /// simple tables
"and t.relname = {} " /// Connection is already done to a needed database, only table name is needed.
"and ix.indisreplident = 't' " /// index is is replica identity index
"ORDER BY a.attname", /// column names
quoteString(postgres_table_name));
table.replica_identity_columns = readNamesAndTypesList(tx, postgres_table_name, query, use_nulls, true);
}
return table;
}
PostgreSQLTableStructure fetchPostgreSQLTableStructure(pqxx::connection & connection, const String & postgres_table_name, bool use_nulls)
{
pqxx::ReadTransaction tx(connection);
auto result = fetchPostgreSQLTableStructure(tx, postgres_table_name, use_nulls, false, false);
tx.commit();
return result;
}
std::unordered_set<std::string> fetchPostgreSQLTablesList(pqxx::connection & connection)
{
pqxx::ReadTransaction tx(connection);
auto result = fetchPostgreSQLTablesList(tx);
tx.commit();
return result;
}
template
PostgreSQLTableStructure fetchPostgreSQLTableStructure(
pqxx::ReadTransaction & tx, const String & postgres_table_name, bool use_nulls,
bool with_primary_key, bool with_replica_identity_index);
template
PostgreSQLTableStructure fetchPostgreSQLTableStructure(
pqxx::ReplicationTransaction & tx, const String & postgres_table_name, bool use_nulls,
bool with_primary_key, bool with_replica_identity_index);
template
std::unordered_set<std::string> fetchPostgreSQLTablesList(pqxx::work & tx);
template
std::unordered_set<std::string> fetchPostgreSQLTablesList(pqxx::ReadTransaction & tx);
}
#endif

View File

@ -5,15 +5,34 @@
#endif
#if USE_LIBPQXX
#include <Storages/PostgreSQL/ConnectionHolder.h>
#include <Core/PostgreSQL/ConnectionHolder.h>
#include <Core/NamesAndTypes.h>
namespace DB
{
std::shared_ptr<NamesAndTypesList> fetchPostgreSQLTableStructure(
postgres::ConnectionHolderPtr connection_holder, const String & postgres_table_name, bool use_nulls);
struct PostgreSQLTableStructure
{
std::shared_ptr<NamesAndTypesList> columns = nullptr;
std::shared_ptr<NamesAndTypesList> primary_key_columns = nullptr;
std::shared_ptr<NamesAndTypesList> replica_identity_columns = nullptr;
};
using PostgreSQLTableStructurePtr = std::unique_ptr<PostgreSQLTableStructure>;
std::unordered_set<std::string> fetchPostgreSQLTablesList(pqxx::connection & connection);
PostgreSQLTableStructure fetchPostgreSQLTableStructure(
pqxx::connection & connection, const String & postgres_table_name, bool use_nulls = true);
template<typename T>
PostgreSQLTableStructure fetchPostgreSQLTableStructure(
T & tx, const String & postgres_table_name, bool use_nulls = true,
bool with_primary_key = false, bool with_replica_identity_index = false);
template<typename T>
std::unordered_set<std::string> fetchPostgreSQLTablesList(T & tx);
}

View File

@ -107,9 +107,10 @@ BlockInputStreamPtr PostgreSQLDictionarySource::loadKeys(const Columns & key_col
BlockInputStreamPtr PostgreSQLDictionarySource::loadBase(const String & query)
{
return std::make_shared<PostgreSQLBlockInputStream>(pool->get(), query, sample_block, max_block_size);
return std::make_shared<PostgreSQLBlockInputStream<>>(pool->get(), query, sample_block, max_block_size);
}
bool PostgreSQLDictionarySource::isModified() const
{
if (!configuration.invalidate_query.empty())
@ -128,7 +129,7 @@ std::string PostgreSQLDictionarySource::doInvalidateQuery(const std::string & re
Block invalidate_sample_block;
ColumnPtr column(ColumnString::create());
invalidate_sample_block.insert(ColumnWithTypeAndName(column, std::make_shared<DataTypeString>(), "Sample Block"));
PostgreSQLBlockInputStream block_input_stream(pool->get(), request, invalidate_sample_block, 1);
PostgreSQLBlockInputStream<> block_input_stream(pool->get(), request, invalidate_sample_block, 1);
return readInvalidateQuery(block_input_stream);
}

View File

@ -11,8 +11,7 @@
#include <Core/Block.h>
#include <common/LocalDateTime.h>
#include <common/logger_useful.h>
#include <Storages/PostgreSQL/PoolWithFailover.h>
#include <pqxx/pqxx>
#include <Core/PostgreSQL/PoolWithFailover.h>
namespace DB

View File

@ -116,6 +116,8 @@ target_link_libraries(clickhouse_functions PRIVATE clickhouse_functions_url)
add_subdirectory(array)
target_link_libraries(clickhouse_functions PRIVATE clickhouse_functions_array)
add_subdirectory(JSONPath)
if (USE_STATS)
target_link_libraries(clickhouse_functions PRIVATE stats)
endif()

View File

@ -110,6 +110,11 @@ struct CRCFunctionWrapper
throw Exception("Cannot apply function " + std::string(Impl::name) + " to Array argument", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
[[noreturn]] static void uuid(const ColumnUUID::Container & /*offsets*/, size_t /*n*/, PaddedPODArray<ReturnType> & /*res*/)
{
throw Exception("Cannot apply function " + std::string(Impl::name) + " to UUID argument", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
private:
static ReturnType doCRC(const ColumnString::Chars & buf, size_t offset, size_t size)
{

View File

@ -39,6 +39,8 @@ struct DummyJSONParser
std::string_view getString() const { return {}; }
Array getArray() const { return {}; }
Object getObject() const { return {}; }
Element getElement() { return {}; }
};
/// References an array in a JSON document.
@ -97,4 +99,9 @@ struct DummyJSONParser
#endif
};
inline ALWAYS_INLINE std::ostream& operator<<(std::ostream& out, DummyJSONParser::Element)
{
return out;
}
}

View File

@ -54,6 +54,12 @@ struct EmptyImpl
prev_offset = offsets[i];
}
}
static void uuid(const ColumnUUID::Container & container, size_t n, PaddedPODArray<UInt8> & res)
{
for (size_t i = 0; i < n; ++i)
res[i] = negative ^ (container.data()->toUnderType() == 0);
}
};
}

View File

@ -0,0 +1,15 @@
#include <Functions/FunctionSQLJSON.h>
#include <Functions/FunctionFactory.h>
namespace DB
{
void registerFunctionsSQLJSON(FunctionFactory & factory)
{
factory.registerFunction<FunctionSQLJSON<NameJSONExists, JSONExistsImpl>>();
factory.registerFunction<FunctionSQLJSON<NameJSONQuery, JSONQueryImpl>>();
factory.registerFunction<FunctionSQLJSON<NameJSONValue, JSONValueImpl>>();
}
}

View File

@ -0,0 +1,334 @@
#pragma once
#include <sstream>
#include <type_traits>
#include <Columns/ColumnConst.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnsNumber.h>
#include <Core/Settings.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypesNumber.h>
#include <Functions/DummyJSONParser.h>
#include <Functions/IFunction.h>
#include <Functions/JSONPath/ASTs/ASTJSONPath.h>
#include <Functions/JSONPath/Generator/GeneratorJSONPath.h>
#include <Functions/JSONPath/Parsers/ParserJSONPath.h>
#include <Functions/RapidJSONParser.h>
#include <Functions/SimdJSONParser.h>
#include <Interpreters/Context.h>
#include <Parsers/IParser.h>
#include <Parsers/Lexer.h>
#include <common/range.h>
#if !defined(ARCADIA_BUILD)
# include "config_functions.h"
#endif
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION;
extern const int BAD_ARGUMENTS;
}
class FunctionSQLJSONHelpers
{
public:
template <typename Name, template <typename> typename Impl, class JSONParser>
class Executor
{
public:
static ColumnPtr run(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, uint32_t parse_depth)
{
MutableColumnPtr to{result_type->createColumn()};
to->reserve(input_rows_count);
if (arguments.size() < 2)
{
throw Exception{"JSONPath functions require at least 2 arguments", ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION};
}
const auto & first_column = arguments[0];
/// Check 1 argument: must be of type String (JSONPath)
if (!isString(first_column.type))
{
throw Exception(
"JSONPath functions require 1 argument to be JSONPath of type string, illegal type: " + first_column.type->getName(),
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
/// Check 1 argument: must be const (JSONPath)
if (!isColumnConst(*first_column.column))
{
throw Exception("1 argument (JSONPath) must be const", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
const auto & second_column = arguments[1];
/// Check 2 argument: must be of type String (JSON)
if (!isString(second_column.type))
{
throw Exception(
"JSONPath functions require 2 argument to be JSON of string, illegal type: " + second_column.type->getName(),
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
const ColumnPtr & arg_jsonpath = first_column.column;
const auto * arg_jsonpath_const = typeid_cast<const ColumnConst *>(arg_jsonpath.get());
const auto * arg_jsonpath_string = typeid_cast<const ColumnString *>(arg_jsonpath_const->getDataColumnPtr().get());
const ColumnPtr & arg_json = second_column.column;
const auto * col_json_const = typeid_cast<const ColumnConst *>(arg_json.get());
const auto * col_json_string
= typeid_cast<const ColumnString *>(col_json_const ? col_json_const->getDataColumnPtr().get() : arg_json.get());
/// Get data and offsets for 1 argument (JSONPath)
const ColumnString::Chars & chars_path = arg_jsonpath_string->getChars();
const ColumnString::Offsets & offsets_path = arg_jsonpath_string->getOffsets();
/// Prepare to parse 1 argument (JSONPath)
const char * query_begin = reinterpret_cast<const char *>(&chars_path[0]);
const char * query_end = query_begin + offsets_path[0] - 1;
/// Tokenize query
Tokens tokens(query_begin, query_end);
/// Max depth 0 indicates that depth is not limited
IParser::Pos token_iterator(tokens, parse_depth);
/// Parse query and create AST tree
Expected expected;
ASTPtr res;
ParserJSONPath parser;
const bool parse_res = parser.parse(token_iterator, res, expected);
if (!parse_res)
{
throw Exception{"Unable to parse JSONPath", ErrorCodes::BAD_ARGUMENTS};
}
/// Get data and offsets for 2 argument (JSON)
const ColumnString::Chars & chars_json = col_json_string->getChars();
const ColumnString::Offsets & offsets_json = col_json_string->getOffsets();
JSONParser json_parser;
using Element = typename JSONParser::Element;
Element document;
bool document_ok = false;
/// Parse JSON for every row
Impl<JSONParser> impl;
for (const auto i : collections::range(0, input_rows_count))
{
std::string_view json{
reinterpret_cast<const char *>(&chars_json[offsets_json[i - 1]]), offsets_json[i] - offsets_json[i - 1] - 1};
document_ok = json_parser.parse(json, document);
bool added_to_column = false;
if (document_ok)
{
added_to_column = impl.insertResultToColumn(*to, document, res);
}
if (!added_to_column)
{
to->insertDefault();
}
}
return to;
}
};
};
template <typename Name, template <typename> typename Impl>
class FunctionSQLJSON : public IFunction, WithConstContext
{
public:
static FunctionPtr create(ContextPtr context_) { return std::make_shared<FunctionSQLJSON>(context_); }
explicit FunctionSQLJSON(ContextPtr context_) : WithConstContext(context_) { }
static constexpr auto name = Name::name;
String getName() const override { return Name::name; }
bool isVariadic() const override { return true; }
size_t getNumberOfArguments() const override { return 0; }
bool useDefaultImplementationForConstants() const override { return true; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {0}; }
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
return Impl<DummyJSONParser>::getReturnType(Name::name, arguments);
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
{
/// Choose JSONParser.
/// 1. Lexer(path) -> Tokens
/// 2. Create ASTPtr
/// 3. Parser(Tokens, ASTPtr) -> complete AST
/// 4. Execute functions: call getNextItem on generator and handle each item
uint32_t parse_depth = getContext()->getSettingsRef().max_parser_depth;
#if USE_SIMDJSON
if (getContext()->getSettingsRef().allow_simdjson)
return FunctionSQLJSONHelpers::Executor<Name, Impl, SimdJSONParser>::run(arguments, result_type, input_rows_count, parse_depth);
#endif
return FunctionSQLJSONHelpers::Executor<Name, Impl, DummyJSONParser>::run(arguments, result_type, input_rows_count, parse_depth);
}
};
struct NameJSONExists
{
static constexpr auto name{"JSON_EXISTS"};
};
struct NameJSONValue
{
static constexpr auto name{"JSON_VALUE"};
};
struct NameJSONQuery
{
static constexpr auto name{"JSON_QUERY"};
};
template <typename JSONParser>
class JSONExistsImpl
{
public:
using Element = typename JSONParser::Element;
static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) { return std::make_shared<DataTypeUInt8>(); }
static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; }
static bool insertResultToColumn(IColumn & dest, const Element & root, ASTPtr & query_ptr)
{
GeneratorJSONPath<JSONParser> generator_json_path(query_ptr);
Element current_element = root;
VisitorStatus status;
while ((status = generator_json_path.getNextItem(current_element)) != VisitorStatus::Exhausted)
{
if (status == VisitorStatus::Ok)
{
break;
}
current_element = root;
}
/// insert result, status can be either Ok (if we found the item)
/// or Exhausted (if we never found the item)
ColumnUInt8 & col_bool = assert_cast<ColumnUInt8 &>(dest);
if (status == VisitorStatus::Ok)
{
col_bool.insert(1);
}
else
{
col_bool.insert(0);
}
return true;
}
};
template <typename JSONParser>
class JSONValueImpl
{
public:
using Element = typename JSONParser::Element;
static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) { return std::make_shared<DataTypeString>(); }
static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; }
static bool insertResultToColumn(IColumn & dest, const Element & root, ASTPtr & query_ptr)
{
GeneratorJSONPath<JSONParser> generator_json_path(query_ptr);
Element current_element = root;
VisitorStatus status;
Element res;
while ((status = generator_json_path.getNextItem(current_element)) != VisitorStatus::Exhausted)
{
if (status == VisitorStatus::Ok)
{
if (!(current_element.isArray() || current_element.isObject()))
{
break;
}
}
else if (status == VisitorStatus::Error)
{
/// ON ERROR
/// Here it is possible to handle errors with ON ERROR (as described in ISO/IEC TR 19075-6),
/// however this functionality is not implemented yet
}
current_element = root;
}
if (status == VisitorStatus::Exhausted)
{
return false;
}
std::stringstream out; // STYLE_CHECK_ALLOW_STD_STRING_STREAM
out << current_element.getElement();
auto output_str = out.str();
ColumnString & col_str = assert_cast<ColumnString &>(dest);
col_str.insertData(output_str.data(), output_str.size());
return true;
}
};
/**
* Function to test jsonpath member access, will be removed in final PR
* @tparam JSONParser parser
*/
template <typename JSONParser>
class JSONQueryImpl
{
public:
using Element = typename JSONParser::Element;
static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) { return std::make_shared<DataTypeString>(); }
static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; }
static bool insertResultToColumn(IColumn & dest, const Element & root, ASTPtr & query_ptr)
{
GeneratorJSONPath<JSONParser> generator_json_path(query_ptr);
Element current_element = root;
VisitorStatus status;
std::stringstream out; // STYLE_CHECK_ALLOW_STD_STRING_STREAM
/// Create json array of results: [res1, res2, ...]
out << "[";
bool success = false;
while ((status = generator_json_path.getNextItem(current_element)) != VisitorStatus::Exhausted)
{
if (status == VisitorStatus::Ok)
{
if (success)
{
out << ", ";
}
success = true;
out << current_element.getElement();
}
else if (status == VisitorStatus::Error)
{
/// ON ERROR
/// Here it is possible to handle errors with ON ERROR (as described in ISO/IEC TR 19075-6),
/// however this functionality is not implemented yet
}
current_element = root;
}
out << "]";
if (!success)
{
return false;
}
ColumnString & col_str = assert_cast<ColumnString &>(dest);
auto output_str = out.str();
col_str.insertData(output_str.data(), output_str.size());
return true;
}
};
}

View File

@ -8,6 +8,7 @@
#include <Columns/ColumnFixedString.h>
#include <Columns/ColumnArray.h>
#include <Columns/ColumnMap.h>
#include <Columns/ColumnsNumber.h>
namespace DB
@ -43,7 +44,9 @@ public:
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
if (!isStringOrFixedString(arguments[0])
&& !isArray(arguments[0]) && !isMap(arguments[0]))
&& !isArray(arguments[0])
&& !isMap(arguments[0])
&& !isUUID(arguments[0]))
throw Exception("Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
return std::make_shared<DataTypeNumber<ResultType>>();
@ -51,7 +54,7 @@ public:
bool useDefaultImplementationForConstants() const override { return true; }
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) const override
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
{
const ColumnPtr column = arguments[0].column;
if (const ColumnString * col = checkAndGetColumn<ColumnString>(column.get()))
@ -104,6 +107,14 @@ public:
Impl::array(col_nested.getOffsets(), vec_res);
return col_res;
}
else if (const ColumnUUID * col_uuid = checkAndGetColumn<ColumnUUID>(column.get()))
{
auto col_res = ColumnVector<ResultType>::create();
typename ColumnVector<ResultType>::Container & vec_res = col_res->getData();
vec_res.resize(col_uuid->size());
Impl::uuid(col_uuid->getData(), input_rows_count, vec_res);
return col_res;
}
else
throw Exception("Illegal column " + arguments[0].column->getName() + " of argument of function " + getName(),
ErrorCodes::ILLEGAL_COLUMN);

View File

@ -1265,23 +1265,7 @@ public:
assert(2 == types.size() && 2 == values.size());
auto & b = static_cast<llvm::IRBuilder<> &>(builder);
auto * x = values[0];
auto * y = values[1];
if (!types[0]->equals(*types[1]))
{
llvm::Type * common;
if (x->getType()->isIntegerTy() && y->getType()->isIntegerTy())
common = b.getIntNTy(std::max(
/// if one integer has a sign bit, make sure the other does as well. llvm generates optimal code
/// (e.g. uses overflow flag on x86) for (word size + 1)-bit integer operations.
x->getType()->getIntegerBitWidth() + (!typeIsSigned(*types[0]) && typeIsSigned(*types[1])),
y->getType()->getIntegerBitWidth() + (!typeIsSigned(*types[1]) && typeIsSigned(*types[0]))));
else
/// (double, float) or (double, int_N where N <= double's mantissa width) -> double
common = b.getDoubleTy();
x = nativeCast(b, types[0], x, common);
y = nativeCast(b, types[1], y, common);
}
auto [x, y] = nativeCastToCommon(b, types[0], values[0], types[1], values[1]);
auto * result = CompileOp<Op>::compile(b, x, y, typeIsSigned(*types[0]) || typeIsSigned(*types[1]));
return b.CreateSelect(result, b.getInt8(1), b.getInt8(0));
}

View File

@ -10,14 +10,20 @@
#include <Common/assert_cast.h>
#include <Core/Settings.h>
#include <Columns/ColumnConst.h>
#include <Columns/ColumnLowCardinality.h>
#include <Columns/ColumnDecimal.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnVector.h>
#include <Columns/ColumnFixedString.h>
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnArray.h>
#include <Columns/ColumnTuple.h>
#include <DataTypes/Serializations/SerializationDecimal.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeLowCardinality.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypesDecimal.h>
#include <DataTypes/DataTypeUUID.h>
#include <DataTypes/DataTypeEnum.h>
#include <DataTypes/DataTypeFactory.h>
#include <DataTypes/DataTypeNullable.h>
@ -528,6 +534,7 @@ public:
}
};
template <typename JSONParser>
using JSONExtractInt8Impl = JSONExtractNumericImpl<JSONParser, Int8>;
template <typename JSONParser>
@ -625,6 +632,60 @@ struct JSONExtractTree
}
};
class LowCardinalityNode : public Node
{
public:
LowCardinalityNode(DataTypePtr dictionary_type_, std::unique_ptr<Node> impl_)
: dictionary_type(dictionary_type_), impl(std::move(impl_)) {}
bool insertResultToColumn(IColumn & dest, const Element & element) override
{
auto from_col = dictionary_type->createColumn();
if (impl->insertResultToColumn(*from_col, element))
{
StringRef value = from_col->getDataAt(0);
assert_cast<ColumnLowCardinality &>(dest).insertData(value.data, value.size);
return true;
}
return false;
}
private:
DataTypePtr dictionary_type;
std::unique_ptr<Node> impl;
};
class UUIDNode : public Node
{
public:
bool insertResultToColumn(IColumn & dest, const Element & element) override
{
if (!element.isString())
return false;
auto uuid = parseFromString<UUID>(element.getString());
assert_cast<ColumnUUID &>(dest).insert(uuid);
return true;
}
};
template <typename DecimalType>
class DecimalNode : public Node
{
public:
DecimalNode(DataTypePtr data_type_) : data_type(data_type_) {}
bool insertResultToColumn(IColumn & dest, const Element & element) override
{
if (!element.isDouble())
return false;
const auto * type = assert_cast<const DataTypeDecimal<DecimalType> *>(data_type.get());
auto result = convertToDecimal<DataTypeNumber<Float64>, DataTypeDecimal<DecimalType>>(element.getDouble(), type->getScale());
assert_cast<ColumnDecimal<DecimalType> &>(dest).insert(result);
return true;
}
private:
DataTypePtr data_type;
};
class StringNode : public Node
{
public:
@ -864,6 +925,17 @@ struct JSONExtractTree
case TypeIndex::Float64: return std::make_unique<NumericNode<Float64>>();
case TypeIndex::String: return std::make_unique<StringNode>();
case TypeIndex::FixedString: return std::make_unique<FixedStringNode>();
case TypeIndex::UUID: return std::make_unique<UUIDNode>();
case TypeIndex::LowCardinality:
{
auto dictionary_type = typeid_cast<const DataTypeLowCardinality *>(type.get())->getDictionaryType();
auto impl = build(function_name, dictionary_type);
return std::make_unique<LowCardinalityNode>(dictionary_type, std::move(impl));
}
case TypeIndex::Decimal256: return std::make_unique<DecimalNode<Decimal256>>(type);
case TypeIndex::Decimal128: return std::make_unique<DecimalNode<Decimal128>>(type);
case TypeIndex::Decimal64: return std::make_unique<DecimalNode<Decimal64>>(type);
case TypeIndex::Decimal32: return std::make_unique<DecimalNode<Decimal32>>(type);
case TypeIndex::Enum8:
return std::make_unique<EnumNode<Int8>>(static_cast<const DataTypeEnum8 &>(*type).getValues());
case TypeIndex::Enum16:

View File

@ -0,0 +1,18 @@
#pragma once
#include <Functions/JSONPath/ASTs/ASTJSONPathQuery.h>
#include <Parsers/IAST.h>
namespace DB
{
class ASTJSONPath : public IAST
{
public:
String getID(char) const override { return "ASTJSONPath"; }
ASTPtr clone() const override { return std::make_shared<ASTJSONPath>(*this); }
ASTJSONPathQuery * jsonpath_query;
};
}

View File

@ -0,0 +1,19 @@
#pragma once
#include <Parsers/IAST.h>
namespace DB
{
class ASTJSONPathMemberAccess : public IAST
{
public:
String getID(char) const override { return "ASTJSONPathMemberAccess"; }
ASTPtr clone() const override { return std::make_shared<ASTJSONPathMemberAccess>(*this); }
public:
/// Member name to lookup in json document (in path: $.some_key.another_key. ...)
String member_name;
};
}

View File

@ -0,0 +1,15 @@
#pragma once
#include <Parsers/IAST.h>
namespace DB
{
class ASTJSONPathQuery : public IAST
{
public:
String getID(char) const override { return "ASTJSONPathQuery"; }
ASTPtr clone() const override { return std::make_shared<ASTJSONPathQuery>(*this); }
};
}

View File

@ -0,0 +1,23 @@
#pragma once
#include <vector>
#include <Parsers/IAST.h>
namespace DB
{
class ASTJSONPathRange : public IAST
{
public:
String getID(char) const override { return "ASTJSONPathRange"; }
ASTPtr clone() const override { return std::make_shared<ASTJSONPathRange>(*this); }
public:
/// Ranges to lookup in json array ($[0, 1, 2, 4 to 9])
/// Range is represented as <start, end (non-inclusive)>
/// Single index is represented as <start, start + 1>
std::vector<std::pair<UInt32, UInt32>> ranges;
bool is_star = false;
};
}

View File

@ -0,0 +1,15 @@
#pragma once
#include <Parsers/IAST.h>
namespace DB
{
class ASTJSONPathRoot : public IAST
{
public:
String getID(char) const override { return "ASTJSONPathRoot"; }
ASTPtr clone() const override { return std::make_shared<ASTJSONPathRoot>(*this); }
};
}

View File

@ -0,0 +1,15 @@
#pragma once
#include <Parsers/IAST.h>
namespace DB
{
class ASTJSONPathStar : public IAST
{
public:
String getID(char) const override { return "ASTJSONPathStar"; }
ASTPtr clone() const override { return std::make_shared<ASTJSONPathStar>(*this); }
};
}

View File

@ -0,0 +1,13 @@
include("${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake")
add_headers_and_sources(clickhouse_functions_jsonpath Parsers)
add_headers_and_sources(clickhouse_functions_jsonpath ASTs)
add_headers_and_sources(clickhouse_functions_jsonpath Generator)
add_library(clickhouse_functions_jsonpath ${clickhouse_functions_jsonpath_sources} ${clickhouse_functions_jsonpath_headers})
target_link_libraries(clickhouse_functions_jsonpath PRIVATE dbms)
target_link_libraries(clickhouse_functions_jsonpath PRIVATE clickhouse_parsers)
target_link_libraries(clickhouse_functions PRIVATE clickhouse_functions_jsonpath)
if (STRIP_DEBUG_SYMBOLS_FUNCTIONS)
target_compile_options(clickhouse_functions_jsonpath PRIVATE "-g0")
endif()

View File

@ -0,0 +1,128 @@
#pragma once
#include <Functions/JSONPath/Generator/IGenerator.h>
#include <Functions/JSONPath/Generator/VisitorJSONPathMemberAccess.h>
#include <Functions/JSONPath/Generator/VisitorJSONPathRange.h>
#include <Functions/JSONPath/Generator/VisitorJSONPathRoot.h>
#include <Functions/JSONPath/Generator/VisitorJSONPathStar.h>
#include <Functions/JSONPath/Generator/VisitorStatus.h>
#include <Functions/JSONPath/ASTs/ASTJSONPath.h>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
template <typename JSONParser>
class GeneratorJSONPath : public IGenerator<JSONParser>
{
public:
/**
* Traverses children ASTs of ASTJSONPathQuery and creates a vector of corresponding visitors
* @param query_ptr_ pointer to ASTJSONPathQuery
*/
GeneratorJSONPath(ASTPtr query_ptr_)
{
query_ptr = query_ptr_;
const auto * path = query_ptr->as<ASTJSONPath>();
if (!path)
{
throw Exception("Invalid path", ErrorCodes::LOGICAL_ERROR);
}
const auto * query = path->jsonpath_query;
for (auto child_ast : query->children)
{
if (typeid_cast<ASTJSONPathRoot *>(child_ast.get()))
{
visitors.push_back(std::make_shared<VisitorJSONPathRoot<JSONParser>>(child_ast));
}
else if (typeid_cast<ASTJSONPathMemberAccess *>(child_ast.get()))
{
visitors.push_back(std::make_shared<VisitorJSONPathMemberAccess<JSONParser>>(child_ast));
}
else if (typeid_cast<ASTJSONPathRange *>(child_ast.get()))
{
visitors.push_back(std::make_shared<VisitorJSONPathRange<JSONParser>>(child_ast));
}
else if (typeid_cast<ASTJSONPathStar *>(child_ast.get()))
{
visitors.push_back(std::make_shared<VisitorJSONPathStar<JSONParser>>(child_ast));
}
}
}
const char * getName() const override { return "GeneratorJSONPath"; }
/**
* This method exposes API of traversing all paths, described by JSONPath,
* to SQLJSON Functions.
* Expected usage is to iteratively call this method from inside the function
* and to execute custom logic with received element or handle an error.
* On each such call getNextItem will yield next item into element argument
* and modify its internal state to prepare for next call.
*
* @param element root of JSON document
* @return is the generator exhausted
*/
VisitorStatus getNextItem(typename JSONParser::Element & element) override
{
while (true)
{
/// element passed to us actually is root, so here we assign current to root
auto current = element;
if (current_visitor < 0)
{
return VisitorStatus::Exhausted;
}
for (int i = 0; i < current_visitor; ++i)
{
visitors[i]->apply(current);
}
VisitorStatus status = VisitorStatus::Error;
for (size_t i = current_visitor; i < visitors.size(); ++i)
{
status = visitors[i]->visit(current);
current_visitor = i;
if (status == VisitorStatus::Error || status == VisitorStatus::Ignore)
{
break;
}
}
updateVisitorsForNextRun();
if (status != VisitorStatus::Ignore)
{
element = current;
return status;
}
}
}
private:
bool updateVisitorsForNextRun()
{
while (current_visitor >= 0 && visitors[current_visitor]->isExhausted())
{
visitors[current_visitor]->reinitialize();
current_visitor--;
}
if (current_visitor >= 0)
{
visitors[current_visitor]->updateState();
}
return current_visitor >= 0;
}
int current_visitor = 0;
ASTPtr query_ptr;
VisitorList<JSONParser> visitors;
};
}

View File

@ -0,0 +1,29 @@
#pragma once
#include <Functions/JSONPath/Generator/IGenerator_fwd.h>
#include <Functions/JSONPath/Generator/VisitorStatus.h>
#include <Parsers/IAST.h>
namespace DB
{
template <typename JSONParser>
class IGenerator
{
public:
IGenerator() = default;
virtual const char * getName() const = 0;
/**
* Used to yield next non-ignored element describes by JSONPath query.
*
* @param element to be extracted into
* @return true if generator is not exhausted
*/
virtual VisitorStatus getNextItem(typename JSONParser::Element & element) = 0;
virtual ~IGenerator() = default;
};
}

View File

@ -0,0 +1,16 @@
#pragma once
#include <Functions/JSONPath/Generator/IVisitor.h>
namespace DB
{
template <typename JSONParser>
class IGenerator;
template <typename JSONParser>
using IVisitorPtr = std::shared_ptr<IVisitor<JSONParser>>;
template <typename JSONParser>
using VisitorList = std::vector<IVisitorPtr<JSONParser>>;
}

View File

@ -0,0 +1,46 @@
#pragma once
#include <Functions/JSONPath/Generator/VisitorStatus.h>
namespace DB
{
template <typename JSONParser>
class IVisitor
{
public:
virtual const char * getName() const = 0;
/**
* Applies this visitor to document and mutates its state
* @param element simdjson element
*/
virtual VisitorStatus visit(typename JSONParser::Element & element) = 0;
/**
* Applies this visitor to document, but does not mutate state
* @param element simdjson element
*/
virtual VisitorStatus apply(typename JSONParser::Element & element) const = 0;
/**
* Restores visitor's initial state for later use
*/
virtual void reinitialize() = 0;
virtual void updateState() = 0;
bool isExhausted() { return is_exhausted; }
void setExhausted(bool exhausted) { is_exhausted = exhausted; }
virtual ~IVisitor() = default;
private:
/**
* This variable is for detecting whether a visitor's next visit will be able
* to yield a new item.
*/
bool is_exhausted = false;
};
}

View File

@ -0,0 +1,50 @@
#pragma once
#include <Functions/JSONPath/ASTs/ASTJSONPathMemberAccess.h>
#include <Functions/JSONPath/Generator/IVisitor.h>
#include <Functions/JSONPath/Generator/VisitorStatus.h>
namespace DB
{
template <typename JSONParser>
class VisitorJSONPathMemberAccess : public IVisitor<JSONParser>
{
public:
VisitorJSONPathMemberAccess(ASTPtr member_access_ptr_)
: member_access_ptr(member_access_ptr_->as<ASTJSONPathMemberAccess>()) { }
const char * getName() const override { return "VisitorJSONPathMemberAccess"; }
VisitorStatus apply(typename JSONParser::Element & element) const override
{
typename JSONParser::Element result;
element.getObject().find(std::string_view(member_access_ptr->member_name), result);
element = result;
return VisitorStatus::Ok;
}
VisitorStatus visit(typename JSONParser::Element & element) override
{
this->setExhausted(true);
if (!element.isObject())
{
return VisitorStatus::Error;
}
typename JSONParser::Element result;
if (!element.getObject().find(std::string_view(member_access_ptr->member_name), result))
{
return VisitorStatus::Error;
}
apply(element);
return VisitorStatus::Ok;
}
void reinitialize() override { this->setExhausted(false); }
void updateState() override { }
private:
ASTJSONPathMemberAccess * member_access_ptr;
};
}

View File

@ -0,0 +1,80 @@
#pragma once
#include <Functions/JSONPath/ASTs/ASTJSONPathRange.h>
#include <Functions/JSONPath/Generator/IVisitor.h>
#include <Functions/JSONPath/Generator/VisitorStatus.h>
namespace DB
{
template <typename JSONParser>
class VisitorJSONPathRange : public IVisitor<JSONParser>
{
public:
VisitorJSONPathRange(ASTPtr range_ptr_) : range_ptr(range_ptr_->as<ASTJSONPathRange>())
{
current_range = 0;
current_index = range_ptr->ranges[current_range].first;
}
const char * getName() const override { return "VisitorJSONPathRange"; }
VisitorStatus apply(typename JSONParser::Element & element) const override
{
typename JSONParser::Element result;
typename JSONParser::Array array = element.getArray();
element = array[current_index];
return VisitorStatus::Ok;
}
VisitorStatus visit(typename JSONParser::Element & element) override
{
if (!element.isArray())
{
this->setExhausted(true);
return VisitorStatus::Error;
}
VisitorStatus status;
if (current_index < element.getArray().size())
{
apply(element);
status = VisitorStatus::Ok;
}
else
{
status = VisitorStatus::Ignore;
}
if (current_index + 1 == range_ptr->ranges[current_range].second
&& current_range + 1 == range_ptr->ranges.size())
{
this->setExhausted(true);
}
return status;
}
void reinitialize() override
{
current_range = 0;
current_index = range_ptr->ranges[current_range].first;
this->setExhausted(false);
}
void updateState() override
{
current_index++;
if (current_index == range_ptr->ranges[current_range].second)
{
current_range++;
current_index = range_ptr->ranges[current_range].first;
}
}
private:
ASTJSONPathRange * range_ptr;
size_t current_range;
UInt32 current_index;
};
}

View File

@ -0,0 +1,35 @@
#pragma once
#include <Functions/JSONPath/ASTs/ASTJSONPathRoot.h>
#include <Functions/JSONPath/Generator/IVisitor.h>
#include <Functions/JSONPath/Generator/VisitorStatus.h>
namespace DB
{
template <typename JSONParser>
class VisitorJSONPathRoot : public IVisitor<JSONParser>
{
public:
VisitorJSONPathRoot(ASTPtr) { }
const char * getName() const override { return "VisitorJSONPathRoot"; }
VisitorStatus apply(typename JSONParser::Element & /*element*/) const override
{
/// No-op on document, since we are already passed document's root
return VisitorStatus::Ok;
}
VisitorStatus visit(typename JSONParser::Element & element) override
{
apply(element);
this->setExhausted(true);
return VisitorStatus::Ok;
}
void reinitialize() override { this->setExhausted(false); }
void updateState() override { }
};
}

View File

@ -0,0 +1,66 @@
#pragma once
#include <Functions/JSONPath/ASTs/ASTJSONPathStar.h>
#include <Functions/JSONPath/Generator/IVisitor.h>
#include <Functions/JSONPath/Generator/VisitorStatus.h>
namespace DB
{
template <typename JSONParser>
class VisitorJSONPathStar : public IVisitor<JSONParser>
{
public:
VisitorJSONPathStar(ASTPtr)
{
current_index = 0;
}
const char * getName() const override { return "VisitorJSONPathStar"; }
VisitorStatus apply(typename JSONParser::Element & element) const override
{
typename JSONParser::Element result;
typename JSONParser::Array array = element.getArray();
element = array[current_index];
return VisitorStatus::Ok;
}
VisitorStatus visit(typename JSONParser::Element & element) override
{
if (!element.isArray())
{
this->setExhausted(true);
return VisitorStatus::Error;
}
VisitorStatus status;
if (current_index < element.getArray().size())
{
apply(element);
status = VisitorStatus::Ok;
}
else
{
status = VisitorStatus::Ignore;
this->setExhausted(true);
}
return status;
}
void reinitialize() override
{
current_index = 0;
this->setExhausted(false);
}
void updateState() override
{
current_index++;
}
private:
UInt32 current_index;
};
}

View File

@ -0,0 +1,13 @@
#pragma once
namespace DB
{
enum VisitorStatus
{
Ok,
Exhausted,
Error,
Ignore
};
}

View File

@ -0,0 +1,31 @@
#include <Functions/JSONPath/ASTs/ASTJSONPath.h>
#include <Functions/JSONPath/ASTs/ASTJSONPathMemberAccess.h>
#include <Functions/JSONPath/Parsers/ParserJSONPath.h>
#include <Functions/JSONPath/Parsers/ParserJSONPathQuery.h>
namespace DB
{
/**
* Entry parser for JSONPath
*/
bool ParserJSONPath::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
{
auto ast_jsonpath = std::make_shared<ASTJSONPath>();
ParserJSONPathQuery parser_jsonpath_query;
/// Push back dot AST and brackets AST to query->children
ASTPtr query;
bool res = parser_jsonpath_query.parse(pos, query, expected);
if (res)
{
/// Set ASTJSONPathQuery of ASTJSONPath
ast_jsonpath->set(ast_jsonpath->jsonpath_query, query);
}
node = ast_jsonpath;
return res;
}
}

View File

@ -0,0 +1,21 @@
#pragma once
#include <Parsers/IParserBase.h>
namespace DB
{
/**
* Entry parser for JSONPath
*/
class ParserJSONPath : public IParserBase
{
private:
const char * getName() const override { return "ParserJSONPath"; }
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
public:
explicit ParserJSONPath() = default;
};
}

View File

@ -0,0 +1,42 @@
#include <Functions/JSONPath/ASTs/ASTJSONPathMemberAccess.h>
#include <Functions/JSONPath/Parsers/ParserJSONPathMemberAccess.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ExpressionElementParsers.h>
#include <Parsers/Lexer.h>
namespace DB
{
/**
*
* @param pos token iterator
* @param node node of ASTJSONPathMemberAccess
* @param expected stuff for logging
* @return was parse successful
*/
bool ParserJSONPathMemberAccess::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
{
if (pos->type != TokenType::Dot)
{
return false;
}
++pos;
if (pos->type != TokenType::BareWord)
{
return false;
}
ParserIdentifier name_p;
ASTPtr member_name;
if (!name_p.parse(pos, member_name, expected))
{
return false;
}
auto member_access = std::make_shared<ASTJSONPathMemberAccess>();
node = member_access;
return tryGetIdentifierNameInto(member_name, member_access->member_name);
}
}

View File

@ -0,0 +1,14 @@
#pragma once
#include <Parsers/IParserBase.h>
namespace DB
{
class ParserJSONPathMemberAccess : public IParserBase
{
const char * getName() const override { return "ParserJSONPathMemberAccess"; }
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
};
}

View File

@ -0,0 +1,48 @@
#include <Functions/JSONPath/ASTs/ASTJSONPathQuery.h>
#include <Functions/JSONPath/Parsers/ParserJSONPathQuery.h>
#include <Functions/JSONPath/Parsers/ParserJSONPathRoot.h>
#include <Functions/JSONPath/Parsers/ParserJSONPathMemberAccess.h>
#include <Functions/JSONPath/Parsers/ParserJSONPathRange.h>
#include <Functions/JSONPath/Parsers/ParserJSONPathStar.h>
namespace DB
{
/**
*
* @param pos token iterator
* @param query node of ASTJSONPathQuery
* @param expected stuff for logging
* @return was parse successful
*/
bool ParserJSONPathQuery::parseImpl(Pos & pos, ASTPtr & query, Expected & expected)
{
query = std::make_shared<ASTJSONPathQuery>();
ParserJSONPathMemberAccess parser_jsonpath_member_access;
ParserJSONPathRange parser_jsonpath_range;
ParserJSONPathStar parser_jsonpath_star;
ParserJSONPathRoot parser_jsonpath_root;
ASTPtr path_root;
if (!parser_jsonpath_root.parse(pos, path_root, expected))
{
return false;
}
query->children.push_back(path_root);
ASTPtr accessor;
while (parser_jsonpath_member_access.parse(pos, accessor, expected)
|| parser_jsonpath_range.parse(pos, accessor, expected)
|| parser_jsonpath_star.parse(pos, accessor, expected))
{
if (accessor)
{
query->children.push_back(accessor);
accessor = nullptr;
}
}
/// parsing was successful if we reached the end of query by this point
return pos->type == TokenType::EndOfStream;
}
}

View File

@ -0,0 +1,14 @@
#pragma once
#include <Parsers/IParserBase.h>
namespace DB
{
class ParserJSONPathQuery : public IParserBase
{
protected:
const char * getName() const override { return "ParserJSONPathQuery"; }
bool parseImpl(Pos & pos, ASTPtr & query, Expected & expected) override;
};
}

View File

@ -0,0 +1,94 @@
#include <Functions/JSONPath/ASTs/ASTJSONPathRange.h>
#include <Functions/JSONPath/Parsers/ParserJSONPathQuery.h>
#include <Functions/JSONPath/Parsers/ParserJSONPathRange.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/ExpressionElementParsers.h>
#include <Parsers/CommonParsers.h>
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
}
/**
*
* @param pos token iterator
* @param node node of ASTJSONPathQuery
* @param expected stuff for logging
* @return was parse successful
*/
bool ParserJSONPathRange::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
{
if (pos->type != TokenType::OpeningSquareBracket)
{
return false;
}
++pos;
auto range = std::make_shared<ASTJSONPathRange>();
node = range;
ParserNumber number_p;
ASTPtr number_ptr;
while (pos->type != TokenType::ClosingSquareBracket)
{
if (pos->type != TokenType::Number)
{
return false;
}
std::pair<UInt32, UInt32> range_indices;
if (!number_p.parse(pos, number_ptr, expected))
{
return false;
}
range_indices.first = number_ptr->as<ASTLiteral>()->value.get<UInt32>();
if (pos->type == TokenType::Comma || pos->type == TokenType::ClosingSquareBracket)
{
/// Single index case
range_indices.second = range_indices.first + 1;
}
else if (pos->type == TokenType::BareWord)
{
if (!ParserKeyword("TO").ignore(pos, expected))
{
return false;
}
if (!number_p.parse(pos, number_ptr, expected))
{
return false;
}
range_indices.second = number_ptr->as<ASTLiteral>()->value.get<UInt32>();
}
else
{
return false;
}
if (range_indices.first >= range_indices.second)
{
throw Exception(
ErrorCodes::BAD_ARGUMENTS,
"Start of range must be greater than end of range, however {} >= {}",
range_indices.first,
range_indices.second);
}
range->ranges.push_back(std::move(range_indices));
if (pos->type != TokenType::ClosingSquareBracket)
{
++pos;
}
}
++pos;
/// We can't have both ranges and star present, so parse was successful <=> exactly 1 of these conditions is true
return !range->ranges.empty() ^ range->is_star;
}
}

View File

@ -0,0 +1,18 @@
#pragma once
#include <Parsers/IParserBase.h>
namespace DB
{
class ParserJSONPathRange : public IParserBase
{
private:
const char * getName() const override { return "ParserJSONPathRange"; }
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
public:
explicit ParserJSONPathRange() = default;
};
}

View File

@ -0,0 +1,27 @@
#include <Functions/JSONPath/ASTs/ASTJSONPathRoot.h>
#include <Functions/JSONPath/Parsers/ParserJSONPathRoot.h>
#include <Parsers/Lexer.h>
namespace DB
{
/**
*
* @param pos token iterator
* @param node node of ASTJSONPathRoot
* @param expected stuff for logging
* @return was parse successful
*/
bool ParserJSONPathRoot::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
{
if (pos->type != TokenType::DollarSign)
{
expected.add(pos, "dollar sign (start of jsonpath)");
return false;
}
node = std::make_shared<ASTJSONPathRoot>();
++pos;
return true;
}
}

Some files were not shown because too many files have changed in this diff Show More