Merge branch 'master' into no-keep-context-lock-while-calculating-access

This commit is contained in:
Nikita Mikhaylov 2023-07-20 14:42:13 +02:00 committed by GitHub
commit 668062dc29
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
273 changed files with 6386 additions and 1618 deletions

View File

@ -2870,6 +2870,216 @@ jobs:
docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" sudo rm -fr "$TEMP_PATH"
IntegrationTestsAnalyzerAsan0:
needs: [BuilderDebAsan]
runs-on: [self-hosted, stress-tester]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_asan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (asan, analyzer)
REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
RUN_BY_HASH_NUM=0
RUN_BY_HASH_TOTAL=6
EOF
- name: Download json reports
uses: actions/download-artifact@v3
with:
path: ${{ env.REPORTS_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
- name: Integration test
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci"
python3 integration_test_check.py "$CHECK_NAME"
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH"
IntegrationTestsAnalyzerAsan1:
needs: [BuilderDebAsan]
runs-on: [self-hosted, stress-tester]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_asan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (asan, analyzer)
REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
RUN_BY_HASH_NUM=1
RUN_BY_HASH_TOTAL=6
EOF
- name: Download json reports
uses: actions/download-artifact@v3
with:
path: ${{ env.REPORTS_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
- name: Integration test
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci"
python3 integration_test_check.py "$CHECK_NAME"
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH"
IntegrationTestsAnalyzerAsan2:
needs: [BuilderDebAsan]
runs-on: [self-hosted, stress-tester]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_asan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (asan, analyzer)
REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
RUN_BY_HASH_NUM=2
RUN_BY_HASH_TOTAL=6
EOF
- name: Download json reports
uses: actions/download-artifact@v3
with:
path: ${{ env.REPORTS_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
- name: Integration test
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci"
python3 integration_test_check.py "$CHECK_NAME"
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH"
IntegrationTestsAnalyzerAsan3:
needs: [BuilderDebAsan]
runs-on: [self-hosted, stress-tester]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_asan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (asan, analyzer)
REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
RUN_BY_HASH_NUM=3
RUN_BY_HASH_TOTAL=6
EOF
- name: Download json reports
uses: actions/download-artifact@v3
with:
path: ${{ env.REPORTS_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
- name: Integration test
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci"
python3 integration_test_check.py "$CHECK_NAME"
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH"
IntegrationTestsAnalyzerAsan4:
needs: [BuilderDebAsan]
runs-on: [self-hosted, stress-tester]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_asan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (asan, analyzer)
REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
RUN_BY_HASH_NUM=4
RUN_BY_HASH_TOTAL=6
EOF
- name: Download json reports
uses: actions/download-artifact@v3
with:
path: ${{ env.REPORTS_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
- name: Integration test
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci"
python3 integration_test_check.py "$CHECK_NAME"
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH"
IntegrationTestsAnalyzerAsan5:
needs: [BuilderDebAsan]
runs-on: [self-hosted, stress-tester]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/integration_tests_asan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Integration tests (asan, analyzer)
REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
RUN_BY_HASH_NUM=5
RUN_BY_HASH_TOTAL=6
EOF
- name: Download json reports
uses: actions/download-artifact@v3
with:
path: ${{ env.REPORTS_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
- name: Integration test
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci"
python3 integration_test_check.py "$CHECK_NAME"
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH"
IntegrationTestsTsan0: IntegrationTestsTsan0:
needs: [BuilderDebTsan] needs: [BuilderDebTsan]
runs-on: [self-hosted, stress-tester] runs-on: [self-hosted, stress-tester]
@ -3963,6 +4173,12 @@ jobs:
- IntegrationTestsAsan3 - IntegrationTestsAsan3
- IntegrationTestsAsan4 - IntegrationTestsAsan4
- IntegrationTestsAsan5 - IntegrationTestsAsan5
- IntegrationTestsAnalyzerAsan0
- IntegrationTestsAnalyzerAsan1
- IntegrationTestsAnalyzerAsan2
- IntegrationTestsAnalyzerAsan3
- IntegrationTestsAnalyzerAsan4
- IntegrationTestsAnalyzerAsan5
- IntegrationTestsRelease0 - IntegrationTestsRelease0
- IntegrationTestsRelease1 - IntegrationTestsRelease1
- IntegrationTestsRelease2 - IntegrationTestsRelease2

View File

@ -5099,6 +5099,12 @@ jobs:
- IntegrationTestsAsan3 - IntegrationTestsAsan3
- IntegrationTestsAsan4 - IntegrationTestsAsan4
- IntegrationTestsAsan5 - IntegrationTestsAsan5
- IntegrationTestsAnalyzerAsan0
- IntegrationTestsAnalyzerAsan1
- IntegrationTestsAnalyzerAsan2
- IntegrationTestsAnalyzerAsan3
- IntegrationTestsAnalyzerAsan4
- IntegrationTestsAnalyzerAsan5
- IntegrationTestsRelease0 - IntegrationTestsRelease0
- IntegrationTestsRelease1 - IntegrationTestsRelease1
- IntegrationTestsRelease2 - IntegrationTestsRelease2

View File

@ -4,6 +4,8 @@ services:
kafka_zookeeper: kafka_zookeeper:
image: zookeeper:3.4.9 image: zookeeper:3.4.9
hostname: kafka_zookeeper hostname: kafka_zookeeper
ports:
- 2181:2181
environment: environment:
ZOO_MY_ID: 1 ZOO_MY_ID: 1
ZOO_PORT: 2181 ZOO_PORT: 2181
@ -15,15 +17,14 @@ services:
image: confluentinc/cp-kafka:5.2.0 image: confluentinc/cp-kafka:5.2.0
hostname: kafka1 hostname: kafka1
ports: ports:
- ${KAFKA_EXTERNAL_PORT:-8081}:${KAFKA_EXTERNAL_PORT:-8081} - ${KAFKA_EXTERNAL_PORT}:${KAFKA_EXTERNAL_PORT}
environment: environment:
KAFKA_ADVERTISED_LISTENERS: INSIDE://localhost:${KAFKA_EXTERNAL_PORT},OUTSIDE://kafka1:19092 KAFKA_ADVERTISED_LISTENERS: INSIDE://localhost:${KAFKA_EXTERNAL_PORT},OUTSIDE://kafka1:19092
KAFKA_ADVERTISED_HOST_NAME: kafka1 KAFKA_ADVERTISED_HOST_NAME: kafka1
KAFKA_LISTENERS: INSIDE://0.0.0.0:${KAFKA_EXTERNAL_PORT},OUTSIDE://0.0.0.0:19092
KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: INSIDE:PLAINTEXT,OUTSIDE:PLAINTEXT KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: INSIDE:PLAINTEXT,OUTSIDE:PLAINTEXT
KAFKA_INTER_BROKER_LISTENER_NAME: INSIDE KAFKA_INTER_BROKER_LISTENER_NAME: INSIDE
KAFKA_BROKER_ID: 1 KAFKA_BROKER_ID: 1
KAFKA_ZOOKEEPER_CONNECT: "kafka_zookeeper:2181" KAFKA_ZOOKEEPER_CONNECT: kafka_zookeeper:2181
KAFKA_LOG4J_LOGGERS: "kafka.controller=INFO,kafka.producer.async.DefaultEventHandler=INFO,state.change.logger=INFO" KAFKA_LOG4J_LOGGERS: "kafka.controller=INFO,kafka.producer.async.DefaultEventHandler=INFO,state.change.logger=INFO"
KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1 KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
depends_on: depends_on:
@ -35,13 +36,38 @@ services:
image: confluentinc/cp-schema-registry:5.2.0 image: confluentinc/cp-schema-registry:5.2.0
hostname: schema-registry hostname: schema-registry
ports: ports:
- ${SCHEMA_REGISTRY_EXTERNAL_PORT:-12313}:${SCHEMA_REGISTRY_INTERNAL_PORT:-12313} - ${SCHEMA_REGISTRY_EXTERNAL_PORT}:${SCHEMA_REGISTRY_EXTERNAL_PORT}
environment: environment:
SCHEMA_REGISTRY_HOST_NAME: schema-registry SCHEMA_REGISTRY_HOST_NAME: schema-registry
SCHEMA_REGISTRY_KAFKASTORE_SECURITY_PROTOCOL: PLAINTEXT
SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS: PLAINTEXT://kafka1:19092 SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS: PLAINTEXT://kafka1:19092
SCHEMA_REGISTRY_LISTENERS: http://0.0.0.0:${SCHEMA_REGISTRY_EXTERNAL_PORT}
SCHEMA_REGISTRY_SCHEMA_REGISTRY_GROUP_ID: noauth
depends_on: depends_on:
- kafka_zookeeper - kafka_zookeeper
- kafka1 - kafka1
restart: always
security_opt:
- label:disable
schema-registry-auth:
image: confluentinc/cp-schema-registry:5.2.0
hostname: schema-registry-auth
ports:
- ${SCHEMA_REGISTRY_AUTH_EXTERNAL_PORT}:${SCHEMA_REGISTRY_AUTH_EXTERNAL_PORT}
environment:
SCHEMA_REGISTRY_HOST_NAME: schema-registry-auth
SCHEMA_REGISTRY_LISTENERS: http://0.0.0.0:${SCHEMA_REGISTRY_AUTH_EXTERNAL_PORT}
SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS: PLAINTEXT://kafka1:19092
SCHEMA_REGISTRY_AUTHENTICATION_METHOD: BASIC
SCHEMA_REGISTRY_AUTHENTICATION_ROLES: user
SCHEMA_REGISTRY_AUTHENTICATION_REALM: RealmFooBar
SCHEMA_REGISTRY_OPTS: "-Djava.security.auth.login.config=/etc/schema-registry/secrets/schema_registry_jaas.conf"
SCHEMA_REGISTRY_SCHEMA_REGISTRY_GROUP_ID: auth
volumes:
- ${SCHEMA_REGISTRY_DIR:-}/secrets:/etc/schema-registry/secrets
depends_on:
- kafka_zookeeper
- kafka1
restart: always
security_opt: security_opt:
- label:disable - label:disable

View File

@ -76,6 +76,7 @@ The supported formats are:
| [RowBinary](#rowbinary) | ✔ | ✔ | | [RowBinary](#rowbinary) | ✔ | ✔ |
| [RowBinaryWithNames](#rowbinarywithnamesandtypes) | ✔ | ✔ | | [RowBinaryWithNames](#rowbinarywithnamesandtypes) | ✔ | ✔ |
| [RowBinaryWithNamesAndTypes](#rowbinarywithnamesandtypes) | ✔ | ✔ | | [RowBinaryWithNamesAndTypes](#rowbinarywithnamesandtypes) | ✔ | ✔ |
| [RowBinaryWithDefaults](#rowbinarywithdefaults) | ✔ | ✔ |
| [Native](#native) | ✔ | ✔ | | [Native](#native) | ✔ | ✔ |
| [Null](#null) | ✗ | ✔ | | [Null](#null) | ✗ | ✔ |
| [XML](#xml) | ✗ | ✔ | | [XML](#xml) | ✗ | ✔ |
@ -472,6 +473,7 @@ The CSV format supports the output of totals and extremes the same way as `TabSe
- [input_format_csv_trim_whitespaces](/docs/en/operations/settings/settings-formats.md/#input_format_csv_trim_whitespaces) - trim spaces and tabs in non-quoted CSV strings. Default value - `true`. - [input_format_csv_trim_whitespaces](/docs/en/operations/settings/settings-formats.md/#input_format_csv_trim_whitespaces) - trim spaces and tabs in non-quoted CSV strings. Default value - `true`.
- [input_format_csv_allow_whitespace_or_tab_as_delimiter](/docs/en/operations/settings/settings-formats.md/# input_format_csv_allow_whitespace_or_tab_as_delimiter) - Allow to use whitespace or tab as field delimiter in CSV strings. Default value - `false`. - [input_format_csv_allow_whitespace_or_tab_as_delimiter](/docs/en/operations/settings/settings-formats.md/# input_format_csv_allow_whitespace_or_tab_as_delimiter) - Allow to use whitespace or tab as field delimiter in CSV strings. Default value - `false`.
- [input_format_csv_allow_variable_number_of_columns](/docs/en/operations/settings/settings-formats.md/#input_format_csv_allow_variable_number_of_columns) - ignore extra columns in CSV input (if file has more columns than expected) and treat missing fields in CSV input as default values. Default value - `false`. - [input_format_csv_allow_variable_number_of_columns](/docs/en/operations/settings/settings-formats.md/#input_format_csv_allow_variable_number_of_columns) - ignore extra columns in CSV input (if file has more columns than expected) and treat missing fields in CSV input as default values. Default value - `false`.
- [input_format_csv_use_default_on_bad_values](/docs/en/operations/settings/settings-formats.md/#input_format_csv_use_default_on_bad_values) - Allow to set default value to column when CSV field deserialization failed on bad value. Default value - `false`.
## CSVWithNames {#csvwithnames} ## CSVWithNames {#csvwithnames}
@ -1515,6 +1517,23 @@ If setting [input_format_with_types_use_header](/docs/en/operations/settings/set
the types from input data will be compared with the types of the corresponding columns from the table. Otherwise, the second row will be skipped. the types from input data will be compared with the types of the corresponding columns from the table. Otherwise, the second row will be skipped.
::: :::
## RowBinaryWithDefaults {#rowbinarywithdefaults}
Similar to [RowBinary](#rowbinary), but with an extra byte before each column that indicates if default value should be used.
Examples:
```sql
:) select * from format('RowBinaryWithDefaults', 'x UInt32 default 42, y UInt32', x'010001000000')
┌──x─┬─y─┐
│ 42 │ 1 │
└────┴───┘
```
For column `x` there is only one byte `01` that indicates that default value should be used and no other data after this byte is provided.
For column `y` data starts with byte `00` that indicates that column has actual value that should be read from the subsequent data `01000000`.
## RowBinary format settings {#row-binary-format-settings} ## RowBinary format settings {#row-binary-format-settings}
- [format_binary_max_string_size](/docs/en/operations/settings/settings-formats.md/#format_binary_max_string_size) - The maximum allowed size for String in RowBinary format. Default value - `1GiB`. - [format_binary_max_string_size](/docs/en/operations/settings/settings-formats.md/#format_binary_max_string_size) - The maximum allowed size for String in RowBinary format. Default value - `1GiB`.

View File

@ -30,7 +30,7 @@ description: In order to effectively mitigate possible human errors, you should
``` ```
:::note ALL :::note ALL
`ALL` is only applicable to the `RESTORE` command prior to version 23.4 of Clickhouse. Prior to version 23.4 of ClickHouse, `ALL` was only applicable to the `RESTORE` command.
::: :::
## Background ## Background

View File

@ -989,6 +989,28 @@ Result
a b a b
``` ```
### input_format_csv_use_default_on_bad_values {#input_format_csv_use_default_on_bad_values}
Allow to set default value to column when CSV field deserialization failed on bad value
Default value: `false`.
**Examples**
Query
```bash
./clickhouse local -q "create table test_tbl (x String, y UInt32, z Date) engine=MergeTree order by x"
echo 'a,b,c' | ./clickhouse local -q "INSERT INTO test_tbl SETTINGS input_format_csv_use_default_on_bad_values=true FORMAT CSV"
./clickhouse local -q "select * from test_tbl"
```
Result
```text
a 0 1971-01-01
```
## Values format settings {#values-format-settings} ## Values format settings {#values-format-settings}
### input_format_values_interpret_expressions {#input_format_values_interpret_expressions} ### input_format_values_interpret_expressions {#input_format_values_interpret_expressions}
@ -1325,6 +1347,17 @@ Default value: 0.
Sets [Confluent Schema Registry](https://docs.confluent.io/current/schema-registry/index.html) URL to use with [AvroConfluent](../../interfaces/formats.md/#data-format-avro-confluent) format. Sets [Confluent Schema Registry](https://docs.confluent.io/current/schema-registry/index.html) URL to use with [AvroConfluent](../../interfaces/formats.md/#data-format-avro-confluent) format.
Format:
``` text
http://[user:password@]machine[:port]"
```
Examples:
``` text
http://registry.example.com:8081
http://admin:secret@registry.example.com:8081
```
Default value: `Empty`. Default value: `Empty`.
### output_format_avro_codec {#output_format_avro_codec} ### output_format_avro_codec {#output_format_avro_codec}

View File

@ -4524,6 +4524,7 @@ This setting allows to specify renaming pattern for files processed by `file` ta
### Placeholders ### Placeholders
- `%a` — Full original filename (e.g., "sample.csv").
- `%f` — Original filename without extension (e.g., "sample"). - `%f` — Original filename without extension (e.g., "sample").
- `%e` — Original file extension with dot (e.g., ".csv"). - `%e` — Original file extension with dot (e.g., ".csv").
- `%t` — Timestamp (in microseconds). - `%t` — Timestamp (in microseconds).

View File

@ -722,7 +722,7 @@ SELECT toDate('2016-12-27') AS date, toYearWeek(date) AS yearWeek0, toYearWeek(d
## age ## age
Returns the `unit` component of the difference between `startdate` and `enddate`. The difference is calculated using a precision of 1 second. Returns the `unit` component of the difference between `startdate` and `enddate`. The difference is calculated using a precision of 1 microsecond.
E.g. the difference between `2021-12-29` and `2022-01-01` is 3 days for `day` unit, 0 months for `month` unit, 0 years for `year` unit. E.g. the difference between `2021-12-29` and `2022-01-01` is 3 days for `day` unit, 0 months for `month` unit, 0 years for `year` unit.
For an alternative to `age`, see function `date\_diff`. For an alternative to `age`, see function `date\_diff`.
@ -738,6 +738,8 @@ age('unit', startdate, enddate, [timezone])
- `unit` — The type of interval for result. [String](../../sql-reference/data-types/string.md). - `unit` — The type of interval for result. [String](../../sql-reference/data-types/string.md).
Possible values: Possible values:
- `microsecond` (possible abbreviations: `us`, `u`)
- `millisecond` (possible abbreviations: `ms`)
- `second` (possible abbreviations: `ss`, `s`) - `second` (possible abbreviations: `ss`, `s`)
- `minute` (possible abbreviations: `mi`, `n`) - `minute` (possible abbreviations: `mi`, `n`)
- `hour` (possible abbreviations: `hh`, `h`) - `hour` (possible abbreviations: `hh`, `h`)
@ -813,6 +815,8 @@ Aliases: `dateDiff`, `DATE_DIFF`, `timestampDiff`, `timestamp_diff`, `TIMESTAMP_
- `unit` — The type of interval for result. [String](../../sql-reference/data-types/string.md). - `unit` — The type of interval for result. [String](../../sql-reference/data-types/string.md).
Possible values: Possible values:
- `microsecond` (possible abbreviations: `us`, `u`)
- `millisecond` (possible abbreviations: `ms`)
- `second` (possible abbreviations: `ss`, `s`) - `second` (possible abbreviations: `ss`, `s`)
- `minute` (possible abbreviations: `mi`, `n`) - `minute` (possible abbreviations: `mi`, `n`)
- `hour` (possible abbreviations: `hh`, `h`) - `hour` (possible abbreviations: `hh`, `h`)
@ -1134,6 +1138,8 @@ Result:
Returns the current date and time at the moment of query analysis. The function is a constant expression. Returns the current date and time at the moment of query analysis. The function is a constant expression.
Alias: `current_timestamp`.
**Syntax** **Syntax**
``` sql ``` sql
@ -1264,6 +1270,8 @@ Result:
Accepts zero arguments and returns the current date at one of the moments of query analysis. Accepts zero arguments and returns the current date at one of the moments of query analysis.
The same as toDate(now()). The same as toDate(now()).
Aliases: `curdate`, `current_date`.
## yesterday ## yesterday
Accepts zero arguments and returns yesterdays date at one of the moments of query analysis. Accepts zero arguments and returns yesterdays date at one of the moments of query analysis.

View File

@ -51,7 +51,7 @@ Calculates the MD5 from a string and returns the resulting set of bytes as Fixed
If you do not need MD5 in particular, but you need a decent cryptographic 128-bit hash, use the sipHash128 function instead. If you do not need MD5 in particular, but you need a decent cryptographic 128-bit hash, use the sipHash128 function instead.
If you want to get the same result as output by the md5sum utility, use lower(hex(MD5(s))). If you want to get the same result as output by the md5sum utility, use lower(hex(MD5(s))).
## sipHash64 (#hash_functions-siphash64) ## sipHash64 {#hash_functions-siphash64}
Produces a 64-bit [SipHash](https://en.wikipedia.org/wiki/SipHash) hash value. Produces a 64-bit [SipHash](https://en.wikipedia.org/wiki/SipHash) hash value.
@ -63,9 +63,9 @@ This is a cryptographic hash function. It works at least three times faster than
The function [interprets](/docs/en/sql-reference/functions/type-conversion-functions.md/#type_conversion_functions-reinterpretAsString) all the input parameters as strings and calculates the hash value for each of them. It then combines the hashes by the following algorithm: The function [interprets](/docs/en/sql-reference/functions/type-conversion-functions.md/#type_conversion_functions-reinterpretAsString) all the input parameters as strings and calculates the hash value for each of them. It then combines the hashes by the following algorithm:
1. The first and the second hash value are concatenated to an array which is hashed. 1. The first and the second hash value are concatenated to an array which is hashed.
2. The previously calculated hash value and the hash of the third input parameter are hashed in a similar way. 2. The previously calculated hash value and the hash of the third input parameter are hashed in a similar way.
3. This calculation is repeated for all remaining hash values of the original input. 3. This calculation is repeated for all remaining hash values of the original input.
**Arguments** **Arguments**

View File

@ -1267,3 +1267,36 @@ Like [initcap](#initcap), assuming that the string contains valid UTF-8 encoded
Does not detect the language, e.g. for Turkish the result might not be exactly correct (i/İ vs. i/I). Does not detect the language, e.g. for Turkish the result might not be exactly correct (i/İ vs. i/I).
If the length of the UTF-8 byte sequence is different for upper and lower case of a code point, the result may be incorrect for this code point. If the length of the UTF-8 byte sequence is different for upper and lower case of a code point, the result may be incorrect for this code point.
## firstLine
Returns the first line from a multi-line string.
**Syntax**
```sql
firstLine(val)
```
**Arguments**
- `val` - Input value. [String](../data-types/string.md)
**Returned value**
- The first line of the input value or the whole value if there is no line
separators. [String](../data-types/string.md)
**Example**
```sql
select firstLine('foo\nbar\nbaz');
```
Result:
```result
┌─firstLine('foo\nbar\nbaz')─┐
│ foo │
└────────────────────────────┘
```

View File

@ -97,7 +97,7 @@ This is an experimental feature that may change in backwards-incompatible ways i
::: :::
```sql ```sql
CREATE LIVE VIEW [IF NOT EXISTS] [db.]table_name [WITH [TIMEOUT [value_in_sec] [AND]] [REFRESH [value_in_sec]]] AS SELECT ... CREATE LIVE VIEW [IF NOT EXISTS] [db.]table_name [WITH REFRESH [value_in_sec]] AS SELECT ...
``` ```
Live views store result of the corresponding [SELECT](../../../sql-reference/statements/select/index.md) query and are updated any time the result of the query changes. Query result as well as partial result needed to combine with new data are stored in memory providing increased performance for repeated queries. Live views can provide push notifications when query result changes using the [WATCH](../../../sql-reference/statements/watch.md) query. Live views store result of the corresponding [SELECT](../../../sql-reference/statements/select/index.md) query and are updated any time the result of the query changes. Query result as well as partial result needed to combine with new data are stored in memory providing increased performance for repeated queries. Live views can provide push notifications when query result changes using the [WATCH](../../../sql-reference/statements/watch.md) query.

View File

@ -5,7 +5,27 @@ sidebar_label: WITH
# WITH Clause # WITH Clause
ClickHouse supports Common Table Expressions ([CTE](https://en.wikipedia.org/wiki/Hierarchical_and_recursive_queries_in_SQL)), that is provides to use results of `WITH` clause in the rest of `SELECT` query. Named subqueries can be included to the current and child query context in places where table objects are allowed. Recursion is prevented by hiding the current level CTEs from the WITH expression. ClickHouse supports Common Table Expressions ([CTE](https://en.wikipedia.org/wiki/Hierarchical_and_recursive_queries_in_SQL)) and substitutes the code defined in the `WITH` clause in all places of use for the rest of `SELECT` query. Named subqueries can be included to the current and child query context in places where table objects are allowed. Recursion is prevented by hiding the current level CTEs from the WITH expression.
Please note that CTEs do not guarantee the same results in all places they are called because the query will be re-executed for each use case.
An example of such behavior is below
``` sql
with cte_numbers as
(
select
num
from generateRandom('num UInt64', NULL)
limit 1000000
)
select
count()
from cte_numbers
where num in (select num from cte_numbers)
```
If CTEs were to pass exactly the results and not just a piece of code, you would always see `1000000`
However, due to the fact that we are referring `cte_numbers` twice, random numbers are generated each time and, accordingly, we see different random results, `280501, 392454, 261636, 196227` and so on...
## Syntax ## Syntax

View File

@ -134,7 +134,7 @@ Multiple path components can have globs. For being processed file must exist and
- `*` — Substitutes any number of any characters except `/` including empty string. - `*` — Substitutes any number of any characters except `/` including empty string.
- `?` — Substitutes any single character. - `?` — Substitutes any single character.
- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`. - `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`, including `/`.
- `{N..M}` — Substitutes any number in range from N to M including both borders. - `{N..M}` — Substitutes any number in range from N to M including both borders.
- `**` - Fetches all files inside the folder recursively. - `**` - Fetches all files inside the folder recursively.

View File

@ -4201,6 +4201,7 @@ SELECT *, timezone() FROM test_tz WHERE d = '2000-01-01 00:00:00' SETTINGS sessi
### Шаблон ### Шаблон
Шаблон поддерживает следующие виды плейсхолдеров: Шаблон поддерживает следующие виды плейсхолдеров:
- `%a` — Полное исходное имя файла (например "sample.csv").
- `%f` — Исходное имя файла без расширения (например "sample"). - `%f` — Исходное имя файла без расширения (например "sample").
- `%e` — Оригинальное расширение файла с точкой (например ".csv"). - `%e` — Оригинальное расширение файла с точкой (например ".csv").
- `%t` — Текущее время (в микросекундах). - `%t` — Текущее время (в микросекундах).

View File

@ -625,7 +625,7 @@ SELECT toDate('2016-12-27') AS date, toYearWeek(date) AS yearWeek0, toYearWeek(d
## age ## age
Вычисляет компонент `unit` разницы между `startdate` и `enddate`. Разница вычисляется с точностью в 1 секунду. Вычисляет компонент `unit` разницы между `startdate` и `enddate`. Разница вычисляется с точностью в 1 микросекунду.
Например, разница между `2021-12-29` и `2022-01-01` 3 дня для единицы `day`, 0 месяцев для единицы `month`, 0 лет для единицы `year`. Например, разница между `2021-12-29` и `2022-01-01` 3 дня для единицы `day`, 0 месяцев для единицы `month`, 0 лет для единицы `year`.
**Синтаксис** **Синтаксис**
@ -639,6 +639,8 @@ age('unit', startdate, enddate, [timezone])
- `unit` — единица измерения времени, в которой будет выражено возвращаемое значение функции. [String](../../sql-reference/data-types/string.md). - `unit` — единица измерения времени, в которой будет выражено возвращаемое значение функции. [String](../../sql-reference/data-types/string.md).
Возможные значения: Возможные значения:
- `microsecond` (возможные сокращения: `us`, `u`)
- `millisecond` (возможные сокращения: `ms`)
- `second` (возможные сокращения: `ss`, `s`) - `second` (возможные сокращения: `ss`, `s`)
- `minute` (возможные сокращения: `mi`, `n`) - `minute` (возможные сокращения: `mi`, `n`)
- `hour` (возможные сокращения: `hh`, `h`) - `hour` (возможные сокращения: `hh`, `h`)
@ -712,6 +714,8 @@ date_diff('unit', startdate, enddate, [timezone])
- `unit` — единица измерения времени, в которой будет выражено возвращаемое значение функции. [String](../../sql-reference/data-types/string.md). - `unit` — единица измерения времени, в которой будет выражено возвращаемое значение функции. [String](../../sql-reference/data-types/string.md).
Возможные значения: Возможные значения:
- `microsecond` (возможные сокращения: `us`, `u`)
- `millisecond` (возможные сокращения: `ms`)
- `second` (возможные сокращения: `ss`, `s`) - `second` (возможные сокращения: `ss`, `s`)
- `minute` (возможные сокращения: `mi`, `n`) - `minute` (возможные сокращения: `mi`, `n`)
- `hour` (возможные сокращения: `hh`, `h`) - `hour` (возможные сокращения: `hh`, `h`)

View File

@ -1124,3 +1124,39 @@ Do Nothing for 2 Minutes 2:00 &nbsp;
Не учитывает язык. То есть, для турецкого языка, результат может быть не совсем верным. Не учитывает язык. То есть, для турецкого языка, результат может быть не совсем верным.
Если длина UTF-8 последовательности байтов различна для верхнего и нижнего регистра кодовой точки, то для этой кодовой точки результат работы может быть некорректным. Если длина UTF-8 последовательности байтов различна для верхнего и нижнего регистра кодовой точки, то для этой кодовой точки результат работы может быть некорректным.
Если строка содержит набор байтов, не являющийся UTF-8, то поведение не определено. Если строка содержит набор байтов, не являющийся UTF-8, то поведение не определено.
## firstLine
Возвращает первую строку в многострочном тексте.
**Синтаксис**
```sql
firstLine(val)
```
**Аргументы**
- `val` - текст для обработки. [String](../data-types/string.md)
**Returned value**
- Первая строка текста или весь текст, если переносы строк отсутствуют.
Тип: [String](../data-types/string.md)
**Пример**
Запрос:
```sql
select firstLine('foo\nbar\nbaz');
```
Результат:
```result
┌─firstLine('foo\nbar\nbaz')─┐
│ foo │
└────────────────────────────┘
```

View File

@ -73,7 +73,7 @@ CREATE MATERIALIZED VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER] [TO[db.]na
Чтобы использовать `LIVE VIEW` и запросы `WATCH`, включите настройку [allow_experimental_live_view](../../../operations/settings/settings.md#allow-experimental-live-view). Чтобы использовать `LIVE VIEW` и запросы `WATCH`, включите настройку [allow_experimental_live_view](../../../operations/settings/settings.md#allow-experimental-live-view).
::: :::
```sql ```sql
CREATE LIVE VIEW [IF NOT EXISTS] [db.]table_name [WITH [TIMEOUT [value_in_sec] [AND]] [REFRESH [value_in_sec]]] AS SELECT ... CREATE LIVE VIEW [IF NOT EXISTS] [db.]table_name [WITH REFRESH [value_in_sec]] AS SELECT ...
``` ```
`LIVE VIEW` хранит результат запроса [SELECT](../../../sql-reference/statements/select/index.md), указанного при создании, и обновляется сразу же при изменении этого результата. Конечный результат запроса и промежуточные данные, из которых формируется результат, хранятся в оперативной памяти, и это обеспечивает высокую скорость обработки для повторяющихся запросов. LIVE-представления могут отправлять push-уведомления при изменении результата исходного запроса `SELECT`. Для этого используйте запрос [WATCH](../../../sql-reference/statements/watch.md). `LIVE VIEW` хранит результат запроса [SELECT](../../../sql-reference/statements/select/index.md), указанного при создании, и обновляется сразу же при изменении этого результата. Конечный результат запроса и промежуточные данные, из которых формируется результат, хранятся в оперативной памяти, и это обеспечивает высокую скорость обработки для повторяющихся запросов. LIVE-представления могут отправлять push-уведомления при изменении результата исходного запроса `SELECT`. Для этого используйте запрос [WATCH](../../../sql-reference/statements/watch.md).

View File

@ -79,7 +79,7 @@ SELECT * FROM file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 U
- `*` — заменяет любое количество любых символов кроме `/`, включая отсутствие символов. - `*` — заменяет любое количество любых символов кроме `/`, включая отсутствие символов.
- `?` — заменяет ровно один любой символ. - `?` — заменяет ровно один любой символ.
- `{some_string,another_string,yet_another_one}` — заменяет любую из строк `'some_string', 'another_string', 'yet_another_one'`. - `{some_string,another_string,yet_another_one}` — заменяет любую из строк `'some_string', 'another_string', 'yet_another_one'`, причём строка может содержать `/`.
- `{N..M}` — заменяет любое число в интервале от `N` до `M` включительно (может содержать ведущие нули). - `{N..M}` — заменяет любое число в интервале от `N` до `M` включительно (может содержать ведущие нули).
Конструкция с `{}` аналогична табличной функции [remote](remote.md). Конструкция с `{}` аналогична табличной функции [remote](remote.md).

View File

@ -643,6 +643,8 @@ date_diff('unit', startdate, enddate, [timezone])
- `unit``value`对应的时间单位。类型为[String](../../sql-reference/data-types/string.md)。 - `unit``value`对应的时间单位。类型为[String](../../sql-reference/data-types/string.md)。
可能的值: 可能的值:
- `microsecond`
- `millisecond`
- `second` - `second`
- `minute` - `minute`
- `hour` - `hour`

View File

@ -72,7 +72,7 @@ ClickHouse 中的物化视图更像是插入触发器。 如果视图查询中
使用[allow_experimental_live_view](../../../operations/settings/settings.md#allow-experimental-live-view)设置启用实时视图和`WATCH`查询的使用。 输入命令`set allow_experimental_live_view = 1`。 使用[allow_experimental_live_view](../../../operations/settings/settings.md#allow-experimental-live-view)设置启用实时视图和`WATCH`查询的使用。 输入命令`set allow_experimental_live_view = 1`。
```sql ```sql
CREATE LIVE VIEW [IF NOT EXISTS] [db.]table_name [WITH [TIMEOUT [value_in_sec] [AND]] [REFRESH [value_in_sec]]] AS SELECT ... CREATE LIVE VIEW [IF NOT EXISTS] [db.]table_name [WITH REFRESH [value_in_sec]] AS SELECT ...
``` ```
实时视图存储相应[SELECT](../../../sql-reference/statements/select/index.md)查询的结果,并在查询结果更改时随时更新。 查询结果以及与新数据结合所需的部分结果存储在内存中,为重复查询提供更高的性能。当使用[WATCH](../../../sql-reference/statements/watch.md)查询更改查询结果时,实时视图可以提供推送通知。 实时视图存储相应[SELECT](../../../sql-reference/statements/select/index.md)查询的结果,并在查询结果更改时随时更新。 查询结果以及与新数据结合所需的部分结果存储在内存中,为重复查询提供更高的性能。当使用[WATCH](../../../sql-reference/statements/watch.md)查询更改查询结果时,实时视图可以提供推送通知。

View File

@ -887,6 +887,7 @@ try
#endif #endif
global_context->setRemoteHostFilter(config()); global_context->setRemoteHostFilter(config());
global_context->setHTTPHeaderFilter(config());
std::string path_str = getCanonicalPath(config().getString("path", DBMS_DEFAULT_PATH)); std::string path_str = getCanonicalPath(config().getString("path", DBMS_DEFAULT_PATH));
fs::path path = path_str; fs::path path = path_str;
@ -1200,6 +1201,7 @@ try
} }
global_context->setRemoteHostFilter(*config); global_context->setRemoteHostFilter(*config);
global_context->setHTTPHeaderFilter(*config);
global_context->setMaxTableSizeToDrop(server_settings_.max_table_size_to_drop); global_context->setMaxTableSizeToDrop(server_settings_.max_table_size_to_drop);
global_context->setMaxPartitionSizeToDrop(server_settings_.max_partition_size_to_drop); global_context->setMaxPartitionSizeToDrop(server_settings_.max_partition_size_to_drop);

View File

@ -866,6 +866,14 @@
--> -->
<!--</remote_url_allow_hosts>--> <!--</remote_url_allow_hosts>-->
<!-- The list of HTTP headers forbidden to use in HTTP-related storage engines and table functions.
If this section is not present in configuration, all headers are allowed.
-->
<!-- <http_forbid_headers>
<header>exact_header</header>
<header_regexp>(?i)(case_insensitive_header)</header_regexp>
</http_forbid_headers> -->
<!-- If element has 'incl' attribute, then for it's value will be used corresponding substitution from another file. <!-- If element has 'incl' attribute, then for it's value will be used corresponding substitution from another file.
By default, path to file with substitutions is /etc/metrika.xml. It could be changed in config in 'include_from' element. By default, path to file with substitutions is /etc/metrika.xml. It could be changed in config in 'include_from' element.
Values for substitutions are specified in /clickhouse/name_of_substitution elements in that file. Values for substitutions are specified in /clickhouse/name_of_substitution elements in that file.

View File

@ -88,3 +88,4 @@ endfunction()
add_rust_subdirectory (BLAKE3) add_rust_subdirectory (BLAKE3)
add_rust_subdirectory (skim) add_rust_subdirectory (skim)
add_rust_subdirectory (prql)

3
rust/prql/CMakeLists.txt Normal file
View File

@ -0,0 +1,3 @@
clickhouse_import_crate(MANIFEST_PATH Cargo.toml)
target_include_directories(_ch_rust_prql INTERFACE include)
add_library(ch_rust::prql ALIAS _ch_rust_prql)

569
rust/prql/Cargo.lock generated Normal file
View File

@ -0,0 +1,569 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "_ch_rust_prql"
version = "0.1.0"
dependencies = [
"prql-compiler",
"serde_json",
]
[[package]]
name = "addr2line"
version = "0.20.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f4fa78e18c64fce05e902adecd7a5eed15a5e0a3439f7b0e169f0252214865e3"
dependencies = [
"gimli",
]
[[package]]
name = "adler"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
[[package]]
name = "ahash"
version = "0.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47"
dependencies = [
"getrandom",
"once_cell",
"version_check",
]
[[package]]
name = "aho-corasick"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "43f6cb1bf222025340178f382c426f13757b2960e89779dfcb319c32542a5a41"
dependencies = [
"memchr",
]
[[package]]
name = "anyhow"
version = "1.0.71"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c7d0618f0e0b7e8ff11427422b64564d5fb0be1940354bfe2e0529b18a9d9b8"
dependencies = [
"backtrace",
]
[[package]]
name = "ariadne"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "367fd0ad87307588d087544707bc5fbf4805ded96c7db922b70d368fa1cb5702"
dependencies = [
"unicode-width",
"yansi",
]
[[package]]
name = "backtrace"
version = "0.3.68"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4319208da049c43661739c5fade2ba182f09d1dc2299b32298d3a31692b17e12"
dependencies = [
"addr2line",
"cc",
"cfg-if",
"libc",
"miniz_oxide",
"object",
"rustc-demangle",
]
[[package]]
name = "cc"
version = "1.0.79"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f"
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "chumsky"
version = "0.9.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "23170228b96236b5a7299057ac284a321457700bc8c41a4476052f0f4ba5349d"
dependencies = [
"hashbrown 0.12.3",
"stacker",
]
[[package]]
name = "csv"
version = "1.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "626ae34994d3d8d668f4269922248239db4ae42d538b14c398b74a52208e8086"
dependencies = [
"csv-core",
"itoa",
"ryu",
"serde",
]
[[package]]
name = "csv-core"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90"
dependencies = [
"memchr",
]
[[package]]
name = "either"
version = "1.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91"
[[package]]
name = "enum-as-inner"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c9720bba047d567ffc8a3cba48bf19126600e249ab7f128e9233e6376976a116"
dependencies = [
"heck",
"proc-macro2",
"quote",
"syn 1.0.109",
]
[[package]]
name = "equivalent"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "88bffebc5d80432c9b140ee17875ff173a8ab62faad5b257da912bd2f6c1c0a1"
[[package]]
name = "getrandom"
version = "0.2.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427"
dependencies = [
"cfg-if",
"libc",
"wasi",
]
[[package]]
name = "gimli"
version = "0.27.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b6c80984affa11d98d1b88b66ac8853f143217b399d3c74116778ff8fdb4ed2e"
[[package]]
name = "hashbrown"
version = "0.12.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
dependencies = [
"ahash",
]
[[package]]
name = "hashbrown"
version = "0.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2c6201b9ff9fd90a5a3bac2e56a830d0caa509576f0e503818ee82c181b3437a"
[[package]]
name = "heck"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
[[package]]
name = "indexmap"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d5477fe2230a79769d8dc68e0eabf5437907c0457a5614a9e8dddb67f65eb65d"
dependencies = [
"equivalent",
"hashbrown 0.14.0",
]
[[package]]
name = "itertools"
version = "0.10.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473"
dependencies = [
"either",
]
[[package]]
name = "itoa"
version = "1.0.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "62b02a5381cc465bd3041d84623d0fa3b66738b52b8e2fc3bab8ad63ab032f4a"
[[package]]
name = "lazy_static"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
[[package]]
name = "libc"
version = "0.2.147"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3"
[[package]]
name = "log"
version = "0.4.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b06a4cde4c0f271a446782e3eff8de789548ce57dbc8eca9292c27f4a42004b4"
[[package]]
name = "memchr"
version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
[[package]]
name = "minimal-lexical"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
[[package]]
name = "miniz_oxide"
version = "0.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e7810e0be55b428ada41041c41f32c9f1a42817901b4ccf45fa3d4b6561e74c7"
dependencies = [
"adler",
]
[[package]]
name = "nom"
version = "7.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
dependencies = [
"memchr",
"minimal-lexical",
]
[[package]]
name = "object"
version = "0.31.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8bda667d9f2b5051b8833f59f3bf748b28ef54f850f4fcb389a252aa383866d1"
dependencies = [
"memchr",
]
[[package]]
name = "once_cell"
version = "1.18.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d"
[[package]]
name = "proc-macro2"
version = "1.0.63"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7b368fba921b0dce7e60f5e04ec15e565b3303972b42bcfde1d0713b881959eb"
dependencies = [
"unicode-ident",
]
[[package]]
name = "prql-compiler"
version = "0.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c99b52154002ac7f286dd2293c2f8d4e30526c1d396b14deef5ada1deef3c9ff"
dependencies = [
"anyhow",
"ariadne",
"chumsky",
"csv",
"enum-as-inner",
"itertools",
"lazy_static",
"log",
"once_cell",
"regex",
"semver",
"serde",
"serde_json",
"serde_yaml",
"sqlformat",
"sqlparser",
"strum",
"strum_macros",
]
[[package]]
name = "psm"
version = "0.1.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5787f7cda34e3033a72192c018bc5883100330f362ef279a8cbccfce8bb4e874"
dependencies = [
"cc",
]
[[package]]
name = "quote"
version = "1.0.29"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "573015e8ab27661678357f27dc26460738fd2b6c86e46f386fde94cb5d913105"
dependencies = [
"proc-macro2",
]
[[package]]
name = "regex"
version = "1.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "89089e897c013b3deb627116ae56a6955a72b8bed395c9526af31c9fe528b484"
dependencies = [
"aho-corasick",
"memchr",
"regex-automata",
"regex-syntax",
]
[[package]]
name = "regex-automata"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fa250384981ea14565685dea16a9ccc4d1c541a13f82b9c168572264d1df8c56"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax",
]
[[package]]
name = "regex-syntax"
version = "0.7.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2ab07dc67230e4a4718e70fd5c20055a4334b121f1f9db8fe63ef39ce9b8c846"
[[package]]
name = "rustc-demangle"
version = "0.1.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76"
[[package]]
name = "rustversion"
version = "1.0.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dc31bd9b61a32c31f9650d18add92aa83a49ba979c143eefd27fe7177b05bd5f"
[[package]]
name = "ryu"
version = "1.0.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fe232bdf6be8c8de797b22184ee71118d63780ea42ac85b61d1baa6d3b782ae9"
[[package]]
name = "semver"
version = "1.0.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bebd363326d05ec3e2f532ab7660680f3b02130d780c299bca73469d521bc0ed"
dependencies = [
"serde",
]
[[package]]
name = "serde"
version = "1.0.166"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d01b7404f9d441d3ad40e6a636a7782c377d2abdbe4fa2440e2edcc2f4f10db8"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.166"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5dd83d6dde2b6b2d466e14d9d1acce8816dedee94f735eac6395808b3483c6d6"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.23",
]
[[package]]
name = "serde_json"
version = "1.0.100"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0f1e14e89be7aa4c4b78bdbdc9eb5bf8517829a600ae8eaa39a6e1d960b5185c"
dependencies = [
"itoa",
"ryu",
"serde",
]
[[package]]
name = "serde_yaml"
version = "0.9.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "452e67b9c20c37fa79df53201dc03839651086ed9bbe92b3ca585ca9fdaa7d85"
dependencies = [
"indexmap",
"itoa",
"ryu",
"serde",
"unsafe-libyaml",
]
[[package]]
name = "sqlformat"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0c12bc9199d1db8234678b7051747c07f517cdcf019262d1847b94ec8b1aee3e"
dependencies = [
"itertools",
"nom",
"unicode_categories",
]
[[package]]
name = "sqlparser"
version = "0.33.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "355dc4d4b6207ca8a3434fc587db0a8016130a574dbcdbfb93d7f7b5bc5b211a"
dependencies = [
"log",
"serde",
]
[[package]]
name = "stacker"
version = "0.1.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c886bd4480155fd3ef527d45e9ac8dd7118a898a46530b7b94c3e21866259fce"
dependencies = [
"cc",
"cfg-if",
"libc",
"psm",
"winapi",
]
[[package]]
name = "strum"
version = "0.24.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "063e6045c0e62079840579a7e47a355ae92f60eb74daaf156fb1e84ba164e63f"
dependencies = [
"strum_macros",
]
[[package]]
name = "strum_macros"
version = "0.24.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e385be0d24f186b4ce2f9982191e7101bb737312ad61c1f2f984f34bcf85d59"
dependencies = [
"heck",
"proc-macro2",
"quote",
"rustversion",
"syn 1.0.109",
]
[[package]]
name = "syn"
version = "1.0.109"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "syn"
version = "2.0.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "59fb7d6d8281a51045d62b8eb3a7d1ce347b76f312af50cd3dc0af39c87c1737"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "unicode-ident"
version = "1.0.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "22049a19f4a68748a168c0fc439f9516686aa045927ff767eca0a85101fb6e73"
[[package]]
name = "unicode-width"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b"
[[package]]
name = "unicode_categories"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e"
[[package]]
name = "unsafe-libyaml"
version = "0.2.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1865806a559042e51ab5414598446a5871b561d21b6764f2eabb0dd481d880a6"
[[package]]
name = "version_check"
version = "0.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
[[package]]
name = "wasi"
version = "0.11.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
[[package]]
name = "winapi"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
dependencies = [
"winapi-i686-pc-windows-gnu",
"winapi-x86_64-pc-windows-gnu",
]
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
[[package]]
name = "yansi"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec"

20
rust/prql/Cargo.toml Normal file
View File

@ -0,0 +1,20 @@
[package]
name = "_ch_rust_prql"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
prql-compiler = "0.8.1"
serde_json = "1.0"
[lib]
crate-type = ["staticlib"]
[profile.release]
debug = true
[profile.release-thinlto]
inherits = "release"
lto = true

18
rust/prql/include/prql.h Normal file
View File

@ -0,0 +1,18 @@
#pragma once
#include <cstdint>
extern "C" {
/// Converts a PRQL query to an SQL query.
/// @param query is a pointer to the beginning of the PRQL query.
/// @param size is the size of the PRQL query.
/// @param out is a pointer to a uint8_t pointer which will be set to the beginning of the null terminated SQL query or the error message.
/// @param out_size is the size of the string pointed by `out`.
/// @returns zero in case of success, non-zero in case of failure.
int64_t prql_to_sql(const uint8_t * query, uint64_t size, uint8_t ** out, uint64_t * out_size);
/// Frees the passed in pointer which's memory was allocated by Rust allocators previously.
void prql_free_pointer(uint8_t * ptr_to_free);
} // extern "C"

56
rust/prql/src/lib.rs Normal file
View File

@ -0,0 +1,56 @@
use prql_compiler::sql::Dialect;
use prql_compiler::{Options, Target};
use std::ffi::{c_char, CString};
use std::slice;
fn set_output(result: String, out: *mut *mut u8, out_size: *mut u64) {
assert!(!out_size.is_null());
let out_size_ptr = unsafe { &mut *out_size };
*out_size_ptr = (result.len() + 1).try_into().unwrap();
assert!(!out.is_null());
let out_ptr = unsafe { &mut *out };
*out_ptr = CString::new(result).unwrap().into_raw() as *mut u8;
}
#[no_mangle]
pub unsafe extern "C" fn prql_to_sql(
query: *const u8,
size: u64,
out: *mut *mut u8,
out_size: *mut u64,
) -> i64 {
let query_vec = unsafe { slice::from_raw_parts(query, size.try_into().unwrap()) }.to_vec();
let maybe_prql_query = String::from_utf8(query_vec);
if maybe_prql_query.is_err() {
set_output(
String::from("The PRQL query must be UTF-8 encoded!"),
out,
out_size,
);
return 1;
}
let prql_query = maybe_prql_query.unwrap();
let opts = &Options {
format: true,
target: Target::Sql(Some(Dialect::ClickHouse)),
signature_comment: false,
color: false,
};
let (is_err, res) = match prql_compiler::compile(&prql_query, &opts) {
Ok(sql_str) => (false, sql_str),
Err(err) => (true, err.to_string()),
};
set_output(res, out, out_size);
match is_err {
true => 1,
false => 0,
}
}
#[no_mangle]
pub unsafe extern "C" fn prql_free_pointer(ptr_to_free: *mut u8) {
std::mem::drop(CString::from_raw(ptr_to_free as *mut c_char));
}

204
rust/skim/Cargo.lock generated
View File

@ -42,17 +42,6 @@ version = "0.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711"
[[package]]
name = "atty"
version = "0.2.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
dependencies = [
"hermit-abi 0.1.19",
"libc",
"winapi",
]
[[package]] [[package]]
name = "autocfg" name = "autocfg"
version = "1.1.0" version = "1.1.0"
@ -104,31 +93,6 @@ dependencies = [
"winapi", "winapi",
] ]
[[package]]
name = "clap"
version = "3.2.25"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4ea181bf566f71cb9a5d17a59e1871af638180a18fb0035c92ae62b705207123"
dependencies = [
"atty",
"bitflags",
"clap_lex",
"indexmap",
"once_cell",
"strsim",
"termcolor",
"textwrap",
]
[[package]]
name = "clap_lex"
version = "0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2850f2f5a82cbf437dd5af4d49848fbdfc27c157c3d010345776f952765261c5"
dependencies = [
"os_str_bytes",
]
[[package]] [[package]]
name = "codespan-reporting" name = "codespan-reporting"
version = "0.11.1" version = "0.11.1"
@ -214,9 +178,9 @@ dependencies = [
[[package]] [[package]]
name = "cxx" name = "cxx"
version = "1.0.97" version = "1.0.101"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e88abab2f5abbe4c56e8f1fb431b784d710b709888f35755a160e62e33fe38e8" checksum = "5032837c1384de3708043de9d4e97bb91290faca6c16529a28aa340592a78166"
dependencies = [ dependencies = [
"cc", "cc",
"cxxbridge-flags", "cxxbridge-flags",
@ -226,9 +190,9 @@ dependencies = [
[[package]] [[package]]
name = "cxx-build" name = "cxx-build"
version = "1.0.97" version = "1.0.101"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c0c11acd0e63bae27dcd2afced407063312771212b7a823b4fd72d633be30fb" checksum = "51368b3d0dbf356e10fcbfd455a038503a105ee556f7ee79b6bb8c53a7247456"
dependencies = [ dependencies = [
"cc", "cc",
"codespan-reporting", "codespan-reporting",
@ -236,24 +200,24 @@ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"scratch", "scratch",
"syn 2.0.23", "syn 2.0.26",
] ]
[[package]] [[package]]
name = "cxxbridge-flags" name = "cxxbridge-flags"
version = "1.0.97" version = "1.0.101"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8d3816ed957c008ccd4728485511e3d9aaf7db419aa321e3d2c5a2f3411e36c8" checksum = "0d9062157072e4aafc8e56ceaf8325ce850c5ae37578c852a0d4de2cecdded13"
[[package]] [[package]]
name = "cxxbridge-macro" name = "cxxbridge-macro"
version = "1.0.97" version = "1.0.101"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a26acccf6f445af85ea056362561a24ef56cdc15fcc685f03aec50b9c702cb6d" checksum = "cf01e8a540f5a4e0f284595834f81cf88572f244b768f051724537afa99a2545"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.23", "syn 2.0.26",
] ]
[[package]] [[package]]
@ -359,19 +323,6 @@ version = "1.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91" checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91"
[[package]]
name = "env_logger"
version = "0.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a12e6657c4c97ebab115a42dcee77225f7f482cdd841cf7088c657a42e9e00e7"
dependencies = [
"atty",
"humantime",
"log",
"regex",
"termcolor",
]
[[package]] [[package]]
name = "fnv" name = "fnv"
version = "1.0.7" version = "1.0.7"
@ -398,32 +349,11 @@ dependencies = [
"wasi 0.11.0+wasi-snapshot-preview1", "wasi 0.11.0+wasi-snapshot-preview1",
] ]
[[package]]
name = "hashbrown"
version = "0.12.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
[[package]] [[package]]
name = "hermit-abi" name = "hermit-abi"
version = "0.1.19" version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" checksum = "443144c8cdadd93ebf52ddb4056d257f5b52c04d3c804e657d19eb73fc33668b"
dependencies = [
"libc",
]
[[package]]
name = "hermit-abi"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fed44880c466736ef9a5c5b5facefb5ed0785676d0c02d612db14e54f0d84286"
[[package]]
name = "humantime"
version = "2.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4"
[[package]] [[package]]
name = "iana-time-zone" name = "iana-time-zone"
@ -454,16 +384,6 @@ version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39"
[[package]]
name = "indexmap"
version = "1.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99"
dependencies = [
"autocfg",
"hashbrown",
]
[[package]] [[package]]
name = "js-sys" name = "js-sys"
version = "0.3.64" version = "0.3.64"
@ -487,9 +407,9 @@ checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3"
[[package]] [[package]]
name = "link-cplusplus" name = "link-cplusplus"
version = "1.0.8" version = "1.0.9"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ecd207c9c713c34f95a097a5b029ac2ce6010530c7b49d7fea24d977dede04f5" checksum = "9d240c6f7e1ba3a28b0249f774e6a9dd0175054b52dfbb61b16eb8505c3785c9"
dependencies = [ dependencies = [
"cc", "cc",
] ]
@ -564,7 +484,7 @@ version = "1.16.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43"
dependencies = [ dependencies = [
"hermit-abi 0.3.1", "hermit-abi",
"libc", "libc",
] ]
@ -574,12 +494,6 @@ version = "1.18.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d"
[[package]]
name = "os_str_bytes"
version = "6.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4d5d9eb14b174ee9aa2ef96dc2b94637a2d4b6e7cb873c7e171f0c20c6cf3eac"
[[package]] [[package]]
name = "pin-utils" name = "pin-utils"
version = "0.1.0" version = "0.1.0"
@ -588,18 +502,18 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
[[package]] [[package]]
name = "proc-macro2" name = "proc-macro2"
version = "1.0.63" version = "1.0.66"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7b368fba921b0dce7e60f5e04ec15e565b3303972b42bcfde1d0713b881959eb" checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9"
dependencies = [ dependencies = [
"unicode-ident", "unicode-ident",
] ]
[[package]] [[package]]
name = "quote" name = "quote"
version = "1.0.29" version = "1.0.31"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "573015e8ab27661678357f27dc26460738fd2b6c86e46f386fde94cb5d913105" checksum = "5fe8a65d69dd0808184ebb5f836ab526bb259db23c657efa38711b1072ee47f0"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
] ]
@ -648,9 +562,21 @@ dependencies = [
[[package]] [[package]]
name = "regex" name = "regex"
version = "1.8.4" version = "1.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d0ab3ca65655bb1e41f2a8c8cd662eb4fb035e67c3f78da1d61dffe89d07300f" checksum = "b2eae68fc220f7cf2532e4494aded17545fce192d59cd996e0fe7887f4ceb575"
dependencies = [
"aho-corasick",
"memchr",
"regex-automata",
"regex-syntax",
]
[[package]]
name = "regex-automata"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "39354c10dd07468c2e73926b23bb9c2caca74c5501e38a35da70406f1d923310"
dependencies = [ dependencies = [
"aho-corasick", "aho-corasick",
"memchr", "memchr",
@ -659,39 +585,33 @@ dependencies = [
[[package]] [[package]]
name = "regex-syntax" name = "regex-syntax"
version = "0.7.2" version = "0.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "436b050e76ed2903236f032a59761c1eb99e1b0aead2c257922771dab1fc8c78" checksum = "e5ea92a5b6195c6ef2a0295ea818b312502c6fc94dde986c5553242e18fd4ce2"
[[package]] [[package]]
name = "rustversion" name = "rustversion"
version = "1.0.12" version = "1.0.14"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4f3208ce4d8448b3f3e7d168a73f5e0c43a61e32930de3bceeccedb388b6bf06" checksum = "7ffc183a10b4478d04cbbbfc96d0873219d962dd5accaff2ffbd4ceb7df837f4"
[[package]] [[package]]
name = "scopeguard" name = "scopeguard"
version = "1.1.0" version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
[[package]] [[package]]
name = "scratch" name = "scratch"
version = "1.0.5" version = "1.0.7"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1792db035ce95be60c3f8853017b3999209281c24e2ba5bc8e59bf97a0c590c1" checksum = "a3cf7c11c38cb994f3d40e8a8cde3bbd1f72a435e4c49e85d6553d8312306152"
[[package]] [[package]]
name = "serde" name = "serde"
version = "1.0.164" version = "1.0.171"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9e8c8cf938e98f769bc164923b06dce91cea1751522f46f8466461af04c9027d" checksum = "30e27d1e4fd7659406c492fd6cfaf2066ba8773de45ca75e855590f856dc34a9"
[[package]]
name = "shlex"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "43b2853a4d09f215c24cc5489c992ce46052d359b5109343cbafbf26bc62f8a3"
[[package]] [[package]]
name = "skim" name = "skim"
@ -699,23 +619,19 @@ version = "0.10.4"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e5d28de0a6cb2cdd83a076f1de9d965b973ae08b244df1aa70b432946dda0f32" checksum = "e5d28de0a6cb2cdd83a076f1de9d965b973ae08b244df1aa70b432946dda0f32"
dependencies = [ dependencies = [
"atty",
"beef", "beef",
"bitflags", "bitflags",
"chrono", "chrono",
"clap",
"crossbeam", "crossbeam",
"defer-drop", "defer-drop",
"derive_builder", "derive_builder",
"env_logger",
"fuzzy-matcher", "fuzzy-matcher",
"lazy_static", "lazy_static",
"log", "log",
"nix 0.25.1", "nix 0.25.1",
"rayon", "rayon",
"regex", "regex",
"shlex", "time 0.3.23",
"time 0.3.22",
"timer", "timer",
"tuikit", "tuikit",
"unicode-width", "unicode-width",
@ -741,9 +657,9 @@ dependencies = [
[[package]] [[package]]
name = "syn" name = "syn"
version = "2.0.23" version = "2.0.26"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "59fb7d6d8281a51045d62b8eb3a7d1ce347b76f312af50cd3dc0af39c87c1737" checksum = "45c3457aacde3c65315de5031ec191ce46604304d2446e803d71ade03308d970"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
@ -770,30 +686,24 @@ dependencies = [
"winapi-util", "winapi-util",
] ]
[[package]]
name = "textwrap"
version = "0.16.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "222a222a5bfe1bba4a77b45ec488a741b3cb8872e5e499451fd7d0129c9c7c3d"
[[package]] [[package]]
name = "thiserror" name = "thiserror"
version = "1.0.40" version = "1.0.43"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "978c9a314bd8dc99be594bc3c175faaa9794be04a5a5e153caba6915336cebac" checksum = "a35fc5b8971143ca348fa6df4f024d4d55264f3468c71ad1c2f365b0a4d58c42"
dependencies = [ dependencies = [
"thiserror-impl", "thiserror-impl",
] ]
[[package]] [[package]]
name = "thiserror-impl" name = "thiserror-impl"
version = "1.0.40" version = "1.0.43"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f9456a42c5b0d803c8cd86e73dd7cc9edd429499f37a3550d286d5e86720569f" checksum = "463fe12d7993d3b327787537ce8dd4dfa058de32fc2b195ef3cde03dc4771e8f"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.23", "syn 2.0.26",
] ]
[[package]] [[package]]
@ -819,9 +729,9 @@ dependencies = [
[[package]] [[package]]
name = "time" name = "time"
version = "0.3.22" version = "0.3.23"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ea9e1b3cf1243ae005d9e74085d4d542f3125458f3a81af210d901dcd7411efd" checksum = "59e399c068f43a5d116fedaf73b203fa4f9c519f17e2b34f63221d3792f81446"
dependencies = [ dependencies = [
"serde", "serde",
"time-core", "time-core",
@ -858,9 +768,9 @@ dependencies = [
[[package]] [[package]]
name = "unicode-ident" name = "unicode-ident"
version = "1.0.9" version = "1.0.11"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b15811caf2415fb889178633e7724bad2509101cde276048e013b9def5e51fa0" checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c"
[[package]] [[package]]
name = "unicode-width" name = "unicode-width"
@ -928,7 +838,7 @@ dependencies = [
"once_cell", "once_cell",
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.23", "syn 2.0.26",
"wasm-bindgen-shared", "wasm-bindgen-shared",
] ]
@ -950,7 +860,7 @@ checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.23", "syn 2.0.26",
"wasm-bindgen-backend", "wasm-bindgen-backend",
"wasm-bindgen-shared", "wasm-bindgen-shared",
] ]

View File

@ -6,7 +6,7 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies] [dependencies]
skim = "0.10.2" skim = { version = "0.10.2", default-features = false }
cxx = "1.0.83" cxx = "1.0.83"
term = "0.7.0" term = "0.7.0"

View File

@ -1,10 +1,25 @@
#include <AggregateFunctions/IAggregateFunction.h>
#include <AggregateFunctions/AggregateFunctionFactory.h> #include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/AggregateFunctionGroupArrayMoving.h>
#include <AggregateFunctions/Helpers.h> #include <AggregateFunctions/Helpers.h>
#include <AggregateFunctions/FactoryHelpers.h> #include <AggregateFunctions/FactoryHelpers.h>
#include <DataTypes/DataTypeDate.h> #include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypeDateTime64.h> #include <DataTypes/DataTypeDateTime64.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypesDecimal.h>
#include <IO/WriteHelpers.h>
#include <IO/ReadHelpers.h>
#include <Columns/ColumnVector.h>
#include <Columns/ColumnArray.h>
#include <Common/ArenaAllocator.h>
#include <Common/assert_cast.h>
#include <type_traits>
#define AGGREGATE_FUNCTION_MOVING_MAX_ARRAY_SIZE 0xFFFFFF
namespace DB namespace DB
@ -13,11 +28,186 @@ struct Settings;
namespace ErrorCodes namespace ErrorCodes
{ {
extern const int TOO_LARGE_ARRAY_SIZE;
extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int BAD_ARGUMENTS; extern const int BAD_ARGUMENTS;
} }
template <typename T>
struct MovingData
{
/// For easy serialization.
static_assert(std::has_unique_object_representations_v<T> || std::is_floating_point_v<T>);
using Accumulator = T;
/// Switch to ordinary Allocator after 4096 bytes to avoid fragmentation and trash in Arena
using Allocator = MixedAlignedArenaAllocator<alignof(T), 4096>;
using Array = PODArray<T, 32, Allocator>;
Array value; /// Prefix sums.
T sum{};
void NO_SANITIZE_UNDEFINED add(T val, Arena * arena)
{
sum += val;
value.push_back(sum, arena);
}
};
template <typename T>
struct MovingSumData : public MovingData<T>
{
static constexpr auto name = "groupArrayMovingSum";
T NO_SANITIZE_UNDEFINED get(size_t idx, UInt64 window_size) const
{
if (idx < window_size)
return this->value[idx];
else
return this->value[idx] - this->value[idx - window_size];
}
};
template <typename T>
struct MovingAvgData : public MovingData<T>
{
static constexpr auto name = "groupArrayMovingAvg";
T NO_SANITIZE_UNDEFINED get(size_t idx, UInt64 window_size) const
{
if (idx < window_size)
return this->value[idx] / T(window_size);
else
return (this->value[idx] - this->value[idx - window_size]) / T(window_size);
}
};
template <typename T, typename LimitNumElements, typename Data>
class MovingImpl final
: public IAggregateFunctionDataHelper<Data, MovingImpl<T, LimitNumElements, Data>>
{
static constexpr bool limit_num_elems = LimitNumElements::value;
UInt64 window_size;
public:
using ResultT = typename Data::Accumulator;
using ColumnSource = ColumnVectorOrDecimal<T>;
/// Probably for overflow function in the future.
using ColumnResult = ColumnVectorOrDecimal<ResultT>;
explicit MovingImpl(const DataTypePtr & data_type_, UInt64 window_size_ = std::numeric_limits<UInt64>::max())
: IAggregateFunctionDataHelper<Data, MovingImpl<T, LimitNumElements, Data>>({data_type_}, {}, createResultType(data_type_))
, window_size(window_size_) {}
String getName() const override { return Data::name; }
static DataTypePtr createResultType(const DataTypePtr & argument)
{
return std::make_shared<DataTypeArray>(getReturnTypeElement(argument));
}
void NO_SANITIZE_UNDEFINED add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
{
auto value = static_cast<const ColumnSource &>(*columns[0]).getData()[row_num];
this->data(place).add(static_cast<ResultT>(value), arena);
}
void NO_SANITIZE_UNDEFINED merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
{
auto & cur_elems = this->data(place);
auto & rhs_elems = this->data(rhs);
size_t cur_size = cur_elems.value.size();
if (rhs_elems.value.size())
cur_elems.value.insert(rhs_elems.value.begin(), rhs_elems.value.end(), arena);
for (size_t i = cur_size; i < cur_elems.value.size(); ++i)
{
cur_elems.value[i] += cur_elems.sum;
}
cur_elems.sum += rhs_elems.sum;
}
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
{
const auto & value = this->data(place).value;
size_t size = value.size();
writeVarUInt(size, buf);
buf.write(reinterpret_cast<const char *>(value.data()), size * sizeof(value[0]));
}
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
{
size_t size = 0;
readVarUInt(size, buf);
if (unlikely(size > AGGREGATE_FUNCTION_MOVING_MAX_ARRAY_SIZE))
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE,
"Too large array size (maximum: {})", AGGREGATE_FUNCTION_MOVING_MAX_ARRAY_SIZE);
if (size > 0)
{
auto & value = this->data(place).value;
value.resize(size, arena);
buf.readStrict(reinterpret_cast<char *>(value.data()), size * sizeof(value[0]));
this->data(place).sum = value.back();
}
}
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
{
const auto & data = this->data(place);
size_t size = data.value.size();
ColumnArray & arr_to = assert_cast<ColumnArray &>(to);
ColumnArray::Offsets & offsets_to = arr_to.getOffsets();
offsets_to.push_back(offsets_to.back() + size);
if (size)
{
typename ColumnResult::Container & data_to = assert_cast<ColumnResult &>(arr_to.getData()).getData();
for (size_t i = 0; i < size; ++i)
{
if (!limit_num_elems)
{
data_to.push_back(data.get(i, size));
}
else
{
data_to.push_back(data.get(i, window_size));
}
}
}
}
bool allocatesMemoryInArena() const override
{
return true;
}
private:
static auto getReturnTypeElement(const DataTypePtr & argument)
{
if constexpr (!is_decimal<ResultT>)
return std::make_shared<DataTypeNumber<ResultT>>();
else
{
using Res = DataTypeDecimal<ResultT>;
return std::make_shared<Res>(Res::maxPrecision(), getDecimalScale(*argument));
}
}
};
namespace namespace
{ {
@ -79,7 +269,7 @@ AggregateFunctionPtr createAggregateFunctionMoving(
if (type != Field::Types::Int64 && type != Field::Types::UInt64) if (type != Field::Types::Int64 && type != Field::Types::UInt64)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter for aggregate function {} should be positive integer", name); throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter for aggregate function {} should be positive integer", name);
if ((type == Field::Types::Int64 && parameters[0].get<Int64>() < 0) || if ((type == Field::Types::Int64 && parameters[0].get<Int64>() <= 0) ||
(type == Field::Types::UInt64 && parameters[0].get<UInt64>() == 0)) (type == Field::Types::UInt64 && parameters[0].get<UInt64>() == 0))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter for aggregate function {} should be positive integer", name); throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter for aggregate function {} should be positive integer", name);

View File

@ -1,207 +0,0 @@
#pragma once
#include <IO/WriteHelpers.h>
#include <IO/ReadHelpers.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypesDecimal.h>
#include <Columns/ColumnVector.h>
#include <Columns/ColumnArray.h>
#include <Common/ArenaAllocator.h>
#include <Common/assert_cast.h>
#include <AggregateFunctions/IAggregateFunction.h>
#include <type_traits>
#define AGGREGATE_FUNCTION_MOVING_MAX_ARRAY_SIZE 0xFFFFFF
namespace DB
{
struct Settings;
namespace ErrorCodes
{
extern const int TOO_LARGE_ARRAY_SIZE;
}
template <typename T>
struct MovingData
{
/// For easy serialization.
static_assert(std::has_unique_object_representations_v<T> || std::is_floating_point_v<T>);
using Accumulator = T;
/// Switch to ordinary Allocator after 4096 bytes to avoid fragmentation and trash in Arena
using Allocator = MixedAlignedArenaAllocator<alignof(T), 4096>;
using Array = PODArray<T, 32, Allocator>;
Array value; /// Prefix sums.
T sum{};
void NO_SANITIZE_UNDEFINED add(T val, Arena * arena)
{
sum += val;
value.push_back(sum, arena);
}
};
template <typename T>
struct MovingSumData : public MovingData<T>
{
static constexpr auto name = "groupArrayMovingSum";
T NO_SANITIZE_UNDEFINED get(size_t idx, UInt64 window_size) const
{
if (idx < window_size)
return this->value[idx];
else
return this->value[idx] - this->value[idx - window_size];
}
};
template <typename T>
struct MovingAvgData : public MovingData<T>
{
static constexpr auto name = "groupArrayMovingAvg";
T NO_SANITIZE_UNDEFINED get(size_t idx, UInt64 window_size) const
{
if (idx < window_size)
return this->value[idx] / T(window_size);
else
return (this->value[idx] - this->value[idx - window_size]) / T(window_size);
}
};
template <typename T, typename LimitNumElements, typename Data>
class MovingImpl final
: public IAggregateFunctionDataHelper<Data, MovingImpl<T, LimitNumElements, Data>>
{
static constexpr bool limit_num_elems = LimitNumElements::value;
UInt64 window_size;
public:
using ResultT = typename Data::Accumulator;
using ColumnSource = ColumnVectorOrDecimal<T>;
/// Probably for overflow function in the future.
using ColumnResult = ColumnVectorOrDecimal<ResultT>;
explicit MovingImpl(const DataTypePtr & data_type_, UInt64 window_size_ = std::numeric_limits<UInt64>::max())
: IAggregateFunctionDataHelper<Data, MovingImpl<T, LimitNumElements, Data>>({data_type_}, {}, createResultType(data_type_))
, window_size(window_size_) {}
String getName() const override { return Data::name; }
static DataTypePtr createResultType(const DataTypePtr & argument)
{
return std::make_shared<DataTypeArray>(getReturnTypeElement(argument));
}
void NO_SANITIZE_UNDEFINED add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
{
auto value = static_cast<const ColumnSource &>(*columns[0]).getData()[row_num];
this->data(place).add(static_cast<ResultT>(value), arena);
}
void NO_SANITIZE_UNDEFINED merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
{
auto & cur_elems = this->data(place);
auto & rhs_elems = this->data(rhs);
size_t cur_size = cur_elems.value.size();
if (rhs_elems.value.size())
cur_elems.value.insert(rhs_elems.value.begin(), rhs_elems.value.end(), arena);
for (size_t i = cur_size; i < cur_elems.value.size(); ++i)
{
cur_elems.value[i] += cur_elems.sum;
}
cur_elems.sum += rhs_elems.sum;
}
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
{
const auto & value = this->data(place).value;
size_t size = value.size();
writeVarUInt(size, buf);
buf.write(reinterpret_cast<const char *>(value.data()), size * sizeof(value[0]));
}
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
{
size_t size = 0;
readVarUInt(size, buf);
if (unlikely(size > AGGREGATE_FUNCTION_MOVING_MAX_ARRAY_SIZE))
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE,
"Too large array size (maximum: {})", AGGREGATE_FUNCTION_MOVING_MAX_ARRAY_SIZE);
if (size > 0)
{
auto & value = this->data(place).value;
value.resize(size, arena);
buf.readStrict(reinterpret_cast<char *>(value.data()), size * sizeof(value[0]));
this->data(place).sum = value.back();
}
}
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
{
const auto & data = this->data(place);
size_t size = data.value.size();
ColumnArray & arr_to = assert_cast<ColumnArray &>(to);
ColumnArray::Offsets & offsets_to = arr_to.getOffsets();
offsets_to.push_back(offsets_to.back() + size);
if (size)
{
typename ColumnResult::Container & data_to = assert_cast<ColumnResult &>(arr_to.getData()).getData();
for (size_t i = 0; i < size; ++i)
{
if (!limit_num_elems)
{
data_to.push_back(data.get(i, size));
}
else
{
data_to.push_back(data.get(i, window_size));
}
}
}
}
bool allocatesMemoryInArena() const override
{
return true;
}
private:
static auto getReturnTypeElement(const DataTypePtr & argument)
{
if constexpr (!is_decimal<ResultT>)
return std::make_shared<DataTypeNumber<ResultT>>();
else
{
using Res = DataTypeDecimal<ResultT>;
return std::make_shared<Res>(Res::maxPrecision(), getDecimalScale(*argument));
}
}
};
#undef AGGREGATE_FUNCTION_MOVING_MAX_ARRAY_SIZE
}

View File

@ -46,6 +46,7 @@
#include <Parsers/ASTColumnDeclaration.h> #include <Parsers/ASTColumnDeclaration.h>
#include <Parsers/ASTFunction.h> #include <Parsers/ASTFunction.h>
#include <Parsers/Kusto/ParserKQLStatement.h> #include <Parsers/Kusto/ParserKQLStatement.h>
#include <Parsers/PRQL/ParserPRQLQuery.h>
#include <Processors/Formats/Impl/NullFormat.h> #include <Processors/Formats/Impl/NullFormat.h>
#include <Processors/Formats/IInputFormat.h> #include <Processors/Formats/IInputFormat.h>
@ -72,6 +73,7 @@
#include <iostream> #include <iostream>
#include <filesystem> #include <filesystem>
#include <map> #include <map>
#include <memory>
#include <unordered_map> #include <unordered_map>
#include "config_version.h" #include "config_version.h"
@ -338,6 +340,8 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_mu
if (dialect == Dialect::kusto) if (dialect == Dialect::kusto)
parser = std::make_unique<ParserKQLStatement>(end, global_context->getSettings().allow_settings_after_format_in_insert); parser = std::make_unique<ParserKQLStatement>(end, global_context->getSettings().allow_settings_after_format_in_insert);
else if (dialect == Dialect::prql)
parser = std::make_unique<ParserPRQLQuery>(max_length, settings.max_parser_depth);
else else
parser = std::make_unique<ParserQuery>(end, global_context->getSettings().allow_settings_after_format_in_insert); parser = std::make_unique<ParserQuery>(end, global_context->getSettings().allow_settings_after_format_in_insert);

View File

@ -105,6 +105,8 @@ void Connection::connect(const ConnectionTimeouts & timeouts)
for (auto it = addresses.begin(); it != addresses.end();) for (auto it = addresses.begin(); it != addresses.end();)
{ {
have_more_addresses_to_connect = it != std::prev(addresses.end());
if (connected) if (connected)
disconnect(); disconnect();

View File

@ -159,6 +159,8 @@ public:
out->setAsyncCallback(async_callback); out->setAsyncCallback(async_callback);
} }
bool haveMoreAddressesToConnect() const { return have_more_addresses_to_connect; }
private: private:
String host; String host;
UInt16 port; UInt16 port;
@ -227,6 +229,8 @@ private:
std::shared_ptr<WriteBuffer> maybe_compressed_out; std::shared_ptr<WriteBuffer> maybe_compressed_out;
std::unique_ptr<NativeWriter> block_out; std::unique_ptr<NativeWriter> block_out;
bool have_more_addresses_to_connect = false;
/// Logger is created lazily, for avoid to run DNS request in constructor. /// Logger is created lazily, for avoid to run DNS request in constructor.
class LoggerWrapper class LoggerWrapper
{ {

View File

@ -179,7 +179,7 @@ bool ConnectionEstablisherAsync::checkTimeout()
is_timeout_alarmed = true; is_timeout_alarmed = true;
} }
if (is_timeout_alarmed && !is_socket_ready) if (is_timeout_alarmed && !is_socket_ready && !haveMoreAddressesToConnect())
{ {
/// In not async case timeout exception would be thrown and caught in ConnectionEstablisher::run, /// In not async case timeout exception would be thrown and caught in ConnectionEstablisher::run,
/// but in async case we process timeout outside and cannot throw exception. So, we just save fail message. /// but in async case we process timeout outside and cannot throw exception. So, we just save fail message.
@ -225,6 +225,11 @@ void ConnectionEstablisherAsync::resetResult()
} }
} }
bool ConnectionEstablisherAsync::haveMoreAddressesToConnect()
{
return !result.entry.isNull() && result.entry->haveMoreAddressesToConnect();
}
#endif #endif
} }

View File

@ -104,6 +104,8 @@ private:
void resetResult(); void resetResult();
bool haveMoreAddressesToConnect();
ConnectionEstablisher connection_establisher; ConnectionEstablisher connection_establisher;
TryResult result; TryResult result;
std::string fail_message; std::string fail_message;

View File

@ -319,24 +319,21 @@ Packet MultiplexedConnections::receivePacketUnlocked(AsyncCallback async_callbac
throw Exception(ErrorCodes::NO_AVAILABLE_REPLICA, "Logical error: no available replica"); throw Exception(ErrorCodes::NO_AVAILABLE_REPLICA, "Logical error: no available replica");
Packet packet; Packet packet;
try
{ {
AsyncCallbackSetter async_setter(current_connection, std::move(async_callback)); AsyncCallbackSetter async_setter(current_connection, std::move(async_callback));
packet = current_connection->receivePacket();
try }
catch (Exception & e)
{
if (e.code() == ErrorCodes::UNKNOWN_PACKET_FROM_SERVER)
{ {
packet = current_connection->receivePacket(); /// Exception may happen when packet is received, e.g. when got unknown packet.
} /// In this case, invalidate replica, so that we would not read from it anymore.
catch (Exception & e) current_connection->disconnect();
{ invalidateReplica(state);
if (e.code() == ErrorCodes::UNKNOWN_PACKET_FROM_SERVER)
{
/// Exception may happen when packet is received, e.g. when got unknown packet.
/// In this case, invalidate replica, so that we would not read from it anymore.
current_connection->disconnect();
invalidateReplica(state);
}
throw;
} }
throw;
} }
switch (packet.type) switch (packet.type)

View File

@ -848,6 +848,9 @@ ASTs QueryFuzzer::getDropQueriesForFuzzedTables(const ASTDropQuery & drop_query)
void QueryFuzzer::notifyQueryFailed(ASTPtr ast) void QueryFuzzer::notifyQueryFailed(ASTPtr ast)
{ {
if (ast == nullptr)
return;
auto remove_fuzzed_table = [this](const auto & table_name) auto remove_fuzzed_table = [this](const auto & table_name)
{ {
auto pos = table_name.find("__fuzz_"); auto pos = table_name.find("__fuzz_");

View File

@ -1,26 +1,4 @@
#include "Allocator.h" #include "Allocator.h"
/** Keep definition of this constant in cpp file; otherwise its value template class Allocator<false>;
* is inlined into allocator code making it impossible to override it template class Allocator<true>;
* in third-party code.
*
* Note: extern may seem redundant, but is actually needed due to bug in GCC.
* See also: https://gcc.gnu.org/legacy-ml/gcc-help/2017-12/msg00021.html
*/
#ifdef NDEBUG
__attribute__((__weak__)) extern const size_t MMAP_THRESHOLD = 128 * (1ULL << 20);
#else
/**
* In debug build, use small mmap threshold to reproduce more memory
* stomping bugs. Along with ASLR it will hopefully detect more issues than
* ASan. The program may fail due to the limit on number of memory mappings.
*
* Not too small to avoid too quick exhaust of memory mappings.
*/
__attribute__((__weak__)) extern const size_t MMAP_THRESHOLD = 16384;
#endif
template class Allocator<false, false>;
template class Allocator<true, false>;
template class Allocator<false, true>;
template class Allocator<true, true>;

View File

@ -36,51 +36,26 @@
#include <Common/Allocator_fwd.h> #include <Common/Allocator_fwd.h>
/// Required for older Darwin builds, that lack definition of MAP_ANONYMOUS
#ifndef MAP_ANONYMOUS
#define MAP_ANONYMOUS MAP_ANON
#endif
/**
* Many modern allocators (for example, tcmalloc) do not do a mremap for
* realloc, even in case of large enough chunks of memory. Although this allows
* you to increase performance and reduce memory consumption during realloc.
* To fix this, we do mremap manually if the chunk of memory is large enough.
* The threshold (64 MB) is chosen quite large, since changing the address
* space is very slow, especially in the case of a large number of threads. We
* expect that the set of operations mmap/something to do/mremap can only be
* performed about 1000 times per second.
*
* P.S. This is also required, because tcmalloc can not allocate a chunk of
* memory greater than 16 GB.
*
* P.P.S. Note that MMAP_THRESHOLD symbol is intentionally made weak. It allows
* to override it during linkage when using ClickHouse as a library in
* third-party applications which may already use own allocator doing mmaps
* in the implementation of alloc/realloc.
*/
extern const size_t MMAP_THRESHOLD;
static constexpr size_t MALLOC_MIN_ALIGNMENT = 8; static constexpr size_t MALLOC_MIN_ALIGNMENT = 8;
namespace CurrentMetrics
{
extern const Metric MMappedAllocs;
extern const Metric MMappedAllocBytes;
}
namespace DB namespace DB
{ {
namespace ErrorCodes namespace ErrorCodes
{ {
extern const int BAD_ARGUMENTS;
extern const int CANNOT_ALLOCATE_MEMORY; extern const int CANNOT_ALLOCATE_MEMORY;
extern const int CANNOT_MUNMAP;
extern const int CANNOT_MREMAP;
extern const int LOGICAL_ERROR; extern const int LOGICAL_ERROR;
} }
} }
/** Previously there was a code which tried to use manual mmap and mremap (clickhouse_mremap.h) for large allocations/reallocations (64MB+).
* Most modern allocators (including jemalloc) don't use mremap, so the idea was to take advantage from mremap system call for large reallocs.
* Actually jemalloc had support for mremap, but it was intentionally removed from codebase https://github.com/jemalloc/jemalloc/commit/e2deab7a751c8080c2b2cdcfd7b11887332be1bb.
* Our performance tests also shows that without manual mmap/mremap/munmap clickhouse is overall faster for about 1-2% and up to 5-7x for some types of queries.
* That is why we don't do manuall mmap/mremap/munmap here and completely rely on jemalloc for allocations of any size.
*/
/** Responsible for allocating / freeing memory. Used, for example, in PODArray, Arena. /** Responsible for allocating / freeing memory. Used, for example, in PODArray, Arena.
* Also used in hash tables. * Also used in hash tables.
* The interface is different from std::allocator * The interface is different from std::allocator
@ -88,10 +63,8 @@ namespace ErrorCodes
* - passing the size into the `free` method; * - passing the size into the `free` method;
* - by the presence of the `alignment` argument; * - by the presence of the `alignment` argument;
* - the possibility of zeroing memory (used in hash tables); * - the possibility of zeroing memory (used in hash tables);
* - random hint address for mmap
* - mmap_threshold for using mmap less or more
*/ */
template <bool clear_memory_, bool mmap_populate> template <bool clear_memory_>
class Allocator class Allocator
{ {
public: public:
@ -109,7 +82,7 @@ public:
try try
{ {
checkSize(size); checkSize(size);
freeNoTrack(buf, size); freeNoTrack(buf);
CurrentMemoryTracker::free(size); CurrentMemoryTracker::free(size);
} }
catch (...) catch (...)
@ -132,49 +105,26 @@ public:
/// nothing to do. /// nothing to do.
/// BTW, it's not possible to change alignment while doing realloc. /// BTW, it's not possible to change alignment while doing realloc.
} }
else if (old_size < MMAP_THRESHOLD && new_size < MMAP_THRESHOLD else if (alignment <= MALLOC_MIN_ALIGNMENT)
&& alignment <= MALLOC_MIN_ALIGNMENT)
{ {
/// Resize malloc'd memory region with no special alignment requirement. /// Resize malloc'd memory region with no special alignment requirement.
CurrentMemoryTracker::realloc(old_size, new_size); CurrentMemoryTracker::realloc(old_size, new_size);
void * new_buf = ::realloc(buf, new_size); void * new_buf = ::realloc(buf, new_size);
if (nullptr == new_buf) if (nullptr == new_buf)
DB::throwFromErrno(fmt::format("Allocator: Cannot realloc from {} to {}.", ReadableSize(old_size), ReadableSize(new_size)), DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY); {
DB::throwFromErrno(
fmt::format("Allocator: Cannot realloc from {} to {}.", ReadableSize(old_size), ReadableSize(new_size)), DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY);
}
buf = new_buf; buf = new_buf;
if constexpr (clear_memory) if constexpr (clear_memory)
if (new_size > old_size) if (new_size > old_size)
memset(reinterpret_cast<char *>(buf) + old_size, 0, new_size - old_size); memset(reinterpret_cast<char *>(buf) + old_size, 0, new_size - old_size);
} }
else if (old_size >= MMAP_THRESHOLD && new_size >= MMAP_THRESHOLD)
{
/// Resize mmap'd memory region.
CurrentMemoryTracker::realloc(old_size, new_size);
// On apple and freebsd self-implemented mremap used (common/mremap.h)
buf = clickhouse_mremap(buf, old_size, new_size, MREMAP_MAYMOVE,
PROT_READ | PROT_WRITE, mmap_flags, -1, 0);
if (MAP_FAILED == buf)
DB::throwFromErrno(fmt::format("Allocator: Cannot mremap memory chunk from {} to {}.",
ReadableSize(old_size), ReadableSize(new_size)), DB::ErrorCodes::CANNOT_MREMAP);
/// No need for zero-fill, because mmap guarantees it.
}
else if (new_size < MMAP_THRESHOLD)
{
/// Small allocs that requires a copy. Assume there's enough memory in system. Call CurrentMemoryTracker once.
CurrentMemoryTracker::realloc(old_size, new_size);
void * new_buf = allocNoTrack(new_size, alignment);
memcpy(new_buf, buf, std::min(old_size, new_size));
freeNoTrack(buf, old_size);
buf = new_buf;
}
else else
{ {
/// Big allocs that requires a copy. MemoryTracker is called inside 'alloc', 'free' methods. /// Big allocs that requires a copy. MemoryTracker is called inside 'alloc', 'free' methods.
void * new_buf = alloc(new_size, alignment); void * new_buf = alloc(new_size, alignment);
memcpy(new_buf, buf, std::min(old_size, new_size)); memcpy(new_buf, buf, std::min(old_size, new_size));
free(buf, old_size); free(buf, old_size);
@ -192,83 +142,38 @@ protected:
static constexpr bool clear_memory = clear_memory_; static constexpr bool clear_memory = clear_memory_;
// Freshly mmapped pages are copy-on-write references to a global zero page.
// On the first write, a page fault occurs, and an actual writable page is
// allocated. If we are going to use this memory soon, such as when resizing
// hash tables, it makes sense to pre-fault the pages by passing
// MAP_POPULATE to mmap(). This takes some time, but should be faster
// overall than having a hot loop interrupted by page faults.
// It is only supported on Linux.
static constexpr int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS
#if defined(OS_LINUX)
| (mmap_populate ? MAP_POPULATE : 0)
#endif
;
private: private:
void * allocNoTrack(size_t size, size_t alignment) void * allocNoTrack(size_t size, size_t alignment)
{ {
void * buf; void * buf;
size_t mmap_min_alignment = ::getPageSize(); if (alignment <= MALLOC_MIN_ALIGNMENT)
if (size >= MMAP_THRESHOLD)
{ {
if (alignment > mmap_min_alignment) if constexpr (clear_memory)
throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, buf = ::calloc(size, 1);
"Too large alignment {}: more than page size when allocating {}.", else
ReadableSize(alignment), ReadableSize(size)); buf = ::malloc(size);
buf = mmap(getMmapHint(), size, PROT_READ | PROT_WRITE, if (nullptr == buf)
mmap_flags, -1, 0); DB::throwFromErrno(fmt::format("Allocator: Cannot malloc {}.", ReadableSize(size)), DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY);
if (MAP_FAILED == buf)
DB::throwFromErrno(fmt::format("Allocator: Cannot mmap {}.", ReadableSize(size)), DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY);
/// No need for zero-fill, because mmap guarantees it.
CurrentMetrics::add(CurrentMetrics::MMappedAllocs);
CurrentMetrics::add(CurrentMetrics::MMappedAllocBytes, size);
} }
else else
{ {
if (alignment <= MALLOC_MIN_ALIGNMENT) buf = nullptr;
{ int res = posix_memalign(&buf, alignment, size);
if constexpr (clear_memory)
buf = ::calloc(size, 1);
else
buf = ::malloc(size);
if (nullptr == buf) if (0 != res)
DB::throwFromErrno(fmt::format("Allocator: Cannot malloc {}.", ReadableSize(size)), DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY); DB::throwFromErrno(fmt::format("Cannot allocate memory (posix_memalign) {}.", ReadableSize(size)),
} DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY, res);
else
{
buf = nullptr;
int res = posix_memalign(&buf, alignment, size);
if (0 != res) if constexpr (clear_memory)
DB::throwFromErrno(fmt::format("Cannot allocate memory (posix_memalign) {}.", ReadableSize(size)), memset(buf, 0, size);
DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY, res);
if constexpr (clear_memory)
memset(buf, 0, size);
}
} }
return buf; return buf;
} }
void freeNoTrack(void * buf, size_t size) void freeNoTrack(void * buf)
{ {
if (size >= MMAP_THRESHOLD) ::free(buf);
{
if (0 != munmap(buf, size))
DB::throwFromErrno(fmt::format("Allocator: Cannot munmap {}.", ReadableSize(size)), DB::ErrorCodes::CANNOT_MUNMAP);
CurrentMetrics::sub(CurrentMetrics::MMappedAllocs);
CurrentMetrics::sub(CurrentMetrics::MMappedAllocBytes, size);
}
else
{
::free(buf);
}
} }
void checkSize(size_t size) void checkSize(size_t size)
@ -277,21 +182,6 @@ private:
if (size >= 0x8000000000000000ULL) if (size >= 0x8000000000000000ULL)
throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Too large size ({}) passed to allocator. It indicates an error.", size); throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Too large size ({}) passed to allocator. It indicates an error.", size);
} }
#ifndef NDEBUG
/// In debug builds, request mmap() at random addresses (a kind of ASLR), to
/// reproduce more memory stomping bugs. Note that Linux doesn't do it by
/// default. This may lead to worse TLB performance.
void * getMmapHint()
{
return reinterpret_cast<void *>(std::uniform_int_distribution<intptr_t>(0x100000000000UL, 0x700000000000UL)(thread_local_rng));
}
#else
void * getMmapHint()
{
return nullptr;
}
#endif
}; };
@ -367,7 +257,5 @@ constexpr size_t allocatorInitialBytes<AllocatorWithStackMemory<
/// Prevent implicit template instantiation of Allocator /// Prevent implicit template instantiation of Allocator
extern template class Allocator<false, false>; extern template class Allocator<false>;
extern template class Allocator<true, false>; extern template class Allocator<true>;
extern template class Allocator<false, true>;
extern template class Allocator<true, true>;

View File

@ -3,7 +3,7 @@
* This file provides forward declarations for Allocator. * This file provides forward declarations for Allocator.
*/ */
template <bool clear_memory_, bool mmap_populate = false> template <bool clear_memory_>
class Allocator; class Allocator;
template <typename Base, size_t N = 64, size_t Alignment = 1> template <typename Base, size_t N = 64, size_t Alignment = 1>

View File

@ -5,7 +5,6 @@ namespace DB
AsyncTaskExecutor::AsyncTaskExecutor(std::unique_ptr<AsyncTask> task_) : task(std::move(task_)) AsyncTaskExecutor::AsyncTaskExecutor(std::unique_ptr<AsyncTask> task_) : task(std::move(task_))
{ {
createFiber();
} }
void AsyncTaskExecutor::resume() void AsyncTaskExecutor::resume()
@ -13,6 +12,10 @@ void AsyncTaskExecutor::resume()
if (routine_is_finished) if (routine_is_finished)
return; return;
/// Create fiber lazily on first resume() call.
if (!fiber)
createFiber();
if (!checkBeforeTaskResume()) if (!checkBeforeTaskResume())
return; return;
@ -22,6 +25,11 @@ void AsyncTaskExecutor::resume()
return; return;
resumeUnlocked(); resumeUnlocked();
/// Destroy fiber when it's finished.
if (routine_is_finished)
destroyFiber();
if (exception) if (exception)
processException(exception); processException(exception);
} }
@ -46,9 +54,8 @@ void AsyncTaskExecutor::cancel()
void AsyncTaskExecutor::restart() void AsyncTaskExecutor::restart()
{ {
std::lock_guard guard(fiber_lock); std::lock_guard guard(fiber_lock);
if (fiber) if (!routine_is_finished)
destroyFiber(); destroyFiber();
createFiber();
routine_is_finished = false; routine_is_finished = false;
} }

View File

@ -173,8 +173,6 @@
M(PartsInMemory, "In-memory parts.") \ M(PartsInMemory, "In-memory parts.") \
M(MMappedFiles, "Total number of mmapped files.") \ M(MMappedFiles, "Total number of mmapped files.") \
M(MMappedFileBytes, "Sum size of mmapped file regions.") \ M(MMappedFileBytes, "Sum size of mmapped file regions.") \
M(MMappedAllocs, "Total number of mmapped allocations") \
M(MMappedAllocBytes, "Sum bytes of mmapped allocations") \
M(AsynchronousReadWait, "Number of threads waiting for asynchronous read.") \ M(AsynchronousReadWait, "Number of threads waiting for asynchronous read.") \
M(PendingAsyncInsert, "Number of asynchronous inserts that are waiting for flush.") \ M(PendingAsyncInsert, "Number of asynchronous inserts that are waiting for flush.") \
M(KafkaConsumers, "Number of active Kafka consumers") \ M(KafkaConsumers, "Number of active Kafka consumers") \

View File

@ -10,7 +10,6 @@
#include <cassert> #include <cassert>
#include <chrono> #include <chrono>
#include <cstring> #include <cstring>
#include <iostream>
#include <memory> #include <memory>

View File

@ -47,6 +47,7 @@ String FileRenamer::generateNewFilename(const String & filename) const
// Define placeholders and their corresponding values // Define placeholders and their corresponding values
std::map<String, String> placeholders = std::map<String, String> placeholders =
{ {
{"%a", filename},
{"%f", file_base}, {"%f", file_base},
{"%e", file_ext}, {"%e", file_ext},
{"%t", timestamp}, {"%t", timestamp},
@ -69,16 +70,17 @@ bool FileRenamer::isEmpty() const
bool FileRenamer::validateRenamingRule(const String & rule, bool throw_on_error) bool FileRenamer::validateRenamingRule(const String & rule, bool throw_on_error)
{ {
// Check if the rule contains invalid placeholders // Check if the rule contains invalid placeholders
re2::RE2 invalid_placeholder_pattern("^([^%]|%[fet%])*$"); re2::RE2 invalid_placeholder_pattern("^([^%]|%[afet%])*$");
if (!re2::RE2::FullMatch(rule, invalid_placeholder_pattern)) if (!re2::RE2::FullMatch(rule, invalid_placeholder_pattern))
{ {
if (throw_on_error) if (throw_on_error)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid renaming rule: Allowed placeholders only %f, %e, %t, and %%"); throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid renaming rule: Allowed placeholders only %a, %f, %e, %t, and %%");
return false; return false;
} }
// Replace valid placeholders with empty strings and count remaining percentage signs. // Replace valid placeholders with empty strings and count remaining percentage signs.
String replaced_rule = rule; String replaced_rule = rule;
boost::replace_all(replaced_rule, "%a", "");
boost::replace_all(replaced_rule, "%f", ""); boost::replace_all(replaced_rule, "%f", "");
boost::replace_all(replaced_rule, "%e", ""); boost::replace_all(replaced_rule, "%e", "");
boost::replace_all(replaced_rule, "%t", ""); boost::replace_all(replaced_rule, "%t", "");

View File

@ -9,6 +9,7 @@ namespace DB
/** /**
* The FileRenamer class provides functionality for renaming files based on given pattern with placeholders * The FileRenamer class provides functionality for renaming files based on given pattern with placeholders
* The supported placeholders are: * The supported placeholders are:
* %a - Full original file name ("sample.csv")
* %f - Original filename without extension ("sample") * %f - Original filename without extension ("sample")
* %e - Original file extension with dot (".csv") * %e - Original file extension with dot (".csv")
* %t - Timestamp (in microseconds) * %t - Timestamp (in microseconds)

View File

@ -0,0 +1,56 @@
#include <Common/HTTPHeaderFilter.h>
#include <Common/StringUtils/StringUtils.h>
#include <Common/Exception.h>
#include <re2/re2.h>
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
}
void HTTPHeaderFilter::checkHeaders(const HTTPHeaderEntries & entries) const
{
std::lock_guard guard(mutex);
for (const auto & entry : entries)
{
if (forbidden_headers.contains(entry.name))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "HTTP header \"{}\" is forbidden in configuration file, "
"see <http_forbid_headers>", entry.name);
for (const auto & header_regex : forbidden_headers_regexp)
if (re2::RE2::FullMatch(entry.name, header_regex))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "HTTP header \"{}\" is forbidden in configuration file, "
"see <http_forbid_headers>", entry.name);
}
}
void HTTPHeaderFilter::setValuesFromConfig(const Poco::Util::AbstractConfiguration & config)
{
std::lock_guard guard(mutex);
if (config.has("http_forbid_headers"))
{
std::vector<std::string> keys;
config.keys("http_forbid_headers", keys);
for (const auto & key : keys)
{
if (startsWith(key, "header_regexp"))
forbidden_headers_regexp.push_back(config.getString("http_forbid_headers." + key));
else if (startsWith(key, "header"))
forbidden_headers.insert(config.getString("http_forbid_headers." + key));
}
}
else
{
forbidden_headers.clear();
forbidden_headers_regexp.clear();
}
}
}

View File

@ -0,0 +1,27 @@
#pragma once
#include <IO/HTTPHeaderEntries.h>
#include <Poco/Util/AbstractConfiguration.h>
#include <vector>
#include <unordered_set>
#include <mutex>
namespace DB
{
class HTTPHeaderFilter
{
public:
void setValuesFromConfig(const Poco::Util::AbstractConfiguration & config);
void checkHeaders(const HTTPHeaderEntries & entries) const;
private:
std::unordered_set<std::string> forbidden_headers;
std::vector<std::string> forbidden_headers_regexp;
mutable std::mutex mutex;
};
}

View File

@ -8,7 +8,7 @@
* table, so it makes sense to pre-fault the pages so that page faults don't * table, so it makes sense to pre-fault the pages so that page faults don't
* interrupt the resize loop. Set the allocator parameter accordingly. * interrupt the resize loop. Set the allocator parameter accordingly.
*/ */
using HashTableAllocator = Allocator<true /* clear_memory */, true /* mmap_populate */>; using HashTableAllocator = Allocator<true /* clear_memory */>;
template <size_t initial_bytes = 64> template <size_t initial_bytes = 64>
using HashTableAllocatorWithStackMemory = AllocatorWithStackMemory<HashTableAllocator, initial_bytes>; using HashTableAllocatorWithStackMemory = AllocatorWithStackMemory<HashTableAllocator, initial_bytes>;

View File

@ -3,8 +3,10 @@
#include "KeeperException.h" #include "KeeperException.h"
#include "TestKeeper.h" #include "TestKeeper.h"
#include <functional>
#include <filesystem> #include <filesystem>
#include <functional>
#include <ranges>
#include <vector>
#include <Common/ZooKeeper/Types.h> #include <Common/ZooKeeper/Types.h>
#include <Common/ZooKeeper/ZooKeeperCommon.h> #include <Common/ZooKeeper/ZooKeeperCommon.h>
@ -350,15 +352,35 @@ void ZooKeeper::createIfNotExists(const std::string & path, const std::string &
void ZooKeeper::createAncestors(const std::string & path) void ZooKeeper::createAncestors(const std::string & path)
{ {
size_t pos = 1; std::string data;
std::string path_created; // Ignored
std::vector<std::string> pending_nodes;
size_t last_pos = path.rfind('/');
if (last_pos == std::string::npos || last_pos == 0)
return;
std::string current_node = path.substr(0, last_pos);
while (true) while (true)
{ {
pos = path.find('/', pos); Coordination::Error code = createImpl(current_node, data, CreateMode::Persistent, path_created);
if (pos == std::string::npos) if (code == Coordination::Error::ZNONODE)
{
/// The parent node doesn't exist. Save the current node and try with the parent
last_pos = current_node.rfind('/');
if (last_pos == std::string::npos || last_pos == 0)
throw KeeperException(code, path);
pending_nodes.emplace_back(std::move(current_node));
current_node = path.substr(0, last_pos);
}
else if (code == Coordination::Error::ZOK || code == Coordination::Error::ZNODEEXISTS)
break; break;
createIfNotExists(path.substr(0, pos), ""); else
++pos; throw KeeperException(code, path);
} }
for (const std::string & pending : pending_nodes | std::views::reverse)
createIfNotExists(pending, data);
} }
void ZooKeeper::checkExistsAndGetCreateAncestorsOps(const std::string & path, Coordination::Requests & requests) void ZooKeeper::checkExistsAndGetCreateAncestorsOps(const std::string & path, Coordination::Requests & requests)

View File

@ -54,6 +54,7 @@
#cmakedefine01 USE_BORINGSSL #cmakedefine01 USE_BORINGSSL
#cmakedefine01 USE_BLAKE3 #cmakedefine01 USE_BLAKE3
#cmakedefine01 USE_SKIM #cmakedefine01 USE_SKIM
#cmakedefine01 USE_PRQL
#cmakedefine01 USE_OPENSSL_INTREE #cmakedefine01 USE_OPENSSL_INTREE
#cmakedefine01 USE_ULID #cmakedefine01 USE_ULID
#cmakedefine01 FIU_ENABLE #cmakedefine01 FIU_ENABLE

View File

@ -41,9 +41,38 @@ void KeeperContext::initialize(const Poco::Util::AbstractConfiguration & config)
initializeDisks(config); initializeDisks(config);
} }
namespace
{
bool diskValidator(const Poco::Util::AbstractConfiguration & config, const std::string & disk_config_prefix)
{
const auto disk_type = config.getString(disk_config_prefix + ".type", "local");
using namespace std::literals;
static constexpr std::array supported_disk_types
{
"s3"sv,
"s3_plain"sv,
"local"sv
};
if (std::all_of(
supported_disk_types.begin(),
supported_disk_types.end(),
[&](const auto supported_type) { return disk_type != supported_type; }))
{
LOG_INFO(&Poco::Logger::get("KeeperContext"), "Disk type '{}' is not supported for Keeper", disk_type);
return false;
}
return true;
}
}
void KeeperContext::initializeDisks(const Poco::Util::AbstractConfiguration & config) void KeeperContext::initializeDisks(const Poco::Util::AbstractConfiguration & config)
{ {
disk_selector->initialize(config, "storage_configuration.disks", Context::getGlobalContextInstance()); disk_selector->initialize(config, "storage_configuration.disks", Context::getGlobalContextInstance(), diskValidator);
log_storage = getLogsPathFromConfig(config); log_storage = getLogsPathFromConfig(config);

View File

@ -48,7 +48,11 @@ inline auto scaleMultiplier(UInt32 scale)
/** Components of DecimalX value: /** Components of DecimalX value:
* whole - represents whole part of decimal, can be negative or positive. * whole - represents whole part of decimal, can be negative or positive.
* fractional - for fractional part of decimal, always positive. * fractional - for fractional part of decimal.
*
* 0.123 represents 0 / 0.123
* -0.123 represents 0 / -0.123
* -1.123 represents -1 / 0.123
*/ */
template <typename DecimalType> template <typename DecimalType>
struct DecimalComponents struct DecimalComponents

View File

@ -577,6 +577,7 @@ class IColumn;
M(Bool, optimize_skip_merged_partitions, false, "Skip partitions with one part with level > 0 in optimize final", 0) \ M(Bool, optimize_skip_merged_partitions, false, "Skip partitions with one part with level > 0 in optimize final", 0) \
M(Bool, optimize_on_insert, true, "Do the same transformation for inserted block of data as if merge was done on this block.", 0) \ M(Bool, optimize_on_insert, true, "Do the same transformation for inserted block of data as if merge was done on this block.", 0) \
M(Bool, optimize_use_projections, true, "Automatically choose projections to perform SELECT query", 0) ALIAS(allow_experimental_projection_optimization) \ M(Bool, optimize_use_projections, true, "Automatically choose projections to perform SELECT query", 0) ALIAS(allow_experimental_projection_optimization) \
M(Bool, optimize_use_implicit_projections, false, "Automatically choose implicit projections to perform SELECT query", 0) \
M(Bool, force_optimize_projection, false, "If projection optimization is enabled, SELECT queries need to use projection", 0) \ M(Bool, force_optimize_projection, false, "If projection optimization is enabled, SELECT queries need to use projection", 0) \
M(Bool, async_socket_for_remote, true, "Asynchronously read from socket executing remote query", 0) \ M(Bool, async_socket_for_remote, true, "Asynchronously read from socket executing remote query", 0) \
M(Bool, async_query_sending_for_remote, true, "Asynchronously create connections and send query to shards in remote query", 0) \ M(Bool, async_query_sending_for_remote, true, "Asynchronously create connections and send query to shards in remote query", 0) \
@ -736,7 +737,7 @@ class IColumn;
M(String, workload, "default", "Name of workload to be used to access resources", 0) \ M(String, workload, "default", "Name of workload to be used to access resources", 0) \
M(Milliseconds, storage_system_stack_trace_pipe_read_timeout_ms, 100, "Maximum time to read from a pipe for receiving information from the threads when querying the `system.stack_trace` table. This setting is used for testing purposes and not meant to be changed by users.", 0) \ M(Milliseconds, storage_system_stack_trace_pipe_read_timeout_ms, 100, "Maximum time to read from a pipe for receiving information from the threads when querying the `system.stack_trace` table. This setting is used for testing purposes and not meant to be changed by users.", 0) \
\ \
M(String, rename_files_after_processing, "", "Rename successfully processed files according to the specified pattern; Pattern can include the following placeholders: `%f` (original filename without extension), `%e` (file extension with dot), `%t` (current timestamp in µs), and `%%` (% sign)", 0) \ M(String, rename_files_after_processing, "", "Rename successfully processed files according to the specified pattern; Pattern can include the following placeholders: `%a` (full original file name), `%f` (original filename without extension), `%e` (file extension with dot), `%t` (current timestamp in µs), and `%%` (% sign)", 0) \
\ \
M(Bool, parallelize_output_from_storages, true, "Parallelize output for reading step from storage. It allows parallelizing query processing right after reading from storage if possible", 0) \ M(Bool, parallelize_output_from_storages, true, "Parallelize output for reading step from storage. It allows parallelizing query processing right after reading from storage if possible", 0) \
M(String, insert_deduplication_token, "", "If not empty, used for duplicate detection instead of data digest", 0) \ M(String, insert_deduplication_token, "", "If not empty, used for duplicate detection instead of data digest", 0) \
@ -774,6 +775,7 @@ class IColumn;
M(Bool, keeper_map_strict_mode, false, "Enforce additional checks during operations on KeeperMap. E.g. throw an exception on an insert for already existing key", 0) \ M(Bool, keeper_map_strict_mode, false, "Enforce additional checks during operations on KeeperMap. E.g. throw an exception on an insert for already existing key", 0) \
M(UInt64, extract_kvp_max_pairs_per_row, 1000, "Max number pairs that can be produced by extractKeyValuePairs function. Used to safeguard against consuming too much memory.", 0) \ M(UInt64, extract_kvp_max_pairs_per_row, 1000, "Max number pairs that can be produced by extractKeyValuePairs function. Used to safeguard against consuming too much memory.", 0) \
M(Timezone, session_timezone, "", "The default timezone for current session or query. The server default timezone if empty.", 0) \ M(Timezone, session_timezone, "", "The default timezone for current session or query. The server default timezone if empty.", 0) \
M(Bool, allow_create_index_without_type, false, "Allow CREATE INDEX query without TYPE. Query will be ignored. Made for SQL compatibility tests.", 0)\
// End of COMMON_SETTINGS // End of COMMON_SETTINGS
// Please add settings related to formats into the FORMAT_FACTORY_SETTINGS and move obsolete settings to OBSOLETE_SETTINGS. // Please add settings related to formats into the FORMAT_FACTORY_SETTINGS and move obsolete settings to OBSOLETE_SETTINGS.
@ -872,6 +874,7 @@ class IColumn;
M(Bool, input_format_csv_detect_header, true, "Automatically detect header with names and types in CSV format", 0) \ M(Bool, input_format_csv_detect_header, true, "Automatically detect header with names and types in CSV format", 0) \
M(Bool, input_format_csv_allow_whitespace_or_tab_as_delimiter, false, "Allow to use spaces and tabs(\\t) as field delimiter in the CSV strings", 0) \ M(Bool, input_format_csv_allow_whitespace_or_tab_as_delimiter, false, "Allow to use spaces and tabs(\\t) as field delimiter in the CSV strings", 0) \
M(Bool, input_format_csv_trim_whitespaces, true, "Trims spaces and tabs (\\t) characters at the beginning and end in CSV strings", 0) \ M(Bool, input_format_csv_trim_whitespaces, true, "Trims spaces and tabs (\\t) characters at the beginning and end in CSV strings", 0) \
M(Bool, input_format_csv_use_default_on_bad_values, false, "Allow to set default value to column when CSV field deserialization failed on bad value", 0) \
M(Bool, input_format_tsv_detect_header, true, "Automatically detect header with names and types in TSV format", 0) \ M(Bool, input_format_tsv_detect_header, true, "Automatically detect header with names and types in TSV format", 0) \
M(Bool, input_format_custom_detect_header, true, "Automatically detect header with names and types in CustomSeparated format", 0) \ M(Bool, input_format_custom_detect_header, true, "Automatically detect header with names and types in CustomSeparated format", 0) \
M(Bool, input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference, false, "Skip columns with unsupported types while schema inference for format Parquet", 0) \ M(Bool, input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference, false, "Skip columns with unsupported types while schema inference for format Parquet", 0) \

View File

@ -80,6 +80,7 @@ namespace SettingsChangesHistory
/// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972) /// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972)
static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> settings_changes_history = static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> settings_changes_history =
{ {
{"23.7", {{"optimize_use_implicit_projections", true, false, "Disable implicit projections due to unexpected results."}}},
{"23.6", {{"http_send_timeout", 180, 30, "3 minutes seems crazy long. Note that this is timeout for a single network write call, not for the whole upload operation."}, {"23.6", {{"http_send_timeout", 180, 30, "3 minutes seems crazy long. Note that this is timeout for a single network write call, not for the whole upload operation."},
{"http_receive_timeout", 180, 30, "See http_send_timeout."}}}, {"http_receive_timeout", 180, 30, "See http_send_timeout."}}},
{"23.5", {{"input_format_parquet_preserve_order", true, false, "Allow Parquet reader to reorder rows for better parallelism."}, {"23.5", {{"input_format_parquet_preserve_order", true, false, "Allow Parquet reader to reorder rows for better parallelism."},

View File

@ -138,7 +138,9 @@ IMPLEMENT_SETTING_ENUM(MsgPackUUIDRepresentation, ErrorCodes::BAD_ARGUMENTS,
IMPLEMENT_SETTING_ENUM(Dialect, ErrorCodes::BAD_ARGUMENTS, IMPLEMENT_SETTING_ENUM(Dialect, ErrorCodes::BAD_ARGUMENTS,
{{"clickhouse", Dialect::clickhouse}, {{"clickhouse", Dialect::clickhouse},
{"kusto", Dialect::kusto}}) {"kusto", Dialect::kusto},
{"kusto", Dialect::kusto},
{"prql", Dialect::prql}})
// FIXME: do not add 'kusto_auto' to the list. Maybe remove it from code completely? // FIXME: do not add 'kusto_auto' to the list. Maybe remove it from code completely?
IMPLEMENT_SETTING_ENUM(ParallelReplicasCustomKeyFilterType, ErrorCodes::BAD_ARGUMENTS, IMPLEMENT_SETTING_ENUM(ParallelReplicasCustomKeyFilterType, ErrorCodes::BAD_ARGUMENTS,

View File

@ -207,6 +207,7 @@ enum class Dialect
clickhouse, clickhouse,
kusto, kusto,
kusto_auto, kusto_auto,
prql,
}; };
DECLARE_SETTING_ENUM(Dialect) DECLARE_SETTING_ENUM(Dialect)

View File

@ -3,6 +3,7 @@
#if USE_MYSQL #if USE_MYSQL
#include <Databases/MySQL/MaterializedMySQLSyncThread.h> #include <Databases/MySQL/MaterializedMySQLSyncThread.h>
#include <Databases/MySQL/tryParseTableIDFromDDL.h>
#include <cstdlib> #include <cstdlib>
#include <random> #include <random>
#include <string_view> #include <string_view>
@ -151,61 +152,6 @@ static void checkMySQLVariables(const mysqlxx::Pool::Entry & connection, const S
} }
} }
static std::tuple<String, String> tryExtractTableNameFromDDL(const String & ddl)
{
String table_name;
String database_name;
if (ddl.empty()) return std::make_tuple(database_name, table_name);
bool parse_failed = false;
Tokens tokens(ddl.data(), ddl.data() + ddl.size());
IParser::Pos pos(tokens, 0);
Expected expected;
ASTPtr res;
ASTPtr table;
if (ParserKeyword("CREATE TEMPORARY TABLE").ignore(pos, expected) || ParserKeyword("CREATE TABLE").ignore(pos, expected))
{
ParserKeyword("IF NOT EXISTS").ignore(pos, expected);
if (!ParserCompoundIdentifier(true).parse(pos, table, expected))
parse_failed = true;
}
else if (ParserKeyword("ALTER TABLE").ignore(pos, expected))
{
if (!ParserCompoundIdentifier(true).parse(pos, table, expected))
parse_failed = true;
}
else if (ParserKeyword("DROP TABLE").ignore(pos, expected) || ParserKeyword("DROP TEMPORARY TABLE").ignore(pos, expected))
{
ParserKeyword("IF EXISTS").ignore(pos, expected);
if (!ParserCompoundIdentifier(true).parse(pos, table, expected))
parse_failed = true;
}
else if (ParserKeyword("TRUNCATE").ignore(pos, expected))
{
ParserKeyword("TABLE").ignore(pos, expected);
if (!ParserCompoundIdentifier(true).parse(pos, table, expected))
parse_failed = true;
}
else if (ParserKeyword("RENAME TABLE").ignore(pos, expected))
{
if (!ParserCompoundIdentifier(true).parse(pos, table, expected))
parse_failed = true;
}
else
{
parse_failed = true;
}
if (!parse_failed)
{
if (auto table_id = table->as<ASTTableIdentifier>()->getTableId())
{
database_name = table_id.database_name;
table_name = table_id.table_name;
}
}
return std::make_tuple(database_name, table_name);
}
MaterializedMySQLSyncThread::MaterializedMySQLSyncThread( MaterializedMySQLSyncThread::MaterializedMySQLSyncThread(
ContextPtr context_, ContextPtr context_,
const String & database_name_, const String & database_name_,
@ -868,14 +814,12 @@ void MaterializedMySQLSyncThread::executeDDLAtomic(const QueryEvent & query_even
String query = query_event.query; String query = query_event.query;
if (!materialized_tables_list.empty()) if (!materialized_tables_list.empty())
{ {
auto [ddl_database_name, ddl_table_name] = tryExtractTableNameFromDDL(query_event.query); auto table_id = tryParseTableIDFromDDL(query, query_event.schema);
if (!table_id.table_name.empty())
if (!ddl_table_name.empty())
{ {
ddl_database_name = ddl_database_name.empty() ? query_event.schema: ddl_database_name; if (table_id.database_name != mysql_database_name || !materialized_tables_list.contains(table_id.table_name))
if (ddl_database_name != mysql_database_name || !materialized_tables_list.contains(ddl_table_name))
{ {
LOG_DEBUG(log, "Skip MySQL DDL: \n {}", query_event.query); LOG_DEBUG(log, "Skip MySQL DDL for {}.{}:\n{}", table_id.database_name, table_id.table_name, query);
return; return;
} }
} }

View File

@ -0,0 +1,185 @@
#include "config.h"
#include <gtest/gtest.h>
#include <Databases/MySQL/tryParseTableIDFromDDL.h>
using namespace DB;
struct ParseTableIDFromDDLTestCase
{
String query;
String database_name;
String table_name;
ParseTableIDFromDDLTestCase(
const String & query_,
const String & database_name_,
const String & table_name_)
: query(query_)
, database_name(database_name_)
, table_name(table_name_)
{
}
};
std::ostream & operator<<(std::ostream & ostr, const ParseTableIDFromDDLTestCase & test_case)
{
return ostr << '"' << test_case.query << "\" extracts `" << test_case.database_name << "`.`" << test_case.table_name << "`";
}
class ParseTableIDFromDDLTest : public ::testing::TestWithParam<ParseTableIDFromDDLTestCase>
{
};
TEST_P(ParseTableIDFromDDLTest, parse)
{
const auto & [query, expected_database_name, expected_table_name] = GetParam();
auto table_id = tryParseTableIDFromDDL(query, "default");
EXPECT_EQ(expected_database_name, table_id.database_name);
EXPECT_EQ(expected_table_name, table_id.table_name);
}
INSTANTIATE_TEST_SUITE_P(MaterializedMySQL, ParseTableIDFromDDLTest, ::testing::ValuesIn(std::initializer_list<ParseTableIDFromDDLTestCase>{
{
"SELECT * FROM db.table",
"",
""
},
{
"CREATE TEMPORARY TABLE db.table",
"db",
"table"
},
{
"CREATE TEMPORARY TABLE IF NOT EXISTS db.table",
"db",
"table"
},
{
"CREATE TEMPORARY TABLE table",
"default",
"table"
},
{
"CREATE TEMPORARY TABLE IF NOT EXISTS table",
"default",
"table"
},
{
"CREATE TABLE db.table",
"db",
"table"
},
{
"CREATE TABLE IF NOT EXISTS db.table",
"db",
"table"
},
{
"CREATE TABLE table",
"default",
"table"
},
{
"CREATE TABLE IF NOT EXISTS table",
"default",
"table"
},
{
"ALTER TABLE db.table",
"db",
"table"
},
{
"ALTER TABLE table",
"default",
"table"
},
{
"DROP TABLE db.table",
"db",
"table"
},
{
"DROP TABLE IF EXISTS db.table",
"db",
"table"
},
{
"DROP TABLE table",
"default",
"table"
},
{
"DROP TABLE IF EXISTS table",
"default",
"table"
},
{
"DROP TEMPORARY TABLE db.table",
"db",
"table"
},
{
"DROP TEMPORARY TABLE IF EXISTS db.table",
"db",
"table"
},
{
"DROP TEMPORARY TABLE table",
"default",
"table"
},
{
"DROP TEMPORARY TABLE IF EXISTS table",
"default",
"table"
},
{
"TRUNCATE db.table",
"db",
"table"
},
{
"TRUNCATE TABLE db.table",
"db",
"table"
},
{
"TRUNCATE table1",
"default",
"table1"
},
{
"TRUNCATE TABLE table",
"default",
"table"
},
{
"RENAME TABLE db.table",
"db",
"table"
},
{
"RENAME TABLE table",
"default",
"table"
},
{
"DROP DATABASE db",
"",
""
},
{
"DROP DATA`BASE db",
"",
""
},
{
"NOT A SQL",
"",
""
},
}));

View File

@ -0,0 +1,44 @@
#include <Databases/MySQL/tryParseTableIDFromDDL.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/CommonParsers.h>
#include <Parsers/ExpressionElementParsers.h>
namespace DB
{
StorageID tryParseTableIDFromDDL(const String & query, const String & default_database_name)
{
bool is_ddl = false;
Tokens tokens(query.data(), query.data() + query.size());
IParser::Pos pos(tokens, 0);
Expected expected;
if (ParserKeyword("CREATE TEMPORARY TABLE").ignore(pos, expected) || ParserKeyword("CREATE TABLE").ignore(pos, expected))
{
ParserKeyword("IF NOT EXISTS").ignore(pos, expected);
is_ddl = true;
}
else if (ParserKeyword("ALTER TABLE").ignore(pos, expected) || ParserKeyword("RENAME TABLE").ignore(pos, expected))
{
is_ddl = true;
}
else if (ParserKeyword("DROP TABLE").ignore(pos, expected) || ParserKeyword("DROP TEMPORARY TABLE").ignore(pos, expected))
{
ParserKeyword("IF EXISTS").ignore(pos, expected);
is_ddl = true;
}
else if (ParserKeyword("TRUNCATE").ignore(pos, expected))
{
ParserKeyword("TABLE").ignore(pos, expected);
is_ddl = true;
}
ASTPtr table;
if (!is_ddl || !ParserCompoundIdentifier(true).parse(pos, table, expected))
return StorageID::createEmpty();
auto table_id = table->as<ASTTableIdentifier>()->getTableId();
if (table_id.database_name.empty())
table_id.database_name = default_database_name;
return table_id;
}
}

View File

@ -0,0 +1,11 @@
#pragma once
#include <base/types.h>
#include <Storages/IStorage.h>
namespace DB
{
StorageID tryParseTableIDFromDDL(const String & query, const String & default_database_name);
}

View File

@ -257,7 +257,6 @@ void registerDictionarySourceHTTP(DictionarySourceFactory & factory)
const auto & headers_prefix = settings_config_prefix + ".headers"; const auto & headers_prefix = settings_config_prefix + ".headers";
if (config.has(headers_prefix)) if (config.has(headers_prefix))
{ {
Poco::Util::AbstractConfiguration::Keys config_keys; Poco::Util::AbstractConfiguration::Keys config_keys;
@ -297,7 +296,10 @@ void registerDictionarySourceHTTP(DictionarySourceFactory & factory)
auto context = copyContextAndApplySettingsFromDictionaryConfig(global_context, config, config_prefix); auto context = copyContextAndApplySettingsFromDictionaryConfig(global_context, config, config_prefix);
if (created_from_ddl) if (created_from_ddl)
{
context->getRemoteHostFilter().checkURL(Poco::URI(configuration.url)); context->getRemoteHostFilter().checkURL(Poco::URI(configuration.url));
context->getHTTPHeaderFilter().checkHeaders(configuration.header_entries);
}
return std::make_unique<HTTPDictionarySource>(dict_struct, configuration, credentials, sample_block, context); return std::make_unique<HTTPDictionarySource>(dict_struct, configuration, credentials, sample_block, context);
}; };

View File

@ -27,7 +27,7 @@ void DiskSelector::assertInitialized() const
} }
void DiskSelector::initialize(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context) void DiskSelector::initialize(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context, DiskValidator disk_validator)
{ {
Poco::Util::AbstractConfiguration::Keys keys; Poco::Util::AbstractConfiguration::Keys keys;
config.keys(config_prefix, keys); config.keys(config_prefix, keys);
@ -46,6 +46,9 @@ void DiskSelector::initialize(const Poco::Util::AbstractConfiguration & config,
auto disk_config_prefix = config_prefix + "." + disk_name; auto disk_config_prefix = config_prefix + "." + disk_name;
if (disk_validator && !disk_validator(config, disk_config_prefix))
continue;
disks.emplace(disk_name, factory.create(disk_name, config, disk_config_prefix, context, disks)); disks.emplace(disk_name, factory.create(disk_name, config, disk_config_prefix, context, disks));
} }
if (!has_default_disk) if (!has_default_disk)

View File

@ -23,7 +23,8 @@ public:
DiskSelector() = default; DiskSelector() = default;
DiskSelector(const DiskSelector & from) = default; DiskSelector(const DiskSelector & from) = default;
void initialize(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context); using DiskValidator = std::function<bool(const Poco::Util::AbstractConfiguration & config, const String & disk_config_prefix)>;
void initialize(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context, DiskValidator disk_validator = {});
DiskSelectorPtr updateFromConfig( DiskSelectorPtr updateFromConfig(
const Poco::Util::AbstractConfiguration & config, const Poco::Util::AbstractConfiguration & config,

View File

@ -23,10 +23,6 @@ namespace ErrorCodes
extern const int LOGICAL_ERROR; extern const int LOGICAL_ERROR;
} }
TemporaryFileOnDisk::TemporaryFileOnDisk(const DiskPtr & disk_)
: TemporaryFileOnDisk(disk_, "")
{}
TemporaryFileOnDisk::TemporaryFileOnDisk(const DiskPtr & disk_, CurrentMetrics::Metric metric_scope) TemporaryFileOnDisk::TemporaryFileOnDisk(const DiskPtr & disk_, CurrentMetrics::Metric metric_scope)
: TemporaryFileOnDisk(disk_) : TemporaryFileOnDisk(disk_)
{ {

View File

@ -16,9 +16,8 @@ using DiskPtr = std::shared_ptr<IDisk>;
class TemporaryFileOnDisk class TemporaryFileOnDisk
{ {
public: public:
explicit TemporaryFileOnDisk(const DiskPtr & disk_);
explicit TemporaryFileOnDisk(const DiskPtr & disk_, CurrentMetrics::Metric metric_scope); explicit TemporaryFileOnDisk(const DiskPtr & disk_, CurrentMetrics::Metric metric_scope);
explicit TemporaryFileOnDisk(const DiskPtr & disk_, const String & prefix); explicit TemporaryFileOnDisk(const DiskPtr & disk_, const String & prefix = "tmp");
~TemporaryFileOnDisk(); ~TemporaryFileOnDisk();

View File

@ -73,6 +73,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
format_settings.csv.trim_whitespaces = settings.input_format_csv_trim_whitespaces; format_settings.csv.trim_whitespaces = settings.input_format_csv_trim_whitespaces;
format_settings.csv.allow_whitespace_or_tab_as_delimiter = settings.input_format_csv_allow_whitespace_or_tab_as_delimiter; format_settings.csv.allow_whitespace_or_tab_as_delimiter = settings.input_format_csv_allow_whitespace_or_tab_as_delimiter;
format_settings.csv.allow_variable_number_of_columns = settings.input_format_csv_allow_variable_number_of_columns; format_settings.csv.allow_variable_number_of_columns = settings.input_format_csv_allow_variable_number_of_columns;
format_settings.csv.use_default_on_bad_values = settings.input_format_csv_use_default_on_bad_values;
format_settings.hive_text.fields_delimiter = settings.input_format_hive_text_fields_delimiter; format_settings.hive_text.fields_delimiter = settings.input_format_hive_text_fields_delimiter;
format_settings.hive_text.collection_items_delimiter = settings.input_format_hive_text_collection_items_delimiter; format_settings.hive_text.collection_items_delimiter = settings.input_format_hive_text_collection_items_delimiter;
format_settings.hive_text.map_keys_delimiter = settings.input_format_hive_text_map_keys_delimiter; format_settings.hive_text.map_keys_delimiter = settings.input_format_hive_text_map_keys_delimiter;

View File

@ -152,6 +152,7 @@ struct FormatSettings
bool trim_whitespaces = true; bool trim_whitespaces = true;
bool allow_whitespace_or_tab_as_delimiter = false; bool allow_whitespace_or_tab_as_delimiter = false;
bool allow_variable_number_of_columns = false; bool allow_variable_number_of_columns = false;
bool use_default_on_bad_values = false;
} csv; } csv;
struct HiveText struct HiveText

View File

@ -19,6 +19,9 @@
namespace DB namespace DB
{ {
static constexpr auto microsecond_multiplier = 1000000;
static constexpr auto millisecond_multiplier = 1000;
namespace ErrorCodes namespace ErrorCodes
{ {
extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int ILLEGAL_TYPE_OF_ARGUMENT;
@ -1377,6 +1380,36 @@ struct ToRelativeSecondNumImpl
using FactorTransform = ZeroTransform; using FactorTransform = ZeroTransform;
}; };
template <Int64 scale_multiplier>
struct ToRelativeSubsecondNumImpl
{
static constexpr auto name = "toRelativeSubsecondNumImpl";
static inline Int64 execute(const DateTime64 & t, DateTime64::NativeType scale, const DateLUTImpl &)
{
static_assert(scale_multiplier == 1000 || scale_multiplier == 1000000);
if (scale == scale_multiplier)
return t.value;
if (scale > scale_multiplier)
return t.value / (scale / scale_multiplier);
return t.value * (scale_multiplier / scale);
}
static inline Int64 execute(UInt32 t, const DateLUTImpl &)
{
return t * scale_multiplier;
}
static inline Int64 execute(Int32 d, const DateLUTImpl & time_zone)
{
return static_cast<Int64>(time_zone.fromDayNum(ExtendedDayNum(d))) * scale_multiplier;
}
static inline Int64 execute(UInt16 d, const DateLUTImpl & time_zone)
{
return static_cast<Int64>(time_zone.fromDayNum(DayNum(d)) * scale_multiplier);
}
using FactorTransform = ZeroTransform;
};
struct ToYYYYMMImpl struct ToYYYYMMImpl
{ {
static constexpr auto name = "toYYYYMM"; static constexpr auto name = "toYYYYMM";
@ -1476,25 +1509,47 @@ struct ToYYYYMMDDhhmmssImpl
using FactorTransform = ZeroTransform; using FactorTransform = ZeroTransform;
}; };
struct DateTimeComponentsWithFractionalPart : public DateLUTImpl::DateTimeComponents
{
UInt16 millisecond;
UInt16 microsecond;
};
struct ToDateTimeComponentsImpl struct ToDateTimeComponentsImpl
{ {
static constexpr auto name = "toDateTimeComponents"; static constexpr auto name = "toDateTimeComponents";
static inline DateLUTImpl::DateTimeComponents execute(Int64 t, const DateLUTImpl & time_zone) static inline DateTimeComponentsWithFractionalPart execute(const DateTime64 & t, DateTime64::NativeType scale_multiplier, const DateLUTImpl & time_zone)
{ {
return time_zone.toDateTimeComponents(t); auto components = DecimalUtils::splitWithScaleMultiplier(t, scale_multiplier);
if (t.value < 0 && components.fractional)
{
components.fractional = scale_multiplier + (components.whole ? Int64(-1) : Int64(1)) * components.fractional;
--components.whole;
}
Int64 fractional = components.fractional;
if (scale_multiplier > microsecond_multiplier)
fractional = fractional / (scale_multiplier / microsecond_multiplier);
else if (scale_multiplier < microsecond_multiplier)
fractional = fractional * (microsecond_multiplier / scale_multiplier);
constexpr Int64 divider = microsecond_multiplier/ millisecond_multiplier;
UInt16 millisecond = static_cast<UInt16>(fractional / divider);
UInt16 microsecond = static_cast<UInt16>(fractional % divider);
return DateTimeComponentsWithFractionalPart{time_zone.toDateTimeComponents(components.whole), millisecond, microsecond};
} }
static inline DateLUTImpl::DateTimeComponents execute(UInt32 t, const DateLUTImpl & time_zone) static inline DateTimeComponentsWithFractionalPart execute(UInt32 t, const DateLUTImpl & time_zone)
{ {
return time_zone.toDateTimeComponents(static_cast<DateLUTImpl::Time>(t)); return DateTimeComponentsWithFractionalPart{time_zone.toDateTimeComponents(static_cast<DateLUTImpl::Time>(t)), 0, 0};
} }
static inline DateLUTImpl::DateTimeComponents execute(Int32 d, const DateLUTImpl & time_zone) static inline DateTimeComponentsWithFractionalPart execute(Int32 d, const DateLUTImpl & time_zone)
{ {
return time_zone.toDateTimeComponents(ExtendedDayNum(d)); return DateTimeComponentsWithFractionalPart{time_zone.toDateTimeComponents(ExtendedDayNum(d)), 0, 0};
} }
static inline DateLUTImpl::DateTimeComponents execute(UInt16 d, const DateLUTImpl & time_zone) static inline DateTimeComponentsWithFractionalPart execute(UInt16 d, const DateLUTImpl & time_zone)
{ {
return time_zone.toDateTimeComponents(DayNum(d)); return DateTimeComponentsWithFractionalPart{time_zone.toDateTimeComponents(DayNum(d)), 0, 0};
} }
using FactorTransform = ZeroTransform; using FactorTransform = ZeroTransform;

View File

@ -1112,6 +1112,11 @@ private:
bool c0_const = isColumnConst(*c0); bool c0_const = isColumnConst(*c0);
bool c1_const = isColumnConst(*c1); bool c1_const = isColumnConst(*c1);
/// This is a paranoid check to protect from a broken query analysis.
if (c0->isNullable() != c1->isNullable())
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Logical error: columns are assumed to be of identical types, but they are different in Nullable");
if (c0_const && c1_const) if (c0_const && c1_const)
{ {
UInt8 res = 0; UInt8 res = 0;

View File

@ -79,28 +79,51 @@ namespace impl
UInt64 key1 = 0; UInt64 key1 = 0;
}; };
static SipHashKey parseSipHashKey(const ColumnWithTypeAndName & key) struct SipHashKeyColumns
{ {
SipHashKey ret{}; ColumnPtr key0;
ColumnPtr key1;
bool is_const;
const auto * tuple = checkAndGetColumn<ColumnTuple>(key.column.get()); size_t size() const
{
assert(key0 && key1);
assert(key0->size() == key1->size());
return key0->size();
}
SipHashKey getKey(size_t i) const
{
if (is_const)
i = 0;
const auto & key0data = assert_cast<const ColumnUInt64 &>(*key0).getData();
const auto & key1data = assert_cast<const ColumnUInt64 &>(*key1).getData();
return {key0data[i], key1data[i]};
}
};
static SipHashKeyColumns parseSipHashKeyColumns(const ColumnWithTypeAndName & key)
{
const ColumnTuple * tuple = nullptr;
const auto * column = key.column.get();
bool is_const = false;
if (isColumnConst(*column))
{
is_const = true;
tuple = checkAndGetColumnConstData<ColumnTuple>(column);
}
else
tuple = checkAndGetColumn<ColumnTuple>(column);
if (!tuple) if (!tuple)
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "key must be a tuple"); throw Exception(ErrorCodes::NOT_IMPLEMENTED, "key must be a tuple");
if (tuple->tupleSize() != 2) if (tuple->tupleSize() != 2)
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "wrong tuple size: key must be a tuple of 2 UInt64"); throw Exception(ErrorCodes::NOT_IMPLEMENTED, "wrong tuple size: key must be a tuple of 2 UInt64");
if (tuple->empty()) SipHashKeyColumns ret{tuple->getColumnPtr(0), tuple->getColumnPtr(1), is_const};
return ret; assert(ret.key0);
if (!checkColumn<ColumnUInt64>(*ret.key0))
if (const auto * key0col = checkAndGetColumn<ColumnUInt64>(&(tuple->getColumn(0))))
ret.key0 = key0col->get64(0);
else
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "first element of the key tuple is not UInt64"); throw Exception(ErrorCodes::NOT_IMPLEMENTED, "first element of the key tuple is not UInt64");
assert(ret.key1);
if (const auto * key1col = checkAndGetColumn<ColumnUInt64>(&(tuple->getColumn(1)))) if (!checkColumn<ColumnUInt64>(*ret.key1))
ret.key1 = key1col->get64(0);
else
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "second element of the key tuple is not UInt64"); throw Exception(ErrorCodes::NOT_IMPLEMENTED, "second element of the key tuple is not UInt64");
return ret; return ret;
@ -329,8 +352,10 @@ struct SipHash64KeyedImpl
static constexpr auto name = "sipHash64Keyed"; static constexpr auto name = "sipHash64Keyed";
using ReturnType = UInt64; using ReturnType = UInt64;
using Key = impl::SipHashKey; using Key = impl::SipHashKey;
using KeyColumns = impl::SipHashKeyColumns;
static Key parseKey(const ColumnWithTypeAndName & key) { return impl::parseSipHashKey(key); } static KeyColumns parseKeyColumns(const ColumnWithTypeAndName & key) { return impl::parseSipHashKeyColumns(key); }
static Key getKey(const KeyColumns & key, size_t i) { return key.getKey(i); }
static UInt64 applyKeyed(const Key & key, const char * begin, size_t size) { return sipHash64Keyed(key.key0, key.key1, begin, size); } static UInt64 applyKeyed(const Key & key, const char * begin, size_t size) { return sipHash64Keyed(key.key0, key.key1, begin, size); }
@ -371,8 +396,10 @@ struct SipHash128KeyedImpl
static constexpr auto name = "sipHash128Keyed"; static constexpr auto name = "sipHash128Keyed";
using ReturnType = UInt128; using ReturnType = UInt128;
using Key = impl::SipHashKey; using Key = impl::SipHashKey;
using KeyColumns = impl::SipHashKeyColumns;
static Key parseKey(const ColumnWithTypeAndName & key) { return impl::parseSipHashKey(key); } static KeyColumns parseKeyColumns(const ColumnWithTypeAndName & key) { return impl::parseSipHashKeyColumns(key); }
static Key getKey(const KeyColumns & key, size_t i) { return key.getKey(i); }
static UInt128 applyKeyed(const Key & key, const char * begin, size_t size) { return sipHash128Keyed(key.key0, key.key1, begin, size); } static UInt128 applyKeyed(const Key & key, const char * begin, size_t size) { return sipHash128Keyed(key.key0, key.key1, begin, size); }
@ -398,13 +425,43 @@ struct SipHash128ReferenceImpl
using ReturnType = UInt128; using ReturnType = UInt128;
static UInt128 combineHashes(UInt128 h1, UInt128 h2) { return combineHashesFunc<UInt128, SipHash128Impl>(h1, h2); } static UInt128 combineHashes(UInt128 h1, UInt128 h2) { return combineHashesFunc<UInt128, SipHash128ReferenceImpl>(h1, h2); }
static UInt128 apply(const char * data, const size_t size) { return sipHash128Reference(data, size); } static UInt128 apply(const char * data, const size_t size) { return sipHash128Reference(data, size); }
static constexpr bool use_int_hash_for_pods = false; static constexpr bool use_int_hash_for_pods = false;
}; };
struct SipHash128ReferenceKeyedImpl
{
static constexpr auto name = "sipHash128ReferenceKeyed";
using ReturnType = UInt128;
using Key = impl::SipHashKey;
using KeyColumns = impl::SipHashKeyColumns;
static KeyColumns parseKeyColumns(const ColumnWithTypeAndName & key) { return impl::parseSipHashKeyColumns(key); }
static Key getKey(const KeyColumns & key, size_t i) { return key.getKey(i); }
static UInt128 applyKeyed(const Key & key, const char * begin, size_t size)
{
return sipHash128ReferenceKeyed(key.key0, key.key1, begin, size);
}
static UInt128 combineHashesKeyed(const Key & key, UInt128 h1, UInt128 h2)
{
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
UInt128 tmp;
reverseMemcpy(&tmp, &h1, sizeof(UInt128));
h1 = tmp;
reverseMemcpy(&tmp, &h2, sizeof(UInt128));
h2 = tmp;
#endif
UInt128 hashes[] = {h1, h2};
return applyKeyed(key, reinterpret_cast<const char *>(hashes), 2 * sizeof(UInt128));
}
static constexpr bool use_int_hash_for_pods = false;
};
/** Why we need MurmurHash2? /** Why we need MurmurHash2?
* MurmurHash2 is an outdated hash function, superseded by MurmurHash3 and subsequently by CityHash, xxHash, HighwayHash. * MurmurHash2 is an outdated hash function, superseded by MurmurHash3 and subsequently by CityHash, xxHash, HighwayHash.
@ -1023,7 +1080,7 @@ private:
DECLARE_MULTITARGET_CODE( DECLARE_MULTITARGET_CODE(
template <typename Impl, bool Keyed, typename KeyType> template <typename Impl, bool Keyed, typename KeyType, typename KeyColumnsType>
class FunctionAnyHash : public IFunction class FunctionAnyHash : public IFunction
{ {
public: public:
@ -1033,9 +1090,12 @@ private:
using ToType = typename Impl::ReturnType; using ToType = typename Impl::ReturnType;
template <typename FromType, bool first> template <typename FromType, bool first>
void executeIntType(const KeyType & key, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to) const void executeIntType(const KeyColumnsType & key_cols, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to) const
{ {
using ColVecType = ColumnVectorOrDecimal<FromType>; using ColVecType = ColumnVectorOrDecimal<FromType>;
KeyType key{};
if constexpr (Keyed)
key = Impl::getKey(key_cols, 0);
if (const ColVecType * col_from = checkAndGetColumn<ColVecType>(column)) if (const ColVecType * col_from = checkAndGetColumn<ColVecType>(column))
{ {
@ -1044,6 +1104,9 @@ private:
for (size_t i = 0; i < size; ++i) for (size_t i = 0; i < size; ++i)
{ {
ToType hash; ToType hash;
if constexpr (Keyed)
if (!key_cols.is_const && i != 0)
key = Impl::getKey(key_cols, i);
if constexpr (Impl::use_int_hash_for_pods) if constexpr (Impl::use_int_hash_for_pods)
{ {
@ -1077,6 +1140,14 @@ private:
} }
else if (auto col_from_const = checkAndGetColumnConst<ColVecType>(column)) else if (auto col_from_const = checkAndGetColumnConst<ColVecType>(column))
{ {
if constexpr (Keyed)
{
if (!key_cols.is_const)
{
ColumnPtr full_column = col_from_const->convertToFullColumn();
return executeIntType<FromType, first>(key_cols, full_column.get(), vec_to);
}
}
auto value = col_from_const->template getValue<FromType>(); auto value = col_from_const->template getValue<FromType>();
ToType hash; ToType hash;
@ -1107,8 +1178,15 @@ private:
if constexpr (first) if constexpr (first)
vec_to.assign(size, hash); vec_to.assign(size, hash);
else else
{
for (size_t i = 0; i < size; ++i) for (size_t i = 0; i < size; ++i)
{
if constexpr (Keyed)
if (!key_cols.is_const && i != 0)
key = Impl::getKey(key_cols, i);
vec_to[i] = combineHashes(key, vec_to[i], hash); vec_to[i] = combineHashes(key, vec_to[i], hash);
}
}
} }
else else
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}", throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}",
@ -1116,9 +1194,12 @@ private:
} }
template <typename FromType, bool first> template <typename FromType, bool first>
void executeBigIntType(const KeyType & key, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to) const void executeBigIntType(const KeyColumnsType & key_cols, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to) const
{ {
using ColVecType = ColumnVectorOrDecimal<FromType>; using ColVecType = ColumnVectorOrDecimal<FromType>;
KeyType key{};
if constexpr (Keyed)
key = Impl::getKey(key_cols, 0);
if (const ColVecType * col_from = checkAndGetColumn<ColVecType>(column)) if (const ColVecType * col_from = checkAndGetColumn<ColVecType>(column))
{ {
@ -1127,6 +1208,9 @@ private:
for (size_t i = 0; i < size; ++i) for (size_t i = 0; i < size; ++i)
{ {
ToType hash; ToType hash;
if constexpr (Keyed)
if (!key_cols.is_const && i != 0)
key = Impl::getKey(key_cols, i);
if constexpr (std::endian::native == std::endian::little) if constexpr (std::endian::native == std::endian::little)
hash = apply(key, reinterpret_cast<const char *>(&vec_from[i]), sizeof(vec_from[i])); hash = apply(key, reinterpret_cast<const char *>(&vec_from[i]), sizeof(vec_from[i]));
else else
@ -1143,6 +1227,14 @@ private:
} }
else if (auto col_from_const = checkAndGetColumnConst<ColVecType>(column)) else if (auto col_from_const = checkAndGetColumnConst<ColVecType>(column))
{ {
if constexpr (Keyed)
{
if (!key_cols.is_const)
{
ColumnPtr full_column = col_from_const->convertToFullColumn();
return executeBigIntType<FromType, first>(key_cols, full_column.get(), vec_to);
}
}
auto value = col_from_const->template getValue<FromType>(); auto value = col_from_const->template getValue<FromType>();
ToType hash; ToType hash;
@ -1158,8 +1250,15 @@ private:
if constexpr (first) if constexpr (first)
vec_to.assign(size, hash); vec_to.assign(size, hash);
else else
{
for (size_t i = 0; i < size; ++i) for (size_t i = 0; i < size; ++i)
{
if constexpr (Keyed)
if (!key_cols.is_const && i != 0)
key = Impl::getKey(key_cols, i);
vec_to[i] = combineHashes(key, vec_to[i], hash); vec_to[i] = combineHashes(key, vec_to[i], hash);
}
}
} }
else else
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}", throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}",
@ -1167,10 +1266,16 @@ private:
} }
template <bool first> template <bool first>
void executeGeneric(const KeyType & key, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to) const void executeGeneric(const KeyColumnsType & key_cols, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to) const
{ {
KeyType key{};
if constexpr (Keyed)
key = Impl::getKey(key_cols, 0);
for (size_t i = 0, size = column->size(); i < size; ++i) for (size_t i = 0, size = column->size(); i < size; ++i)
{ {
if constexpr (Keyed)
if (!key_cols.is_const && i != 0)
key = Impl::getKey(key_cols, i);
StringRef bytes = column->getDataAt(i); StringRef bytes = column->getDataAt(i);
const ToType hash = apply(key, bytes.data, bytes.size); const ToType hash = apply(key, bytes.data, bytes.size);
if constexpr (first) if constexpr (first)
@ -1181,8 +1286,11 @@ private:
} }
template <bool first> template <bool first>
void executeString(const KeyType & key, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to) const void executeString(const KeyColumnsType & key_cols, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to) const
{ {
KeyType key{};
if constexpr (Keyed)
key = Impl::getKey(key_cols, 0);
if (const ColumnString * col_from = checkAndGetColumn<ColumnString>(column)) if (const ColumnString * col_from = checkAndGetColumn<ColumnString>(column))
{ {
const typename ColumnString::Chars & data = col_from->getChars(); const typename ColumnString::Chars & data = col_from->getChars();
@ -1192,6 +1300,9 @@ private:
ColumnString::Offset current_offset = 0; ColumnString::Offset current_offset = 0;
for (size_t i = 0; i < size; ++i) for (size_t i = 0; i < size; ++i)
{ {
if constexpr (Keyed)
if (!key_cols.is_const && i != 0)
key = Impl::getKey(key_cols, i);
const ToType hash = apply(key, const ToType hash = apply(key,
reinterpret_cast<const char *>(&data[current_offset]), reinterpret_cast<const char *>(&data[current_offset]),
offsets[i] - current_offset - 1); offsets[i] - current_offset - 1);
@ -1212,6 +1323,9 @@ private:
for (size_t i = 0; i < size; ++i) for (size_t i = 0; i < size; ++i)
{ {
if constexpr (Keyed)
if (!key_cols.is_const && i != 0)
key = Impl::getKey(key_cols, i);
const ToType hash = apply(key, reinterpret_cast<const char *>(&data[i * n]), n); const ToType hash = apply(key, reinterpret_cast<const char *>(&data[i * n]), n);
if constexpr (first) if constexpr (first)
vec_to[i] = hash; vec_to[i] = hash;
@ -1221,6 +1335,14 @@ private:
} }
else if (const ColumnConst * col_from_const = checkAndGetColumnConstStringOrFixedString(column)) else if (const ColumnConst * col_from_const = checkAndGetColumnConstStringOrFixedString(column))
{ {
if constexpr (Keyed)
{
if (!key_cols.is_const)
{
ColumnPtr full_column = col_from_const->convertToFullColumn();
return executeString<first>(key_cols, full_column.get(), vec_to);
}
}
String value = col_from_const->getValue<String>(); String value = col_from_const->getValue<String>();
const ToType hash = apply(key, value.data(), value.size()); const ToType hash = apply(key, value.data(), value.size());
const size_t size = vec_to.size(); const size_t size = vec_to.size();
@ -1228,8 +1350,15 @@ private:
if constexpr (first) if constexpr (first)
vec_to.assign(size, hash); vec_to.assign(size, hash);
else else
{
for (size_t i = 0; i < size; ++i) for (size_t i = 0; i < size; ++i)
{
if constexpr (Keyed)
if (!key_cols.is_const && i != 0)
key = Impl::getKey(key_cols, i);
vec_to[i] = combineHashes(key, vec_to[i], hash); vec_to[i] = combineHashes(key, vec_to[i], hash);
}
}
} }
else else
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}",
@ -1237,7 +1366,7 @@ private:
} }
template <bool first> template <bool first>
void executeArray(const KeyType & key, const IDataType * type, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to) const void executeArray(const KeyColumnsType & key_cols, const IDataType * type, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to) const
{ {
const IDataType * nested_type = typeid_cast<const DataTypeArray &>(*type).getNestedType().get(); const IDataType * nested_type = typeid_cast<const DataTypeArray &>(*type).getNestedType().get();
@ -1249,13 +1378,19 @@ private:
typename ColumnVector<ToType>::Container vec_temp(nested_size); typename ColumnVector<ToType>::Container vec_temp(nested_size);
bool nested_is_first = true; bool nested_is_first = true;
executeForArgument(key, nested_type, nested_column, vec_temp, nested_is_first); executeForArgument(key_cols, nested_type, nested_column, vec_temp, nested_is_first);
const size_t size = offsets.size(); const size_t size = offsets.size();
ColumnArray::Offset current_offset = 0; ColumnArray::Offset current_offset = 0;
KeyType key{};
if constexpr (Keyed)
key = Impl::getKey(key_cols, 0);
for (size_t i = 0; i < size; ++i) for (size_t i = 0; i < size; ++i)
{ {
if constexpr (Keyed)
if (!key_cols.is_const && i != 0)
key = Impl::getKey(key_cols, i);
ColumnArray::Offset next_offset = offsets[i]; ColumnArray::Offset next_offset = offsets[i];
ToType hash; ToType hash;
@ -1279,7 +1414,7 @@ private:
{ {
/// NOTE: here, of course, you can do without the materialization of the column. /// NOTE: here, of course, you can do without the materialization of the column.
ColumnPtr full_column = col_from_const->convertToFullColumn(); ColumnPtr full_column = col_from_const->convertToFullColumn();
executeArray<first>(key, type, full_column.get(), vec_to); executeArray<first>(key_cols, type, full_column.get(), vec_to);
} }
else else
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}",
@ -1287,7 +1422,7 @@ private:
} }
template <bool first> template <bool first>
void executeAny(const KeyType & key, const IDataType * from_type, const IColumn * icolumn, typename ColumnVector<ToType>::Container & vec_to) const void executeAny(const KeyColumnsType & key_cols, const IDataType * from_type, const IColumn * icolumn, typename ColumnVector<ToType>::Container & vec_to) const
{ {
WhichDataType which(from_type); WhichDataType which(from_type);
@ -1295,40 +1430,45 @@ private:
throw Exception(ErrorCodes::LOGICAL_ERROR, "Argument column '{}' size {} doesn't match result column size {} of function {}", throw Exception(ErrorCodes::LOGICAL_ERROR, "Argument column '{}' size {} doesn't match result column size {} of function {}",
icolumn->getName(), icolumn->size(), vec_to.size(), getName()); icolumn->getName(), icolumn->size(), vec_to.size(), getName());
if (which.isUInt8()) executeIntType<UInt8, first>(key, icolumn, vec_to); if constexpr (Keyed)
else if (which.isUInt16()) executeIntType<UInt16, first>(key, icolumn, vec_to); if ((!key_cols.is_const && key_cols.size() != vec_to.size())
else if (which.isUInt32()) executeIntType<UInt32, first>(key, icolumn, vec_to); || (key_cols.is_const && key_cols.size() != 1))
else if (which.isUInt64()) executeIntType<UInt64, first>(key, icolumn, vec_to); throw Exception(ErrorCodes::LOGICAL_ERROR, "Key column size {} doesn't match result column size {} of function {}", key_cols.size(), vec_to.size(), getName());
else if (which.isUInt128()) executeBigIntType<UInt128, first>(key, icolumn, vec_to);
else if (which.isUInt256()) executeBigIntType<UInt256, first>(key, icolumn, vec_to); if (which.isUInt8()) executeIntType<UInt8, first>(key_cols, icolumn, vec_to);
else if (which.isInt8()) executeIntType<Int8, first>(key, icolumn, vec_to); else if (which.isUInt16()) executeIntType<UInt16, first>(key_cols, icolumn, vec_to);
else if (which.isInt16()) executeIntType<Int16, first>(key, icolumn, vec_to); else if (which.isUInt32()) executeIntType<UInt32, first>(key_cols, icolumn, vec_to);
else if (which.isInt32()) executeIntType<Int32, first>(key, icolumn, vec_to); else if (which.isUInt64()) executeIntType<UInt64, first>(key_cols, icolumn, vec_to);
else if (which.isInt64()) executeIntType<Int64, first>(key, icolumn, vec_to); else if (which.isUInt128()) executeBigIntType<UInt128, first>(key_cols, icolumn, vec_to);
else if (which.isInt128()) executeBigIntType<Int128, first>(key, icolumn, vec_to); else if (which.isUInt256()) executeBigIntType<UInt256, first>(key_cols, icolumn, vec_to);
else if (which.isInt256()) executeBigIntType<Int256, first>(key, icolumn, vec_to); else if (which.isInt8()) executeIntType<Int8, first>(key_cols, icolumn, vec_to);
else if (which.isUUID()) executeBigIntType<UUID, first>(key, icolumn, vec_to); else if (which.isInt16()) executeIntType<Int16, first>(key_cols, icolumn, vec_to);
else if (which.isIPv4()) executeIntType<IPv4, first>(key, icolumn, vec_to); else if (which.isInt32()) executeIntType<Int32, first>(key_cols, icolumn, vec_to);
else if (which.isIPv6()) executeBigIntType<IPv6, first>(key, icolumn, vec_to); else if (which.isInt64()) executeIntType<Int64, first>(key_cols, icolumn, vec_to);
else if (which.isEnum8()) executeIntType<Int8, first>(key, icolumn, vec_to); else if (which.isInt128()) executeBigIntType<Int128, first>(key_cols, icolumn, vec_to);
else if (which.isEnum16()) executeIntType<Int16, first>(key, icolumn, vec_to); else if (which.isInt256()) executeBigIntType<Int256, first>(key_cols, icolumn, vec_to);
else if (which.isDate()) executeIntType<UInt16, first>(key, icolumn, vec_to); else if (which.isUUID()) executeBigIntType<UUID, first>(key_cols, icolumn, vec_to);
else if (which.isDate32()) executeIntType<Int32, first>(key, icolumn, vec_to); else if (which.isIPv4()) executeIntType<IPv4, first>(key_cols, icolumn, vec_to);
else if (which.isDateTime()) executeIntType<UInt32, first>(key, icolumn, vec_to); else if (which.isIPv6()) executeBigIntType<IPv6, first>(key_cols, icolumn, vec_to);
else if (which.isEnum8()) executeIntType<Int8, first>(key_cols, icolumn, vec_to);
else if (which.isEnum16()) executeIntType<Int16, first>(key_cols, icolumn, vec_to);
else if (which.isDate()) executeIntType<UInt16, first>(key_cols, icolumn, vec_to);
else if (which.isDate32()) executeIntType<Int32, first>(key_cols, icolumn, vec_to);
else if (which.isDateTime()) executeIntType<UInt32, first>(key_cols, icolumn, vec_to);
/// TODO: executeIntType() for Decimal32/64 leads to incompatible result /// TODO: executeIntType() for Decimal32/64 leads to incompatible result
else if (which.isDecimal32()) executeBigIntType<Decimal32, first>(key, icolumn, vec_to); else if (which.isDecimal32()) executeBigIntType<Decimal32, first>(key_cols, icolumn, vec_to);
else if (which.isDecimal64()) executeBigIntType<Decimal64, first>(key, icolumn, vec_to); else if (which.isDecimal64()) executeBigIntType<Decimal64, first>(key_cols, icolumn, vec_to);
else if (which.isDecimal128()) executeBigIntType<Decimal128, first>(key, icolumn, vec_to); else if (which.isDecimal128()) executeBigIntType<Decimal128, first>(key_cols, icolumn, vec_to);
else if (which.isDecimal256()) executeBigIntType<Decimal256, first>(key, icolumn, vec_to); else if (which.isDecimal256()) executeBigIntType<Decimal256, first>(key_cols, icolumn, vec_to);
else if (which.isFloat32()) executeIntType<Float32, first>(key, icolumn, vec_to); else if (which.isFloat32()) executeIntType<Float32, first>(key_cols, icolumn, vec_to);
else if (which.isFloat64()) executeIntType<Float64, first>(key, icolumn, vec_to); else if (which.isFloat64()) executeIntType<Float64, first>(key_cols, icolumn, vec_to);
else if (which.isString()) executeString<first>(key, icolumn, vec_to); else if (which.isString()) executeString<first>(key_cols, icolumn, vec_to);
else if (which.isFixedString()) executeString<first>(key, icolumn, vec_to); else if (which.isFixedString()) executeString<first>(key_cols, icolumn, vec_to);
else if (which.isArray()) executeArray<first>(key, from_type, icolumn, vec_to); else if (which.isArray()) executeArray<first>(key_cols, from_type, icolumn, vec_to);
else executeGeneric<first>(key, icolumn, vec_to); else executeGeneric<first>(key_cols, icolumn, vec_to);
} }
void executeForArgument(const KeyType & key, const IDataType * type, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to, bool & is_first) const void executeForArgument(const KeyColumnsType & key_cols, const IDataType * type, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to, bool & is_first) const
{ {
/// Flattening of tuples. /// Flattening of tuples.
if (const ColumnTuple * tuple = typeid_cast<const ColumnTuple *>(column)) if (const ColumnTuple * tuple = typeid_cast<const ColumnTuple *>(column))
@ -1337,7 +1477,7 @@ private:
const DataTypes & tuple_types = typeid_cast<const DataTypeTuple &>(*type).getElements(); const DataTypes & tuple_types = typeid_cast<const DataTypeTuple &>(*type).getElements();
size_t tuple_size = tuple_columns.size(); size_t tuple_size = tuple_columns.size();
for (size_t i = 0; i < tuple_size; ++i) for (size_t i = 0; i < tuple_size; ++i)
executeForArgument(key, tuple_types[i].get(), tuple_columns[i].get(), vec_to, is_first); executeForArgument(key_cols, tuple_types[i].get(), tuple_columns[i].get(), vec_to, is_first);
} }
else if (const ColumnTuple * tuple_const = checkAndGetColumnConstData<ColumnTuple>(column)) else if (const ColumnTuple * tuple_const = checkAndGetColumnConstData<ColumnTuple>(column))
{ {
@ -1347,24 +1487,24 @@ private:
for (size_t i = 0; i < tuple_size; ++i) for (size_t i = 0; i < tuple_size; ++i)
{ {
auto tmp = ColumnConst::create(tuple_columns[i], column->size()); auto tmp = ColumnConst::create(tuple_columns[i], column->size());
executeForArgument(key, tuple_types[i].get(), tmp.get(), vec_to, is_first); executeForArgument(key_cols, tuple_types[i].get(), tmp.get(), vec_to, is_first);
} }
} }
else if (const auto * map = checkAndGetColumn<ColumnMap>(column)) else if (const auto * map = checkAndGetColumn<ColumnMap>(column))
{ {
const auto & type_map = assert_cast<const DataTypeMap &>(*type); const auto & type_map = assert_cast<const DataTypeMap &>(*type);
executeForArgument(key, type_map.getNestedType().get(), map->getNestedColumnPtr().get(), vec_to, is_first); executeForArgument(key_cols, type_map.getNestedType().get(), map->getNestedColumnPtr().get(), vec_to, is_first);
} }
else if (const auto * const_map = checkAndGetColumnConst<ColumnMap>(column)) else if (const auto * const_map = checkAndGetColumnConst<ColumnMap>(column))
{ {
executeForArgument(key, type, const_map->convertToFullColumnIfConst().get(), vec_to, is_first); executeForArgument(key_cols, type, const_map->convertToFullColumnIfConst().get(), vec_to, is_first);
} }
else else
{ {
if (is_first) if (is_first)
executeAny<true>(key, type, column, vec_to); executeAny<true>(key_cols, type, column, vec_to);
else else
executeAny<false>(key, type, column, vec_to); executeAny<false>(key_cols, type, column, vec_to);
} }
is_first = false; is_first = false;
@ -1395,30 +1535,33 @@ public:
{ {
auto col_to = ColumnVector<ToType>::create(input_rows_count); auto col_to = ColumnVector<ToType>::create(input_rows_count);
typename ColumnVector<ToType>::Container & vec_to = col_to->getData(); if (input_rows_count != 0)
/// If using a "keyed" algorithm, the first argument is the key and
/// the data starts from the second argument.
/// Otherwise there is no key and all arguments are interpreted as data.
constexpr size_t first_data_argument = Keyed;
if (arguments.size() <= first_data_argument)
{ {
/// Return a fixed random-looking magic number when input is empty typename ColumnVector<ToType>::Container & vec_to = col_to->getData();
vec_to.assign(input_rows_count, static_cast<ToType>(0xe28dbde7fe22e41c));
}
KeyType key{}; /// If using a "keyed" algorithm, the first argument is the key and
if constexpr (Keyed) /// the data starts from the second argument.
if (!arguments.empty()) /// Otherwise there is no key and all arguments are interpreted as data.
key = Impl::parseKey(arguments[0]); constexpr size_t first_data_argument = Keyed;
/// The function supports arbitrary number of arguments of arbitrary types. if (arguments.size() <= first_data_argument)
bool is_first_argument = true; {
for (size_t i = first_data_argument; i < arguments.size(); ++i) /// Return a fixed random-looking magic number when input is empty
{ vec_to.assign(input_rows_count, static_cast<ToType>(0xe28dbde7fe22e41c));
const auto & col = arguments[i]; }
executeForArgument(key, col.type.get(), col.column.get(), vec_to, is_first_argument);
KeyColumnsType key_cols{};
if constexpr (Keyed)
if (!arguments.empty())
key_cols = Impl::parseKeyColumns(arguments[0]);
/// The function supports arbitrary number of arguments of arbitrary types.
bool is_first_argument = true;
for (size_t i = first_data_argument; i < arguments.size(); ++i)
{
const auto & col = arguments[i];
executeForArgument(key_cols, col.type.get(), col.column.get(), vec_to, is_first_argument);
}
} }
if constexpr (std::is_same_v<ToType, UInt128>) /// backward-compatible if constexpr (std::is_same_v<ToType, UInt128>) /// backward-compatible
@ -1450,17 +1593,19 @@ public:
) // DECLARE_MULTITARGET_CODE ) // DECLARE_MULTITARGET_CODE
template <typename Impl, bool Keyed = false, typename KeyType = char> template <typename Impl, bool Keyed = false, typename KeyType = char, typename KeyColumnsType = char>
class FunctionAnyHash : public TargetSpecific::Default::FunctionAnyHash<Impl, Keyed, KeyType> class FunctionAnyHash : public TargetSpecific::Default::FunctionAnyHash<Impl, Keyed, KeyType, KeyColumnsType>
{ {
public: public:
explicit FunctionAnyHash(ContextPtr context) : selector(context) explicit FunctionAnyHash(ContextPtr context) : selector(context)
{ {
selector.registerImplementation<TargetArch::Default, TargetSpecific::Default::FunctionAnyHash<Impl, Keyed, KeyType>>(); selector
.registerImplementation<TargetArch::Default, TargetSpecific::Default::FunctionAnyHash<Impl, Keyed, KeyType, KeyColumnsType>>();
#if USE_MULTITARGET_CODE #if USE_MULTITARGET_CODE
selector.registerImplementation<TargetArch::AVX2, TargetSpecific::AVX2::FunctionAnyHash<Impl, Keyed, KeyType>>(); selector.registerImplementation<TargetArch::AVX2, TargetSpecific::AVX2::FunctionAnyHash<Impl, Keyed, KeyType, KeyColumnsType>>();
selector.registerImplementation<TargetArch::AVX512F, TargetSpecific::AVX512F::FunctionAnyHash<Impl, Keyed, KeyType>>(); selector
.registerImplementation<TargetArch::AVX512F, TargetSpecific::AVX512F::FunctionAnyHash<Impl, Keyed, KeyType, KeyColumnsType>>();
#endif #endif
} }
@ -1696,7 +1841,7 @@ struct NameIntHash32 { static constexpr auto name = "intHash32"; };
struct NameIntHash64 { static constexpr auto name = "intHash64"; }; struct NameIntHash64 { static constexpr auto name = "intHash64"; };
using FunctionSipHash64 = FunctionAnyHash<SipHash64Impl>; using FunctionSipHash64 = FunctionAnyHash<SipHash64Impl>;
using FunctionSipHash64Keyed = FunctionAnyHash<SipHash64KeyedImpl, true, SipHash64KeyedImpl::Key>; using FunctionSipHash64Keyed = FunctionAnyHash<SipHash64KeyedImpl, true, SipHash64KeyedImpl::Key, SipHash64KeyedImpl::KeyColumns>;
using FunctionIntHash32 = FunctionIntHash<IntHash32Impl, NameIntHash32>; using FunctionIntHash32 = FunctionIntHash<IntHash32Impl, NameIntHash32>;
using FunctionIntHash64 = FunctionIntHash<IntHash64Impl, NameIntHash64>; using FunctionIntHash64 = FunctionIntHash<IntHash64Impl, NameIntHash64>;
#if USE_SSL #if USE_SSL
@ -1710,8 +1855,10 @@ using FunctionSHA384 = FunctionStringHashFixedString<SHA384Impl>;
using FunctionSHA512 = FunctionStringHashFixedString<SHA512Impl>; using FunctionSHA512 = FunctionStringHashFixedString<SHA512Impl>;
#endif #endif
using FunctionSipHash128 = FunctionAnyHash<SipHash128Impl>; using FunctionSipHash128 = FunctionAnyHash<SipHash128Impl>;
using FunctionSipHash128Keyed = FunctionAnyHash<SipHash128KeyedImpl, true, SipHash128KeyedImpl::Key>; using FunctionSipHash128Keyed = FunctionAnyHash<SipHash128KeyedImpl, true, SipHash128KeyedImpl::Key, SipHash128KeyedImpl::KeyColumns>;
using FunctionSipHash128Reference = FunctionAnyHash<SipHash128ReferenceImpl>; using FunctionSipHash128Reference = FunctionAnyHash<SipHash128ReferenceImpl>;
using FunctionSipHash128ReferenceKeyed
= FunctionAnyHash<SipHash128ReferenceKeyedImpl, true, SipHash128ReferenceKeyedImpl::Key, SipHash128ReferenceKeyedImpl::KeyColumns>;
using FunctionCityHash64 = FunctionAnyHash<ImplCityHash64>; using FunctionCityHash64 = FunctionAnyHash<ImplCityHash64>;
using FunctionFarmFingerprint64 = FunctionAnyHash<ImplFarmFingerprint64>; using FunctionFarmFingerprint64 = FunctionAnyHash<ImplFarmFingerprint64>;
using FunctionFarmHash64 = FunctionAnyHash<ImplFarmHash64>; using FunctionFarmHash64 = FunctionAnyHash<ImplFarmHash64>;

View File

@ -20,6 +20,11 @@ REGISTER_FUNCTION(Hashing)
.examples{{"hash", "SELECT hex(sipHash128Reference('foo', '\\x01', 3))", ""}}, .examples{{"hash", "SELECT hex(sipHash128Reference('foo', '\\x01', 3))", ""}},
.categories{"Hash"} .categories{"Hash"}
}); });
factory.registerFunction<FunctionSipHash128ReferenceKeyed>(FunctionDocumentation{
.description = "Same as [sipHash128Reference](#hash_functions-siphash128reference) but additionally takes an explicit key argument "
"instead of using a fixed key.",
.examples{{"hash", "SELECT hex(sipHash128ReferenceKeyed((506097522914230528, 1084818905618843912),'foo', '\\x01', 3));", ""}},
.categories{"Hash"}});
factory.registerFunction<FunctionCityHash64>(); factory.registerFunction<FunctionCityHash64>();
factory.registerFunction<FunctionFarmFingerprint64>(); factory.registerFunction<FunctionFarmFingerprint64>();
factory.registerFunction<FunctionFarmHash64>(); factory.registerFunction<FunctionFarmHash64>();

View File

@ -39,6 +39,9 @@ struct HasTokenImpl
if (start_pos != nullptr) if (start_pos != nullptr)
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Function '{}' does not support start_pos argument", name); throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Function '{}' does not support start_pos argument", name);
if (pattern.empty())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Needle cannot be empty, because empty string isn't a token");
if (haystack_offsets.empty()) if (haystack_offsets.empty())
return; return;

View File

@ -7,8 +7,8 @@
namespace DB namespace DB
{ {
/** URL processing functions. See implementation in separate .cpp files. /** These helpers are used by URL processing functions. See implementation in separate .cpp files.
* All functions are not strictly follow RFC, instead they are maximally simplified for performance reasons. * All functions do not strictly follow RFC, instead they are maximally simplified for performance reasons.
* *
* Functions for extraction parts of URL. * Functions for extraction parts of URL.
* If URL has nothing like, then empty string is returned. * If URL has nothing like, then empty string is returned.
@ -101,7 +101,7 @@ struct ExtractSubstringImpl
static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &) static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &)
{ {
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Column of type FixedString is not supported by URL functions"); throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Column of type FixedString is not supported by this function");
} }
}; };
@ -156,7 +156,7 @@ struct CutSubstringImpl
static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &) static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &)
{ {
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Column of type FixedString is not supported by URL functions"); throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Column of type FixedString is not supported by this function");
} }
}; };

View File

@ -5,7 +5,7 @@
namespace DB namespace DB
{ {
/** Tansform-type wrapper for DateTime64, simplifies DateTime64 support for given Transform. /** Transform-type wrapper for DateTime64, simplifies DateTime64 support for given Transform.
* *
* Depending on what overloads of Transform::execute() are available, when called with DateTime64 value, * Depending on what overloads of Transform::execute() are available, when called with DateTime64 value,
* invokes Transform::execute() with either: * invokes Transform::execute() with either:
@ -80,7 +80,10 @@ public:
} }
else else
{ {
const auto components = DecimalUtils::splitWithScaleMultiplier(t, scale_multiplier); auto components = DecimalUtils::splitWithScaleMultiplier(t, scale_multiplier);
if (t.value < 0 && components.fractional)
--components.whole;
return wrapped_transform.execute(static_cast<Int64>(components.whole), std::forward<Args>(args)...); return wrapped_transform.execute(static_cast<Int64>(components.whole), std::forward<Args>(args)...);
} }
} }

View File

@ -1,8 +1,8 @@
#pragma once #pragma once
#include <Functions/FunctionFactory.h> #include <Functions/FunctionFactory.h>
#include <Functions/URL/FunctionsURL.h>
#include <Functions/FunctionHelpers.h> #include <Functions/FunctionHelpers.h>
#include <Functions/StringHelpers.h>
#include <DataTypes/DataTypeString.h> #include <DataTypes/DataTypeString.h>
#include <Columns/ColumnString.h> #include <Columns/ColumnString.h>
#include <Columns/ColumnFixedString.h> #include <Columns/ColumnFixedString.h>

View File

@ -1,7 +1,7 @@
#include <Functions/FunctionFactory.h> #include <Functions/FunctionFactory.h>
#include <Functions/FunctionStringToString.h> #include <Functions/FunctionStringToString.h>
#include <Functions/StringHelpers.h>
#include <base/find_symbols.h> #include <base/find_symbols.h>
#include "FunctionsURL.h"
namespace DB namespace DB
{ {

View File

@ -1,7 +1,7 @@
#pragma once #pragma once
#include "FunctionsURL.h"
#include <base/find_symbols.h> #include <base/find_symbols.h>
#include <Functions/StringHelpers.h>
namespace DB namespace DB
{ {

View File

@ -1,7 +1,7 @@
#include <Common/StringUtils/StringUtils.h> #include <Common/StringUtils/StringUtils.h>
#include <Functions/FunctionFactory.h> #include <Functions/FunctionFactory.h>
#include <Functions/FunctionStringToString.h> #include <Functions/FunctionStringToString.h>
#include <Functions/URL/FunctionsURL.h> #include <Functions/StringHelpers.h>
namespace DB namespace DB
@ -154,4 +154,3 @@ REGISTER_FUNCTION(Netloc)
} }
} }

View File

@ -1,6 +1,6 @@
#include <Functions/FunctionFactory.h> #include <Functions/FunctionFactory.h>
#include <Functions/FunctionStringToString.h> #include <Functions/FunctionStringToString.h>
#include "FunctionsURL.h" #include <Functions/StringHelpers.h>
#include "path.h" #include "path.h"
#include <base/find_symbols.h> #include <base/find_symbols.h>

View File

@ -1,7 +1,7 @@
#pragma once #pragma once
#include <base/find_symbols.h> #include <base/find_symbols.h>
#include <Functions/URL/FunctionsURL.h> #include <Functions/StringHelpers.h>
namespace DB namespace DB

View File

@ -1,6 +1,6 @@
#include <Functions/FunctionFactory.h> #include <Functions/FunctionFactory.h>
#include <Functions/FunctionStringToString.h> #include <Functions/FunctionStringToString.h>
#include "FunctionsURL.h" #include <Functions/StringHelpers.h>
#include "path.h" #include "path.h"
#include <base/find_symbols.h> #include <base/find_symbols.h>

View File

@ -1,7 +1,7 @@
#pragma once #pragma once
#include "FunctionsURL.h"
#include <Common/StringUtils/StringUtils.h> #include <Common/StringUtils/StringUtils.h>
#include <Functions/StringHelpers.h>
namespace DB namespace DB
@ -54,4 +54,3 @@ struct ExtractProtocol
}; };
} }

View File

@ -1,7 +1,7 @@
#pragma once #pragma once
#include "FunctionsURL.h"
#include <base/find_symbols.h> #include <base/find_symbols.h>
#include <Functions/StringHelpers.h>
namespace DB namespace DB

View File

@ -1,7 +1,7 @@
#pragma once #pragma once
#include "FunctionsURL.h"
#include <base/find_symbols.h> #include <base/find_symbols.h>
#include <Functions/StringHelpers.h>
namespace DB namespace DB
@ -34,4 +34,3 @@ struct ExtractQueryStringAndFragment
}; };
} }

View File

@ -174,12 +174,13 @@ public:
{ {
auto res = static_cast<Int64>(transform_y.execute(y, timezone_y)) auto res = static_cast<Int64>(transform_y.execute(y, timezone_y))
- static_cast<Int64>(transform_x.execute(x, timezone_x)); - static_cast<Int64>(transform_x.execute(x, timezone_x));
DateLUTImpl::DateTimeComponents a_comp; DateTimeComponentsWithFractionalPart a_comp;
DateLUTImpl::DateTimeComponents b_comp; DateTimeComponentsWithFractionalPart b_comp;
Int64 adjust_value; Int64 adjust_value;
auto x_seconds = TransformDateTime64<ToRelativeSecondNumImpl<ResultPrecision::Extended>>(transform_x.getScaleMultiplier()).execute(x, timezone_x); auto x_microseconds = TransformDateTime64<ToRelativeSubsecondNumImpl<microsecond_multiplier>>(transform_x.getScaleMultiplier()).execute(x, timezone_x);
auto y_seconds = TransformDateTime64<ToRelativeSecondNumImpl<ResultPrecision::Extended>>(transform_y.getScaleMultiplier()).execute(y, timezone_y); auto y_microseconds = TransformDateTime64<ToRelativeSubsecondNumImpl<microsecond_multiplier>>(transform_y.getScaleMultiplier()).execute(y, timezone_y);
if (x_seconds <= y_seconds)
if (x_microseconds <= y_microseconds)
{ {
a_comp = TransformDateTime64<ToDateTimeComponentsImpl>(transform_x.getScaleMultiplier()).execute(x, timezone_x); a_comp = TransformDateTime64<ToDateTimeComponentsImpl>(transform_x.getScaleMultiplier()).execute(x, timezone_x);
b_comp = TransformDateTime64<ToDateTimeComponentsImpl>(transform_y.getScaleMultiplier()).execute(y, timezone_y); b_comp = TransformDateTime64<ToDateTimeComponentsImpl>(transform_y.getScaleMultiplier()).execute(y, timezone_y);
@ -192,14 +193,16 @@ public:
adjust_value = 1; adjust_value = 1;
} }
if constexpr (std::is_same_v<TransformX, TransformDateTime64<ToRelativeYearNumImpl<ResultPrecision::Extended>>>) if constexpr (std::is_same_v<TransformX, TransformDateTime64<ToRelativeYearNumImpl<ResultPrecision::Extended>>>)
{ {
if ((a_comp.date.month > b_comp.date.month) if ((a_comp.date.month > b_comp.date.month)
|| ((a_comp.date.month == b_comp.date.month) && ((a_comp.date.day > b_comp.date.day) || ((a_comp.date.month == b_comp.date.month) && ((a_comp.date.day > b_comp.date.day)
|| ((a_comp.date.day == b_comp.date.day) && ((a_comp.time.hour > b_comp.time.hour) || ((a_comp.date.day == b_comp.date.day) && ((a_comp.time.hour > b_comp.time.hour)
|| ((a_comp.time.hour == b_comp.time.hour) && ((a_comp.time.minute > b_comp.time.minute) || ((a_comp.time.hour == b_comp.time.hour) && ((a_comp.time.minute > b_comp.time.minute)
|| ((a_comp.time.minute == b_comp.time.minute) && (a_comp.time.second > b_comp.time.second)))) || ((a_comp.time.minute == b_comp.time.minute) && ((a_comp.time.second > b_comp.time.second)
))))) || ((a_comp.time.second == b_comp.time.second) && ((a_comp.millisecond > b_comp.millisecond)
|| ((a_comp.millisecond == b_comp.millisecond) && (a_comp.microsecond > b_comp.microsecond)))))))))))))
res += adjust_value; res += adjust_value;
} }
else if constexpr (std::is_same_v<TransformX, TransformDateTime64<ToRelativeQuarterNumImpl<ResultPrecision::Extended>>>) else if constexpr (std::is_same_v<TransformX, TransformDateTime64<ToRelativeQuarterNumImpl<ResultPrecision::Extended>>>)
@ -210,8 +213,9 @@ public:
|| ((x_month_in_quarter == y_month_in_quarter) && ((a_comp.date.day > b_comp.date.day) || ((x_month_in_quarter == y_month_in_quarter) && ((a_comp.date.day > b_comp.date.day)
|| ((a_comp.date.day == b_comp.date.day) && ((a_comp.time.hour > b_comp.time.hour) || ((a_comp.date.day == b_comp.date.day) && ((a_comp.time.hour > b_comp.time.hour)
|| ((a_comp.time.hour == b_comp.time.hour) && ((a_comp.time.minute > b_comp.time.minute) || ((a_comp.time.hour == b_comp.time.hour) && ((a_comp.time.minute > b_comp.time.minute)
|| ((a_comp.time.minute == b_comp.time.minute) && (a_comp.time.second > b_comp.time.second)))) || ((a_comp.time.minute == b_comp.time.minute) && ((a_comp.time.second > b_comp.time.second)
))))) || ((a_comp.time.second == b_comp.time.second) && ((a_comp.millisecond > b_comp.millisecond)
|| ((a_comp.millisecond == b_comp.millisecond) && (a_comp.microsecond > b_comp.microsecond)))))))))))))
res += adjust_value; res += adjust_value;
} }
else if constexpr (std::is_same_v<TransformX, TransformDateTime64<ToRelativeMonthNumImpl<ResultPrecision::Extended>>>) else if constexpr (std::is_same_v<TransformX, TransformDateTime64<ToRelativeMonthNumImpl<ResultPrecision::Extended>>>)
@ -219,8 +223,9 @@ public:
if ((a_comp.date.day > b_comp.date.day) if ((a_comp.date.day > b_comp.date.day)
|| ((a_comp.date.day == b_comp.date.day) && ((a_comp.time.hour > b_comp.time.hour) || ((a_comp.date.day == b_comp.date.day) && ((a_comp.time.hour > b_comp.time.hour)
|| ((a_comp.time.hour == b_comp.time.hour) && ((a_comp.time.minute > b_comp.time.minute) || ((a_comp.time.hour == b_comp.time.hour) && ((a_comp.time.minute > b_comp.time.minute)
|| ((a_comp.time.minute == b_comp.time.minute) && (a_comp.time.second > b_comp.time.second)))) || ((a_comp.time.minute == b_comp.time.minute) && ((a_comp.time.second > b_comp.time.second)
))) || ((a_comp.time.second == b_comp.time.second) && ((a_comp.millisecond > b_comp.millisecond)
|| ((a_comp.millisecond == b_comp.millisecond) && (a_comp.microsecond > b_comp.microsecond)))))))))))
res += adjust_value; res += adjust_value;
} }
else if constexpr (std::is_same_v<TransformX, TransformDateTime64<ToRelativeWeekNumImpl<ResultPrecision::Extended>>>) else if constexpr (std::is_same_v<TransformX, TransformDateTime64<ToRelativeWeekNumImpl<ResultPrecision::Extended>>>)
@ -230,25 +235,44 @@ public:
if ((x_day_of_week > y_day_of_week) if ((x_day_of_week > y_day_of_week)
|| ((x_day_of_week == y_day_of_week) && (a_comp.time.hour > b_comp.time.hour)) || ((x_day_of_week == y_day_of_week) && (a_comp.time.hour > b_comp.time.hour))
|| ((a_comp.time.hour == b_comp.time.hour) && ((a_comp.time.minute > b_comp.time.minute) || ((a_comp.time.hour == b_comp.time.hour) && ((a_comp.time.minute > b_comp.time.minute)
|| ((a_comp.time.minute == b_comp.time.minute) && (a_comp.time.second > b_comp.time.second))))) || ((a_comp.time.minute == b_comp.time.minute) && ((a_comp.time.second > b_comp.time.second)
|| ((a_comp.time.second == b_comp.time.second) && ((a_comp.millisecond > b_comp.millisecond)
|| ((a_comp.millisecond == b_comp.millisecond) && (a_comp.microsecond > b_comp.microsecond)))))))))
res += adjust_value; res += adjust_value;
} }
else if constexpr (std::is_same_v<TransformX, TransformDateTime64<ToRelativeDayNumImpl<ResultPrecision::Extended>>>) else if constexpr (std::is_same_v<TransformX, TransformDateTime64<ToRelativeDayNumImpl<ResultPrecision::Extended>>>)
{ {
if ((a_comp.time.hour > b_comp.time.hour) if ((a_comp.time.hour > b_comp.time.hour)
|| ((a_comp.time.hour == b_comp.time.hour) && ((a_comp.time.minute > b_comp.time.minute) || ((a_comp.time.hour == b_comp.time.hour) && ((a_comp.time.minute > b_comp.time.minute)
|| ((a_comp.time.minute == b_comp.time.minute) && (a_comp.time.second > b_comp.time.second))))) || ((a_comp.time.minute == b_comp.time.minute) && ((a_comp.time.second > b_comp.time.second)
|| ((a_comp.time.second == b_comp.time.second) && ((a_comp.millisecond > b_comp.millisecond)
|| ((a_comp.millisecond == b_comp.millisecond) && (a_comp.microsecond > b_comp.microsecond)))))))))
res += adjust_value; res += adjust_value;
} }
else if constexpr (std::is_same_v<TransformX, TransformDateTime64<ToRelativeHourNumImpl<ResultPrecision::Extended>>>) else if constexpr (std::is_same_v<TransformX, TransformDateTime64<ToRelativeHourNumImpl<ResultPrecision::Extended>>>)
{ {
if ((a_comp.time.minute > b_comp.time.minute) if ((a_comp.time.minute > b_comp.time.minute)
|| ((a_comp.time.minute == b_comp.time.minute) && (a_comp.time.second > b_comp.time.second))) || ((a_comp.time.minute == b_comp.time.minute) && ((a_comp.time.second > b_comp.time.second)
|| ((a_comp.time.second == b_comp.time.second) && ((a_comp.millisecond > b_comp.millisecond)
|| ((a_comp.millisecond == b_comp.millisecond) && (a_comp.microsecond > b_comp.microsecond)))))))
res += adjust_value; res += adjust_value;
} }
else if constexpr (std::is_same_v<TransformX, TransformDateTime64<ToRelativeMinuteNumImpl<ResultPrecision::Extended>>>) else if constexpr (std::is_same_v<TransformX, TransformDateTime64<ToRelativeMinuteNumImpl<ResultPrecision::Extended>>>)
{ {
if (a_comp.time.second > b_comp.time.second) if ((a_comp.time.second > b_comp.time.second)
|| ((a_comp.time.second == b_comp.time.second) && ((a_comp.millisecond > b_comp.millisecond)
|| ((a_comp.millisecond == b_comp.millisecond) && (a_comp.microsecond > b_comp.microsecond)))))
res += adjust_value;
}
else if constexpr (std::is_same_v<TransformX, TransformDateTime64<ToRelativeSecondNumImpl<ResultPrecision::Extended>>>)
{
if ((a_comp.millisecond > b_comp.millisecond)
|| ((a_comp.millisecond == b_comp.millisecond) && (a_comp.microsecond > b_comp.microsecond)))
res += adjust_value;
}
else if constexpr (std::is_same_v<TransformX, TransformDateTime64<ToRelativeSubsecondNumImpl<1000>>>)
{
if (a_comp.microsecond > b_comp.microsecond)
res += adjust_value; res += adjust_value;
} }
return res; return res;
@ -373,6 +397,10 @@ public:
impl.template dispatchForColumns<ToRelativeMinuteNumImpl<ResultPrecision::Extended>>(x, y, timezone_x, timezone_y, res->getData()); impl.template dispatchForColumns<ToRelativeMinuteNumImpl<ResultPrecision::Extended>>(x, y, timezone_x, timezone_y, res->getData());
else if (unit == "second" || unit == "ss" || unit == "s") else if (unit == "second" || unit == "ss" || unit == "s")
impl.template dispatchForColumns<ToRelativeSecondNumImpl<ResultPrecision::Extended>>(x, y, timezone_x, timezone_y, res->getData()); impl.template dispatchForColumns<ToRelativeSecondNumImpl<ResultPrecision::Extended>>(x, y, timezone_x, timezone_y, res->getData());
else if (unit == "millisecond" || unit == "ms")
impl.template dispatchForColumns<ToRelativeSubsecondNumImpl<millisecond_multiplier>>(x, y, timezone_x, timezone_y, res->getData());
else if (unit == "microsecond" || unit == "us" || unit == "u")
impl.template dispatchForColumns<ToRelativeSubsecondNumImpl<microsecond_multiplier>>(x, y, timezone_x, timezone_y, res->getData());
else else
throw Exception(ErrorCodes::BAD_ARGUMENTS, throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Function {} does not support '{}' unit", getName(), unit); "Function {} does not support '{}' unit", getName(), unit);

View File

@ -0,0 +1,42 @@
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionStringToString.h>
#include <Functions/StringHelpers.h>
#include <base/find_symbols.h>
namespace DB
{
struct FirstLine
{
static size_t getReserveLengthForElement() { return 16; }
static void execute(Pos data, size_t size, Pos & res_data, size_t & res_size)
{
res_data = data;
const Pos end = data + size;
const Pos pos = find_first_symbols<'\r', '\n'>(data, end);
res_size = pos - data;
}
};
struct NameFirstLine
{
static constexpr auto name = "firstLine";
};
using FunctionFirstLine = FunctionStringToString<ExtractSubstringImpl<FirstLine>, NameFirstLine>;
REGISTER_FUNCTION(FirstLine)
{
factory.registerFunction<FunctionFirstLine>(FunctionDocumentation{
.description = "Returns first line of a multi-line string.",
.syntax = "firstLine(string)",
.arguments = {{.name = "string", .description = "The string to process."}},
.returned_value = {"The first line of the string or the whole string if there is no line separators."},
.examples = {
{.name = "Return first line", .query = "firstLine('Hello\\nWorld')", .result = "'Hello'"},
{.name = "Return whole string", .query = "firstLine('Hello World')", .result = "'Hello World'"},
}});
}
}

View File

@ -20,6 +20,7 @@ namespace ErrorCodes
{ {
extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_COLUMN;
extern const int BAD_ARGUMENTS;
} }
namespace namespace
@ -108,6 +109,12 @@ public:
/// S2 acceptes point as (latitude, longitude) /// S2 acceptes point as (latitude, longitude)
S2LatLng lat_lng = S2LatLng::FromDegrees(lat, lon); S2LatLng lat_lng = S2LatLng::FromDegrees(lat, lon);
if (!lat_lng.is_valid())
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Point is invalid. For valid point the latitude is between -90 and 90 degrees inclusive"
"and the longitude is between -180 and 180 degrees inclusive.");
S2CellId id(lat_lng); S2CellId id(lat_lng);
dst_data[row] = id.id(); dst_data[row] = id.id();

View File

@ -119,7 +119,7 @@ public:
if (!lhs_array->hasEqualOffsets(*rhs_array)) if (!lhs_array->hasEqualOffsets(*rhs_array))
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH,
"The argument 1 and argument {} of function {} have different array offsets", "The argument 2 and argument {} of function {} have different array offsets",
i + 1, i + 1,
getName()); getName());

View File

@ -138,6 +138,7 @@ private:
REGISTER_FUNCTION(Now) REGISTER_FUNCTION(Now)
{ {
factory.registerFunction<NowOverloadResolver>({}, FunctionFactory::CaseInsensitive); factory.registerFunction<NowOverloadResolver>({}, FunctionFactory::CaseInsensitive);
factory.registerAlias("current_timestamp", NowOverloadResolver::name, FunctionFactory::CaseInsensitive);
} }
} }

View File

@ -114,13 +114,18 @@ public:
const auto hi = S2CellId(data_hi[row]); const auto hi = S2CellId(data_hi[row]);
const auto point = S2CellId(data_point[row]); const auto point = S2CellId(data_point[row]);
if (!lo.is_valid() || !hi.is_valid()) S2LatLngRect rect(lo.ToLatLng(), hi.ToLatLng());
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Rectangle is not valid");
if (!point.is_valid()) if (!point.is_valid())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Point is not valid"); throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Point is invalid. For valid point the latitude is between -90 and 90 degrees inclusive "
"and the longitude is between -180 and 180 degrees inclusive.");
S2LatLngRect rect(lo.ToLatLng(), hi.ToLatLng()); if (!rect.is_valid())
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Rectangle is invalid. For valid rectangles the latitude bounds do not exceed "
"Pi/2 in absolute value and the longitude bounds do not exceed Pi in absolute value. "
"Also, if either the latitude or longitude bound is empty then both must be. ");
rect.AddPoint(point.ToPoint()); rect.AddPoint(point.ToPoint());

View File

@ -107,13 +107,18 @@ public:
const auto hi = S2CellId(data_hi[row]); const auto hi = S2CellId(data_hi[row]);
const auto point = S2CellId(data_point[row]); const auto point = S2CellId(data_point[row]);
if (!lo.is_valid() || !hi.is_valid()) S2LatLngRect rect(lo.ToLatLng(), hi.ToLatLng());
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Rectangle is not valid");
if (!point.is_valid()) if (!point.is_valid())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Point is not valid"); throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Point is invalid. For valid point the latitude is between -90 and 90 degrees inclusive "
"and the longitude is between -180 and 180 degrees inclusive.");
S2LatLngRect rect(lo.ToLatLng(), hi.ToLatLng()); if (!rect.is_valid())
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Rectangle is invalid. For valid rectangles the latitude bounds do not exceed "
"Pi/2 in absolute value and the longitude bounds do not exceed Pi in absolute value. "
"Also, if either the latitude or longitude bound is empty then both must be. ");
dst_data.emplace_back(rect.Contains(point.ToLatLng())); dst_data.emplace_back(rect.Contains(point.ToLatLng()));
} }

View File

@ -128,15 +128,15 @@ public:
const auto lo2 = S2CellId(data_lo2[row]); const auto lo2 = S2CellId(data_lo2[row]);
const auto hi2 = S2CellId(data_hi2[row]); const auto hi2 = S2CellId(data_hi2[row]);
if (!lo1.is_valid() || !hi1.is_valid())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "First rectangle is not valid");
if (!lo2.is_valid() || !hi2.is_valid())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second rectangle is not valid");
S2LatLngRect rect1(lo1.ToLatLng(), hi1.ToLatLng()); S2LatLngRect rect1(lo1.ToLatLng(), hi1.ToLatLng());
S2LatLngRect rect2(lo2.ToLatLng(), hi2.ToLatLng()); S2LatLngRect rect2(lo2.ToLatLng(), hi2.ToLatLng());
if (!rect1.is_valid() || !rect2.is_valid())
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Rectangle is invalid. For valid rectangles the latitude bounds do not exceed "
"Pi/2 in absolute value and the longitude bounds do not exceed Pi in absolute value. "
"Also, if either the latitude or longitude bound is empty then both must be.");
S2LatLngRect rect_intersection = rect1.Intersection(rect2); S2LatLngRect rect_intersection = rect1.Intersection(rect2);
vec_res_first.emplace_back(S2CellId(rect_intersection.lo()).id()); vec_res_first.emplace_back(S2CellId(rect_intersection.lo()).id());

View File

@ -126,15 +126,15 @@ public:
const auto lo2 = S2CellId(data_lo2[row]); const auto lo2 = S2CellId(data_lo2[row]);
const auto hi2 = S2CellId(data_hi2[row]); const auto hi2 = S2CellId(data_hi2[row]);
if (!lo1.is_valid() || !hi1.is_valid())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "First rectangle is not valid");
if (!lo2.is_valid() || !hi2.is_valid())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second rectangle is not valid");
S2LatLngRect rect1(lo1.ToLatLng(), hi1.ToLatLng()); S2LatLngRect rect1(lo1.ToLatLng(), hi1.ToLatLng());
S2LatLngRect rect2(lo2.ToLatLng(), hi2.ToLatLng()); S2LatLngRect rect2(lo2.ToLatLng(), hi2.ToLatLng());
if (!rect1.is_valid() || !rect2.is_valid())
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Rectangle is invalid. For valid rectangles the latitude bounds do not exceed "
"Pi/2 in absolute value and the longitude bounds do not exceed Pi in absolute value. "
"Also, if either the latitude or longitude bound is empty then both must be. ");
S2LatLngRect rect_union = rect1.Union(rect2); S2LatLngRect rect_union = rect1.Union(rect2);
vec_res_first.emplace_back(S2CellId(rect_union.lo()).id()); vec_res_first.emplace_back(S2CellId(rect_union.lo()).id());

View File

@ -97,7 +97,7 @@ public:
const auto id = S2CellId(data_id[row]); const auto id = S2CellId(data_id[row]);
if (!id.is_valid()) if (!id.is_valid())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Point is not valid"); throw Exception(ErrorCodes::BAD_ARGUMENTS, "CellId is invalid.");
S2Point point = id.ToPoint(); S2Point point = id.ToPoint();
S2LatLng ll(point); S2LatLng ll(point);

View File

@ -10,7 +10,6 @@
#include <Functions/DateTimeTransforms.h> #include <Functions/DateTimeTransforms.h>
#include <Functions/FunctionFactory.h> #include <Functions/FunctionFactory.h>
#include <Functions/IFunction.h> #include <Functions/IFunction.h>
#include <Functions/TransformDateTime64.h>
#include <IO/WriteHelpers.h> #include <IO/WriteHelpers.h>

View File

@ -86,6 +86,8 @@ public:
REGISTER_FUNCTION(Today) REGISTER_FUNCTION(Today)
{ {
factory.registerFunction<TodayOverloadResolver>(); factory.registerFunction<TodayOverloadResolver>();
factory.registerAlias("current_date", TodayOverloadResolver::name, FunctionFactory::CaseInsensitive);
factory.registerAlias("curdate", TodayOverloadResolver::name, FunctionFactory::CaseInsensitive);
} }
} }

View File

@ -16,19 +16,15 @@
#include <DataTypes/DataTypeSet.h> #include <DataTypes/DataTypeSet.h>
#include <DataTypes/DataTypeFunction.h> #include <DataTypes/DataTypeFunction.h>
#include <DataTypes/DataTypeFixedString.h>
#include <DataTypes/DataTypeString.h> #include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypeTuple.h> #include <DataTypes/DataTypeTuple.h>
#include <DataTypes/DataTypeArray.h> #include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeLowCardinality.h> #include <DataTypes/DataTypeLowCardinality.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/FieldToDataType.h> #include <DataTypes/FieldToDataType.h>
#include <DataTypes/DataTypesDecimal.h> #include <DataTypes/DataTypesDecimal.h>
#include <DataTypes/DataTypeFactory.h> #include <DataTypes/DataTypeFactory.h>
#include <Columns/ColumnArray.h>
#include <Columns/ColumnConst.h> #include <Columns/ColumnConst.h>
#include <Columns/ColumnFixedString.h>
#include <Columns/ColumnSet.h> #include <Columns/ColumnSet.h>
#include <Storages/StorageSet.h> #include <Storages/StorageSet.h>
@ -47,7 +43,6 @@
#include <Interpreters/ExpressionActions.h> #include <Interpreters/ExpressionActions.h>
#include <Interpreters/misc.h> #include <Interpreters/misc.h>
#include <Interpreters/ActionsVisitor.h> #include <Interpreters/ActionsVisitor.h>
#include <Interpreters/InterpreterSelectWithUnionQuery.h>
#include <Interpreters/Set.h> #include <Interpreters/Set.h>
#include <Interpreters/evaluateConstantExpression.h> #include <Interpreters/evaluateConstantExpression.h>
#include <Interpreters/convertFieldToType.h> #include <Interpreters/convertFieldToType.h>
@ -61,6 +56,7 @@
#include <Interpreters/InterpreterSelectQueryAnalyzer.h> #include <Interpreters/InterpreterSelectQueryAnalyzer.h>
#include <Parsers/queryToString.h> #include <Parsers/queryToString.h>
namespace DB namespace DB
{ {
@ -715,7 +711,7 @@ bool ActionsMatcher::needChildVisit(const ASTPtr & node, const ASTPtr & child)
node->as<ASTExpressionList>()) node->as<ASTExpressionList>())
return false; return false;
/// Do not go to FROM, JOIN, UNION. /// Do not go to FROM, JOIN, UNION
if (child->as<ASTTableExpression>() || if (child->as<ASTTableExpression>() ||
child->as<ASTSelectQuery>()) child->as<ASTSelectQuery>())
return false; return false;

View File

@ -97,6 +97,10 @@ UInt128 AsynchronousInsertQueue::InsertQuery::calculateHash() const
for (const auto & setting : settings.allChanged()) for (const auto & setting : settings.allChanged())
{ {
/// We don't consider this setting because it is only for deduplication,
/// which means we can put two inserts with different tokens in the same block safely.
if (setting.getName() == "insert_deduplication_token")
continue;
siphash.update(setting.getName()); siphash.update(setting.getName());
applyVisitor(FieldVisitorHash(siphash), setting.getValue()); applyVisitor(FieldVisitorHash(siphash), setting.getValue());
} }
@ -111,9 +115,10 @@ bool AsynchronousInsertQueue::InsertQuery::operator==(const InsertQuery & other)
return query_str == other.query_str && settings == other.settings; return query_str == other.query_str && settings == other.settings;
} }
AsynchronousInsertQueue::InsertData::Entry::Entry(String && bytes_, String && query_id_, MemoryTracker * user_memory_tracker_) AsynchronousInsertQueue::InsertData::Entry::Entry(String && bytes_, String && query_id_, const String & async_dedup_token_, MemoryTracker * user_memory_tracker_)
: bytes(std::move(bytes_)) : bytes(std::move(bytes_))
, query_id(std::move(query_id_)) , query_id(std::move(query_id_))
, async_dedup_token(async_dedup_token_)
, user_memory_tracker(user_memory_tracker_) , user_memory_tracker(user_memory_tracker_)
, create_time(std::chrono::system_clock::now()) , create_time(std::chrono::system_clock::now())
{ {
@ -227,7 +232,7 @@ AsynchronousInsertQueue::push(ASTPtr query, ContextPtr query_context)
/// to avoid buffering of huge amount of data in memory. /// to avoid buffering of huge amount of data in memory.
auto read_buf = getReadBufferFromASTInsertQuery(query); auto read_buf = getReadBufferFromASTInsertQuery(query);
LimitReadBuffer limit_buf(*read_buf, settings.async_insert_max_data_size, /* trow_exception */ false, /* exact_limit */ {}); LimitReadBuffer limit_buf(*read_buf, settings.async_insert_max_data_size, /* throw_exception */ false, /* exact_limit */ {});
WriteBufferFromString write_buf(bytes); WriteBufferFromString write_buf(bytes);
copyData(limit_buf, write_buf); copyData(limit_buf, write_buf);
@ -253,7 +258,7 @@ AsynchronousInsertQueue::push(ASTPtr query, ContextPtr query_context)
if (auto quota = query_context->getQuota()) if (auto quota = query_context->getQuota())
quota->used(QuotaType::WRITTEN_BYTES, bytes.size()); quota->used(QuotaType::WRITTEN_BYTES, bytes.size());
auto entry = std::make_shared<InsertData::Entry>(std::move(bytes), query_context->getCurrentQueryId(), CurrentThread::getUserMemoryTracker()); auto entry = std::make_shared<InsertData::Entry>(std::move(bytes), query_context->getCurrentQueryId(), settings.insert_deduplication_token, CurrentThread::getUserMemoryTracker());
InsertQuery key{query, settings}; InsertQuery key{query, settings};
InsertDataPtr data_to_process; InsertDataPtr data_to_process;
@ -515,7 +520,7 @@ try
StreamingFormatExecutor executor(header, format, std::move(on_error), std::move(adding_defaults_transform)); StreamingFormatExecutor executor(header, format, std::move(on_error), std::move(adding_defaults_transform));
std::unique_ptr<ReadBuffer> last_buffer; std::unique_ptr<ReadBuffer> last_buffer;
auto chunk_info = std::make_shared<ChunkOffsets>(); auto chunk_info = std::make_shared<AsyncInsertInfo>();
for (const auto & entry : data->entries) for (const auto & entry : data->entries)
{ {
auto buffer = std::make_unique<ReadBufferFromString>(entry->bytes); auto buffer = std::make_unique<ReadBufferFromString>(entry->bytes);
@ -524,6 +529,7 @@ try
size_t num_rows = executor.execute(*buffer); size_t num_rows = executor.execute(*buffer);
total_rows += num_rows; total_rows += num_rows;
chunk_info->offsets.push_back(total_rows); chunk_info->offsets.push_back(total_rows);
chunk_info->tokens.push_back(entry->async_dedup_token);
/// Keep buffer, because it still can be used /// Keep buffer, because it still can be used
/// in destructor, while resetting buffer at next iteration. /// in destructor, while resetting buffer at next iteration.

Some files were not shown because too many files have changed in this diff Show More