mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-22 07:31:57 +00:00
Merge branch 'master' into async-connect-to-multiple-ips
This commit is contained in:
commit
b08c72b28d
216
.github/workflows/master.yml
vendored
216
.github/workflows/master.yml
vendored
@ -2870,6 +2870,216 @@ jobs:
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
IntegrationTestsAnalyzerAsan0:
|
||||
needs: [BuilderDebAsan]
|
||||
runs-on: [self-hosted, stress-tester]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/integration_tests_asan
|
||||
REPORTS_PATH=${{runner.temp}}/reports_dir
|
||||
CHECK_NAME=Integration tests (asan, analyzer)
|
||||
REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
|
||||
RUN_BY_HASH_NUM=0
|
||||
RUN_BY_HASH_TOTAL=6
|
||||
EOF
|
||||
- name: Download json reports
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
path: ${{ env.REPORTS_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
- name: Integration test
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci"
|
||||
python3 integration_test_check.py "$CHECK_NAME"
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
IntegrationTestsAnalyzerAsan1:
|
||||
needs: [BuilderDebAsan]
|
||||
runs-on: [self-hosted, stress-tester]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/integration_tests_asan
|
||||
REPORTS_PATH=${{runner.temp}}/reports_dir
|
||||
CHECK_NAME=Integration tests (asan, analyzer)
|
||||
REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
|
||||
RUN_BY_HASH_NUM=1
|
||||
RUN_BY_HASH_TOTAL=6
|
||||
EOF
|
||||
- name: Download json reports
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
path: ${{ env.REPORTS_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
- name: Integration test
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci"
|
||||
python3 integration_test_check.py "$CHECK_NAME"
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
IntegrationTestsAnalyzerAsan2:
|
||||
needs: [BuilderDebAsan]
|
||||
runs-on: [self-hosted, stress-tester]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/integration_tests_asan
|
||||
REPORTS_PATH=${{runner.temp}}/reports_dir
|
||||
CHECK_NAME=Integration tests (asan, analyzer)
|
||||
REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
|
||||
RUN_BY_HASH_NUM=2
|
||||
RUN_BY_HASH_TOTAL=6
|
||||
EOF
|
||||
- name: Download json reports
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
path: ${{ env.REPORTS_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
- name: Integration test
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci"
|
||||
python3 integration_test_check.py "$CHECK_NAME"
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
IntegrationTestsAnalyzerAsan3:
|
||||
needs: [BuilderDebAsan]
|
||||
runs-on: [self-hosted, stress-tester]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/integration_tests_asan
|
||||
REPORTS_PATH=${{runner.temp}}/reports_dir
|
||||
CHECK_NAME=Integration tests (asan, analyzer)
|
||||
REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
|
||||
RUN_BY_HASH_NUM=3
|
||||
RUN_BY_HASH_TOTAL=6
|
||||
EOF
|
||||
- name: Download json reports
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
path: ${{ env.REPORTS_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
- name: Integration test
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci"
|
||||
python3 integration_test_check.py "$CHECK_NAME"
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
IntegrationTestsAnalyzerAsan4:
|
||||
needs: [BuilderDebAsan]
|
||||
runs-on: [self-hosted, stress-tester]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/integration_tests_asan
|
||||
REPORTS_PATH=${{runner.temp}}/reports_dir
|
||||
CHECK_NAME=Integration tests (asan, analyzer)
|
||||
REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
|
||||
RUN_BY_HASH_NUM=4
|
||||
RUN_BY_HASH_TOTAL=6
|
||||
EOF
|
||||
- name: Download json reports
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
path: ${{ env.REPORTS_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
- name: Integration test
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci"
|
||||
python3 integration_test_check.py "$CHECK_NAME"
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
IntegrationTestsAnalyzerAsan5:
|
||||
needs: [BuilderDebAsan]
|
||||
runs-on: [self-hosted, stress-tester]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/integration_tests_asan
|
||||
REPORTS_PATH=${{runner.temp}}/reports_dir
|
||||
CHECK_NAME=Integration tests (asan, analyzer)
|
||||
REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
|
||||
RUN_BY_HASH_NUM=5
|
||||
RUN_BY_HASH_TOTAL=6
|
||||
EOF
|
||||
- name: Download json reports
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
path: ${{ env.REPORTS_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
- name: Integration test
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci"
|
||||
python3 integration_test_check.py "$CHECK_NAME"
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
IntegrationTestsTsan0:
|
||||
needs: [BuilderDebTsan]
|
||||
runs-on: [self-hosted, stress-tester]
|
||||
@ -3963,6 +4173,12 @@ jobs:
|
||||
- IntegrationTestsAsan3
|
||||
- IntegrationTestsAsan4
|
||||
- IntegrationTestsAsan5
|
||||
- IntegrationTestsAnalyzerAsan0
|
||||
- IntegrationTestsAnalyzerAsan1
|
||||
- IntegrationTestsAnalyzerAsan2
|
||||
- IntegrationTestsAnalyzerAsan3
|
||||
- IntegrationTestsAnalyzerAsan4
|
||||
- IntegrationTestsAnalyzerAsan5
|
||||
- IntegrationTestsRelease0
|
||||
- IntegrationTestsRelease1
|
||||
- IntegrationTestsRelease2
|
||||
|
6
.github/workflows/pull_request.yml
vendored
6
.github/workflows/pull_request.yml
vendored
@ -5099,6 +5099,12 @@ jobs:
|
||||
- IntegrationTestsAsan3
|
||||
- IntegrationTestsAsan4
|
||||
- IntegrationTestsAsan5
|
||||
- IntegrationTestsAnalyzerAsan0
|
||||
- IntegrationTestsAnalyzerAsan1
|
||||
- IntegrationTestsAnalyzerAsan2
|
||||
- IntegrationTestsAnalyzerAsan3
|
||||
- IntegrationTestsAnalyzerAsan4
|
||||
- IntegrationTestsAnalyzerAsan5
|
||||
- IntegrationTestsRelease0
|
||||
- IntegrationTestsRelease1
|
||||
- IntegrationTestsRelease2
|
||||
|
@ -4,6 +4,8 @@ services:
|
||||
kafka_zookeeper:
|
||||
image: zookeeper:3.4.9
|
||||
hostname: kafka_zookeeper
|
||||
ports:
|
||||
- 2181:2181
|
||||
environment:
|
||||
ZOO_MY_ID: 1
|
||||
ZOO_PORT: 2181
|
||||
@ -15,15 +17,14 @@ services:
|
||||
image: confluentinc/cp-kafka:5.2.0
|
||||
hostname: kafka1
|
||||
ports:
|
||||
- ${KAFKA_EXTERNAL_PORT:-8081}:${KAFKA_EXTERNAL_PORT:-8081}
|
||||
- ${KAFKA_EXTERNAL_PORT}:${KAFKA_EXTERNAL_PORT}
|
||||
environment:
|
||||
KAFKA_ADVERTISED_LISTENERS: INSIDE://localhost:${KAFKA_EXTERNAL_PORT},OUTSIDE://kafka1:19092
|
||||
KAFKA_ADVERTISED_HOST_NAME: kafka1
|
||||
KAFKA_LISTENERS: INSIDE://0.0.0.0:${KAFKA_EXTERNAL_PORT},OUTSIDE://0.0.0.0:19092
|
||||
KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: INSIDE:PLAINTEXT,OUTSIDE:PLAINTEXT
|
||||
KAFKA_INTER_BROKER_LISTENER_NAME: INSIDE
|
||||
KAFKA_BROKER_ID: 1
|
||||
KAFKA_ZOOKEEPER_CONNECT: "kafka_zookeeper:2181"
|
||||
KAFKA_ZOOKEEPER_CONNECT: kafka_zookeeper:2181
|
||||
KAFKA_LOG4J_LOGGERS: "kafka.controller=INFO,kafka.producer.async.DefaultEventHandler=INFO,state.change.logger=INFO"
|
||||
KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
|
||||
depends_on:
|
||||
@ -35,13 +36,38 @@ services:
|
||||
image: confluentinc/cp-schema-registry:5.2.0
|
||||
hostname: schema-registry
|
||||
ports:
|
||||
- ${SCHEMA_REGISTRY_EXTERNAL_PORT:-12313}:${SCHEMA_REGISTRY_INTERNAL_PORT:-12313}
|
||||
- ${SCHEMA_REGISTRY_EXTERNAL_PORT}:${SCHEMA_REGISTRY_EXTERNAL_PORT}
|
||||
environment:
|
||||
SCHEMA_REGISTRY_HOST_NAME: schema-registry
|
||||
SCHEMA_REGISTRY_KAFKASTORE_SECURITY_PROTOCOL: PLAINTEXT
|
||||
SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS: PLAINTEXT://kafka1:19092
|
||||
SCHEMA_REGISTRY_LISTENERS: http://0.0.0.0:${SCHEMA_REGISTRY_EXTERNAL_PORT}
|
||||
SCHEMA_REGISTRY_SCHEMA_REGISTRY_GROUP_ID: noauth
|
||||
depends_on:
|
||||
- kafka_zookeeper
|
||||
- kafka1
|
||||
restart: always
|
||||
security_opt:
|
||||
- label:disable
|
||||
|
||||
schema-registry-auth:
|
||||
image: confluentinc/cp-schema-registry:5.2.0
|
||||
hostname: schema-registry-auth
|
||||
ports:
|
||||
- ${SCHEMA_REGISTRY_AUTH_EXTERNAL_PORT}:${SCHEMA_REGISTRY_AUTH_EXTERNAL_PORT}
|
||||
environment:
|
||||
SCHEMA_REGISTRY_HOST_NAME: schema-registry-auth
|
||||
SCHEMA_REGISTRY_LISTENERS: http://0.0.0.0:${SCHEMA_REGISTRY_AUTH_EXTERNAL_PORT}
|
||||
SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS: PLAINTEXT://kafka1:19092
|
||||
SCHEMA_REGISTRY_AUTHENTICATION_METHOD: BASIC
|
||||
SCHEMA_REGISTRY_AUTHENTICATION_ROLES: user
|
||||
SCHEMA_REGISTRY_AUTHENTICATION_REALM: RealmFooBar
|
||||
SCHEMA_REGISTRY_OPTS: "-Djava.security.auth.login.config=/etc/schema-registry/secrets/schema_registry_jaas.conf"
|
||||
SCHEMA_REGISTRY_SCHEMA_REGISTRY_GROUP_ID: auth
|
||||
volumes:
|
||||
- ${SCHEMA_REGISTRY_DIR:-}/secrets:/etc/schema-registry/secrets
|
||||
depends_on:
|
||||
- kafka_zookeeper
|
||||
- kafka1
|
||||
restart: always
|
||||
security_opt:
|
||||
- label:disable
|
||||
|
@ -76,6 +76,7 @@ The supported formats are:
|
||||
| [RowBinary](#rowbinary) | ✔ | ✔ |
|
||||
| [RowBinaryWithNames](#rowbinarywithnamesandtypes) | ✔ | ✔ |
|
||||
| [RowBinaryWithNamesAndTypes](#rowbinarywithnamesandtypes) | ✔ | ✔ |
|
||||
| [RowBinaryWithDefaults](#rowbinarywithdefaults) | ✔ | ✔ |
|
||||
| [Native](#native) | ✔ | ✔ |
|
||||
| [Null](#null) | ✗ | ✔ |
|
||||
| [XML](#xml) | ✗ | ✔ |
|
||||
@ -472,6 +473,7 @@ The CSV format supports the output of totals and extremes the same way as `TabSe
|
||||
- [input_format_csv_trim_whitespaces](/docs/en/operations/settings/settings-formats.md/#input_format_csv_trim_whitespaces) - trim spaces and tabs in non-quoted CSV strings. Default value - `true`.
|
||||
- [input_format_csv_allow_whitespace_or_tab_as_delimiter](/docs/en/operations/settings/settings-formats.md/# input_format_csv_allow_whitespace_or_tab_as_delimiter) - Allow to use whitespace or tab as field delimiter in CSV strings. Default value - `false`.
|
||||
- [input_format_csv_allow_variable_number_of_columns](/docs/en/operations/settings/settings-formats.md/#input_format_csv_allow_variable_number_of_columns) - ignore extra columns in CSV input (if file has more columns than expected) and treat missing fields in CSV input as default values. Default value - `false`.
|
||||
- [input_format_csv_use_default_on_bad_values](/docs/en/operations/settings/settings-formats.md/#input_format_csv_use_default_on_bad_values) - Allow to set default value to column when CSV field deserialization failed on bad value. Default value - `false`.
|
||||
|
||||
## CSVWithNames {#csvwithnames}
|
||||
|
||||
@ -1515,6 +1517,23 @@ If setting [input_format_with_types_use_header](/docs/en/operations/settings/set
|
||||
the types from input data will be compared with the types of the corresponding columns from the table. Otherwise, the second row will be skipped.
|
||||
:::
|
||||
|
||||
## RowBinaryWithDefaults {#rowbinarywithdefaults}
|
||||
|
||||
Similar to [RowBinary](#rowbinary), but with an extra byte before each column that indicates if default value should be used.
|
||||
|
||||
Examples:
|
||||
|
||||
```sql
|
||||
:) select * from format('RowBinaryWithDefaults', 'x UInt32 default 42, y UInt32', x'010001000000')
|
||||
|
||||
┌──x─┬─y─┐
|
||||
│ 42 │ 1 │
|
||||
└────┴───┘
|
||||
```
|
||||
|
||||
For column `x` there is only one byte `01` that indicates that default value should be used and no other data after this byte is provided.
|
||||
For column `y` data starts with byte `00` that indicates that column has actual value that should be read from the subsequent data `01000000`.
|
||||
|
||||
## RowBinary format settings {#row-binary-format-settings}
|
||||
|
||||
- [format_binary_max_string_size](/docs/en/operations/settings/settings-formats.md/#format_binary_max_string_size) - The maximum allowed size for String in RowBinary format. Default value - `1GiB`.
|
||||
|
@ -30,7 +30,7 @@ description: In order to effectively mitigate possible human errors, you should
|
||||
```
|
||||
|
||||
:::note ALL
|
||||
`ALL` is only applicable to the `RESTORE` command prior to version 23.4 of Clickhouse.
|
||||
Prior to version 23.4 of ClickHouse, `ALL` was only applicable to the `RESTORE` command.
|
||||
:::
|
||||
|
||||
## Background
|
||||
|
@ -989,6 +989,28 @@ Result
|
||||
a b
|
||||
```
|
||||
|
||||
### input_format_csv_use_default_on_bad_values {#input_format_csv_use_default_on_bad_values}
|
||||
|
||||
Allow to set default value to column when CSV field deserialization failed on bad value
|
||||
|
||||
Default value: `false`.
|
||||
|
||||
**Examples**
|
||||
|
||||
Query
|
||||
|
||||
```bash
|
||||
./clickhouse local -q "create table test_tbl (x String, y UInt32, z Date) engine=MergeTree order by x"
|
||||
echo 'a,b,c' | ./clickhouse local -q "INSERT INTO test_tbl SETTINGS input_format_csv_use_default_on_bad_values=true FORMAT CSV"
|
||||
./clickhouse local -q "select * from test_tbl"
|
||||
```
|
||||
|
||||
Result
|
||||
|
||||
```text
|
||||
a 0 1971-01-01
|
||||
```
|
||||
|
||||
## Values format settings {#values-format-settings}
|
||||
|
||||
### input_format_values_interpret_expressions {#input_format_values_interpret_expressions}
|
||||
@ -1325,6 +1347,17 @@ Default value: 0.
|
||||
|
||||
Sets [Confluent Schema Registry](https://docs.confluent.io/current/schema-registry/index.html) URL to use with [AvroConfluent](../../interfaces/formats.md/#data-format-avro-confluent) format.
|
||||
|
||||
Format:
|
||||
``` text
|
||||
http://[user:password@]machine[:port]"
|
||||
```
|
||||
|
||||
Examples:
|
||||
``` text
|
||||
http://registry.example.com:8081
|
||||
http://admin:secret@registry.example.com:8081
|
||||
```
|
||||
|
||||
Default value: `Empty`.
|
||||
|
||||
### output_format_avro_codec {#output_format_avro_codec}
|
||||
|
@ -4524,6 +4524,7 @@ This setting allows to specify renaming pattern for files processed by `file` ta
|
||||
|
||||
### Placeholders
|
||||
|
||||
- `%a` — Full original filename (e.g., "sample.csv").
|
||||
- `%f` — Original filename without extension (e.g., "sample").
|
||||
- `%e` — Original file extension with dot (e.g., ".csv").
|
||||
- `%t` — Timestamp (in microseconds).
|
||||
|
@ -722,7 +722,7 @@ SELECT toDate('2016-12-27') AS date, toYearWeek(date) AS yearWeek0, toYearWeek(d
|
||||
|
||||
## age
|
||||
|
||||
Returns the `unit` component of the difference between `startdate` and `enddate`. The difference is calculated using a precision of 1 second.
|
||||
Returns the `unit` component of the difference between `startdate` and `enddate`. The difference is calculated using a precision of 1 microsecond.
|
||||
E.g. the difference between `2021-12-29` and `2022-01-01` is 3 days for `day` unit, 0 months for `month` unit, 0 years for `year` unit.
|
||||
|
||||
For an alternative to `age`, see function `date\_diff`.
|
||||
@ -738,6 +738,8 @@ age('unit', startdate, enddate, [timezone])
|
||||
- `unit` — The type of interval for result. [String](../../sql-reference/data-types/string.md).
|
||||
Possible values:
|
||||
|
||||
- `microsecond` (possible abbreviations: `us`, `u`)
|
||||
- `millisecond` (possible abbreviations: `ms`)
|
||||
- `second` (possible abbreviations: `ss`, `s`)
|
||||
- `minute` (possible abbreviations: `mi`, `n`)
|
||||
- `hour` (possible abbreviations: `hh`, `h`)
|
||||
@ -813,6 +815,8 @@ Aliases: `dateDiff`, `DATE_DIFF`, `timestampDiff`, `timestamp_diff`, `TIMESTAMP_
|
||||
- `unit` — The type of interval for result. [String](../../sql-reference/data-types/string.md).
|
||||
Possible values:
|
||||
|
||||
- `microsecond` (possible abbreviations: `us`, `u`)
|
||||
- `millisecond` (possible abbreviations: `ms`)
|
||||
- `second` (possible abbreviations: `ss`, `s`)
|
||||
- `minute` (possible abbreviations: `mi`, `n`)
|
||||
- `hour` (possible abbreviations: `hh`, `h`)
|
||||
|
@ -51,7 +51,7 @@ Calculates the MD5 from a string and returns the resulting set of bytes as Fixed
|
||||
If you do not need MD5 in particular, but you need a decent cryptographic 128-bit hash, use the ‘sipHash128’ function instead.
|
||||
If you want to get the same result as output by the md5sum utility, use lower(hex(MD5(s))).
|
||||
|
||||
## sipHash64 (#hash_functions-siphash64)
|
||||
## sipHash64 {#hash_functions-siphash64}
|
||||
|
||||
Produces a 64-bit [SipHash](https://en.wikipedia.org/wiki/SipHash) hash value.
|
||||
|
||||
@ -63,9 +63,9 @@ This is a cryptographic hash function. It works at least three times faster than
|
||||
|
||||
The function [interprets](/docs/en/sql-reference/functions/type-conversion-functions.md/#type_conversion_functions-reinterpretAsString) all the input parameters as strings and calculates the hash value for each of them. It then combines the hashes by the following algorithm:
|
||||
|
||||
1. The first and the second hash value are concatenated to an array which is hashed.
|
||||
2. The previously calculated hash value and the hash of the third input parameter are hashed in a similar way.
|
||||
3. This calculation is repeated for all remaining hash values of the original input.
|
||||
1. The first and the second hash value are concatenated to an array which is hashed.
|
||||
2. The previously calculated hash value and the hash of the third input parameter are hashed in a similar way.
|
||||
3. This calculation is repeated for all remaining hash values of the original input.
|
||||
|
||||
**Arguments**
|
||||
|
||||
|
@ -1267,3 +1267,36 @@ Like [initcap](#initcap), assuming that the string contains valid UTF-8 encoded
|
||||
Does not detect the language, e.g. for Turkish the result might not be exactly correct (i/İ vs. i/I).
|
||||
|
||||
If the length of the UTF-8 byte sequence is different for upper and lower case of a code point, the result may be incorrect for this code point.
|
||||
|
||||
## firstLine
|
||||
|
||||
Returns the first line from a multi-line string.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
firstLine(val)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `val` - Input value. [String](../data-types/string.md)
|
||||
|
||||
**Returned value**
|
||||
|
||||
- The first line of the input value or the whole value if there is no line
|
||||
separators. [String](../data-types/string.md)
|
||||
|
||||
**Example**
|
||||
|
||||
```sql
|
||||
select firstLine('foo\nbar\nbaz');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```result
|
||||
┌─firstLine('foo\nbar\nbaz')─┐
|
||||
│ foo │
|
||||
└────────────────────────────┘
|
||||
```
|
||||
|
@ -97,7 +97,7 @@ This is an experimental feature that may change in backwards-incompatible ways i
|
||||
:::
|
||||
|
||||
```sql
|
||||
CREATE LIVE VIEW [IF NOT EXISTS] [db.]table_name [WITH [TIMEOUT [value_in_sec] [AND]] [REFRESH [value_in_sec]]] AS SELECT ...
|
||||
CREATE LIVE VIEW [IF NOT EXISTS] [db.]table_name [WITH REFRESH [value_in_sec]] AS SELECT ...
|
||||
```
|
||||
|
||||
Live views store result of the corresponding [SELECT](../../../sql-reference/statements/select/index.md) query and are updated any time the result of the query changes. Query result as well as partial result needed to combine with new data are stored in memory providing increased performance for repeated queries. Live views can provide push notifications when query result changes using the [WATCH](../../../sql-reference/statements/watch.md) query.
|
||||
|
@ -5,7 +5,27 @@ sidebar_label: WITH
|
||||
|
||||
# WITH Clause
|
||||
|
||||
ClickHouse supports Common Table Expressions ([CTE](https://en.wikipedia.org/wiki/Hierarchical_and_recursive_queries_in_SQL)), that is provides to use results of `WITH` clause in the rest of `SELECT` query. Named subqueries can be included to the current and child query context in places where table objects are allowed. Recursion is prevented by hiding the current level CTEs from the WITH expression.
|
||||
ClickHouse supports Common Table Expressions ([CTE](https://en.wikipedia.org/wiki/Hierarchical_and_recursive_queries_in_SQL)) and substitutes the code defined in the `WITH` clause in all places of use for the rest of `SELECT` query. Named subqueries can be included to the current and child query context in places where table objects are allowed. Recursion is prevented by hiding the current level CTEs from the WITH expression.
|
||||
|
||||
Please note that CTEs do not guarantee the same results in all places they are called because the query will be re-executed for each use case.
|
||||
|
||||
An example of such behavior is below
|
||||
``` sql
|
||||
with cte_numbers as
|
||||
(
|
||||
select
|
||||
num
|
||||
from generateRandom('num UInt64', NULL)
|
||||
limit 1000000
|
||||
)
|
||||
select
|
||||
count()
|
||||
from cte_numbers
|
||||
where num in (select num from cte_numbers)
|
||||
```
|
||||
If CTEs were to pass exactly the results and not just a piece of code, you would always see `1000000`
|
||||
|
||||
However, due to the fact that we are referring `cte_numbers` twice, random numbers are generated each time and, accordingly, we see different random results, `280501, 392454, 261636, 196227` and so on...
|
||||
|
||||
## Syntax
|
||||
|
||||
|
@ -134,7 +134,7 @@ Multiple path components can have globs. For being processed file must exist and
|
||||
|
||||
- `*` — Substitutes any number of any characters except `/` including empty string.
|
||||
- `?` — Substitutes any single character.
|
||||
- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`.
|
||||
- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`, including `/`.
|
||||
- `{N..M}` — Substitutes any number in range from N to M including both borders.
|
||||
- `**` - Fetches all files inside the folder recursively.
|
||||
|
||||
|
@ -4201,6 +4201,7 @@ SELECT *, timezone() FROM test_tz WHERE d = '2000-01-01 00:00:00' SETTINGS sessi
|
||||
### Шаблон
|
||||
Шаблон поддерживает следующие виды плейсхолдеров:
|
||||
|
||||
- `%a` — Полное исходное имя файла (например "sample.csv").
|
||||
- `%f` — Исходное имя файла без расширения (например "sample").
|
||||
- `%e` — Оригинальное расширение файла с точкой (например ".csv").
|
||||
- `%t` — Текущее время (в микросекундах).
|
||||
|
@ -625,7 +625,7 @@ SELECT toDate('2016-12-27') AS date, toYearWeek(date) AS yearWeek0, toYearWeek(d
|
||||
|
||||
## age
|
||||
|
||||
Вычисляет компонент `unit` разницы между `startdate` и `enddate`. Разница вычисляется с точностью в 1 секунду.
|
||||
Вычисляет компонент `unit` разницы между `startdate` и `enddate`. Разница вычисляется с точностью в 1 микросекунду.
|
||||
Например, разница между `2021-12-29` и `2022-01-01` 3 дня для единицы `day`, 0 месяцев для единицы `month`, 0 лет для единицы `year`.
|
||||
|
||||
**Синтаксис**
|
||||
@ -639,6 +639,8 @@ age('unit', startdate, enddate, [timezone])
|
||||
- `unit` — единица измерения времени, в которой будет выражено возвращаемое значение функции. [String](../../sql-reference/data-types/string.md).
|
||||
Возможные значения:
|
||||
|
||||
- `microsecond` (возможные сокращения: `us`, `u`)
|
||||
- `millisecond` (возможные сокращения: `ms`)
|
||||
- `second` (возможные сокращения: `ss`, `s`)
|
||||
- `minute` (возможные сокращения: `mi`, `n`)
|
||||
- `hour` (возможные сокращения: `hh`, `h`)
|
||||
@ -712,6 +714,8 @@ date_diff('unit', startdate, enddate, [timezone])
|
||||
- `unit` — единица измерения времени, в которой будет выражено возвращаемое значение функции. [String](../../sql-reference/data-types/string.md).
|
||||
Возможные значения:
|
||||
|
||||
- `microsecond` (возможные сокращения: `us`, `u`)
|
||||
- `millisecond` (возможные сокращения: `ms`)
|
||||
- `second` (возможные сокращения: `ss`, `s`)
|
||||
- `minute` (возможные сокращения: `mi`, `n`)
|
||||
- `hour` (возможные сокращения: `hh`, `h`)
|
||||
|
@ -1124,3 +1124,39 @@ Do Nothing for 2 Minutes 2:00
|
||||
Не учитывает язык. То есть, для турецкого языка, результат может быть не совсем верным.
|
||||
Если длина UTF-8 последовательности байтов различна для верхнего и нижнего регистра кодовой точки, то для этой кодовой точки результат работы может быть некорректным.
|
||||
Если строка содержит набор байтов, не являющийся UTF-8, то поведение не определено.
|
||||
|
||||
## firstLine
|
||||
|
||||
Возвращает первую строку в многострочном тексте.
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
```sql
|
||||
firstLine(val)
|
||||
```
|
||||
|
||||
**Аргументы**
|
||||
|
||||
- `val` - текст для обработки. [String](../data-types/string.md)
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Первая строка текста или весь текст, если переносы строк отсутствуют.
|
||||
|
||||
Тип: [String](../data-types/string.md)
|
||||
|
||||
**Пример**
|
||||
|
||||
Запрос:
|
||||
|
||||
```sql
|
||||
select firstLine('foo\nbar\nbaz');
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
```result
|
||||
┌─firstLine('foo\nbar\nbaz')─┐
|
||||
│ foo │
|
||||
└────────────────────────────┘
|
||||
```
|
||||
|
@ -73,7 +73,7 @@ CREATE MATERIALIZED VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER] [TO[db.]na
|
||||
Чтобы использовать `LIVE VIEW` и запросы `WATCH`, включите настройку [allow_experimental_live_view](../../../operations/settings/settings.md#allow-experimental-live-view).
|
||||
:::
|
||||
```sql
|
||||
CREATE LIVE VIEW [IF NOT EXISTS] [db.]table_name [WITH [TIMEOUT [value_in_sec] [AND]] [REFRESH [value_in_sec]]] AS SELECT ...
|
||||
CREATE LIVE VIEW [IF NOT EXISTS] [db.]table_name [WITH REFRESH [value_in_sec]] AS SELECT ...
|
||||
```
|
||||
`LIVE VIEW` хранит результат запроса [SELECT](../../../sql-reference/statements/select/index.md), указанного при создании, и обновляется сразу же при изменении этого результата. Конечный результат запроса и промежуточные данные, из которых формируется результат, хранятся в оперативной памяти, и это обеспечивает высокую скорость обработки для повторяющихся запросов. LIVE-представления могут отправлять push-уведомления при изменении результата исходного запроса `SELECT`. Для этого используйте запрос [WATCH](../../../sql-reference/statements/watch.md).
|
||||
|
||||
|
@ -79,7 +79,7 @@ SELECT * FROM file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 U
|
||||
|
||||
- `*` — заменяет любое количество любых символов кроме `/`, включая отсутствие символов.
|
||||
- `?` — заменяет ровно один любой символ.
|
||||
- `{some_string,another_string,yet_another_one}` — заменяет любую из строк `'some_string', 'another_string', 'yet_another_one'`.
|
||||
- `{some_string,another_string,yet_another_one}` — заменяет любую из строк `'some_string', 'another_string', 'yet_another_one'`, причём строка может содержать `/`.
|
||||
- `{N..M}` — заменяет любое число в интервале от `N` до `M` включительно (может содержать ведущие нули).
|
||||
|
||||
Конструкция с `{}` аналогична табличной функции [remote](remote.md).
|
||||
|
@ -643,6 +643,8 @@ date_diff('unit', startdate, enddate, [timezone])
|
||||
- `unit` — `value`对应的时间单位。类型为[String](../../sql-reference/data-types/string.md)。
|
||||
可能的值:
|
||||
|
||||
- `microsecond`
|
||||
- `millisecond`
|
||||
- `second`
|
||||
- `minute`
|
||||
- `hour`
|
||||
|
@ -72,7 +72,7 @@ ClickHouse 中的物化视图更像是插入触发器。 如果视图查询中
|
||||
使用[allow_experimental_live_view](../../../operations/settings/settings.md#allow-experimental-live-view)设置启用实时视图和`WATCH`查询的使用。 输入命令`set allow_experimental_live_view = 1`。
|
||||
|
||||
```sql
|
||||
CREATE LIVE VIEW [IF NOT EXISTS] [db.]table_name [WITH [TIMEOUT [value_in_sec] [AND]] [REFRESH [value_in_sec]]] AS SELECT ...
|
||||
CREATE LIVE VIEW [IF NOT EXISTS] [db.]table_name [WITH REFRESH [value_in_sec]] AS SELECT ...
|
||||
```
|
||||
|
||||
实时视图存储相应[SELECT](../../../sql-reference/statements/select/index.md)查询的结果,并在查询结果更改时随时更新。 查询结果以及与新数据结合所需的部分结果存储在内存中,为重复查询提供更高的性能。当使用[WATCH](../../../sql-reference/statements/watch.md)查询更改查询结果时,实时视图可以提供推送通知。
|
||||
|
@ -887,6 +887,7 @@ try
|
||||
#endif
|
||||
|
||||
global_context->setRemoteHostFilter(config());
|
||||
global_context->setHTTPHeaderFilter(config());
|
||||
|
||||
std::string path_str = getCanonicalPath(config().getString("path", DBMS_DEFAULT_PATH));
|
||||
fs::path path = path_str;
|
||||
@ -1200,6 +1201,7 @@ try
|
||||
}
|
||||
|
||||
global_context->setRemoteHostFilter(*config);
|
||||
global_context->setHTTPHeaderFilter(*config);
|
||||
|
||||
global_context->setMaxTableSizeToDrop(server_settings_.max_table_size_to_drop);
|
||||
global_context->setMaxPartitionSizeToDrop(server_settings_.max_partition_size_to_drop);
|
||||
|
@ -866,6 +866,14 @@
|
||||
-->
|
||||
<!--</remote_url_allow_hosts>-->
|
||||
|
||||
<!-- The list of HTTP headers forbidden to use in HTTP-related storage engines and table functions.
|
||||
If this section is not present in configuration, all headers are allowed.
|
||||
-->
|
||||
<!-- <http_forbid_headers>
|
||||
<header>exact_header</header>
|
||||
<header_regexp>(?i)(case_insensitive_header)</header_regexp>
|
||||
</http_forbid_headers> -->
|
||||
|
||||
<!-- If element has 'incl' attribute, then for it's value will be used corresponding substitution from another file.
|
||||
By default, path to file with substitutions is /etc/metrika.xml. It could be changed in config in 'include_from' element.
|
||||
Values for substitutions are specified in /clickhouse/name_of_substitution elements in that file.
|
||||
|
204
rust/skim/Cargo.lock
generated
204
rust/skim/Cargo.lock
generated
@ -42,17 +42,6 @@ version = "0.7.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711"
|
||||
|
||||
[[package]]
|
||||
name = "atty"
|
||||
version = "0.2.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
|
||||
dependencies = [
|
||||
"hermit-abi 0.1.19",
|
||||
"libc",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "autocfg"
|
||||
version = "1.1.0"
|
||||
@ -104,31 +93,6 @@ dependencies = [
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "clap"
|
||||
version = "3.2.25"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4ea181bf566f71cb9a5d17a59e1871af638180a18fb0035c92ae62b705207123"
|
||||
dependencies = [
|
||||
"atty",
|
||||
"bitflags",
|
||||
"clap_lex",
|
||||
"indexmap",
|
||||
"once_cell",
|
||||
"strsim",
|
||||
"termcolor",
|
||||
"textwrap",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "clap_lex"
|
||||
version = "0.2.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2850f2f5a82cbf437dd5af4d49848fbdfc27c157c3d010345776f952765261c5"
|
||||
dependencies = [
|
||||
"os_str_bytes",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "codespan-reporting"
|
||||
version = "0.11.1"
|
||||
@ -214,9 +178,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "cxx"
|
||||
version = "1.0.97"
|
||||
version = "1.0.101"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e88abab2f5abbe4c56e8f1fb431b784d710b709888f35755a160e62e33fe38e8"
|
||||
checksum = "5032837c1384de3708043de9d4e97bb91290faca6c16529a28aa340592a78166"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"cxxbridge-flags",
|
||||
@ -226,9 +190,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "cxx-build"
|
||||
version = "1.0.97"
|
||||
version = "1.0.101"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5c0c11acd0e63bae27dcd2afced407063312771212b7a823b4fd72d633be30fb"
|
||||
checksum = "51368b3d0dbf356e10fcbfd455a038503a105ee556f7ee79b6bb8c53a7247456"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"codespan-reporting",
|
||||
@ -236,24 +200,24 @@ dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"scratch",
|
||||
"syn 2.0.23",
|
||||
"syn 2.0.26",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cxxbridge-flags"
|
||||
version = "1.0.97"
|
||||
version = "1.0.101"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8d3816ed957c008ccd4728485511e3d9aaf7db419aa321e3d2c5a2f3411e36c8"
|
||||
checksum = "0d9062157072e4aafc8e56ceaf8325ce850c5ae37578c852a0d4de2cecdded13"
|
||||
|
||||
[[package]]
|
||||
name = "cxxbridge-macro"
|
||||
version = "1.0.97"
|
||||
version = "1.0.101"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a26acccf6f445af85ea056362561a24ef56cdc15fcc685f03aec50b9c702cb6d"
|
||||
checksum = "cf01e8a540f5a4e0f284595834f81cf88572f244b768f051724537afa99a2545"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.23",
|
||||
"syn 2.0.26",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -359,19 +323,6 @@ version = "1.8.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91"
|
||||
|
||||
[[package]]
|
||||
name = "env_logger"
|
||||
version = "0.9.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a12e6657c4c97ebab115a42dcee77225f7f482cdd841cf7088c657a42e9e00e7"
|
||||
dependencies = [
|
||||
"atty",
|
||||
"humantime",
|
||||
"log",
|
||||
"regex",
|
||||
"termcolor",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fnv"
|
||||
version = "1.0.7"
|
||||
@ -398,32 +349,11 @@ dependencies = [
|
||||
"wasi 0.11.0+wasi-snapshot-preview1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hashbrown"
|
||||
version = "0.12.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
|
||||
|
||||
[[package]]
|
||||
name = "hermit-abi"
|
||||
version = "0.1.19"
|
||||
version = "0.3.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hermit-abi"
|
||||
version = "0.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fed44880c466736ef9a5c5b5facefb5ed0785676d0c02d612db14e54f0d84286"
|
||||
|
||||
[[package]]
|
||||
name = "humantime"
|
||||
version = "2.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4"
|
||||
checksum = "443144c8cdadd93ebf52ddb4056d257f5b52c04d3c804e657d19eb73fc33668b"
|
||||
|
||||
[[package]]
|
||||
name = "iana-time-zone"
|
||||
@ -454,16 +384,6 @@ version = "1.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39"
|
||||
|
||||
[[package]]
|
||||
name = "indexmap"
|
||||
version = "1.9.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
"hashbrown",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "js-sys"
|
||||
version = "0.3.64"
|
||||
@ -487,9 +407,9 @@ checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3"
|
||||
|
||||
[[package]]
|
||||
name = "link-cplusplus"
|
||||
version = "1.0.8"
|
||||
version = "1.0.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ecd207c9c713c34f95a097a5b029ac2ce6010530c7b49d7fea24d977dede04f5"
|
||||
checksum = "9d240c6f7e1ba3a28b0249f774e6a9dd0175054b52dfbb61b16eb8505c3785c9"
|
||||
dependencies = [
|
||||
"cc",
|
||||
]
|
||||
@ -564,7 +484,7 @@ version = "1.16.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43"
|
||||
dependencies = [
|
||||
"hermit-abi 0.3.1",
|
||||
"hermit-abi",
|
||||
"libc",
|
||||
]
|
||||
|
||||
@ -574,12 +494,6 @@ version = "1.18.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d"
|
||||
|
||||
[[package]]
|
||||
name = "os_str_bytes"
|
||||
version = "6.5.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4d5d9eb14b174ee9aa2ef96dc2b94637a2d4b6e7cb873c7e171f0c20c6cf3eac"
|
||||
|
||||
[[package]]
|
||||
name = "pin-utils"
|
||||
version = "0.1.0"
|
||||
@ -588,18 +502,18 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.63"
|
||||
version = "1.0.66"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7b368fba921b0dce7e60f5e04ec15e565b3303972b42bcfde1d0713b881959eb"
|
||||
checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9"
|
||||
dependencies = [
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quote"
|
||||
version = "1.0.29"
|
||||
version = "1.0.31"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "573015e8ab27661678357f27dc26460738fd2b6c86e46f386fde94cb5d913105"
|
||||
checksum = "5fe8a65d69dd0808184ebb5f836ab526bb259db23c657efa38711b1072ee47f0"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
]
|
||||
@ -648,9 +562,21 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "regex"
|
||||
version = "1.8.4"
|
||||
version = "1.9.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d0ab3ca65655bb1e41f2a8c8cd662eb4fb035e67c3f78da1d61dffe89d07300f"
|
||||
checksum = "b2eae68fc220f7cf2532e4494aded17545fce192d59cd996e0fe7887f4ceb575"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"memchr",
|
||||
"regex-automata",
|
||||
"regex-syntax",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex-automata"
|
||||
version = "0.3.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "39354c10dd07468c2e73926b23bb9c2caca74c5501e38a35da70406f1d923310"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"memchr",
|
||||
@ -659,39 +585,33 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "regex-syntax"
|
||||
version = "0.7.2"
|
||||
version = "0.7.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "436b050e76ed2903236f032a59761c1eb99e1b0aead2c257922771dab1fc8c78"
|
||||
checksum = "e5ea92a5b6195c6ef2a0295ea818b312502c6fc94dde986c5553242e18fd4ce2"
|
||||
|
||||
[[package]]
|
||||
name = "rustversion"
|
||||
version = "1.0.12"
|
||||
version = "1.0.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4f3208ce4d8448b3f3e7d168a73f5e0c43a61e32930de3bceeccedb388b6bf06"
|
||||
checksum = "7ffc183a10b4478d04cbbbfc96d0873219d962dd5accaff2ffbd4ceb7df837f4"
|
||||
|
||||
[[package]]
|
||||
name = "scopeguard"
|
||||
version = "1.1.0"
|
||||
version = "1.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
|
||||
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
|
||||
|
||||
[[package]]
|
||||
name = "scratch"
|
||||
version = "1.0.5"
|
||||
version = "1.0.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1792db035ce95be60c3f8853017b3999209281c24e2ba5bc8e59bf97a0c590c1"
|
||||
checksum = "a3cf7c11c38cb994f3d40e8a8cde3bbd1f72a435e4c49e85d6553d8312306152"
|
||||
|
||||
[[package]]
|
||||
name = "serde"
|
||||
version = "1.0.164"
|
||||
version = "1.0.171"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9e8c8cf938e98f769bc164923b06dce91cea1751522f46f8466461af04c9027d"
|
||||
|
||||
[[package]]
|
||||
name = "shlex"
|
||||
version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "43b2853a4d09f215c24cc5489c992ce46052d359b5109343cbafbf26bc62f8a3"
|
||||
checksum = "30e27d1e4fd7659406c492fd6cfaf2066ba8773de45ca75e855590f856dc34a9"
|
||||
|
||||
[[package]]
|
||||
name = "skim"
|
||||
@ -699,23 +619,19 @@ version = "0.10.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e5d28de0a6cb2cdd83a076f1de9d965b973ae08b244df1aa70b432946dda0f32"
|
||||
dependencies = [
|
||||
"atty",
|
||||
"beef",
|
||||
"bitflags",
|
||||
"chrono",
|
||||
"clap",
|
||||
"crossbeam",
|
||||
"defer-drop",
|
||||
"derive_builder",
|
||||
"env_logger",
|
||||
"fuzzy-matcher",
|
||||
"lazy_static",
|
||||
"log",
|
||||
"nix 0.25.1",
|
||||
"rayon",
|
||||
"regex",
|
||||
"shlex",
|
||||
"time 0.3.22",
|
||||
"time 0.3.23",
|
||||
"timer",
|
||||
"tuikit",
|
||||
"unicode-width",
|
||||
@ -741,9 +657,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "2.0.23"
|
||||
version = "2.0.26"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "59fb7d6d8281a51045d62b8eb3a7d1ce347b76f312af50cd3dc0af39c87c1737"
|
||||
checksum = "45c3457aacde3c65315de5031ec191ce46604304d2446e803d71ade03308d970"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
@ -770,30 +686,24 @@ dependencies = [
|
||||
"winapi-util",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "textwrap"
|
||||
version = "0.16.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "222a222a5bfe1bba4a77b45ec488a741b3cb8872e5e499451fd7d0129c9c7c3d"
|
||||
|
||||
[[package]]
|
||||
name = "thiserror"
|
||||
version = "1.0.40"
|
||||
version = "1.0.43"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "978c9a314bd8dc99be594bc3c175faaa9794be04a5a5e153caba6915336cebac"
|
||||
checksum = "a35fc5b8971143ca348fa6df4f024d4d55264f3468c71ad1c2f365b0a4d58c42"
|
||||
dependencies = [
|
||||
"thiserror-impl",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thiserror-impl"
|
||||
version = "1.0.40"
|
||||
version = "1.0.43"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f9456a42c5b0d803c8cd86e73dd7cc9edd429499f37a3550d286d5e86720569f"
|
||||
checksum = "463fe12d7993d3b327787537ce8dd4dfa058de32fc2b195ef3cde03dc4771e8f"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.23",
|
||||
"syn 2.0.26",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -819,9 +729,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "time"
|
||||
version = "0.3.22"
|
||||
version = "0.3.23"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ea9e1b3cf1243ae005d9e74085d4d542f3125458f3a81af210d901dcd7411efd"
|
||||
checksum = "59e399c068f43a5d116fedaf73b203fa4f9c519f17e2b34f63221d3792f81446"
|
||||
dependencies = [
|
||||
"serde",
|
||||
"time-core",
|
||||
@ -858,9 +768,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "unicode-ident"
|
||||
version = "1.0.9"
|
||||
version = "1.0.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b15811caf2415fb889178633e7724bad2509101cde276048e013b9def5e51fa0"
|
||||
checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c"
|
||||
|
||||
[[package]]
|
||||
name = "unicode-width"
|
||||
@ -928,7 +838,7 @@ dependencies = [
|
||||
"once_cell",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.23",
|
||||
"syn 2.0.26",
|
||||
"wasm-bindgen-shared",
|
||||
]
|
||||
|
||||
@ -950,7 +860,7 @@ checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.23",
|
||||
"syn 2.0.26",
|
||||
"wasm-bindgen-backend",
|
||||
"wasm-bindgen-shared",
|
||||
]
|
||||
|
@ -6,7 +6,7 @@ edition = "2021"
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
skim = "0.10.2"
|
||||
skim = { version = "0.10.2", default-features = false }
|
||||
cxx = "1.0.83"
|
||||
term = "0.7.0"
|
||||
|
||||
|
@ -1,10 +1,25 @@
|
||||
#include <AggregateFunctions/IAggregateFunction.h>
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <AggregateFunctions/AggregateFunctionGroupArrayMoving.h>
|
||||
#include <AggregateFunctions/Helpers.h>
|
||||
#include <AggregateFunctions/FactoryHelpers.h>
|
||||
#include <DataTypes/DataTypeDate.h>
|
||||
#include <DataTypes/DataTypeDateTime.h>
|
||||
#include <DataTypes/DataTypeDateTime64.h>
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/DataTypesDecimal.h>
|
||||
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
|
||||
#include <Columns/ColumnVector.h>
|
||||
#include <Columns/ColumnArray.h>
|
||||
|
||||
#include <Common/ArenaAllocator.h>
|
||||
#include <Common/assert_cast.h>
|
||||
|
||||
#include <type_traits>
|
||||
|
||||
#define AGGREGATE_FUNCTION_MOVING_MAX_ARRAY_SIZE 0xFFFFFF
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -13,11 +28,186 @@ struct Settings;
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int TOO_LARGE_ARRAY_SIZE;
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
struct MovingData
|
||||
{
|
||||
/// For easy serialization.
|
||||
static_assert(std::has_unique_object_representations_v<T> || std::is_floating_point_v<T>);
|
||||
|
||||
using Accumulator = T;
|
||||
|
||||
/// Switch to ordinary Allocator after 4096 bytes to avoid fragmentation and trash in Arena
|
||||
using Allocator = MixedAlignedArenaAllocator<alignof(T), 4096>;
|
||||
using Array = PODArray<T, 32, Allocator>;
|
||||
|
||||
Array value; /// Prefix sums.
|
||||
T sum{};
|
||||
|
||||
void NO_SANITIZE_UNDEFINED add(T val, Arena * arena)
|
||||
{
|
||||
sum += val;
|
||||
value.push_back(sum, arena);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct MovingSumData : public MovingData<T>
|
||||
{
|
||||
static constexpr auto name = "groupArrayMovingSum";
|
||||
|
||||
T NO_SANITIZE_UNDEFINED get(size_t idx, UInt64 window_size) const
|
||||
{
|
||||
if (idx < window_size)
|
||||
return this->value[idx];
|
||||
else
|
||||
return this->value[idx] - this->value[idx - window_size];
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct MovingAvgData : public MovingData<T>
|
||||
{
|
||||
static constexpr auto name = "groupArrayMovingAvg";
|
||||
|
||||
T NO_SANITIZE_UNDEFINED get(size_t idx, UInt64 window_size) const
|
||||
{
|
||||
if (idx < window_size)
|
||||
return this->value[idx] / T(window_size);
|
||||
else
|
||||
return (this->value[idx] - this->value[idx - window_size]) / T(window_size);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template <typename T, typename LimitNumElements, typename Data>
|
||||
class MovingImpl final
|
||||
: public IAggregateFunctionDataHelper<Data, MovingImpl<T, LimitNumElements, Data>>
|
||||
{
|
||||
static constexpr bool limit_num_elems = LimitNumElements::value;
|
||||
UInt64 window_size;
|
||||
|
||||
public:
|
||||
using ResultT = typename Data::Accumulator;
|
||||
|
||||
using ColumnSource = ColumnVectorOrDecimal<T>;
|
||||
|
||||
/// Probably for overflow function in the future.
|
||||
using ColumnResult = ColumnVectorOrDecimal<ResultT>;
|
||||
|
||||
explicit MovingImpl(const DataTypePtr & data_type_, UInt64 window_size_ = std::numeric_limits<UInt64>::max())
|
||||
: IAggregateFunctionDataHelper<Data, MovingImpl<T, LimitNumElements, Data>>({data_type_}, {}, createResultType(data_type_))
|
||||
, window_size(window_size_) {}
|
||||
|
||||
String getName() const override { return Data::name; }
|
||||
|
||||
static DataTypePtr createResultType(const DataTypePtr & argument)
|
||||
{
|
||||
return std::make_shared<DataTypeArray>(getReturnTypeElement(argument));
|
||||
}
|
||||
|
||||
void NO_SANITIZE_UNDEFINED add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
|
||||
{
|
||||
auto value = static_cast<const ColumnSource &>(*columns[0]).getData()[row_num];
|
||||
this->data(place).add(static_cast<ResultT>(value), arena);
|
||||
}
|
||||
|
||||
void NO_SANITIZE_UNDEFINED merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
|
||||
{
|
||||
auto & cur_elems = this->data(place);
|
||||
auto & rhs_elems = this->data(rhs);
|
||||
|
||||
size_t cur_size = cur_elems.value.size();
|
||||
|
||||
if (rhs_elems.value.size())
|
||||
cur_elems.value.insert(rhs_elems.value.begin(), rhs_elems.value.end(), arena);
|
||||
|
||||
for (size_t i = cur_size; i < cur_elems.value.size(); ++i)
|
||||
{
|
||||
cur_elems.value[i] += cur_elems.sum;
|
||||
}
|
||||
|
||||
cur_elems.sum += rhs_elems.sum;
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
|
||||
{
|
||||
const auto & value = this->data(place).value;
|
||||
size_t size = value.size();
|
||||
writeVarUInt(size, buf);
|
||||
buf.write(reinterpret_cast<const char *>(value.data()), size * sizeof(value[0]));
|
||||
}
|
||||
|
||||
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
|
||||
{
|
||||
size_t size = 0;
|
||||
readVarUInt(size, buf);
|
||||
|
||||
if (unlikely(size > AGGREGATE_FUNCTION_MOVING_MAX_ARRAY_SIZE))
|
||||
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE,
|
||||
"Too large array size (maximum: {})", AGGREGATE_FUNCTION_MOVING_MAX_ARRAY_SIZE);
|
||||
|
||||
if (size > 0)
|
||||
{
|
||||
auto & value = this->data(place).value;
|
||||
value.resize(size, arena);
|
||||
buf.readStrict(reinterpret_cast<char *>(value.data()), size * sizeof(value[0]));
|
||||
this->data(place).sum = value.back();
|
||||
}
|
||||
}
|
||||
|
||||
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
|
||||
{
|
||||
const auto & data = this->data(place);
|
||||
size_t size = data.value.size();
|
||||
|
||||
ColumnArray & arr_to = assert_cast<ColumnArray &>(to);
|
||||
ColumnArray::Offsets & offsets_to = arr_to.getOffsets();
|
||||
|
||||
offsets_to.push_back(offsets_to.back() + size);
|
||||
|
||||
if (size)
|
||||
{
|
||||
typename ColumnResult::Container & data_to = assert_cast<ColumnResult &>(arr_to.getData()).getData();
|
||||
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
if (!limit_num_elems)
|
||||
{
|
||||
data_to.push_back(data.get(i, size));
|
||||
}
|
||||
else
|
||||
{
|
||||
data_to.push_back(data.get(i, window_size));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool allocatesMemoryInArena() const override
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
private:
|
||||
static auto getReturnTypeElement(const DataTypePtr & argument)
|
||||
{
|
||||
if constexpr (!is_decimal<ResultT>)
|
||||
return std::make_shared<DataTypeNumber<ResultT>>();
|
||||
else
|
||||
{
|
||||
using Res = DataTypeDecimal<ResultT>;
|
||||
return std::make_shared<Res>(Res::maxPrecision(), getDecimalScale(*argument));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
@ -79,7 +269,7 @@ AggregateFunctionPtr createAggregateFunctionMoving(
|
||||
if (type != Field::Types::Int64 && type != Field::Types::UInt64)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter for aggregate function {} should be positive integer", name);
|
||||
|
||||
if ((type == Field::Types::Int64 && parameters[0].get<Int64>() < 0) ||
|
||||
if ((type == Field::Types::Int64 && parameters[0].get<Int64>() <= 0) ||
|
||||
(type == Field::Types::UInt64 && parameters[0].get<UInt64>() == 0))
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter for aggregate function {} should be positive integer", name);
|
||||
|
||||
|
@ -1,207 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/DataTypesDecimal.h>
|
||||
|
||||
#include <Columns/ColumnVector.h>
|
||||
#include <Columns/ColumnArray.h>
|
||||
|
||||
#include <Common/ArenaAllocator.h>
|
||||
#include <Common/assert_cast.h>
|
||||
|
||||
#include <AggregateFunctions/IAggregateFunction.h>
|
||||
|
||||
#include <type_traits>
|
||||
|
||||
#define AGGREGATE_FUNCTION_MOVING_MAX_ARRAY_SIZE 0xFFFFFF
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
struct Settings;
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int TOO_LARGE_ARRAY_SIZE;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
struct MovingData
|
||||
{
|
||||
/// For easy serialization.
|
||||
static_assert(std::has_unique_object_representations_v<T> || std::is_floating_point_v<T>);
|
||||
|
||||
using Accumulator = T;
|
||||
|
||||
/// Switch to ordinary Allocator after 4096 bytes to avoid fragmentation and trash in Arena
|
||||
using Allocator = MixedAlignedArenaAllocator<alignof(T), 4096>;
|
||||
using Array = PODArray<T, 32, Allocator>;
|
||||
|
||||
Array value; /// Prefix sums.
|
||||
T sum{};
|
||||
|
||||
void NO_SANITIZE_UNDEFINED add(T val, Arena * arena)
|
||||
{
|
||||
sum += val;
|
||||
value.push_back(sum, arena);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct MovingSumData : public MovingData<T>
|
||||
{
|
||||
static constexpr auto name = "groupArrayMovingSum";
|
||||
|
||||
T NO_SANITIZE_UNDEFINED get(size_t idx, UInt64 window_size) const
|
||||
{
|
||||
if (idx < window_size)
|
||||
return this->value[idx];
|
||||
else
|
||||
return this->value[idx] - this->value[idx - window_size];
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct MovingAvgData : public MovingData<T>
|
||||
{
|
||||
static constexpr auto name = "groupArrayMovingAvg";
|
||||
|
||||
T NO_SANITIZE_UNDEFINED get(size_t idx, UInt64 window_size) const
|
||||
{
|
||||
if (idx < window_size)
|
||||
return this->value[idx] / T(window_size);
|
||||
else
|
||||
return (this->value[idx] - this->value[idx - window_size]) / T(window_size);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template <typename T, typename LimitNumElements, typename Data>
|
||||
class MovingImpl final
|
||||
: public IAggregateFunctionDataHelper<Data, MovingImpl<T, LimitNumElements, Data>>
|
||||
{
|
||||
static constexpr bool limit_num_elems = LimitNumElements::value;
|
||||
UInt64 window_size;
|
||||
|
||||
public:
|
||||
using ResultT = typename Data::Accumulator;
|
||||
|
||||
using ColumnSource = ColumnVectorOrDecimal<T>;
|
||||
|
||||
/// Probably for overflow function in the future.
|
||||
using ColumnResult = ColumnVectorOrDecimal<ResultT>;
|
||||
|
||||
explicit MovingImpl(const DataTypePtr & data_type_, UInt64 window_size_ = std::numeric_limits<UInt64>::max())
|
||||
: IAggregateFunctionDataHelper<Data, MovingImpl<T, LimitNumElements, Data>>({data_type_}, {}, createResultType(data_type_))
|
||||
, window_size(window_size_) {}
|
||||
|
||||
String getName() const override { return Data::name; }
|
||||
|
||||
static DataTypePtr createResultType(const DataTypePtr & argument)
|
||||
{
|
||||
return std::make_shared<DataTypeArray>(getReturnTypeElement(argument));
|
||||
}
|
||||
|
||||
void NO_SANITIZE_UNDEFINED add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
|
||||
{
|
||||
auto value = static_cast<const ColumnSource &>(*columns[0]).getData()[row_num];
|
||||
this->data(place).add(static_cast<ResultT>(value), arena);
|
||||
}
|
||||
|
||||
void NO_SANITIZE_UNDEFINED merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
|
||||
{
|
||||
auto & cur_elems = this->data(place);
|
||||
auto & rhs_elems = this->data(rhs);
|
||||
|
||||
size_t cur_size = cur_elems.value.size();
|
||||
|
||||
if (rhs_elems.value.size())
|
||||
cur_elems.value.insert(rhs_elems.value.begin(), rhs_elems.value.end(), arena);
|
||||
|
||||
for (size_t i = cur_size; i < cur_elems.value.size(); ++i)
|
||||
{
|
||||
cur_elems.value[i] += cur_elems.sum;
|
||||
}
|
||||
|
||||
cur_elems.sum += rhs_elems.sum;
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
|
||||
{
|
||||
const auto & value = this->data(place).value;
|
||||
size_t size = value.size();
|
||||
writeVarUInt(size, buf);
|
||||
buf.write(reinterpret_cast<const char *>(value.data()), size * sizeof(value[0]));
|
||||
}
|
||||
|
||||
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
|
||||
{
|
||||
size_t size = 0;
|
||||
readVarUInt(size, buf);
|
||||
|
||||
if (unlikely(size > AGGREGATE_FUNCTION_MOVING_MAX_ARRAY_SIZE))
|
||||
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE,
|
||||
"Too large array size (maximum: {})", AGGREGATE_FUNCTION_MOVING_MAX_ARRAY_SIZE);
|
||||
|
||||
if (size > 0)
|
||||
{
|
||||
auto & value = this->data(place).value;
|
||||
value.resize(size, arena);
|
||||
buf.readStrict(reinterpret_cast<char *>(value.data()), size * sizeof(value[0]));
|
||||
this->data(place).sum = value.back();
|
||||
}
|
||||
}
|
||||
|
||||
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
|
||||
{
|
||||
const auto & data = this->data(place);
|
||||
size_t size = data.value.size();
|
||||
|
||||
ColumnArray & arr_to = assert_cast<ColumnArray &>(to);
|
||||
ColumnArray::Offsets & offsets_to = arr_to.getOffsets();
|
||||
|
||||
offsets_to.push_back(offsets_to.back() + size);
|
||||
|
||||
if (size)
|
||||
{
|
||||
typename ColumnResult::Container & data_to = assert_cast<ColumnResult &>(arr_to.getData()).getData();
|
||||
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
if (!limit_num_elems)
|
||||
{
|
||||
data_to.push_back(data.get(i, size));
|
||||
}
|
||||
else
|
||||
{
|
||||
data_to.push_back(data.get(i, window_size));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool allocatesMemoryInArena() const override
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
private:
|
||||
static auto getReturnTypeElement(const DataTypePtr & argument)
|
||||
{
|
||||
if constexpr (!is_decimal<ResultT>)
|
||||
return std::make_shared<DataTypeNumber<ResultT>>();
|
||||
else
|
||||
{
|
||||
using Res = DataTypeDecimal<ResultT>;
|
||||
return std::make_shared<Res>(Res::maxPrecision(), getDecimalScale(*argument));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
#undef AGGREGATE_FUNCTION_MOVING_MAX_ARRAY_SIZE
|
||||
|
||||
}
|
@ -319,24 +319,21 @@ Packet MultiplexedConnections::receivePacketUnlocked(AsyncCallback async_callbac
|
||||
throw Exception(ErrorCodes::NO_AVAILABLE_REPLICA, "Logical error: no available replica");
|
||||
|
||||
Packet packet;
|
||||
try
|
||||
{
|
||||
AsyncCallbackSetter async_setter(current_connection, std::move(async_callback));
|
||||
|
||||
try
|
||||
packet = current_connection->receivePacket();
|
||||
}
|
||||
catch (Exception & e)
|
||||
{
|
||||
if (e.code() == ErrorCodes::UNKNOWN_PACKET_FROM_SERVER)
|
||||
{
|
||||
packet = current_connection->receivePacket();
|
||||
}
|
||||
catch (Exception & e)
|
||||
{
|
||||
if (e.code() == ErrorCodes::UNKNOWN_PACKET_FROM_SERVER)
|
||||
{
|
||||
/// Exception may happen when packet is received, e.g. when got unknown packet.
|
||||
/// In this case, invalidate replica, so that we would not read from it anymore.
|
||||
current_connection->disconnect();
|
||||
invalidateReplica(state);
|
||||
}
|
||||
throw;
|
||||
/// Exception may happen when packet is received, e.g. when got unknown packet.
|
||||
/// In this case, invalidate replica, so that we would not read from it anymore.
|
||||
current_connection->disconnect();
|
||||
invalidateReplica(state);
|
||||
}
|
||||
throw;
|
||||
}
|
||||
|
||||
switch (packet.type)
|
||||
|
@ -848,6 +848,9 @@ ASTs QueryFuzzer::getDropQueriesForFuzzedTables(const ASTDropQuery & drop_query)
|
||||
|
||||
void QueryFuzzer::notifyQueryFailed(ASTPtr ast)
|
||||
{
|
||||
if (ast == nullptr)
|
||||
return;
|
||||
|
||||
auto remove_fuzzed_table = [this](const auto & table_name)
|
||||
{
|
||||
auto pos = table_name.find("__fuzz_");
|
||||
|
@ -5,7 +5,6 @@ namespace DB
|
||||
|
||||
AsyncTaskExecutor::AsyncTaskExecutor(std::unique_ptr<AsyncTask> task_) : task(std::move(task_))
|
||||
{
|
||||
createFiber();
|
||||
}
|
||||
|
||||
void AsyncTaskExecutor::resume()
|
||||
@ -13,6 +12,10 @@ void AsyncTaskExecutor::resume()
|
||||
if (routine_is_finished)
|
||||
return;
|
||||
|
||||
/// Create fiber lazily on first resume() call.
|
||||
if (!fiber)
|
||||
createFiber();
|
||||
|
||||
if (!checkBeforeTaskResume())
|
||||
return;
|
||||
|
||||
@ -22,6 +25,11 @@ void AsyncTaskExecutor::resume()
|
||||
return;
|
||||
|
||||
resumeUnlocked();
|
||||
|
||||
/// Destroy fiber when it's finished.
|
||||
if (routine_is_finished)
|
||||
destroyFiber();
|
||||
|
||||
if (exception)
|
||||
processException(exception);
|
||||
}
|
||||
@ -46,9 +54,8 @@ void AsyncTaskExecutor::cancel()
|
||||
void AsyncTaskExecutor::restart()
|
||||
{
|
||||
std::lock_guard guard(fiber_lock);
|
||||
if (fiber)
|
||||
if (!routine_is_finished)
|
||||
destroyFiber();
|
||||
createFiber();
|
||||
routine_is_finished = false;
|
||||
}
|
||||
|
||||
|
@ -10,7 +10,6 @@
|
||||
#include <cassert>
|
||||
#include <chrono>
|
||||
#include <cstring>
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
|
||||
|
||||
|
@ -47,6 +47,7 @@ String FileRenamer::generateNewFilename(const String & filename) const
|
||||
// Define placeholders and their corresponding values
|
||||
std::map<String, String> placeholders =
|
||||
{
|
||||
{"%a", filename},
|
||||
{"%f", file_base},
|
||||
{"%e", file_ext},
|
||||
{"%t", timestamp},
|
||||
@ -69,16 +70,17 @@ bool FileRenamer::isEmpty() const
|
||||
bool FileRenamer::validateRenamingRule(const String & rule, bool throw_on_error)
|
||||
{
|
||||
// Check if the rule contains invalid placeholders
|
||||
re2::RE2 invalid_placeholder_pattern("^([^%]|%[fet%])*$");
|
||||
re2::RE2 invalid_placeholder_pattern("^([^%]|%[afet%])*$");
|
||||
if (!re2::RE2::FullMatch(rule, invalid_placeholder_pattern))
|
||||
{
|
||||
if (throw_on_error)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid renaming rule: Allowed placeholders only %f, %e, %t, and %%");
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid renaming rule: Allowed placeholders only %a, %f, %e, %t, and %%");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Replace valid placeholders with empty strings and count remaining percentage signs.
|
||||
String replaced_rule = rule;
|
||||
boost::replace_all(replaced_rule, "%a", "");
|
||||
boost::replace_all(replaced_rule, "%f", "");
|
||||
boost::replace_all(replaced_rule, "%e", "");
|
||||
boost::replace_all(replaced_rule, "%t", "");
|
||||
|
@ -9,6 +9,7 @@ namespace DB
|
||||
/**
|
||||
* The FileRenamer class provides functionality for renaming files based on given pattern with placeholders
|
||||
* The supported placeholders are:
|
||||
* %a - Full original file name ("sample.csv")
|
||||
* %f - Original filename without extension ("sample")
|
||||
* %e - Original file extension with dot (".csv")
|
||||
* %t - Timestamp (in microseconds)
|
||||
|
56
src/Common/HTTPHeaderFilter.cpp
Normal file
56
src/Common/HTTPHeaderFilter.cpp
Normal file
@ -0,0 +1,56 @@
|
||||
#include <Common/HTTPHeaderFilter.h>
|
||||
#include <Common/StringUtils/StringUtils.h>
|
||||
#include <Common/Exception.h>
|
||||
|
||||
#include <re2/re2.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
void HTTPHeaderFilter::checkHeaders(const HTTPHeaderEntries & entries) const
|
||||
{
|
||||
std::lock_guard guard(mutex);
|
||||
|
||||
for (const auto & entry : entries)
|
||||
{
|
||||
if (forbidden_headers.contains(entry.name))
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "HTTP header \"{}\" is forbidden in configuration file, "
|
||||
"see <http_forbid_headers>", entry.name);
|
||||
|
||||
for (const auto & header_regex : forbidden_headers_regexp)
|
||||
if (re2::RE2::FullMatch(entry.name, header_regex))
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "HTTP header \"{}\" is forbidden in configuration file, "
|
||||
"see <http_forbid_headers>", entry.name);
|
||||
}
|
||||
}
|
||||
|
||||
void HTTPHeaderFilter::setValuesFromConfig(const Poco::Util::AbstractConfiguration & config)
|
||||
{
|
||||
std::lock_guard guard(mutex);
|
||||
|
||||
if (config.has("http_forbid_headers"))
|
||||
{
|
||||
std::vector<std::string> keys;
|
||||
config.keys("http_forbid_headers", keys);
|
||||
|
||||
for (const auto & key : keys)
|
||||
{
|
||||
if (startsWith(key, "header_regexp"))
|
||||
forbidden_headers_regexp.push_back(config.getString("http_forbid_headers." + key));
|
||||
else if (startsWith(key, "header"))
|
||||
forbidden_headers.insert(config.getString("http_forbid_headers." + key));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
forbidden_headers.clear();
|
||||
forbidden_headers_regexp.clear();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
27
src/Common/HTTPHeaderFilter.h
Normal file
27
src/Common/HTTPHeaderFilter.h
Normal file
@ -0,0 +1,27 @@
|
||||
#pragma once
|
||||
|
||||
#include <IO/HTTPHeaderEntries.h>
|
||||
#include <Poco/Util/AbstractConfiguration.h>
|
||||
#include <vector>
|
||||
#include <unordered_set>
|
||||
#include <mutex>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class HTTPHeaderFilter
|
||||
{
|
||||
public:
|
||||
|
||||
void setValuesFromConfig(const Poco::Util::AbstractConfiguration & config);
|
||||
void checkHeaders(const HTTPHeaderEntries & entries) const;
|
||||
|
||||
private:
|
||||
std::unordered_set<std::string> forbidden_headers;
|
||||
std::vector<std::string> forbidden_headers_regexp;
|
||||
|
||||
mutable std::mutex mutex;
|
||||
};
|
||||
|
||||
}
|
@ -41,9 +41,38 @@ void KeeperContext::initialize(const Poco::Util::AbstractConfiguration & config)
|
||||
initializeDisks(config);
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
bool diskValidator(const Poco::Util::AbstractConfiguration & config, const std::string & disk_config_prefix)
|
||||
{
|
||||
const auto disk_type = config.getString(disk_config_prefix + ".type", "local");
|
||||
|
||||
using namespace std::literals;
|
||||
static constexpr std::array supported_disk_types
|
||||
{
|
||||
"s3"sv,
|
||||
"s3_plain"sv,
|
||||
"local"sv
|
||||
};
|
||||
|
||||
if (std::all_of(
|
||||
supported_disk_types.begin(),
|
||||
supported_disk_types.end(),
|
||||
[&](const auto supported_type) { return disk_type != supported_type; }))
|
||||
{
|
||||
LOG_INFO(&Poco::Logger::get("KeeperContext"), "Disk type '{}' is not supported for Keeper", disk_type);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void KeeperContext::initializeDisks(const Poco::Util::AbstractConfiguration & config)
|
||||
{
|
||||
disk_selector->initialize(config, "storage_configuration.disks", Context::getGlobalContextInstance());
|
||||
disk_selector->initialize(config, "storage_configuration.disks", Context::getGlobalContextInstance(), diskValidator);
|
||||
|
||||
log_storage = getLogsPathFromConfig(config);
|
||||
|
||||
|
@ -48,7 +48,11 @@ inline auto scaleMultiplier(UInt32 scale)
|
||||
|
||||
/** Components of DecimalX value:
|
||||
* whole - represents whole part of decimal, can be negative or positive.
|
||||
* fractional - for fractional part of decimal, always positive.
|
||||
* fractional - for fractional part of decimal.
|
||||
*
|
||||
* 0.123 represents 0 / 0.123
|
||||
* -0.123 represents 0 / -0.123
|
||||
* -1.123 represents -1 / 0.123
|
||||
*/
|
||||
template <typename DecimalType>
|
||||
struct DecimalComponents
|
||||
|
@ -577,6 +577,7 @@ class IColumn;
|
||||
M(Bool, optimize_skip_merged_partitions, false, "Skip partitions with one part with level > 0 in optimize final", 0) \
|
||||
M(Bool, optimize_on_insert, true, "Do the same transformation for inserted block of data as if merge was done on this block.", 0) \
|
||||
M(Bool, optimize_use_projections, true, "Automatically choose projections to perform SELECT query", 0) ALIAS(allow_experimental_projection_optimization) \
|
||||
M(Bool, optimize_use_implicit_projections, false, "Automatically choose implicit projections to perform SELECT query", 0) \
|
||||
M(Bool, force_optimize_projection, false, "If projection optimization is enabled, SELECT queries need to use projection", 0) \
|
||||
M(Bool, async_socket_for_remote, true, "Asynchronously read from socket executing remote query", 0) \
|
||||
M(Bool, async_query_sending_for_remote, true, "Asynchronously create connections and send query to shards in remote query", 0) \
|
||||
@ -736,7 +737,7 @@ class IColumn;
|
||||
M(String, workload, "default", "Name of workload to be used to access resources", 0) \
|
||||
M(Milliseconds, storage_system_stack_trace_pipe_read_timeout_ms, 100, "Maximum time to read from a pipe for receiving information from the threads when querying the `system.stack_trace` table. This setting is used for testing purposes and not meant to be changed by users.", 0) \
|
||||
\
|
||||
M(String, rename_files_after_processing, "", "Rename successfully processed files according to the specified pattern; Pattern can include the following placeholders: `%f` (original filename without extension), `%e` (file extension with dot), `%t` (current timestamp in µs), and `%%` (% sign)", 0) \
|
||||
M(String, rename_files_after_processing, "", "Rename successfully processed files according to the specified pattern; Pattern can include the following placeholders: `%a` (full original file name), `%f` (original filename without extension), `%e` (file extension with dot), `%t` (current timestamp in µs), and `%%` (% sign)", 0) \
|
||||
\
|
||||
M(Bool, parallelize_output_from_storages, true, "Parallelize output for reading step from storage. It allows parallelizing query processing right after reading from storage if possible", 0) \
|
||||
M(String, insert_deduplication_token, "", "If not empty, used for duplicate detection instead of data digest", 0) \
|
||||
@ -774,6 +775,7 @@ class IColumn;
|
||||
M(Bool, keeper_map_strict_mode, false, "Enforce additional checks during operations on KeeperMap. E.g. throw an exception on an insert for already existing key", 0) \
|
||||
M(UInt64, extract_kvp_max_pairs_per_row, 1000, "Max number pairs that can be produced by extractKeyValuePairs function. Used to safeguard against consuming too much memory.", 0) \
|
||||
M(Timezone, session_timezone, "", "The default timezone for current session or query. The server default timezone if empty.", 0) \
|
||||
M(Bool, allow_create_index_without_type, false, "Allow CREATE INDEX query without TYPE. Query will be ignored. Made for SQL compatibility tests.", 0)\
|
||||
// End of COMMON_SETTINGS
|
||||
// Please add settings related to formats into the FORMAT_FACTORY_SETTINGS and move obsolete settings to OBSOLETE_SETTINGS.
|
||||
|
||||
@ -872,6 +874,7 @@ class IColumn;
|
||||
M(Bool, input_format_csv_detect_header, true, "Automatically detect header with names and types in CSV format", 0) \
|
||||
M(Bool, input_format_csv_allow_whitespace_or_tab_as_delimiter, false, "Allow to use spaces and tabs(\\t) as field delimiter in the CSV strings", 0) \
|
||||
M(Bool, input_format_csv_trim_whitespaces, true, "Trims spaces and tabs (\\t) characters at the beginning and end in CSV strings", 0) \
|
||||
M(Bool, input_format_csv_use_default_on_bad_values, false, "Allow to set default value to column when CSV field deserialization failed on bad value", 0) \
|
||||
M(Bool, input_format_tsv_detect_header, true, "Automatically detect header with names and types in TSV format", 0) \
|
||||
M(Bool, input_format_custom_detect_header, true, "Automatically detect header with names and types in CustomSeparated format", 0) \
|
||||
M(Bool, input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference, false, "Skip columns with unsupported types while schema inference for format Parquet", 0) \
|
||||
|
@ -80,6 +80,7 @@ namespace SettingsChangesHistory
|
||||
/// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972)
|
||||
static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> settings_changes_history =
|
||||
{
|
||||
{"23.7", {{"optimize_use_implicit_projections", true, false, "Disable implicit projections due to unexpected results."}}},
|
||||
{"23.6", {{"http_send_timeout", 180, 30, "3 minutes seems crazy long. Note that this is timeout for a single network write call, not for the whole upload operation."},
|
||||
{"http_receive_timeout", 180, 30, "See http_send_timeout."}}},
|
||||
{"23.5", {{"input_format_parquet_preserve_order", true, false, "Allow Parquet reader to reorder rows for better parallelism."},
|
||||
|
@ -3,6 +3,7 @@
|
||||
#if USE_MYSQL
|
||||
|
||||
#include <Databases/MySQL/MaterializedMySQLSyncThread.h>
|
||||
#include <Databases/MySQL/tryParseTableIDFromDDL.h>
|
||||
#include <cstdlib>
|
||||
#include <random>
|
||||
#include <string_view>
|
||||
@ -151,61 +152,6 @@ static void checkMySQLVariables(const mysqlxx::Pool::Entry & connection, const S
|
||||
}
|
||||
}
|
||||
|
||||
static std::tuple<String, String> tryExtractTableNameFromDDL(const String & ddl)
|
||||
{
|
||||
String table_name;
|
||||
String database_name;
|
||||
if (ddl.empty()) return std::make_tuple(database_name, table_name);
|
||||
|
||||
bool parse_failed = false;
|
||||
Tokens tokens(ddl.data(), ddl.data() + ddl.size());
|
||||
IParser::Pos pos(tokens, 0);
|
||||
Expected expected;
|
||||
ASTPtr res;
|
||||
ASTPtr table;
|
||||
if (ParserKeyword("CREATE TEMPORARY TABLE").ignore(pos, expected) || ParserKeyword("CREATE TABLE").ignore(pos, expected))
|
||||
{
|
||||
ParserKeyword("IF NOT EXISTS").ignore(pos, expected);
|
||||
if (!ParserCompoundIdentifier(true).parse(pos, table, expected))
|
||||
parse_failed = true;
|
||||
}
|
||||
else if (ParserKeyword("ALTER TABLE").ignore(pos, expected))
|
||||
{
|
||||
if (!ParserCompoundIdentifier(true).parse(pos, table, expected))
|
||||
parse_failed = true;
|
||||
}
|
||||
else if (ParserKeyword("DROP TABLE").ignore(pos, expected) || ParserKeyword("DROP TEMPORARY TABLE").ignore(pos, expected))
|
||||
{
|
||||
ParserKeyword("IF EXISTS").ignore(pos, expected);
|
||||
if (!ParserCompoundIdentifier(true).parse(pos, table, expected))
|
||||
parse_failed = true;
|
||||
}
|
||||
else if (ParserKeyword("TRUNCATE").ignore(pos, expected))
|
||||
{
|
||||
ParserKeyword("TABLE").ignore(pos, expected);
|
||||
if (!ParserCompoundIdentifier(true).parse(pos, table, expected))
|
||||
parse_failed = true;
|
||||
}
|
||||
else if (ParserKeyword("RENAME TABLE").ignore(pos, expected))
|
||||
{
|
||||
if (!ParserCompoundIdentifier(true).parse(pos, table, expected))
|
||||
parse_failed = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
parse_failed = true;
|
||||
}
|
||||
if (!parse_failed)
|
||||
{
|
||||
if (auto table_id = table->as<ASTTableIdentifier>()->getTableId())
|
||||
{
|
||||
database_name = table_id.database_name;
|
||||
table_name = table_id.table_name;
|
||||
}
|
||||
}
|
||||
return std::make_tuple(database_name, table_name);
|
||||
}
|
||||
|
||||
MaterializedMySQLSyncThread::MaterializedMySQLSyncThread(
|
||||
ContextPtr context_,
|
||||
const String & database_name_,
|
||||
@ -868,14 +814,12 @@ void MaterializedMySQLSyncThread::executeDDLAtomic(const QueryEvent & query_even
|
||||
String query = query_event.query;
|
||||
if (!materialized_tables_list.empty())
|
||||
{
|
||||
auto [ddl_database_name, ddl_table_name] = tryExtractTableNameFromDDL(query_event.query);
|
||||
|
||||
if (!ddl_table_name.empty())
|
||||
auto table_id = tryParseTableIDFromDDL(query, query_event.schema);
|
||||
if (!table_id.table_name.empty())
|
||||
{
|
||||
ddl_database_name = ddl_database_name.empty() ? query_event.schema: ddl_database_name;
|
||||
if (ddl_database_name != mysql_database_name || !materialized_tables_list.contains(ddl_table_name))
|
||||
if (table_id.database_name != mysql_database_name || !materialized_tables_list.contains(table_id.table_name))
|
||||
{
|
||||
LOG_DEBUG(log, "Skip MySQL DDL: \n {}", query_event.query);
|
||||
LOG_DEBUG(log, "Skip MySQL DDL for {}.{}:\n{}", table_id.database_name, table_id.table_name, query);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
185
src/Databases/MySQL/tests/gtest_try_parse_table_id_from_ddl.cpp
Normal file
185
src/Databases/MySQL/tests/gtest_try_parse_table_id_from_ddl.cpp
Normal file
@ -0,0 +1,185 @@
|
||||
#include "config.h"
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <Databases/MySQL/tryParseTableIDFromDDL.h>
|
||||
|
||||
using namespace DB;
|
||||
|
||||
struct ParseTableIDFromDDLTestCase
|
||||
{
|
||||
String query;
|
||||
String database_name;
|
||||
String table_name;
|
||||
|
||||
ParseTableIDFromDDLTestCase(
|
||||
const String & query_,
|
||||
const String & database_name_,
|
||||
const String & table_name_)
|
||||
: query(query_)
|
||||
, database_name(database_name_)
|
||||
, table_name(table_name_)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
std::ostream & operator<<(std::ostream & ostr, const ParseTableIDFromDDLTestCase & test_case)
|
||||
{
|
||||
return ostr << '"' << test_case.query << "\" extracts `" << test_case.database_name << "`.`" << test_case.table_name << "`";
|
||||
}
|
||||
|
||||
class ParseTableIDFromDDLTest : public ::testing::TestWithParam<ParseTableIDFromDDLTestCase>
|
||||
{
|
||||
};
|
||||
|
||||
TEST_P(ParseTableIDFromDDLTest, parse)
|
||||
{
|
||||
const auto & [query, expected_database_name, expected_table_name] = GetParam();
|
||||
auto table_id = tryParseTableIDFromDDL(query, "default");
|
||||
EXPECT_EQ(expected_database_name, table_id.database_name);
|
||||
EXPECT_EQ(expected_table_name, table_id.table_name);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(MaterializedMySQL, ParseTableIDFromDDLTest, ::testing::ValuesIn(std::initializer_list<ParseTableIDFromDDLTestCase>{
|
||||
{
|
||||
"SELECT * FROM db.table",
|
||||
"",
|
||||
""
|
||||
},
|
||||
{
|
||||
"CREATE TEMPORARY TABLE db.table",
|
||||
"db",
|
||||
"table"
|
||||
},
|
||||
{
|
||||
"CREATE TEMPORARY TABLE IF NOT EXISTS db.table",
|
||||
"db",
|
||||
"table"
|
||||
},
|
||||
{
|
||||
"CREATE TEMPORARY TABLE table",
|
||||
"default",
|
||||
"table"
|
||||
},
|
||||
{
|
||||
"CREATE TEMPORARY TABLE IF NOT EXISTS table",
|
||||
"default",
|
||||
"table"
|
||||
},
|
||||
{
|
||||
"CREATE TABLE db.table",
|
||||
"db",
|
||||
"table"
|
||||
},
|
||||
{
|
||||
"CREATE TABLE IF NOT EXISTS db.table",
|
||||
"db",
|
||||
"table"
|
||||
},
|
||||
{
|
||||
"CREATE TABLE table",
|
||||
"default",
|
||||
"table"
|
||||
},
|
||||
{
|
||||
"CREATE TABLE IF NOT EXISTS table",
|
||||
"default",
|
||||
"table"
|
||||
},
|
||||
{
|
||||
"ALTER TABLE db.table",
|
||||
"db",
|
||||
"table"
|
||||
},
|
||||
{
|
||||
"ALTER TABLE table",
|
||||
"default",
|
||||
"table"
|
||||
},
|
||||
{
|
||||
"DROP TABLE db.table",
|
||||
"db",
|
||||
"table"
|
||||
},
|
||||
{
|
||||
"DROP TABLE IF EXISTS db.table",
|
||||
"db",
|
||||
"table"
|
||||
},
|
||||
{
|
||||
"DROP TABLE table",
|
||||
"default",
|
||||
"table"
|
||||
},
|
||||
{
|
||||
"DROP TABLE IF EXISTS table",
|
||||
"default",
|
||||
"table"
|
||||
},
|
||||
{
|
||||
"DROP TEMPORARY TABLE db.table",
|
||||
"db",
|
||||
"table"
|
||||
},
|
||||
{
|
||||
"DROP TEMPORARY TABLE IF EXISTS db.table",
|
||||
"db",
|
||||
"table"
|
||||
},
|
||||
{
|
||||
"DROP TEMPORARY TABLE table",
|
||||
"default",
|
||||
"table"
|
||||
},
|
||||
{
|
||||
"DROP TEMPORARY TABLE IF EXISTS table",
|
||||
"default",
|
||||
"table"
|
||||
},
|
||||
{
|
||||
"TRUNCATE db.table",
|
||||
"db",
|
||||
"table"
|
||||
},
|
||||
{
|
||||
"TRUNCATE TABLE db.table",
|
||||
"db",
|
||||
"table"
|
||||
},
|
||||
{
|
||||
"TRUNCATE table1",
|
||||
"default",
|
||||
"table1"
|
||||
},
|
||||
{
|
||||
"TRUNCATE TABLE table",
|
||||
"default",
|
||||
"table"
|
||||
},
|
||||
{
|
||||
"RENAME TABLE db.table",
|
||||
"db",
|
||||
"table"
|
||||
},
|
||||
{
|
||||
"RENAME TABLE table",
|
||||
"default",
|
||||
"table"
|
||||
},
|
||||
{
|
||||
"DROP DATABASE db",
|
||||
"",
|
||||
""
|
||||
},
|
||||
{
|
||||
"DROP DATA`BASE db",
|
||||
"",
|
||||
""
|
||||
},
|
||||
{
|
||||
"NOT A SQL",
|
||||
"",
|
||||
""
|
||||
},
|
||||
|
||||
}));
|
44
src/Databases/MySQL/tryParseTableIDFromDDL.cpp
Normal file
44
src/Databases/MySQL/tryParseTableIDFromDDL.cpp
Normal file
@ -0,0 +1,44 @@
|
||||
#include <Databases/MySQL/tryParseTableIDFromDDL.h>
|
||||
#include <Parsers/ASTIdentifier.h>
|
||||
#include <Parsers/CommonParsers.h>
|
||||
#include <Parsers/ExpressionElementParsers.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
StorageID tryParseTableIDFromDDL(const String & query, const String & default_database_name)
|
||||
{
|
||||
bool is_ddl = false;
|
||||
Tokens tokens(query.data(), query.data() + query.size());
|
||||
IParser::Pos pos(tokens, 0);
|
||||
Expected expected;
|
||||
if (ParserKeyword("CREATE TEMPORARY TABLE").ignore(pos, expected) || ParserKeyword("CREATE TABLE").ignore(pos, expected))
|
||||
{
|
||||
ParserKeyword("IF NOT EXISTS").ignore(pos, expected);
|
||||
is_ddl = true;
|
||||
}
|
||||
else if (ParserKeyword("ALTER TABLE").ignore(pos, expected) || ParserKeyword("RENAME TABLE").ignore(pos, expected))
|
||||
{
|
||||
is_ddl = true;
|
||||
}
|
||||
else if (ParserKeyword("DROP TABLE").ignore(pos, expected) || ParserKeyword("DROP TEMPORARY TABLE").ignore(pos, expected))
|
||||
{
|
||||
ParserKeyword("IF EXISTS").ignore(pos, expected);
|
||||
is_ddl = true;
|
||||
}
|
||||
else if (ParserKeyword("TRUNCATE").ignore(pos, expected))
|
||||
{
|
||||
ParserKeyword("TABLE").ignore(pos, expected);
|
||||
is_ddl = true;
|
||||
}
|
||||
|
||||
ASTPtr table;
|
||||
if (!is_ddl || !ParserCompoundIdentifier(true).parse(pos, table, expected))
|
||||
return StorageID::createEmpty();
|
||||
auto table_id = table->as<ASTTableIdentifier>()->getTableId();
|
||||
if (table_id.database_name.empty())
|
||||
table_id.database_name = default_database_name;
|
||||
return table_id;
|
||||
}
|
||||
|
||||
}
|
11
src/Databases/MySQL/tryParseTableIDFromDDL.h
Normal file
11
src/Databases/MySQL/tryParseTableIDFromDDL.h
Normal file
@ -0,0 +1,11 @@
|
||||
#pragma once
|
||||
|
||||
#include <base/types.h>
|
||||
#include <Storages/IStorage.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
StorageID tryParseTableIDFromDDL(const String & query, const String & default_database_name);
|
||||
|
||||
}
|
@ -257,7 +257,6 @@ void registerDictionarySourceHTTP(DictionarySourceFactory & factory)
|
||||
|
||||
const auto & headers_prefix = settings_config_prefix + ".headers";
|
||||
|
||||
|
||||
if (config.has(headers_prefix))
|
||||
{
|
||||
Poco::Util::AbstractConfiguration::Keys config_keys;
|
||||
@ -297,7 +296,10 @@ void registerDictionarySourceHTTP(DictionarySourceFactory & factory)
|
||||
auto context = copyContextAndApplySettingsFromDictionaryConfig(global_context, config, config_prefix);
|
||||
|
||||
if (created_from_ddl)
|
||||
{
|
||||
context->getRemoteHostFilter().checkURL(Poco::URI(configuration.url));
|
||||
context->getHTTPHeaderFilter().checkHeaders(configuration.header_entries);
|
||||
}
|
||||
|
||||
return std::make_unique<HTTPDictionarySource>(dict_struct, configuration, credentials, sample_block, context);
|
||||
};
|
||||
|
@ -27,7 +27,7 @@ void DiskSelector::assertInitialized() const
|
||||
}
|
||||
|
||||
|
||||
void DiskSelector::initialize(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context)
|
||||
void DiskSelector::initialize(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context, DiskValidator disk_validator)
|
||||
{
|
||||
Poco::Util::AbstractConfiguration::Keys keys;
|
||||
config.keys(config_prefix, keys);
|
||||
@ -46,6 +46,9 @@ void DiskSelector::initialize(const Poco::Util::AbstractConfiguration & config,
|
||||
|
||||
auto disk_config_prefix = config_prefix + "." + disk_name;
|
||||
|
||||
if (disk_validator && !disk_validator(config, disk_config_prefix))
|
||||
continue;
|
||||
|
||||
disks.emplace(disk_name, factory.create(disk_name, config, disk_config_prefix, context, disks));
|
||||
}
|
||||
if (!has_default_disk)
|
||||
|
@ -23,7 +23,8 @@ public:
|
||||
DiskSelector() = default;
|
||||
DiskSelector(const DiskSelector & from) = default;
|
||||
|
||||
void initialize(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context);
|
||||
using DiskValidator = std::function<bool(const Poco::Util::AbstractConfiguration & config, const String & disk_config_prefix)>;
|
||||
void initialize(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context, DiskValidator disk_validator = {});
|
||||
|
||||
DiskSelectorPtr updateFromConfig(
|
||||
const Poco::Util::AbstractConfiguration & config,
|
||||
|
@ -23,10 +23,6 @@ namespace ErrorCodes
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
TemporaryFileOnDisk::TemporaryFileOnDisk(const DiskPtr & disk_)
|
||||
: TemporaryFileOnDisk(disk_, "")
|
||||
{}
|
||||
|
||||
TemporaryFileOnDisk::TemporaryFileOnDisk(const DiskPtr & disk_, CurrentMetrics::Metric metric_scope)
|
||||
: TemporaryFileOnDisk(disk_)
|
||||
{
|
||||
|
@ -16,9 +16,8 @@ using DiskPtr = std::shared_ptr<IDisk>;
|
||||
class TemporaryFileOnDisk
|
||||
{
|
||||
public:
|
||||
explicit TemporaryFileOnDisk(const DiskPtr & disk_);
|
||||
explicit TemporaryFileOnDisk(const DiskPtr & disk_, CurrentMetrics::Metric metric_scope);
|
||||
explicit TemporaryFileOnDisk(const DiskPtr & disk_, const String & prefix);
|
||||
explicit TemporaryFileOnDisk(const DiskPtr & disk_, const String & prefix = "tmp");
|
||||
|
||||
~TemporaryFileOnDisk();
|
||||
|
||||
|
@ -73,6 +73,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
|
||||
format_settings.csv.trim_whitespaces = settings.input_format_csv_trim_whitespaces;
|
||||
format_settings.csv.allow_whitespace_or_tab_as_delimiter = settings.input_format_csv_allow_whitespace_or_tab_as_delimiter;
|
||||
format_settings.csv.allow_variable_number_of_columns = settings.input_format_csv_allow_variable_number_of_columns;
|
||||
format_settings.csv.use_default_on_bad_values = settings.input_format_csv_use_default_on_bad_values;
|
||||
format_settings.hive_text.fields_delimiter = settings.input_format_hive_text_fields_delimiter;
|
||||
format_settings.hive_text.collection_items_delimiter = settings.input_format_hive_text_collection_items_delimiter;
|
||||
format_settings.hive_text.map_keys_delimiter = settings.input_format_hive_text_map_keys_delimiter;
|
||||
|
@ -152,6 +152,7 @@ struct FormatSettings
|
||||
bool trim_whitespaces = true;
|
||||
bool allow_whitespace_or_tab_as_delimiter = false;
|
||||
bool allow_variable_number_of_columns = false;
|
||||
bool use_default_on_bad_values = false;
|
||||
} csv;
|
||||
|
||||
struct HiveText
|
||||
|
@ -19,6 +19,9 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
static constexpr auto microsecond_multiplier = 1000000;
|
||||
static constexpr auto millisecond_multiplier = 1000;
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
@ -1377,6 +1380,36 @@ struct ToRelativeSecondNumImpl
|
||||
using FactorTransform = ZeroTransform;
|
||||
};
|
||||
|
||||
template <Int64 scale_multiplier>
|
||||
struct ToRelativeSubsecondNumImpl
|
||||
{
|
||||
static constexpr auto name = "toRelativeSubsecondNumImpl";
|
||||
|
||||
static inline Int64 execute(const DateTime64 & t, DateTime64::NativeType scale, const DateLUTImpl &)
|
||||
{
|
||||
static_assert(scale_multiplier == 1000 || scale_multiplier == 1000000);
|
||||
if (scale == scale_multiplier)
|
||||
return t.value;
|
||||
if (scale > scale_multiplier)
|
||||
return t.value / (scale / scale_multiplier);
|
||||
return t.value * (scale_multiplier / scale);
|
||||
}
|
||||
static inline Int64 execute(UInt32 t, const DateLUTImpl &)
|
||||
{
|
||||
return t * scale_multiplier;
|
||||
}
|
||||
static inline Int64 execute(Int32 d, const DateLUTImpl & time_zone)
|
||||
{
|
||||
return static_cast<Int64>(time_zone.fromDayNum(ExtendedDayNum(d))) * scale_multiplier;
|
||||
}
|
||||
static inline Int64 execute(UInt16 d, const DateLUTImpl & time_zone)
|
||||
{
|
||||
return static_cast<Int64>(time_zone.fromDayNum(DayNum(d)) * scale_multiplier);
|
||||
}
|
||||
|
||||
using FactorTransform = ZeroTransform;
|
||||
};
|
||||
|
||||
struct ToYYYYMMImpl
|
||||
{
|
||||
static constexpr auto name = "toYYYYMM";
|
||||
@ -1476,25 +1509,47 @@ struct ToYYYYMMDDhhmmssImpl
|
||||
using FactorTransform = ZeroTransform;
|
||||
};
|
||||
|
||||
struct DateTimeComponentsWithFractionalPart : public DateLUTImpl::DateTimeComponents
|
||||
{
|
||||
UInt16 millisecond;
|
||||
UInt16 microsecond;
|
||||
};
|
||||
|
||||
struct ToDateTimeComponentsImpl
|
||||
{
|
||||
static constexpr auto name = "toDateTimeComponents";
|
||||
|
||||
static inline DateLUTImpl::DateTimeComponents execute(Int64 t, const DateLUTImpl & time_zone)
|
||||
static inline DateTimeComponentsWithFractionalPart execute(const DateTime64 & t, DateTime64::NativeType scale_multiplier, const DateLUTImpl & time_zone)
|
||||
{
|
||||
return time_zone.toDateTimeComponents(t);
|
||||
auto components = DecimalUtils::splitWithScaleMultiplier(t, scale_multiplier);
|
||||
|
||||
if (t.value < 0 && components.fractional)
|
||||
{
|
||||
components.fractional = scale_multiplier + (components.whole ? Int64(-1) : Int64(1)) * components.fractional;
|
||||
--components.whole;
|
||||
}
|
||||
Int64 fractional = components.fractional;
|
||||
if (scale_multiplier > microsecond_multiplier)
|
||||
fractional = fractional / (scale_multiplier / microsecond_multiplier);
|
||||
else if (scale_multiplier < microsecond_multiplier)
|
||||
fractional = fractional * (microsecond_multiplier / scale_multiplier);
|
||||
|
||||
constexpr Int64 divider = microsecond_multiplier/ millisecond_multiplier;
|
||||
UInt16 millisecond = static_cast<UInt16>(fractional / divider);
|
||||
UInt16 microsecond = static_cast<UInt16>(fractional % divider);
|
||||
return DateTimeComponentsWithFractionalPart{time_zone.toDateTimeComponents(components.whole), millisecond, microsecond};
|
||||
}
|
||||
static inline DateLUTImpl::DateTimeComponents execute(UInt32 t, const DateLUTImpl & time_zone)
|
||||
static inline DateTimeComponentsWithFractionalPart execute(UInt32 t, const DateLUTImpl & time_zone)
|
||||
{
|
||||
return time_zone.toDateTimeComponents(static_cast<DateLUTImpl::Time>(t));
|
||||
return DateTimeComponentsWithFractionalPart{time_zone.toDateTimeComponents(static_cast<DateLUTImpl::Time>(t)), 0, 0};
|
||||
}
|
||||
static inline DateLUTImpl::DateTimeComponents execute(Int32 d, const DateLUTImpl & time_zone)
|
||||
static inline DateTimeComponentsWithFractionalPart execute(Int32 d, const DateLUTImpl & time_zone)
|
||||
{
|
||||
return time_zone.toDateTimeComponents(ExtendedDayNum(d));
|
||||
return DateTimeComponentsWithFractionalPart{time_zone.toDateTimeComponents(ExtendedDayNum(d)), 0, 0};
|
||||
}
|
||||
static inline DateLUTImpl::DateTimeComponents execute(UInt16 d, const DateLUTImpl & time_zone)
|
||||
static inline DateTimeComponentsWithFractionalPart execute(UInt16 d, const DateLUTImpl & time_zone)
|
||||
{
|
||||
return time_zone.toDateTimeComponents(DayNum(d));
|
||||
return DateTimeComponentsWithFractionalPart{time_zone.toDateTimeComponents(DayNum(d)), 0, 0};
|
||||
}
|
||||
|
||||
using FactorTransform = ZeroTransform;
|
||||
|
@ -1112,6 +1112,11 @@ private:
|
||||
bool c0_const = isColumnConst(*c0);
|
||||
bool c1_const = isColumnConst(*c1);
|
||||
|
||||
/// This is a paranoid check to protect from a broken query analysis.
|
||||
if (c0->isNullable() != c1->isNullable())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
||||
"Logical error: columns are assumed to be of identical types, but they are different in Nullable");
|
||||
|
||||
if (c0_const && c1_const)
|
||||
{
|
||||
UInt8 res = 0;
|
||||
|
@ -79,28 +79,51 @@ namespace impl
|
||||
UInt64 key1 = 0;
|
||||
};
|
||||
|
||||
static SipHashKey parseSipHashKey(const ColumnWithTypeAndName & key)
|
||||
struct SipHashKeyColumns
|
||||
{
|
||||
SipHashKey ret{};
|
||||
ColumnPtr key0;
|
||||
ColumnPtr key1;
|
||||
bool is_const;
|
||||
|
||||
const auto * tuple = checkAndGetColumn<ColumnTuple>(key.column.get());
|
||||
size_t size() const
|
||||
{
|
||||
assert(key0 && key1);
|
||||
assert(key0->size() == key1->size());
|
||||
return key0->size();
|
||||
}
|
||||
SipHashKey getKey(size_t i) const
|
||||
{
|
||||
if (is_const)
|
||||
i = 0;
|
||||
const auto & key0data = assert_cast<const ColumnUInt64 &>(*key0).getData();
|
||||
const auto & key1data = assert_cast<const ColumnUInt64 &>(*key1).getData();
|
||||
return {key0data[i], key1data[i]};
|
||||
}
|
||||
};
|
||||
|
||||
static SipHashKeyColumns parseSipHashKeyColumns(const ColumnWithTypeAndName & key)
|
||||
{
|
||||
const ColumnTuple * tuple = nullptr;
|
||||
const auto * column = key.column.get();
|
||||
bool is_const = false;
|
||||
if (isColumnConst(*column))
|
||||
{
|
||||
is_const = true;
|
||||
tuple = checkAndGetColumnConstData<ColumnTuple>(column);
|
||||
}
|
||||
else
|
||||
tuple = checkAndGetColumn<ColumnTuple>(column);
|
||||
if (!tuple)
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "key must be a tuple");
|
||||
|
||||
if (tuple->tupleSize() != 2)
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "wrong tuple size: key must be a tuple of 2 UInt64");
|
||||
|
||||
if (tuple->empty())
|
||||
return ret;
|
||||
|
||||
if (const auto * key0col = checkAndGetColumn<ColumnUInt64>(&(tuple->getColumn(0))))
|
||||
ret.key0 = key0col->get64(0);
|
||||
else
|
||||
SipHashKeyColumns ret{tuple->getColumnPtr(0), tuple->getColumnPtr(1), is_const};
|
||||
assert(ret.key0);
|
||||
if (!checkColumn<ColumnUInt64>(*ret.key0))
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "first element of the key tuple is not UInt64");
|
||||
|
||||
if (const auto * key1col = checkAndGetColumn<ColumnUInt64>(&(tuple->getColumn(1))))
|
||||
ret.key1 = key1col->get64(0);
|
||||
else
|
||||
assert(ret.key1);
|
||||
if (!checkColumn<ColumnUInt64>(*ret.key1))
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "second element of the key tuple is not UInt64");
|
||||
|
||||
return ret;
|
||||
@ -329,8 +352,10 @@ struct SipHash64KeyedImpl
|
||||
static constexpr auto name = "sipHash64Keyed";
|
||||
using ReturnType = UInt64;
|
||||
using Key = impl::SipHashKey;
|
||||
using KeyColumns = impl::SipHashKeyColumns;
|
||||
|
||||
static Key parseKey(const ColumnWithTypeAndName & key) { return impl::parseSipHashKey(key); }
|
||||
static KeyColumns parseKeyColumns(const ColumnWithTypeAndName & key) { return impl::parseSipHashKeyColumns(key); }
|
||||
static Key getKey(const KeyColumns & key, size_t i) { return key.getKey(i); }
|
||||
|
||||
static UInt64 applyKeyed(const Key & key, const char * begin, size_t size) { return sipHash64Keyed(key.key0, key.key1, begin, size); }
|
||||
|
||||
@ -371,8 +396,10 @@ struct SipHash128KeyedImpl
|
||||
static constexpr auto name = "sipHash128Keyed";
|
||||
using ReturnType = UInt128;
|
||||
using Key = impl::SipHashKey;
|
||||
using KeyColumns = impl::SipHashKeyColumns;
|
||||
|
||||
static Key parseKey(const ColumnWithTypeAndName & key) { return impl::parseSipHashKey(key); }
|
||||
static KeyColumns parseKeyColumns(const ColumnWithTypeAndName & key) { return impl::parseSipHashKeyColumns(key); }
|
||||
static Key getKey(const KeyColumns & key, size_t i) { return key.getKey(i); }
|
||||
|
||||
static UInt128 applyKeyed(const Key & key, const char * begin, size_t size) { return sipHash128Keyed(key.key0, key.key1, begin, size); }
|
||||
|
||||
@ -398,13 +425,43 @@ struct SipHash128ReferenceImpl
|
||||
|
||||
using ReturnType = UInt128;
|
||||
|
||||
static UInt128 combineHashes(UInt128 h1, UInt128 h2) { return combineHashesFunc<UInt128, SipHash128Impl>(h1, h2); }
|
||||
static UInt128 combineHashes(UInt128 h1, UInt128 h2) { return combineHashesFunc<UInt128, SipHash128ReferenceImpl>(h1, h2); }
|
||||
|
||||
static UInt128 apply(const char * data, const size_t size) { return sipHash128Reference(data, size); }
|
||||
|
||||
static constexpr bool use_int_hash_for_pods = false;
|
||||
};
|
||||
|
||||
struct SipHash128ReferenceKeyedImpl
|
||||
{
|
||||
static constexpr auto name = "sipHash128ReferenceKeyed";
|
||||
using ReturnType = UInt128;
|
||||
using Key = impl::SipHashKey;
|
||||
using KeyColumns = impl::SipHashKeyColumns;
|
||||
|
||||
static KeyColumns parseKeyColumns(const ColumnWithTypeAndName & key) { return impl::parseSipHashKeyColumns(key); }
|
||||
static Key getKey(const KeyColumns & key, size_t i) { return key.getKey(i); }
|
||||
|
||||
static UInt128 applyKeyed(const Key & key, const char * begin, size_t size)
|
||||
{
|
||||
return sipHash128ReferenceKeyed(key.key0, key.key1, begin, size);
|
||||
}
|
||||
|
||||
static UInt128 combineHashesKeyed(const Key & key, UInt128 h1, UInt128 h2)
|
||||
{
|
||||
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
||||
UInt128 tmp;
|
||||
reverseMemcpy(&tmp, &h1, sizeof(UInt128));
|
||||
h1 = tmp;
|
||||
reverseMemcpy(&tmp, &h2, sizeof(UInt128));
|
||||
h2 = tmp;
|
||||
#endif
|
||||
UInt128 hashes[] = {h1, h2};
|
||||
return applyKeyed(key, reinterpret_cast<const char *>(hashes), 2 * sizeof(UInt128));
|
||||
}
|
||||
|
||||
static constexpr bool use_int_hash_for_pods = false;
|
||||
};
|
||||
|
||||
/** Why we need MurmurHash2?
|
||||
* MurmurHash2 is an outdated hash function, superseded by MurmurHash3 and subsequently by CityHash, xxHash, HighwayHash.
|
||||
@ -1023,7 +1080,7 @@ private:
|
||||
|
||||
DECLARE_MULTITARGET_CODE(
|
||||
|
||||
template <typename Impl, bool Keyed, typename KeyType>
|
||||
template <typename Impl, bool Keyed, typename KeyType, typename KeyColumnsType>
|
||||
class FunctionAnyHash : public IFunction
|
||||
{
|
||||
public:
|
||||
@ -1033,9 +1090,12 @@ private:
|
||||
using ToType = typename Impl::ReturnType;
|
||||
|
||||
template <typename FromType, bool first>
|
||||
void executeIntType(const KeyType & key, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to) const
|
||||
void executeIntType(const KeyColumnsType & key_cols, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to) const
|
||||
{
|
||||
using ColVecType = ColumnVectorOrDecimal<FromType>;
|
||||
KeyType key{};
|
||||
if constexpr (Keyed)
|
||||
key = Impl::getKey(key_cols, 0);
|
||||
|
||||
if (const ColVecType * col_from = checkAndGetColumn<ColVecType>(column))
|
||||
{
|
||||
@ -1044,6 +1104,9 @@ private:
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
ToType hash;
|
||||
if constexpr (Keyed)
|
||||
if (!key_cols.is_const && i != 0)
|
||||
key = Impl::getKey(key_cols, i);
|
||||
|
||||
if constexpr (Impl::use_int_hash_for_pods)
|
||||
{
|
||||
@ -1077,6 +1140,14 @@ private:
|
||||
}
|
||||
else if (auto col_from_const = checkAndGetColumnConst<ColVecType>(column))
|
||||
{
|
||||
if constexpr (Keyed)
|
||||
{
|
||||
if (!key_cols.is_const)
|
||||
{
|
||||
ColumnPtr full_column = col_from_const->convertToFullColumn();
|
||||
return executeIntType<FromType, first>(key_cols, full_column.get(), vec_to);
|
||||
}
|
||||
}
|
||||
auto value = col_from_const->template getValue<FromType>();
|
||||
ToType hash;
|
||||
|
||||
@ -1107,8 +1178,15 @@ private:
|
||||
if constexpr (first)
|
||||
vec_to.assign(size, hash);
|
||||
else
|
||||
{
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
if constexpr (Keyed)
|
||||
if (!key_cols.is_const && i != 0)
|
||||
key = Impl::getKey(key_cols, i);
|
||||
vec_to[i] = combineHashes(key, vec_to[i], hash);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}",
|
||||
@ -1116,9 +1194,12 @@ private:
|
||||
}
|
||||
|
||||
template <typename FromType, bool first>
|
||||
void executeBigIntType(const KeyType & key, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to) const
|
||||
void executeBigIntType(const KeyColumnsType & key_cols, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to) const
|
||||
{
|
||||
using ColVecType = ColumnVectorOrDecimal<FromType>;
|
||||
KeyType key{};
|
||||
if constexpr (Keyed)
|
||||
key = Impl::getKey(key_cols, 0);
|
||||
|
||||
if (const ColVecType * col_from = checkAndGetColumn<ColVecType>(column))
|
||||
{
|
||||
@ -1127,6 +1208,9 @@ private:
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
ToType hash;
|
||||
if constexpr (Keyed)
|
||||
if (!key_cols.is_const && i != 0)
|
||||
key = Impl::getKey(key_cols, i);
|
||||
if constexpr (std::endian::native == std::endian::little)
|
||||
hash = apply(key, reinterpret_cast<const char *>(&vec_from[i]), sizeof(vec_from[i]));
|
||||
else
|
||||
@ -1143,6 +1227,14 @@ private:
|
||||
}
|
||||
else if (auto col_from_const = checkAndGetColumnConst<ColVecType>(column))
|
||||
{
|
||||
if constexpr (Keyed)
|
||||
{
|
||||
if (!key_cols.is_const)
|
||||
{
|
||||
ColumnPtr full_column = col_from_const->convertToFullColumn();
|
||||
return executeBigIntType<FromType, first>(key_cols, full_column.get(), vec_to);
|
||||
}
|
||||
}
|
||||
auto value = col_from_const->template getValue<FromType>();
|
||||
|
||||
ToType hash;
|
||||
@ -1158,8 +1250,15 @@ private:
|
||||
if constexpr (first)
|
||||
vec_to.assign(size, hash);
|
||||
else
|
||||
{
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
if constexpr (Keyed)
|
||||
if (!key_cols.is_const && i != 0)
|
||||
key = Impl::getKey(key_cols, i);
|
||||
vec_to[i] = combineHashes(key, vec_to[i], hash);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}",
|
||||
@ -1167,10 +1266,16 @@ private:
|
||||
}
|
||||
|
||||
template <bool first>
|
||||
void executeGeneric(const KeyType & key, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to) const
|
||||
void executeGeneric(const KeyColumnsType & key_cols, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to) const
|
||||
{
|
||||
KeyType key{};
|
||||
if constexpr (Keyed)
|
||||
key = Impl::getKey(key_cols, 0);
|
||||
for (size_t i = 0, size = column->size(); i < size; ++i)
|
||||
{
|
||||
if constexpr (Keyed)
|
||||
if (!key_cols.is_const && i != 0)
|
||||
key = Impl::getKey(key_cols, i);
|
||||
StringRef bytes = column->getDataAt(i);
|
||||
const ToType hash = apply(key, bytes.data, bytes.size);
|
||||
if constexpr (first)
|
||||
@ -1181,8 +1286,11 @@ private:
|
||||
}
|
||||
|
||||
template <bool first>
|
||||
void executeString(const KeyType & key, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to) const
|
||||
void executeString(const KeyColumnsType & key_cols, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to) const
|
||||
{
|
||||
KeyType key{};
|
||||
if constexpr (Keyed)
|
||||
key = Impl::getKey(key_cols, 0);
|
||||
if (const ColumnString * col_from = checkAndGetColumn<ColumnString>(column))
|
||||
{
|
||||
const typename ColumnString::Chars & data = col_from->getChars();
|
||||
@ -1192,6 +1300,9 @@ private:
|
||||
ColumnString::Offset current_offset = 0;
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
if constexpr (Keyed)
|
||||
if (!key_cols.is_const && i != 0)
|
||||
key = Impl::getKey(key_cols, i);
|
||||
const ToType hash = apply(key,
|
||||
reinterpret_cast<const char *>(&data[current_offset]),
|
||||
offsets[i] - current_offset - 1);
|
||||
@ -1212,6 +1323,9 @@ private:
|
||||
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
if constexpr (Keyed)
|
||||
if (!key_cols.is_const && i != 0)
|
||||
key = Impl::getKey(key_cols, i);
|
||||
const ToType hash = apply(key, reinterpret_cast<const char *>(&data[i * n]), n);
|
||||
if constexpr (first)
|
||||
vec_to[i] = hash;
|
||||
@ -1221,6 +1335,14 @@ private:
|
||||
}
|
||||
else if (const ColumnConst * col_from_const = checkAndGetColumnConstStringOrFixedString(column))
|
||||
{
|
||||
if constexpr (Keyed)
|
||||
{
|
||||
if (!key_cols.is_const)
|
||||
{
|
||||
ColumnPtr full_column = col_from_const->convertToFullColumn();
|
||||
return executeString<first>(key_cols, full_column.get(), vec_to);
|
||||
}
|
||||
}
|
||||
String value = col_from_const->getValue<String>();
|
||||
const ToType hash = apply(key, value.data(), value.size());
|
||||
const size_t size = vec_to.size();
|
||||
@ -1228,8 +1350,15 @@ private:
|
||||
if constexpr (first)
|
||||
vec_to.assign(size, hash);
|
||||
else
|
||||
{
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
if constexpr (Keyed)
|
||||
if (!key_cols.is_const && i != 0)
|
||||
key = Impl::getKey(key_cols, i);
|
||||
vec_to[i] = combineHashes(key, vec_to[i], hash);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}",
|
||||
@ -1237,7 +1366,7 @@ private:
|
||||
}
|
||||
|
||||
template <bool first>
|
||||
void executeArray(const KeyType & key, const IDataType * type, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to) const
|
||||
void executeArray(const KeyColumnsType & key_cols, const IDataType * type, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to) const
|
||||
{
|
||||
const IDataType * nested_type = typeid_cast<const DataTypeArray &>(*type).getNestedType().get();
|
||||
|
||||
@ -1249,13 +1378,19 @@ private:
|
||||
|
||||
typename ColumnVector<ToType>::Container vec_temp(nested_size);
|
||||
bool nested_is_first = true;
|
||||
executeForArgument(key, nested_type, nested_column, vec_temp, nested_is_first);
|
||||
executeForArgument(key_cols, nested_type, nested_column, vec_temp, nested_is_first);
|
||||
|
||||
const size_t size = offsets.size();
|
||||
|
||||
ColumnArray::Offset current_offset = 0;
|
||||
KeyType key{};
|
||||
if constexpr (Keyed)
|
||||
key = Impl::getKey(key_cols, 0);
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
if constexpr (Keyed)
|
||||
if (!key_cols.is_const && i != 0)
|
||||
key = Impl::getKey(key_cols, i);
|
||||
ColumnArray::Offset next_offset = offsets[i];
|
||||
|
||||
ToType hash;
|
||||
@ -1279,7 +1414,7 @@ private:
|
||||
{
|
||||
/// NOTE: here, of course, you can do without the materialization of the column.
|
||||
ColumnPtr full_column = col_from_const->convertToFullColumn();
|
||||
executeArray<first>(key, type, full_column.get(), vec_to);
|
||||
executeArray<first>(key_cols, type, full_column.get(), vec_to);
|
||||
}
|
||||
else
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}",
|
||||
@ -1287,7 +1422,7 @@ private:
|
||||
}
|
||||
|
||||
template <bool first>
|
||||
void executeAny(const KeyType & key, const IDataType * from_type, const IColumn * icolumn, typename ColumnVector<ToType>::Container & vec_to) const
|
||||
void executeAny(const KeyColumnsType & key_cols, const IDataType * from_type, const IColumn * icolumn, typename ColumnVector<ToType>::Container & vec_to) const
|
||||
{
|
||||
WhichDataType which(from_type);
|
||||
|
||||
@ -1295,40 +1430,45 @@ private:
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Argument column '{}' size {} doesn't match result column size {} of function {}",
|
||||
icolumn->getName(), icolumn->size(), vec_to.size(), getName());
|
||||
|
||||
if (which.isUInt8()) executeIntType<UInt8, first>(key, icolumn, vec_to);
|
||||
else if (which.isUInt16()) executeIntType<UInt16, first>(key, icolumn, vec_to);
|
||||
else if (which.isUInt32()) executeIntType<UInt32, first>(key, icolumn, vec_to);
|
||||
else if (which.isUInt64()) executeIntType<UInt64, first>(key, icolumn, vec_to);
|
||||
else if (which.isUInt128()) executeBigIntType<UInt128, first>(key, icolumn, vec_to);
|
||||
else if (which.isUInt256()) executeBigIntType<UInt256, first>(key, icolumn, vec_to);
|
||||
else if (which.isInt8()) executeIntType<Int8, first>(key, icolumn, vec_to);
|
||||
else if (which.isInt16()) executeIntType<Int16, first>(key, icolumn, vec_to);
|
||||
else if (which.isInt32()) executeIntType<Int32, first>(key, icolumn, vec_to);
|
||||
else if (which.isInt64()) executeIntType<Int64, first>(key, icolumn, vec_to);
|
||||
else if (which.isInt128()) executeBigIntType<Int128, first>(key, icolumn, vec_to);
|
||||
else if (which.isInt256()) executeBigIntType<Int256, first>(key, icolumn, vec_to);
|
||||
else if (which.isUUID()) executeBigIntType<UUID, first>(key, icolumn, vec_to);
|
||||
else if (which.isIPv4()) executeIntType<IPv4, first>(key, icolumn, vec_to);
|
||||
else if (which.isIPv6()) executeBigIntType<IPv6, first>(key, icolumn, vec_to);
|
||||
else if (which.isEnum8()) executeIntType<Int8, first>(key, icolumn, vec_to);
|
||||
else if (which.isEnum16()) executeIntType<Int16, first>(key, icolumn, vec_to);
|
||||
else if (which.isDate()) executeIntType<UInt16, first>(key, icolumn, vec_to);
|
||||
else if (which.isDate32()) executeIntType<Int32, first>(key, icolumn, vec_to);
|
||||
else if (which.isDateTime()) executeIntType<UInt32, first>(key, icolumn, vec_to);
|
||||
if constexpr (Keyed)
|
||||
if ((!key_cols.is_const && key_cols.size() != vec_to.size())
|
||||
|| (key_cols.is_const && key_cols.size() != 1))
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Key column size {} doesn't match result column size {} of function {}", key_cols.size(), vec_to.size(), getName());
|
||||
|
||||
if (which.isUInt8()) executeIntType<UInt8, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isUInt16()) executeIntType<UInt16, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isUInt32()) executeIntType<UInt32, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isUInt64()) executeIntType<UInt64, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isUInt128()) executeBigIntType<UInt128, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isUInt256()) executeBigIntType<UInt256, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isInt8()) executeIntType<Int8, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isInt16()) executeIntType<Int16, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isInt32()) executeIntType<Int32, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isInt64()) executeIntType<Int64, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isInt128()) executeBigIntType<Int128, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isInt256()) executeBigIntType<Int256, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isUUID()) executeBigIntType<UUID, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isIPv4()) executeIntType<IPv4, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isIPv6()) executeBigIntType<IPv6, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isEnum8()) executeIntType<Int8, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isEnum16()) executeIntType<Int16, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isDate()) executeIntType<UInt16, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isDate32()) executeIntType<Int32, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isDateTime()) executeIntType<UInt32, first>(key_cols, icolumn, vec_to);
|
||||
/// TODO: executeIntType() for Decimal32/64 leads to incompatible result
|
||||
else if (which.isDecimal32()) executeBigIntType<Decimal32, first>(key, icolumn, vec_to);
|
||||
else if (which.isDecimal64()) executeBigIntType<Decimal64, first>(key, icolumn, vec_to);
|
||||
else if (which.isDecimal128()) executeBigIntType<Decimal128, first>(key, icolumn, vec_to);
|
||||
else if (which.isDecimal256()) executeBigIntType<Decimal256, first>(key, icolumn, vec_to);
|
||||
else if (which.isFloat32()) executeIntType<Float32, first>(key, icolumn, vec_to);
|
||||
else if (which.isFloat64()) executeIntType<Float64, first>(key, icolumn, vec_to);
|
||||
else if (which.isString()) executeString<first>(key, icolumn, vec_to);
|
||||
else if (which.isFixedString()) executeString<first>(key, icolumn, vec_to);
|
||||
else if (which.isArray()) executeArray<first>(key, from_type, icolumn, vec_to);
|
||||
else executeGeneric<first>(key, icolumn, vec_to);
|
||||
else if (which.isDecimal32()) executeBigIntType<Decimal32, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isDecimal64()) executeBigIntType<Decimal64, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isDecimal128()) executeBigIntType<Decimal128, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isDecimal256()) executeBigIntType<Decimal256, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isFloat32()) executeIntType<Float32, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isFloat64()) executeIntType<Float64, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isString()) executeString<first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isFixedString()) executeString<first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isArray()) executeArray<first>(key_cols, from_type, icolumn, vec_to);
|
||||
else executeGeneric<first>(key_cols, icolumn, vec_to);
|
||||
}
|
||||
|
||||
void executeForArgument(const KeyType & key, const IDataType * type, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to, bool & is_first) const
|
||||
void executeForArgument(const KeyColumnsType & key_cols, const IDataType * type, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to, bool & is_first) const
|
||||
{
|
||||
/// Flattening of tuples.
|
||||
if (const ColumnTuple * tuple = typeid_cast<const ColumnTuple *>(column))
|
||||
@ -1337,7 +1477,7 @@ private:
|
||||
const DataTypes & tuple_types = typeid_cast<const DataTypeTuple &>(*type).getElements();
|
||||
size_t tuple_size = tuple_columns.size();
|
||||
for (size_t i = 0; i < tuple_size; ++i)
|
||||
executeForArgument(key, tuple_types[i].get(), tuple_columns[i].get(), vec_to, is_first);
|
||||
executeForArgument(key_cols, tuple_types[i].get(), tuple_columns[i].get(), vec_to, is_first);
|
||||
}
|
||||
else if (const ColumnTuple * tuple_const = checkAndGetColumnConstData<ColumnTuple>(column))
|
||||
{
|
||||
@ -1347,24 +1487,24 @@ private:
|
||||
for (size_t i = 0; i < tuple_size; ++i)
|
||||
{
|
||||
auto tmp = ColumnConst::create(tuple_columns[i], column->size());
|
||||
executeForArgument(key, tuple_types[i].get(), tmp.get(), vec_to, is_first);
|
||||
executeForArgument(key_cols, tuple_types[i].get(), tmp.get(), vec_to, is_first);
|
||||
}
|
||||
}
|
||||
else if (const auto * map = checkAndGetColumn<ColumnMap>(column))
|
||||
{
|
||||
const auto & type_map = assert_cast<const DataTypeMap &>(*type);
|
||||
executeForArgument(key, type_map.getNestedType().get(), map->getNestedColumnPtr().get(), vec_to, is_first);
|
||||
executeForArgument(key_cols, type_map.getNestedType().get(), map->getNestedColumnPtr().get(), vec_to, is_first);
|
||||
}
|
||||
else if (const auto * const_map = checkAndGetColumnConst<ColumnMap>(column))
|
||||
{
|
||||
executeForArgument(key, type, const_map->convertToFullColumnIfConst().get(), vec_to, is_first);
|
||||
executeForArgument(key_cols, type, const_map->convertToFullColumnIfConst().get(), vec_to, is_first);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (is_first)
|
||||
executeAny<true>(key, type, column, vec_to);
|
||||
executeAny<true>(key_cols, type, column, vec_to);
|
||||
else
|
||||
executeAny<false>(key, type, column, vec_to);
|
||||
executeAny<false>(key_cols, type, column, vec_to);
|
||||
}
|
||||
|
||||
is_first = false;
|
||||
@ -1395,30 +1535,33 @@ public:
|
||||
{
|
||||
auto col_to = ColumnVector<ToType>::create(input_rows_count);
|
||||
|
||||
typename ColumnVector<ToType>::Container & vec_to = col_to->getData();
|
||||
|
||||
/// If using a "keyed" algorithm, the first argument is the key and
|
||||
/// the data starts from the second argument.
|
||||
/// Otherwise there is no key and all arguments are interpreted as data.
|
||||
constexpr size_t first_data_argument = Keyed;
|
||||
|
||||
if (arguments.size() <= first_data_argument)
|
||||
if (input_rows_count != 0)
|
||||
{
|
||||
/// Return a fixed random-looking magic number when input is empty
|
||||
vec_to.assign(input_rows_count, static_cast<ToType>(0xe28dbde7fe22e41c));
|
||||
}
|
||||
typename ColumnVector<ToType>::Container & vec_to = col_to->getData();
|
||||
|
||||
KeyType key{};
|
||||
if constexpr (Keyed)
|
||||
if (!arguments.empty())
|
||||
key = Impl::parseKey(arguments[0]);
|
||||
/// If using a "keyed" algorithm, the first argument is the key and
|
||||
/// the data starts from the second argument.
|
||||
/// Otherwise there is no key and all arguments are interpreted as data.
|
||||
constexpr size_t first_data_argument = Keyed;
|
||||
|
||||
/// The function supports arbitrary number of arguments of arbitrary types.
|
||||
bool is_first_argument = true;
|
||||
for (size_t i = first_data_argument; i < arguments.size(); ++i)
|
||||
{
|
||||
const auto & col = arguments[i];
|
||||
executeForArgument(key, col.type.get(), col.column.get(), vec_to, is_first_argument);
|
||||
if (arguments.size() <= first_data_argument)
|
||||
{
|
||||
/// Return a fixed random-looking magic number when input is empty
|
||||
vec_to.assign(input_rows_count, static_cast<ToType>(0xe28dbde7fe22e41c));
|
||||
}
|
||||
|
||||
KeyColumnsType key_cols{};
|
||||
if constexpr (Keyed)
|
||||
if (!arguments.empty())
|
||||
key_cols = Impl::parseKeyColumns(arguments[0]);
|
||||
|
||||
/// The function supports arbitrary number of arguments of arbitrary types.
|
||||
bool is_first_argument = true;
|
||||
for (size_t i = first_data_argument; i < arguments.size(); ++i)
|
||||
{
|
||||
const auto & col = arguments[i];
|
||||
executeForArgument(key_cols, col.type.get(), col.column.get(), vec_to, is_first_argument);
|
||||
}
|
||||
}
|
||||
|
||||
if constexpr (std::is_same_v<ToType, UInt128>) /// backward-compatible
|
||||
@ -1450,17 +1593,19 @@ public:
|
||||
|
||||
) // DECLARE_MULTITARGET_CODE
|
||||
|
||||
template <typename Impl, bool Keyed = false, typename KeyType = char>
|
||||
class FunctionAnyHash : public TargetSpecific::Default::FunctionAnyHash<Impl, Keyed, KeyType>
|
||||
template <typename Impl, bool Keyed = false, typename KeyType = char, typename KeyColumnsType = char>
|
||||
class FunctionAnyHash : public TargetSpecific::Default::FunctionAnyHash<Impl, Keyed, KeyType, KeyColumnsType>
|
||||
{
|
||||
public:
|
||||
explicit FunctionAnyHash(ContextPtr context) : selector(context)
|
||||
{
|
||||
selector.registerImplementation<TargetArch::Default, TargetSpecific::Default::FunctionAnyHash<Impl, Keyed, KeyType>>();
|
||||
selector
|
||||
.registerImplementation<TargetArch::Default, TargetSpecific::Default::FunctionAnyHash<Impl, Keyed, KeyType, KeyColumnsType>>();
|
||||
|
||||
#if USE_MULTITARGET_CODE
|
||||
selector.registerImplementation<TargetArch::AVX2, TargetSpecific::AVX2::FunctionAnyHash<Impl, Keyed, KeyType>>();
|
||||
selector.registerImplementation<TargetArch::AVX512F, TargetSpecific::AVX512F::FunctionAnyHash<Impl, Keyed, KeyType>>();
|
||||
selector.registerImplementation<TargetArch::AVX2, TargetSpecific::AVX2::FunctionAnyHash<Impl, Keyed, KeyType, KeyColumnsType>>();
|
||||
selector
|
||||
.registerImplementation<TargetArch::AVX512F, TargetSpecific::AVX512F::FunctionAnyHash<Impl, Keyed, KeyType, KeyColumnsType>>();
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -1696,7 +1841,7 @@ struct NameIntHash32 { static constexpr auto name = "intHash32"; };
|
||||
struct NameIntHash64 { static constexpr auto name = "intHash64"; };
|
||||
|
||||
using FunctionSipHash64 = FunctionAnyHash<SipHash64Impl>;
|
||||
using FunctionSipHash64Keyed = FunctionAnyHash<SipHash64KeyedImpl, true, SipHash64KeyedImpl::Key>;
|
||||
using FunctionSipHash64Keyed = FunctionAnyHash<SipHash64KeyedImpl, true, SipHash64KeyedImpl::Key, SipHash64KeyedImpl::KeyColumns>;
|
||||
using FunctionIntHash32 = FunctionIntHash<IntHash32Impl, NameIntHash32>;
|
||||
using FunctionIntHash64 = FunctionIntHash<IntHash64Impl, NameIntHash64>;
|
||||
#if USE_SSL
|
||||
@ -1710,8 +1855,10 @@ using FunctionSHA384 = FunctionStringHashFixedString<SHA384Impl>;
|
||||
using FunctionSHA512 = FunctionStringHashFixedString<SHA512Impl>;
|
||||
#endif
|
||||
using FunctionSipHash128 = FunctionAnyHash<SipHash128Impl>;
|
||||
using FunctionSipHash128Keyed = FunctionAnyHash<SipHash128KeyedImpl, true, SipHash128KeyedImpl::Key>;
|
||||
using FunctionSipHash128Keyed = FunctionAnyHash<SipHash128KeyedImpl, true, SipHash128KeyedImpl::Key, SipHash128KeyedImpl::KeyColumns>;
|
||||
using FunctionSipHash128Reference = FunctionAnyHash<SipHash128ReferenceImpl>;
|
||||
using FunctionSipHash128ReferenceKeyed
|
||||
= FunctionAnyHash<SipHash128ReferenceKeyedImpl, true, SipHash128ReferenceKeyedImpl::Key, SipHash128ReferenceKeyedImpl::KeyColumns>;
|
||||
using FunctionCityHash64 = FunctionAnyHash<ImplCityHash64>;
|
||||
using FunctionFarmFingerprint64 = FunctionAnyHash<ImplFarmFingerprint64>;
|
||||
using FunctionFarmHash64 = FunctionAnyHash<ImplFarmHash64>;
|
||||
|
@ -20,6 +20,11 @@ REGISTER_FUNCTION(Hashing)
|
||||
.examples{{"hash", "SELECT hex(sipHash128Reference('foo', '\\x01', 3))", ""}},
|
||||
.categories{"Hash"}
|
||||
});
|
||||
factory.registerFunction<FunctionSipHash128ReferenceKeyed>(FunctionDocumentation{
|
||||
.description = "Same as [sipHash128Reference](#hash_functions-siphash128reference) but additionally takes an explicit key argument "
|
||||
"instead of using a fixed key.",
|
||||
.examples{{"hash", "SELECT hex(sipHash128ReferenceKeyed((506097522914230528, 1084818905618843912),'foo', '\\x01', 3));", ""}},
|
||||
.categories{"Hash"}});
|
||||
factory.registerFunction<FunctionCityHash64>();
|
||||
factory.registerFunction<FunctionFarmFingerprint64>();
|
||||
factory.registerFunction<FunctionFarmHash64>();
|
||||
|
@ -39,6 +39,9 @@ struct HasTokenImpl
|
||||
if (start_pos != nullptr)
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Function '{}' does not support start_pos argument", name);
|
||||
|
||||
if (pattern.empty())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Needle cannot be empty, because empty string isn't a token");
|
||||
|
||||
if (haystack_offsets.empty())
|
||||
return;
|
||||
|
||||
|
@ -7,8 +7,8 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/** URL processing functions. See implementation in separate .cpp files.
|
||||
* All functions are not strictly follow RFC, instead they are maximally simplified for performance reasons.
|
||||
/** These helpers are used by URL processing functions. See implementation in separate .cpp files.
|
||||
* All functions do not strictly follow RFC, instead they are maximally simplified for performance reasons.
|
||||
*
|
||||
* Functions for extraction parts of URL.
|
||||
* If URL has nothing like, then empty string is returned.
|
||||
@ -101,7 +101,7 @@ struct ExtractSubstringImpl
|
||||
|
||||
static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &)
|
||||
{
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Column of type FixedString is not supported by URL functions");
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Column of type FixedString is not supported by this function");
|
||||
}
|
||||
};
|
||||
|
||||
@ -156,7 +156,7 @@ struct CutSubstringImpl
|
||||
|
||||
static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &)
|
||||
{
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Column of type FixedString is not supported by URL functions");
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Column of type FixedString is not supported by this function");
|
||||
}
|
||||
};
|
||||
|
@ -5,7 +5,7 @@
|
||||
|
||||
namespace DB
|
||||
{
|
||||
/** Tansform-type wrapper for DateTime64, simplifies DateTime64 support for given Transform.
|
||||
/** Transform-type wrapper for DateTime64, simplifies DateTime64 support for given Transform.
|
||||
*
|
||||
* Depending on what overloads of Transform::execute() are available, when called with DateTime64 value,
|
||||
* invokes Transform::execute() with either:
|
||||
@ -80,7 +80,10 @@ public:
|
||||
}
|
||||
else
|
||||
{
|
||||
const auto components = DecimalUtils::splitWithScaleMultiplier(t, scale_multiplier);
|
||||
auto components = DecimalUtils::splitWithScaleMultiplier(t, scale_multiplier);
|
||||
if (t.value < 0 && components.fractional)
|
||||
--components.whole;
|
||||
|
||||
return wrapped_transform.execute(static_cast<Int64>(components.whole), std::forward<Args>(args)...);
|
||||
}
|
||||
}
|
||||
|
@ -1,8 +1,8 @@
|
||||
#pragma once
|
||||
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/URL/FunctionsURL.h>
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <Functions/StringHelpers.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Columns/ColumnFixedString.h>
|
||||
|
@ -1,7 +1,7 @@
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/FunctionStringToString.h>
|
||||
#include <Functions/StringHelpers.h>
|
||||
#include <base/find_symbols.h>
|
||||
#include "FunctionsURL.h"
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
@ -1,7 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include "FunctionsURL.h"
|
||||
#include <base/find_symbols.h>
|
||||
#include <Functions/StringHelpers.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
@ -1,7 +1,7 @@
|
||||
#include <Common/StringUtils/StringUtils.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/FunctionStringToString.h>
|
||||
#include <Functions/URL/FunctionsURL.h>
|
||||
#include <Functions/StringHelpers.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -154,4 +154,3 @@ REGISTER_FUNCTION(Netloc)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/FunctionStringToString.h>
|
||||
#include "FunctionsURL.h"
|
||||
#include <Functions/StringHelpers.h>
|
||||
#include "path.h"
|
||||
#include <base/find_symbols.h>
|
||||
|
||||
|
@ -1,7 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <base/find_symbols.h>
|
||||
#include <Functions/URL/FunctionsURL.h>
|
||||
#include <Functions/StringHelpers.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
|
@ -1,6 +1,6 @@
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/FunctionStringToString.h>
|
||||
#include "FunctionsURL.h"
|
||||
#include <Functions/StringHelpers.h>
|
||||
#include "path.h"
|
||||
#include <base/find_symbols.h>
|
||||
|
||||
|
@ -1,7 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include "FunctionsURL.h"
|
||||
#include <Common/StringUtils/StringUtils.h>
|
||||
#include <Functions/StringHelpers.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -54,4 +54,3 @@ struct ExtractProtocol
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
|
@ -1,7 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include "FunctionsURL.h"
|
||||
#include <base/find_symbols.h>
|
||||
#include <Functions/StringHelpers.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
|
@ -1,7 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include "FunctionsURL.h"
|
||||
#include <base/find_symbols.h>
|
||||
#include <Functions/StringHelpers.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -34,4 +34,3 @@ struct ExtractQueryStringAndFragment
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
|
@ -174,12 +174,13 @@ public:
|
||||
{
|
||||
auto res = static_cast<Int64>(transform_y.execute(y, timezone_y))
|
||||
- static_cast<Int64>(transform_x.execute(x, timezone_x));
|
||||
DateLUTImpl::DateTimeComponents a_comp;
|
||||
DateLUTImpl::DateTimeComponents b_comp;
|
||||
DateTimeComponentsWithFractionalPart a_comp;
|
||||
DateTimeComponentsWithFractionalPart b_comp;
|
||||
Int64 adjust_value;
|
||||
auto x_seconds = TransformDateTime64<ToRelativeSecondNumImpl<ResultPrecision::Extended>>(transform_x.getScaleMultiplier()).execute(x, timezone_x);
|
||||
auto y_seconds = TransformDateTime64<ToRelativeSecondNumImpl<ResultPrecision::Extended>>(transform_y.getScaleMultiplier()).execute(y, timezone_y);
|
||||
if (x_seconds <= y_seconds)
|
||||
auto x_microseconds = TransformDateTime64<ToRelativeSubsecondNumImpl<microsecond_multiplier>>(transform_x.getScaleMultiplier()).execute(x, timezone_x);
|
||||
auto y_microseconds = TransformDateTime64<ToRelativeSubsecondNumImpl<microsecond_multiplier>>(transform_y.getScaleMultiplier()).execute(y, timezone_y);
|
||||
|
||||
if (x_microseconds <= y_microseconds)
|
||||
{
|
||||
a_comp = TransformDateTime64<ToDateTimeComponentsImpl>(transform_x.getScaleMultiplier()).execute(x, timezone_x);
|
||||
b_comp = TransformDateTime64<ToDateTimeComponentsImpl>(transform_y.getScaleMultiplier()).execute(y, timezone_y);
|
||||
@ -192,14 +193,16 @@ public:
|
||||
adjust_value = 1;
|
||||
}
|
||||
|
||||
|
||||
if constexpr (std::is_same_v<TransformX, TransformDateTime64<ToRelativeYearNumImpl<ResultPrecision::Extended>>>)
|
||||
{
|
||||
if ((a_comp.date.month > b_comp.date.month)
|
||||
|| ((a_comp.date.month == b_comp.date.month) && ((a_comp.date.day > b_comp.date.day)
|
||||
|| ((a_comp.date.day == b_comp.date.day) && ((a_comp.time.hour > b_comp.time.hour)
|
||||
|| ((a_comp.time.hour == b_comp.time.hour) && ((a_comp.time.minute > b_comp.time.minute)
|
||||
|| ((a_comp.time.minute == b_comp.time.minute) && (a_comp.time.second > b_comp.time.second))))
|
||||
)))))
|
||||
|| ((a_comp.time.minute == b_comp.time.minute) && ((a_comp.time.second > b_comp.time.second)
|
||||
|| ((a_comp.time.second == b_comp.time.second) && ((a_comp.millisecond > b_comp.millisecond)
|
||||
|| ((a_comp.millisecond == b_comp.millisecond) && (a_comp.microsecond > b_comp.microsecond)))))))))))))
|
||||
res += adjust_value;
|
||||
}
|
||||
else if constexpr (std::is_same_v<TransformX, TransformDateTime64<ToRelativeQuarterNumImpl<ResultPrecision::Extended>>>)
|
||||
@ -210,8 +213,9 @@ public:
|
||||
|| ((x_month_in_quarter == y_month_in_quarter) && ((a_comp.date.day > b_comp.date.day)
|
||||
|| ((a_comp.date.day == b_comp.date.day) && ((a_comp.time.hour > b_comp.time.hour)
|
||||
|| ((a_comp.time.hour == b_comp.time.hour) && ((a_comp.time.minute > b_comp.time.minute)
|
||||
|| ((a_comp.time.minute == b_comp.time.minute) && (a_comp.time.second > b_comp.time.second))))
|
||||
)))))
|
||||
|| ((a_comp.time.minute == b_comp.time.minute) && ((a_comp.time.second > b_comp.time.second)
|
||||
|| ((a_comp.time.second == b_comp.time.second) && ((a_comp.millisecond > b_comp.millisecond)
|
||||
|| ((a_comp.millisecond == b_comp.millisecond) && (a_comp.microsecond > b_comp.microsecond)))))))))))))
|
||||
res += adjust_value;
|
||||
}
|
||||
else if constexpr (std::is_same_v<TransformX, TransformDateTime64<ToRelativeMonthNumImpl<ResultPrecision::Extended>>>)
|
||||
@ -219,8 +223,9 @@ public:
|
||||
if ((a_comp.date.day > b_comp.date.day)
|
||||
|| ((a_comp.date.day == b_comp.date.day) && ((a_comp.time.hour > b_comp.time.hour)
|
||||
|| ((a_comp.time.hour == b_comp.time.hour) && ((a_comp.time.minute > b_comp.time.minute)
|
||||
|| ((a_comp.time.minute == b_comp.time.minute) && (a_comp.time.second > b_comp.time.second))))
|
||||
)))
|
||||
|| ((a_comp.time.minute == b_comp.time.minute) && ((a_comp.time.second > b_comp.time.second)
|
||||
|| ((a_comp.time.second == b_comp.time.second) && ((a_comp.millisecond > b_comp.millisecond)
|
||||
|| ((a_comp.millisecond == b_comp.millisecond) && (a_comp.microsecond > b_comp.microsecond)))))))))))
|
||||
res += adjust_value;
|
||||
}
|
||||
else if constexpr (std::is_same_v<TransformX, TransformDateTime64<ToRelativeWeekNumImpl<ResultPrecision::Extended>>>)
|
||||
@ -230,25 +235,44 @@ public:
|
||||
if ((x_day_of_week > y_day_of_week)
|
||||
|| ((x_day_of_week == y_day_of_week) && (a_comp.time.hour > b_comp.time.hour))
|
||||
|| ((a_comp.time.hour == b_comp.time.hour) && ((a_comp.time.minute > b_comp.time.minute)
|
||||
|| ((a_comp.time.minute == b_comp.time.minute) && (a_comp.time.second > b_comp.time.second)))))
|
||||
|| ((a_comp.time.minute == b_comp.time.minute) && ((a_comp.time.second > b_comp.time.second)
|
||||
|| ((a_comp.time.second == b_comp.time.second) && ((a_comp.millisecond > b_comp.millisecond)
|
||||
|| ((a_comp.millisecond == b_comp.millisecond) && (a_comp.microsecond > b_comp.microsecond)))))))))
|
||||
res += adjust_value;
|
||||
}
|
||||
else if constexpr (std::is_same_v<TransformX, TransformDateTime64<ToRelativeDayNumImpl<ResultPrecision::Extended>>>)
|
||||
{
|
||||
if ((a_comp.time.hour > b_comp.time.hour)
|
||||
|| ((a_comp.time.hour == b_comp.time.hour) && ((a_comp.time.minute > b_comp.time.minute)
|
||||
|| ((a_comp.time.minute == b_comp.time.minute) && (a_comp.time.second > b_comp.time.second)))))
|
||||
|| ((a_comp.time.minute == b_comp.time.minute) && ((a_comp.time.second > b_comp.time.second)
|
||||
|| ((a_comp.time.second == b_comp.time.second) && ((a_comp.millisecond > b_comp.millisecond)
|
||||
|| ((a_comp.millisecond == b_comp.millisecond) && (a_comp.microsecond > b_comp.microsecond)))))))))
|
||||
res += adjust_value;
|
||||
}
|
||||
else if constexpr (std::is_same_v<TransformX, TransformDateTime64<ToRelativeHourNumImpl<ResultPrecision::Extended>>>)
|
||||
{
|
||||
if ((a_comp.time.minute > b_comp.time.minute)
|
||||
|| ((a_comp.time.minute == b_comp.time.minute) && (a_comp.time.second > b_comp.time.second)))
|
||||
|| ((a_comp.time.minute == b_comp.time.minute) && ((a_comp.time.second > b_comp.time.second)
|
||||
|| ((a_comp.time.second == b_comp.time.second) && ((a_comp.millisecond > b_comp.millisecond)
|
||||
|| ((a_comp.millisecond == b_comp.millisecond) && (a_comp.microsecond > b_comp.microsecond)))))))
|
||||
res += adjust_value;
|
||||
}
|
||||
else if constexpr (std::is_same_v<TransformX, TransformDateTime64<ToRelativeMinuteNumImpl<ResultPrecision::Extended>>>)
|
||||
{
|
||||
if (a_comp.time.second > b_comp.time.second)
|
||||
if ((a_comp.time.second > b_comp.time.second)
|
||||
|| ((a_comp.time.second == b_comp.time.second) && ((a_comp.millisecond > b_comp.millisecond)
|
||||
|| ((a_comp.millisecond == b_comp.millisecond) && (a_comp.microsecond > b_comp.microsecond)))))
|
||||
res += adjust_value;
|
||||
}
|
||||
else if constexpr (std::is_same_v<TransformX, TransformDateTime64<ToRelativeSecondNumImpl<ResultPrecision::Extended>>>)
|
||||
{
|
||||
if ((a_comp.millisecond > b_comp.millisecond)
|
||||
|| ((a_comp.millisecond == b_comp.millisecond) && (a_comp.microsecond > b_comp.microsecond)))
|
||||
res += adjust_value;
|
||||
}
|
||||
else if constexpr (std::is_same_v<TransformX, TransformDateTime64<ToRelativeSubsecondNumImpl<1000>>>)
|
||||
{
|
||||
if (a_comp.microsecond > b_comp.microsecond)
|
||||
res += adjust_value;
|
||||
}
|
||||
return res;
|
||||
@ -373,6 +397,10 @@ public:
|
||||
impl.template dispatchForColumns<ToRelativeMinuteNumImpl<ResultPrecision::Extended>>(x, y, timezone_x, timezone_y, res->getData());
|
||||
else if (unit == "second" || unit == "ss" || unit == "s")
|
||||
impl.template dispatchForColumns<ToRelativeSecondNumImpl<ResultPrecision::Extended>>(x, y, timezone_x, timezone_y, res->getData());
|
||||
else if (unit == "millisecond" || unit == "ms")
|
||||
impl.template dispatchForColumns<ToRelativeSubsecondNumImpl<millisecond_multiplier>>(x, y, timezone_x, timezone_y, res->getData());
|
||||
else if (unit == "microsecond" || unit == "us" || unit == "u")
|
||||
impl.template dispatchForColumns<ToRelativeSubsecondNumImpl<microsecond_multiplier>>(x, y, timezone_x, timezone_y, res->getData());
|
||||
else
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Function {} does not support '{}' unit", getName(), unit);
|
||||
|
42
src/Functions/firstLine.cpp
Normal file
42
src/Functions/firstLine.cpp
Normal file
@ -0,0 +1,42 @@
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/FunctionStringToString.h>
|
||||
#include <Functions/StringHelpers.h>
|
||||
#include <base/find_symbols.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
struct FirstLine
|
||||
{
|
||||
static size_t getReserveLengthForElement() { return 16; }
|
||||
|
||||
static void execute(Pos data, size_t size, Pos & res_data, size_t & res_size)
|
||||
{
|
||||
res_data = data;
|
||||
|
||||
const Pos end = data + size;
|
||||
const Pos pos = find_first_symbols<'\r', '\n'>(data, end);
|
||||
res_size = pos - data;
|
||||
}
|
||||
};
|
||||
|
||||
struct NameFirstLine
|
||||
{
|
||||
static constexpr auto name = "firstLine";
|
||||
};
|
||||
|
||||
using FunctionFirstLine = FunctionStringToString<ExtractSubstringImpl<FirstLine>, NameFirstLine>;
|
||||
|
||||
REGISTER_FUNCTION(FirstLine)
|
||||
{
|
||||
factory.registerFunction<FunctionFirstLine>(FunctionDocumentation{
|
||||
.description = "Returns first line of a multi-line string.",
|
||||
.syntax = "firstLine(string)",
|
||||
.arguments = {{.name = "string", .description = "The string to process."}},
|
||||
.returned_value = {"The first line of the string or the whole string if there is no line separators."},
|
||||
.examples = {
|
||||
{.name = "Return first line", .query = "firstLine('Hello\\nWorld')", .result = "'Hello'"},
|
||||
{.name = "Return whole string", .query = "firstLine('Hello World')", .result = "'Hello World'"},
|
||||
}});
|
||||
}
|
||||
}
|
@ -119,7 +119,7 @@ public:
|
||||
|
||||
if (!lhs_array->hasEqualOffsets(*rhs_array))
|
||||
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH,
|
||||
"The argument 1 and argument {} of function {} have different array offsets",
|
||||
"The argument 2 and argument {} of function {} have different array offsets",
|
||||
i + 1,
|
||||
getName());
|
||||
|
||||
|
@ -10,7 +10,6 @@
|
||||
#include <Functions/DateTimeTransforms.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/IFunction.h>
|
||||
#include <Functions/TransformDateTime64.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
|
||||
|
||||
|
@ -16,19 +16,15 @@
|
||||
|
||||
#include <DataTypes/DataTypeSet.h>
|
||||
#include <DataTypes/DataTypeFunction.h>
|
||||
#include <DataTypes/DataTypeFixedString.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <DataTypes/DataTypeTuple.h>
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypeLowCardinality.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/FieldToDataType.h>
|
||||
#include <DataTypes/DataTypesDecimal.h>
|
||||
#include <DataTypes/DataTypeFactory.h>
|
||||
|
||||
#include <Columns/ColumnArray.h>
|
||||
#include <Columns/ColumnConst.h>
|
||||
#include <Columns/ColumnFixedString.h>
|
||||
#include <Columns/ColumnSet.h>
|
||||
|
||||
#include <Storages/StorageSet.h>
|
||||
@ -47,7 +43,6 @@
|
||||
#include <Interpreters/ExpressionActions.h>
|
||||
#include <Interpreters/misc.h>
|
||||
#include <Interpreters/ActionsVisitor.h>
|
||||
#include <Interpreters/InterpreterSelectWithUnionQuery.h>
|
||||
#include <Interpreters/Set.h>
|
||||
#include <Interpreters/evaluateConstantExpression.h>
|
||||
#include <Interpreters/convertFieldToType.h>
|
||||
@ -61,6 +56,7 @@
|
||||
#include <Interpreters/InterpreterSelectQueryAnalyzer.h>
|
||||
#include <Parsers/queryToString.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
@ -715,7 +711,7 @@ bool ActionsMatcher::needChildVisit(const ASTPtr & node, const ASTPtr & child)
|
||||
node->as<ASTExpressionList>())
|
||||
return false;
|
||||
|
||||
/// Do not go to FROM, JOIN, UNION.
|
||||
/// Do not go to FROM, JOIN, UNION
|
||||
if (child->as<ASTTableExpression>() ||
|
||||
child->as<ASTSelectQuery>())
|
||||
return false;
|
||||
|
@ -97,6 +97,10 @@ UInt128 AsynchronousInsertQueue::InsertQuery::calculateHash() const
|
||||
|
||||
for (const auto & setting : settings.allChanged())
|
||||
{
|
||||
/// We don't consider this setting because it is only for deduplication,
|
||||
/// which means we can put two inserts with different tokens in the same block safely.
|
||||
if (setting.getName() == "insert_deduplication_token")
|
||||
continue;
|
||||
siphash.update(setting.getName());
|
||||
applyVisitor(FieldVisitorHash(siphash), setting.getValue());
|
||||
}
|
||||
@ -111,9 +115,10 @@ bool AsynchronousInsertQueue::InsertQuery::operator==(const InsertQuery & other)
|
||||
return query_str == other.query_str && settings == other.settings;
|
||||
}
|
||||
|
||||
AsynchronousInsertQueue::InsertData::Entry::Entry(String && bytes_, String && query_id_, MemoryTracker * user_memory_tracker_)
|
||||
AsynchronousInsertQueue::InsertData::Entry::Entry(String && bytes_, String && query_id_, const String & async_dedup_token_, MemoryTracker * user_memory_tracker_)
|
||||
: bytes(std::move(bytes_))
|
||||
, query_id(std::move(query_id_))
|
||||
, async_dedup_token(async_dedup_token_)
|
||||
, user_memory_tracker(user_memory_tracker_)
|
||||
, create_time(std::chrono::system_clock::now())
|
||||
{
|
||||
@ -227,7 +232,7 @@ AsynchronousInsertQueue::push(ASTPtr query, ContextPtr query_context)
|
||||
/// to avoid buffering of huge amount of data in memory.
|
||||
|
||||
auto read_buf = getReadBufferFromASTInsertQuery(query);
|
||||
LimitReadBuffer limit_buf(*read_buf, settings.async_insert_max_data_size, /* trow_exception */ false, /* exact_limit */ {});
|
||||
LimitReadBuffer limit_buf(*read_buf, settings.async_insert_max_data_size, /* throw_exception */ false, /* exact_limit */ {});
|
||||
|
||||
WriteBufferFromString write_buf(bytes);
|
||||
copyData(limit_buf, write_buf);
|
||||
@ -253,7 +258,7 @@ AsynchronousInsertQueue::push(ASTPtr query, ContextPtr query_context)
|
||||
if (auto quota = query_context->getQuota())
|
||||
quota->used(QuotaType::WRITTEN_BYTES, bytes.size());
|
||||
|
||||
auto entry = std::make_shared<InsertData::Entry>(std::move(bytes), query_context->getCurrentQueryId(), CurrentThread::getUserMemoryTracker());
|
||||
auto entry = std::make_shared<InsertData::Entry>(std::move(bytes), query_context->getCurrentQueryId(), settings.insert_deduplication_token, CurrentThread::getUserMemoryTracker());
|
||||
|
||||
InsertQuery key{query, settings};
|
||||
InsertDataPtr data_to_process;
|
||||
@ -517,7 +522,7 @@ try
|
||||
|
||||
StreamingFormatExecutor executor(header, format, std::move(on_error), std::move(adding_defaults_transform));
|
||||
std::unique_ptr<ReadBuffer> last_buffer;
|
||||
auto chunk_info = std::make_shared<ChunkOffsets>();
|
||||
auto chunk_info = std::make_shared<AsyncInsertInfo>();
|
||||
for (const auto & entry : data->entries)
|
||||
{
|
||||
auto buffer = std::make_unique<ReadBufferFromString>(entry->bytes);
|
||||
@ -526,6 +531,7 @@ try
|
||||
size_t num_rows = executor.execute(*buffer);
|
||||
total_rows += num_rows;
|
||||
chunk_info->offsets.push_back(total_rows);
|
||||
chunk_info->tokens.push_back(entry->async_dedup_token);
|
||||
|
||||
/// Keep buffer, because it still can be used
|
||||
/// in destructor, while resetting buffer at next iteration.
|
||||
|
@ -69,10 +69,11 @@ private:
|
||||
public:
|
||||
String bytes;
|
||||
const String query_id;
|
||||
const String async_dedup_token;
|
||||
MemoryTracker * const user_memory_tracker;
|
||||
const std::chrono::time_point<std::chrono::system_clock> create_time;
|
||||
|
||||
Entry(String && bytes_, String && query_id_, MemoryTracker * user_memory_tracker_);
|
||||
Entry(String && bytes_, String && query_id_, const String & async_dedup_token, MemoryTracker * user_memory_tracker_);
|
||||
|
||||
void finish(std::exception_ptr exception_ = nullptr);
|
||||
std::future<void> getFuture() { return promise.get_future(); }
|
||||
|
@ -6,6 +6,7 @@
|
||||
#include <Parsers/ASTFunction.h>
|
||||
#include <Parsers/ASTSetQuery.h>
|
||||
#include <Parsers/IAST.h>
|
||||
#include <Parsers/formatAST.h>
|
||||
#include <Common/logger_useful.h>
|
||||
#include <Common/ProfileEvents.h>
|
||||
#include <Common/SipHash.h>
|
||||
@ -115,6 +116,13 @@ ASTPtr removeQueryCacheSettings(ASTPtr ast)
|
||||
return transformed_ast;
|
||||
}
|
||||
|
||||
String queryStringFromAST(ASTPtr ast)
|
||||
{
|
||||
WriteBufferFromOwnString buf;
|
||||
formatAST(*ast, buf, /*hilite*/ false, /*one_line*/ true, /*show_secrets*/ false);
|
||||
return buf.str();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
QueryCache::Key::Key(
|
||||
@ -129,6 +137,7 @@ QueryCache::Key::Key(
|
||||
, is_shared(is_shared_)
|
||||
, expires_at(expires_at_)
|
||||
, is_compressed(is_compressed_)
|
||||
, query_string(queryStringFromAST(ast_))
|
||||
{
|
||||
}
|
||||
|
||||
@ -142,15 +151,6 @@ bool QueryCache::Key::operator==(const Key & other) const
|
||||
return ast->getTreeHash() == other.ast->getTreeHash();
|
||||
}
|
||||
|
||||
String QueryCache::Key::queryStringFromAst() const
|
||||
{
|
||||
WriteBufferFromOwnString buf;
|
||||
IAST::FormatSettings format_settings(buf, /*one_line*/ true);
|
||||
format_settings.show_secrets = false;
|
||||
ast->format(format_settings);
|
||||
return buf.str();
|
||||
}
|
||||
|
||||
size_t QueryCache::KeyHasher::operator()(const Key & key) const
|
||||
{
|
||||
SipHash hash;
|
||||
@ -191,7 +191,7 @@ QueryCache::Writer::Writer(
|
||||
if (auto entry = cache.getWithKey(key); entry.has_value() && !IsStale()(entry->key))
|
||||
{
|
||||
skip_insert = true; /// Key already contained in cache and did not expire yet --> don't replace it
|
||||
LOG_TRACE(&Poco::Logger::get("QueryCache"), "Skipped insert (non-stale entry found), query: {}", key.queryStringFromAst());
|
||||
LOG_TRACE(&Poco::Logger::get("QueryCache"), "Skipped insert (non-stale entry found), query: {}", key.query_string);
|
||||
}
|
||||
}
|
||||
|
||||
@ -263,14 +263,14 @@ void QueryCache::Writer::finalizeWrite()
|
||||
|
||||
if (std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::system_clock::now() - query_start_time) < min_query_runtime)
|
||||
{
|
||||
LOG_TRACE(&Poco::Logger::get("QueryCache"), "Skipped insert (query not expensive enough), query: {}", key.queryStringFromAst());
|
||||
LOG_TRACE(&Poco::Logger::get("QueryCache"), "Skipped insert (query not expensive enough), query: {}", key.query_string);
|
||||
return;
|
||||
}
|
||||
|
||||
if (auto entry = cache.getWithKey(key); entry.has_value() && !IsStale()(entry->key))
|
||||
{
|
||||
/// Same check as in ctor because a parallel Writer could have inserted the current key in the meantime
|
||||
LOG_TRACE(&Poco::Logger::get("QueryCache"), "Skipped insert (non-stale entry found), query: {}", key.queryStringFromAst());
|
||||
LOG_TRACE(&Poco::Logger::get("QueryCache"), "Skipped insert (non-stale entry found), query: {}", key.query_string);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -353,7 +353,7 @@ void QueryCache::Writer::finalizeWrite()
|
||||
|
||||
if ((new_entry_size_in_bytes > max_entry_size_in_bytes) || (new_entry_size_in_rows > max_entry_size_in_rows))
|
||||
{
|
||||
LOG_TRACE(&Poco::Logger::get("QueryCache"), "Skipped insert (query result too big), new_entry_size_in_bytes: {} ({}), new_entry_size_in_rows: {} ({}), query: {}", new_entry_size_in_bytes, max_entry_size_in_bytes, new_entry_size_in_rows, max_entry_size_in_rows, key.queryStringFromAst());
|
||||
LOG_TRACE(&Poco::Logger::get("QueryCache"), "Skipped insert (query result too big), new_entry_size_in_bytes: {} ({}), new_entry_size_in_rows: {} ({}), query: {}", new_entry_size_in_bytes, max_entry_size_in_bytes, new_entry_size_in_rows, max_entry_size_in_rows, key.query_string);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -388,7 +388,7 @@ QueryCache::Reader::Reader(Cache & cache_, const Key & key, const std::lock_guar
|
||||
|
||||
if (!entry.has_value())
|
||||
{
|
||||
LOG_TRACE(&Poco::Logger::get("QueryCache"), "No entry found for query {}", key.queryStringFromAst());
|
||||
LOG_TRACE(&Poco::Logger::get("QueryCache"), "No entry found for query {}", key.query_string);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -397,13 +397,13 @@ QueryCache::Reader::Reader(Cache & cache_, const Key & key, const std::lock_guar
|
||||
|
||||
if (!entry_key.is_shared && entry_key.user_name != key.user_name)
|
||||
{
|
||||
LOG_TRACE(&Poco::Logger::get("QueryCache"), "Inaccessible entry found for query {}", key.queryStringFromAst());
|
||||
LOG_TRACE(&Poco::Logger::get("QueryCache"), "Inaccessible entry found for query {}", key.query_string);
|
||||
return;
|
||||
}
|
||||
|
||||
if (IsStale()(entry_key))
|
||||
{
|
||||
LOG_TRACE(&Poco::Logger::get("QueryCache"), "Stale entry found for query {}", key.queryStringFromAst());
|
||||
LOG_TRACE(&Poco::Logger::get("QueryCache"), "Stale entry found for query {}", key.query_string);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -441,7 +441,7 @@ QueryCache::Reader::Reader(Cache & cache_, const Key & key, const std::lock_guar
|
||||
buildSourceFromChunks(entry_key.header, std::move(decompressed_chunks), entry_mapped->totals, entry_mapped->extremes);
|
||||
}
|
||||
|
||||
LOG_TRACE(&Poco::Logger::get("QueryCache"), "Entry found for query {}", key.queryStringFromAst());
|
||||
LOG_TRACE(&Poco::Logger::get("QueryCache"), "Entry found for query {}", key.query_string);
|
||||
}
|
||||
|
||||
bool QueryCache::Reader::hasCacheEntryForKey() const
|
||||
|
@ -30,7 +30,7 @@ public:
|
||||
/// ----------------------------------------------------
|
||||
/// The actual key (data which gets hashed):
|
||||
|
||||
/// Unlike the query string, the AST is agnostic to lower/upper case (SELECT vs. select)
|
||||
/// Unlike the query string, the AST is agnostic to lower/upper case (SELECT vs. select).
|
||||
const ASTPtr ast;
|
||||
|
||||
/// Note: For a transactionally consistent cache, we would need to include the system settings in the cache key or invalidate the
|
||||
@ -58,6 +58,11 @@ public:
|
||||
/// (we could theoretically apply compression also to the totals and extremes but it's an obscure use case)
|
||||
const bool is_compressed;
|
||||
|
||||
/// The SELECT query as plain string, displayed in SYSTEM.QUERY_CACHE. Stored explicitly, i.e. not constructed from the AST, for the
|
||||
/// sole reason that QueryCache-related SETTINGS are pruned from the AST (see removeQueryCacheSettings()) which will look ugly in
|
||||
/// the SYSTEM.QUERY_CACHE.
|
||||
const String query_string;
|
||||
|
||||
/// Ctor to construct a Key for writing into query cache.
|
||||
Key(ASTPtr ast_,
|
||||
Block header_,
|
||||
@ -69,7 +74,6 @@ public:
|
||||
Key(ASTPtr ast_, const String & user_name_);
|
||||
|
||||
bool operator==(const Key & other) const;
|
||||
String queryStringFromAst() const;
|
||||
};
|
||||
|
||||
struct Entry
|
||||
|
@ -98,6 +98,7 @@
|
||||
#include <Common/logger_useful.h>
|
||||
#include <base/EnumReflection.h>
|
||||
#include <Common/RemoteHostFilter.h>
|
||||
#include <Common/HTTPHeaderFilter.h>
|
||||
#include <Interpreters/AsynchronousInsertQueue.h>
|
||||
#include <Interpreters/DatabaseCatalog.h>
|
||||
#include <Interpreters/JIT/CompiledExpressionCache.h>
|
||||
@ -327,9 +328,10 @@ struct ContextSharedPart : boost::noncopyable
|
||||
OrdinaryBackgroundExecutorPtr fetch_executor;
|
||||
OrdinaryBackgroundExecutorPtr common_executor;
|
||||
|
||||
RemoteHostFilter remote_host_filter; /// Allowed URL from config.xml
|
||||
RemoteHostFilter remote_host_filter; /// Allowed URL from config.xml
|
||||
HTTPHeaderFilter http_header_filter; /// Forbidden HTTP headers from config.xml
|
||||
|
||||
std::optional<TraceCollector> trace_collector; /// Thread collecting traces from threads executing queries
|
||||
std::optional<TraceCollector> trace_collector; /// Thread collecting traces from threads executing queries
|
||||
|
||||
/// Clusters for distributed tables
|
||||
/// Initialized on demand (on distributed storages initialization) since Settings should be initialized
|
||||
@ -2963,6 +2965,16 @@ const RemoteHostFilter & Context::getRemoteHostFilter() const
|
||||
return shared->remote_host_filter;
|
||||
}
|
||||
|
||||
void Context::setHTTPHeaderFilter(const Poco::Util::AbstractConfiguration & config)
|
||||
{
|
||||
shared->http_header_filter.setValuesFromConfig(config);
|
||||
}
|
||||
|
||||
const HTTPHeaderFilter & Context::getHTTPHeaderFilter() const
|
||||
{
|
||||
return shared->http_header_filter;
|
||||
}
|
||||
|
||||
UInt16 Context::getTCPPort() const
|
||||
{
|
||||
auto lock = getLock();
|
||||
|
@ -6,6 +6,7 @@
|
||||
#include <Common/isLocalAddress.h>
|
||||
#include <Common/MultiVersion.h>
|
||||
#include <Common/RemoteHostFilter.h>
|
||||
#include <Common/HTTPHeaderFilter.h>
|
||||
#include <Common/ThreadPool_fwd.h>
|
||||
#include <Common/Throttler_fwd.h>
|
||||
#include <Core/NamesAndTypes.h>
|
||||
@ -766,6 +767,10 @@ public:
|
||||
void setRemoteHostFilter(const Poco::Util::AbstractConfiguration & config);
|
||||
const RemoteHostFilter & getRemoteHostFilter() const;
|
||||
|
||||
/// Storage of forbidden HTTP headers from config.xml
|
||||
void setHTTPHeaderFilter(const Poco::Util::AbstractConfiguration & config);
|
||||
const HTTPHeaderFilter & getHTTPHeaderFilter() const;
|
||||
|
||||
/// The port that the server listens for executing SQL queries.
|
||||
UInt16 getTCPPort() const;
|
||||
|
||||
|
@ -15,6 +15,7 @@ namespace DB
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int TABLE_IS_READ_ONLY;
|
||||
extern const int INCORRECT_QUERY;
|
||||
}
|
||||
|
||||
|
||||
@ -23,6 +24,21 @@ BlockIO InterpreterCreateIndexQuery::execute()
|
||||
auto current_context = getContext();
|
||||
const auto & create_index = query_ptr->as<ASTCreateIndexQuery &>();
|
||||
|
||||
// Noop if allow_create_index_without_type = true. throw otherwise
|
||||
if (!create_index.index_decl->as<ASTIndexDeclaration>()->type)
|
||||
{
|
||||
if (!current_context->getSettingsRef().allow_create_index_without_type)
|
||||
{
|
||||
throw Exception(ErrorCodes::INCORRECT_QUERY, "CREATE INDEX without TYPE is forbidden."
|
||||
" SET allow_create_index_without_type=1 to ignore this statements.");
|
||||
}
|
||||
else
|
||||
{
|
||||
// Nothing to do
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
AccessRightsElements required_access;
|
||||
required_access.emplace_back(AccessType::ALTER_ADD_INDEX, create_index.getDatabase(), create_index.getTable());
|
||||
|
||||
|
@ -118,7 +118,10 @@ bool PredicateExpressionsOptimizer::tryRewritePredicatesToTables(ASTs & tables_e
|
||||
if (table_element->table_join && isLeft(table_element->table_join->as<ASTTableJoin>()->kind))
|
||||
continue; /// Skip right table optimization
|
||||
|
||||
if (table_element->table_join && isFull(table_element->table_join->as<ASTTableJoin>()->kind))
|
||||
if (table_element->table_join && (
|
||||
isFull(table_element->table_join->as<ASTTableJoin>()->kind)
|
||||
|| table_element->table_join->as<ASTTableJoin>()->strictness == JoinStrictness::Asof
|
||||
|| table_element->table_join->as<ASTTableJoin>()->strictness == JoinStrictness::Anti))
|
||||
break; /// Skip left and right table optimization
|
||||
|
||||
is_rewrite_tables |= tryRewritePredicatesToTable(tables_element[table_pos], tables_predicates[table_pos],
|
||||
|
@ -1,27 +1,24 @@
|
||||
#include <Interpreters/evaluateConstantExpression.h>
|
||||
|
||||
#include <Columns/ColumnConst.h>
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <Core/Block.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/FieldToDataType.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Interpreters/convertFieldToType.h>
|
||||
#include <Interpreters/ExpressionActions.h>
|
||||
#include <Interpreters/ExpressionAnalyzer.h>
|
||||
#include <Interpreters/TreeRewriter.h>
|
||||
#include <Parsers/ASTFunction.h>
|
||||
#include <Parsers/ASTIdentifier.h>
|
||||
#include <Parsers/ASTLiteral.h>
|
||||
#include <Parsers/ASTSubquery.h>
|
||||
#include <Parsers/ExpressionElementParsers.h>
|
||||
#include <TableFunctions/TableFunctionFactory.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <Interpreters/FunctionNameNormalizer.h>
|
||||
#include <Interpreters/ReplaceQueryParameterVisitor.h>
|
||||
#include <Poco/Util/AbstractConfiguration.h>
|
||||
#include <unordered_map>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
@ -94,18 +91,18 @@ std::pair<Field, std::shared_ptr<const IDataType>> evaluateConstantExpression(co
|
||||
|
||||
if (!result_column)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Element of set in IN, VALUES or LIMIT or aggregate function parameter "
|
||||
"Element of set in IN, VALUES, or LIMIT, or aggregate function parameter, or a table function argument "
|
||||
"is not a constant expression (result column not found): {}", result_name);
|
||||
|
||||
if (result_column->empty())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
||||
"Logical error: empty result column after evaluation "
|
||||
"of constant expression for IN, VALUES or LIMIT or aggregate function parameter");
|
||||
"of constant expression for IN, VALUES, or LIMIT, or aggregate function parameter, or a table function argument");
|
||||
|
||||
/// Expressions like rand() or now() are not constant
|
||||
if (!isColumnConst(*result_column))
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Element of set in IN, VALUES or LIMIT or aggregate function parameter "
|
||||
"Element of set in IN, VALUES, or LIMIT, or aggregate function parameter, or a table function argument "
|
||||
"is not a constant expression (result column is not const): {}", result_name);
|
||||
|
||||
return std::make_pair((*result_column)[0], result_type);
|
||||
|
@ -56,8 +56,7 @@ void ASTCreateIndexQuery::formatQueryImpl(const FormatSettings & settings, Forma
|
||||
|
||||
formatOnCluster(settings);
|
||||
|
||||
if (!cluster.empty())
|
||||
settings.ostr << " ";
|
||||
settings.ostr << " ";
|
||||
|
||||
index_decl->formatImpl(settings, state, frame);
|
||||
}
|
||||
|
@ -13,8 +13,8 @@ ASTPtr ASTIndexDeclaration::clone() const
|
||||
auto res = std::make_shared<ASTIndexDeclaration>();
|
||||
|
||||
res->name = name;
|
||||
res->granularity = granularity;
|
||||
|
||||
if (granularity)
|
||||
res->granularity = granularity;
|
||||
if (expr)
|
||||
res->set(res->expr, expr->clone());
|
||||
if (type)
|
||||
@ -25,23 +25,37 @@ ASTPtr ASTIndexDeclaration::clone() const
|
||||
|
||||
void ASTIndexDeclaration::formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const
|
||||
{
|
||||
if (part_of_create_index_query)
|
||||
if (expr)
|
||||
{
|
||||
s.ostr << "(";
|
||||
expr->formatImpl(s, state, frame);
|
||||
s.ostr << ")";
|
||||
}
|
||||
else
|
||||
{
|
||||
s.ostr << backQuoteIfNeed(name);
|
||||
s.ostr << " ";
|
||||
expr->formatImpl(s, state, frame);
|
||||
if (part_of_create_index_query)
|
||||
{
|
||||
if (expr->as<ASTExpressionList>())
|
||||
{
|
||||
s.ostr << "(";
|
||||
expr->formatImpl(s, state, frame);
|
||||
s.ostr << ")";
|
||||
}
|
||||
else
|
||||
expr->formatImpl(s, state, frame);
|
||||
}
|
||||
else
|
||||
{
|
||||
s.ostr << backQuoteIfNeed(name);
|
||||
s.ostr << " ";
|
||||
expr->formatImpl(s, state, frame);
|
||||
}
|
||||
}
|
||||
|
||||
s.ostr << (s.hilite ? hilite_keyword : "") << " TYPE " << (s.hilite ? hilite_none : "");
|
||||
type->formatImpl(s, state, frame);
|
||||
s.ostr << (s.hilite ? hilite_keyword : "") << " GRANULARITY " << (s.hilite ? hilite_none : "");
|
||||
s.ostr << granularity;
|
||||
if (type)
|
||||
{
|
||||
s.ostr << (s.hilite ? hilite_keyword : "") << " TYPE " << (s.hilite ? hilite_none : "");
|
||||
type->formatImpl(s, state, frame);
|
||||
}
|
||||
if (granularity)
|
||||
{
|
||||
s.ostr << (s.hilite ? hilite_keyword : "") << " GRANULARITY " << (s.hilite ? hilite_none : "");
|
||||
s.ostr << granularity;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -64,4 +64,14 @@ void ASTSetQuery::formatImpl(const FormatSettings & format, FormatState &, Forma
|
||||
}
|
||||
}
|
||||
|
||||
void ASTSetQuery::appendColumnName(WriteBuffer & ostr) const
|
||||
{
|
||||
Hash hash = getTreeHash();
|
||||
|
||||
writeCString("__settings_", ostr);
|
||||
writeText(hash.first, ostr);
|
||||
ostr.write('_');
|
||||
writeText(hash.second, ostr);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -37,6 +37,9 @@ public:
|
||||
void updateTreeHashImpl(SipHash & hash_state) const override;
|
||||
|
||||
QueryKind getQueryKind() const override { return QueryKind::Set; }
|
||||
|
||||
void appendColumnName(WriteBuffer & ostr) const override;
|
||||
void appendColumnNameWithoutAlias(WriteBuffer & ostr) const override { return appendColumnName(ostr); }
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -17,24 +17,36 @@ bool ParserCreateIndexDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected
|
||||
{
|
||||
ParserKeyword s_type("TYPE");
|
||||
ParserKeyword s_granularity("GRANULARITY");
|
||||
|
||||
ParserToken open(TokenType::OpeningRoundBracket);
|
||||
ParserToken close(TokenType::ClosingRoundBracket);
|
||||
ParserOrderByExpressionList order_list;
|
||||
ParserDataType data_type_p;
|
||||
ParserExpression expression_p;
|
||||
ParserUnsignedInteger granularity_p;
|
||||
|
||||
ASTPtr expr;
|
||||
ASTPtr order;
|
||||
ASTPtr type;
|
||||
ASTPtr granularity;
|
||||
|
||||
/// Skip name parser for SQL-standard CREATE INDEX
|
||||
if (!expression_p.parse(pos, expr, expected))
|
||||
return false;
|
||||
if (expression_p.parse(pos, expr, expected))
|
||||
{
|
||||
}
|
||||
else if (open.ignore(pos, expected))
|
||||
{
|
||||
if (!order_list.parse(pos, order, expected))
|
||||
return false;
|
||||
|
||||
if (!s_type.ignore(pos, expected))
|
||||
return false;
|
||||
if (!close.ignore(pos, expected))
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!data_type_p.parse(pos, type, expected))
|
||||
return false;
|
||||
if (s_type.ignore(pos, expected))
|
||||
{
|
||||
if (!data_type_p.parse(pos, type, expected))
|
||||
return false;
|
||||
}
|
||||
|
||||
if (s_granularity.ignore(pos, expected))
|
||||
{
|
||||
@ -45,13 +57,14 @@ bool ParserCreateIndexDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected
|
||||
auto index = std::make_shared<ASTIndexDeclaration>();
|
||||
index->part_of_create_index_query = true;
|
||||
index->set(index->expr, expr);
|
||||
index->set(index->type, type);
|
||||
if (type)
|
||||
index->set(index->type, type);
|
||||
|
||||
if (granularity)
|
||||
index->granularity = granularity->as<ASTLiteral &>().value.safeGet<UInt64>();
|
||||
else
|
||||
{
|
||||
if (index->type->name == "annoy")
|
||||
if (index->type && index->type->name == "annoy")
|
||||
index->granularity = ASTIndexDeclaration::DEFAULT_ANNOY_INDEX_GRANULARITY;
|
||||
else
|
||||
index->granularity = ASTIndexDeclaration::DEFAULT_INDEX_GRANULARITY;
|
||||
|
@ -4,9 +4,9 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
void formatAST(const IAST & ast, WriteBuffer & buf, bool hilite, bool one_line)
|
||||
void formatAST(const IAST & ast, WriteBuffer & buf, bool hilite, bool one_line, bool show_secrets)
|
||||
{
|
||||
IAST::FormatSettings settings(buf, one_line);
|
||||
IAST::FormatSettings settings(buf, one_line, show_secrets);
|
||||
settings.hilite = hilite;
|
||||
|
||||
ast.format(settings);
|
||||
|
@ -11,7 +11,7 @@ class WriteBuffer;
|
||||
/** Takes a syntax tree and turns it back into text.
|
||||
* In case of INSERT query, the data will be missing.
|
||||
*/
|
||||
void formatAST(const IAST & ast, WriteBuffer & buf, bool hilite = true, bool one_line = false);
|
||||
void formatAST(const IAST & ast, WriteBuffer & buf, bool hilite = true, bool one_line = false, bool show_secrets = true);
|
||||
|
||||
String serializeAST(const IAST & ast, bool one_line = true);
|
||||
|
||||
|
@ -114,16 +114,20 @@ private:
|
||||
|
||||
using Chunks = std::vector<Chunk>;
|
||||
|
||||
/// ChunkOffsets marks offsets of different sub-chunks, which will be used by async inserts.
|
||||
class ChunkOffsets : public ChunkInfo
|
||||
/// AsyncInsert needs two kinds of information:
|
||||
/// - offsets of different sub-chunks
|
||||
/// - tokens of different sub-chunks, which are assigned by setting `insert_deduplication_token`.
|
||||
class AsyncInsertInfo : public ChunkInfo
|
||||
{
|
||||
public:
|
||||
ChunkOffsets() = default;
|
||||
explicit ChunkOffsets(const std::vector<size_t> & offsets_) : offsets(offsets_) {}
|
||||
AsyncInsertInfo() = default;
|
||||
explicit AsyncInsertInfo(const std::vector<size_t> & offsets_, const std::vector<String> & tokens_) : offsets(offsets_), tokens(tokens_) {}
|
||||
|
||||
std::vector<size_t> offsets;
|
||||
std::vector<String> tokens;
|
||||
};
|
||||
|
||||
using ChunkOffsetsPtr = std::shared_ptr<ChunkOffsets>;
|
||||
using AsyncInsertInfoPtr = std::shared_ptr<AsyncInsertInfo>;
|
||||
|
||||
/// Extension to support delayed defaults. AddingDefaultsProcessor uses it to replace missing values with column defaults.
|
||||
class ChunkMissingValues : public ChunkInfo
|
||||
|
@ -75,7 +75,7 @@ public:
|
||||
{
|
||||
if (!allow_missing_columns)
|
||||
throw Exception(
|
||||
ErrorCodes::THERE_IS_NO_COLUMN, "Not found field({}) in arrow schema:{}.", named_col.name, schema.ToString());
|
||||
ErrorCodes::THERE_IS_NO_COLUMN, "Not found field ({}) in the following Arrow schema:\n{}\n", named_col.name, schema.ToString());
|
||||
else
|
||||
continue;
|
||||
}
|
||||
@ -168,4 +168,3 @@ private:
|
||||
};
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -52,6 +52,8 @@
|
||||
#include <Poco/Buffer.h>
|
||||
#include <Poco/JSON/Object.h>
|
||||
#include <Poco/JSON/Parser.h>
|
||||
#include <Poco/Net/HTTPBasicCredentials.h>
|
||||
#include <Poco/Net/HTTPCredentials.h>
|
||||
#include <Poco/Net/HTTPRequest.h>
|
||||
#include <Poco/Net/HTTPResponse.h>
|
||||
#include <Poco/URI.h>
|
||||
@ -934,6 +936,27 @@ private:
|
||||
Poco::Net::HTTPRequest request(Poco::Net::HTTPRequest::HTTP_GET, url.getPathAndQuery(), Poco::Net::HTTPRequest::HTTP_1_1);
|
||||
request.setHost(url.getHost());
|
||||
|
||||
if (!url.getUserInfo().empty())
|
||||
{
|
||||
Poco::Net::HTTPCredentials http_credentials;
|
||||
Poco::Net::HTTPBasicCredentials http_basic_credentials;
|
||||
|
||||
http_credentials.fromUserInfo(url.getUserInfo());
|
||||
|
||||
std::string decoded_username;
|
||||
Poco::URI::decode(http_credentials.getUsername(), decoded_username);
|
||||
http_basic_credentials.setUsername(decoded_username);
|
||||
|
||||
if (!http_credentials.getPassword().empty())
|
||||
{
|
||||
std::string decoded_password;
|
||||
Poco::URI::decode(http_credentials.getPassword(), decoded_password);
|
||||
http_basic_credentials.setPassword(decoded_password);
|
||||
}
|
||||
|
||||
http_basic_credentials.authenticate(request);
|
||||
}
|
||||
|
||||
auto session = makePooledHTTPSession(url, timeouts, 1);
|
||||
session->sendRequest(request);
|
||||
|
||||
|
@ -13,7 +13,8 @@ namespace ErrorCodes
|
||||
extern const int CANNOT_SKIP_UNKNOWN_FIELD;
|
||||
}
|
||||
|
||||
BinaryRowInputFormat::BinaryRowInputFormat(ReadBuffer & in_, const Block & header, Params params_, bool with_names_, bool with_types_, const FormatSettings & format_settings_)
|
||||
template <bool with_defaults>
|
||||
BinaryRowInputFormat<with_defaults>::BinaryRowInputFormat(ReadBuffer & in_, const Block & header, Params params_, bool with_names_, bool with_types_, const FormatSettings & format_settings_)
|
||||
: RowInputFormatWithNamesAndTypes(
|
||||
header,
|
||||
in_,
|
||||
@ -22,16 +23,17 @@ BinaryRowInputFormat::BinaryRowInputFormat(ReadBuffer & in_, const Block & heade
|
||||
with_names_,
|
||||
with_types_,
|
||||
format_settings_,
|
||||
std::make_unique<BinaryFormatReader>(in_, format_settings_))
|
||||
std::make_unique<BinaryFormatReader<with_defaults>>(in_, format_settings_))
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
BinaryFormatReader::BinaryFormatReader(ReadBuffer & in_, const FormatSettings & format_settings_) : FormatWithNamesAndTypesReader(in_, format_settings_)
|
||||
template <bool with_defaults>
|
||||
BinaryFormatReader<with_defaults>::BinaryFormatReader(ReadBuffer & in_, const FormatSettings & format_settings_) : FormatWithNamesAndTypesReader(in_, format_settings_)
|
||||
{
|
||||
}
|
||||
|
||||
std::vector<String> BinaryFormatReader::readHeaderRow()
|
||||
template <bool with_defaults>
|
||||
std::vector<String> BinaryFormatReader<with_defaults>::readHeaderRow()
|
||||
{
|
||||
std::vector<String> fields;
|
||||
String field;
|
||||
@ -43,13 +45,15 @@ std::vector<String> BinaryFormatReader::readHeaderRow()
|
||||
return fields;
|
||||
}
|
||||
|
||||
std::vector<String> BinaryFormatReader::readNames()
|
||||
template <bool with_defaults>
|
||||
std::vector<String> BinaryFormatReader<with_defaults>::readNames()
|
||||
{
|
||||
readVarUInt(read_columns, *in);
|
||||
return readHeaderRow();
|
||||
}
|
||||
|
||||
std::vector<String> BinaryFormatReader::readTypes()
|
||||
template <bool with_defaults>
|
||||
std::vector<String> BinaryFormatReader<with_defaults>::readTypes()
|
||||
{
|
||||
auto types = readHeaderRow();
|
||||
for (const auto & type_name : types)
|
||||
@ -57,26 +61,40 @@ std::vector<String> BinaryFormatReader::readTypes()
|
||||
return types;
|
||||
}
|
||||
|
||||
bool BinaryFormatReader::readField(IColumn & column, const DataTypePtr & /*type*/, const SerializationPtr & serialization, bool /*is_last_file_column*/, const String & /*column_name*/)
|
||||
template <bool with_defaults>
|
||||
bool BinaryFormatReader<with_defaults>::readField(IColumn & column, const DataTypePtr & /*type*/, const SerializationPtr & serialization, bool /*is_last_file_column*/, const String & /*column_name*/)
|
||||
{
|
||||
if constexpr (with_defaults)
|
||||
{
|
||||
UInt8 is_default;
|
||||
readBinary(is_default, *in);
|
||||
if (is_default)
|
||||
{
|
||||
column.insertDefault();
|
||||
return false;
|
||||
}
|
||||
}
|
||||
serialization->deserializeBinary(column, *in, format_settings);
|
||||
return true;
|
||||
}
|
||||
|
||||
void BinaryFormatReader::skipHeaderRow()
|
||||
template <bool with_defaults>
|
||||
void BinaryFormatReader<with_defaults>::skipHeaderRow()
|
||||
{
|
||||
String tmp;
|
||||
for (size_t i = 0; i < read_columns; ++i)
|
||||
readStringBinary(tmp, *in);
|
||||
}
|
||||
|
||||
void BinaryFormatReader::skipNames()
|
||||
template <bool with_defaults>
|
||||
void BinaryFormatReader<with_defaults>::skipNames()
|
||||
{
|
||||
readVarUInt(read_columns, *in);
|
||||
skipHeaderRow();
|
||||
}
|
||||
|
||||
void BinaryFormatReader::skipTypes()
|
||||
template <bool with_defaults>
|
||||
void BinaryFormatReader<with_defaults>::skipTypes()
|
||||
{
|
||||
if (read_columns == 0)
|
||||
{
|
||||
@ -87,7 +105,8 @@ void BinaryFormatReader::skipTypes()
|
||||
skipHeaderRow();
|
||||
}
|
||||
|
||||
void BinaryFormatReader::skipField(size_t file_column)
|
||||
template <bool with_defaults>
|
||||
void BinaryFormatReader<with_defaults>::skipField(size_t file_column)
|
||||
{
|
||||
if (file_column >= read_data_types.size())
|
||||
throw Exception(ErrorCodes::CANNOT_SKIP_UNKNOWN_FIELD,
|
||||
@ -111,12 +130,21 @@ void registerInputFormatRowBinary(FormatFactory & factory)
|
||||
const IRowInputFormat::Params & params,
|
||||
const FormatSettings & settings)
|
||||
{
|
||||
return std::make_shared<BinaryRowInputFormat>(buf, sample, params, with_names, with_types, settings);
|
||||
return std::make_shared<BinaryRowInputFormat<false>>(buf, sample, params, with_names, with_types, settings);
|
||||
});
|
||||
};
|
||||
|
||||
registerWithNamesAndTypes("RowBinary", register_func);
|
||||
factory.registerFileExtension("bin", "RowBinary");
|
||||
|
||||
factory.registerInputFormat("RowBinaryWithDefaults", [](
|
||||
ReadBuffer & buf,
|
||||
const Block & sample,
|
||||
const IRowInputFormat::Params & params,
|
||||
const FormatSettings & settings)
|
||||
{
|
||||
return std::make_shared<BinaryRowInputFormat<true>>(buf, sample, params, false, false, settings);
|
||||
});
|
||||
}
|
||||
|
||||
void registerRowBinaryWithNamesAndTypesSchemaReader(FormatFactory & factory)
|
||||
@ -125,6 +153,8 @@ void registerRowBinaryWithNamesAndTypesSchemaReader(FormatFactory & factory)
|
||||
{
|
||||
return std::make_shared<BinaryWithNamesAndTypesSchemaReader>(buf, settings);
|
||||
});
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
@ -12,6 +12,7 @@ class ReadBuffer;
|
||||
|
||||
/** A stream for inputting data in a binary line-by-line format.
|
||||
*/
|
||||
template <bool with_defaults = false>
|
||||
class BinaryRowInputFormat final : public RowInputFormatWithNamesAndTypes
|
||||
{
|
||||
public:
|
||||
@ -25,6 +26,7 @@ public:
|
||||
std::string getDiagnosticInfo() override { return {}; }
|
||||
};
|
||||
|
||||
template <bool with_defaults = false>
|
||||
class BinaryFormatReader final : public FormatWithNamesAndTypesReader
|
||||
{
|
||||
public:
|
||||
@ -54,7 +56,7 @@ public:
|
||||
BinaryWithNamesAndTypesSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_);
|
||||
|
||||
private:
|
||||
BinaryFormatReader reader;
|
||||
BinaryFormatReader<false> reader;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -1,4 +1,5 @@
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/ReadBufferFromString.h>
|
||||
#include <IO/BufferWithOwnMemory.h>
|
||||
#include <IO/Operators.h>
|
||||
|
||||
@ -315,17 +316,54 @@ bool CSVFormatReader::readField(
|
||||
return false;
|
||||
}
|
||||
|
||||
if (format_settings.csv.use_default_on_bad_values)
|
||||
return readFieldOrDefault(column, type, serialization);
|
||||
return readFieldImpl(*buf, column, type, serialization);
|
||||
}
|
||||
|
||||
bool CSVFormatReader::readFieldImpl(ReadBuffer & istr, DB::IColumn & column, const DB::DataTypePtr & type, const DB::SerializationPtr & serialization)
|
||||
{
|
||||
if (format_settings.null_as_default && !isNullableOrLowCardinalityNullable(type))
|
||||
{
|
||||
/// If value is null but type is not nullable then use default value instead.
|
||||
return SerializationNullable::deserializeTextCSVImpl(column, *buf, format_settings, serialization);
|
||||
return SerializationNullable::deserializeTextCSVImpl(column, istr, format_settings, serialization);
|
||||
}
|
||||
|
||||
/// Read the column normally.
|
||||
serialization->deserializeTextCSV(column, *buf, format_settings);
|
||||
serialization->deserializeTextCSV(column, istr, format_settings);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CSVFormatReader::readFieldOrDefault(DB::IColumn & column, const DB::DataTypePtr & type, const DB::SerializationPtr & serialization)
|
||||
{
|
||||
String field;
|
||||
readCSVField(field, *buf, format_settings.csv);
|
||||
ReadBufferFromString tmp_buf(field);
|
||||
bool is_bad_value = false;
|
||||
bool res = false;
|
||||
|
||||
size_t col_size = column.size();
|
||||
try
|
||||
{
|
||||
res = readFieldImpl(tmp_buf, column, type, serialization);
|
||||
/// Check if we parsed the whole field successfully.
|
||||
if (!field.empty() && !tmp_buf.eof())
|
||||
is_bad_value = true;
|
||||
}
|
||||
catch (const Exception &)
|
||||
{
|
||||
is_bad_value = true;
|
||||
}
|
||||
|
||||
if (!is_bad_value)
|
||||
return res;
|
||||
|
||||
if (column.size() == col_size + 1)
|
||||
column.popBack(1);
|
||||
column.insertDefault();
|
||||
return false;
|
||||
}
|
||||
|
||||
void CSVFormatReader::skipPrefixBeforeHeader()
|
||||
{
|
||||
for (size_t i = 0; i != format_settings.csv.skip_first_lines; ++i)
|
||||
|
@ -89,6 +89,8 @@ public:
|
||||
void setReadBuffer(ReadBuffer & in_) override;
|
||||
|
||||
FormatSettings::EscapingRule getEscapingRule() const override { return FormatSettings::EscapingRule::CSV; }
|
||||
bool readFieldImpl(ReadBuffer & istr, DB::IColumn & column, const DB::DataTypePtr & type, const DB::SerializationPtr & serialization);
|
||||
bool readFieldOrDefault(DB::IColumn & column, const DB::DataTypePtr & type, const DB::SerializationPtr & serialization);
|
||||
|
||||
protected:
|
||||
PeekableReadBuffer * buf;
|
||||
|
@ -111,7 +111,7 @@ void optimizePrimaryKeyCondition(const Stack & stack);
|
||||
void optimizePrewhere(Stack & stack, QueryPlan::Nodes & nodes);
|
||||
void optimizeReadInOrder(QueryPlan::Node & node, QueryPlan::Nodes & nodes);
|
||||
void optimizeAggregationInOrder(QueryPlan::Node & node, QueryPlan::Nodes &);
|
||||
bool optimizeUseAggregateProjections(QueryPlan::Node & node, QueryPlan::Nodes & nodes);
|
||||
bool optimizeUseAggregateProjections(QueryPlan::Node & node, QueryPlan::Nodes & nodes, bool allow_implicit_projections);
|
||||
bool optimizeUseNormalProjections(Stack & stack, QueryPlan::Nodes & nodes);
|
||||
bool addPlansForSets(QueryPlan::Node & node, QueryPlan::Nodes & nodes);
|
||||
|
||||
|
@ -19,6 +19,7 @@ QueryPlanOptimizationSettings QueryPlanOptimizationSettings::fromSettings(const
|
||||
settings.remove_redundant_distinct = from.query_plan_remove_redundant_distinct;
|
||||
settings.optimize_projection = from.optimize_use_projections && from.query_plan_optimize_projection;
|
||||
settings.force_use_projection = settings.optimize_projection && from.force_optimize_projection;
|
||||
settings.optimize_use_implicit_projections = settings.optimize_projection && from.optimize_use_implicit_projections;
|
||||
return settings;
|
||||
}
|
||||
|
||||
|
@ -41,6 +41,7 @@ struct QueryPlanOptimizationSettings
|
||||
/// If reading from projection can be applied
|
||||
bool optimize_projection = false;
|
||||
bool force_use_projection = false;
|
||||
bool optimize_use_implicit_projections = false;
|
||||
|
||||
static QueryPlanOptimizationSettings fromSettings(const Settings & from);
|
||||
static QueryPlanOptimizationSettings fromContext(ContextPtr from);
|
||||
|
@ -126,7 +126,8 @@ void optimizeTreeSecondPass(const QueryPlanOptimizationSettings & optimization_s
|
||||
optimizeReadInOrder(*frame.node, nodes);
|
||||
|
||||
if (optimization_settings.optimize_projection)
|
||||
num_applied_projection += optimizeUseAggregateProjections(*frame.node, nodes);
|
||||
num_applied_projection
|
||||
+= optimizeUseAggregateProjections(*frame.node, nodes, optimization_settings.optimize_use_implicit_projections);
|
||||
|
||||
if (optimization_settings.aggregation_in_order)
|
||||
optimizeAggregationInOrder(*frame.node, nodes);
|
||||
|
@ -433,7 +433,8 @@ AggregateProjectionCandidates getAggregateProjectionCandidates(
|
||||
QueryPlan::Node & node,
|
||||
AggregatingStep & aggregating,
|
||||
ReadFromMergeTree & reading,
|
||||
const std::shared_ptr<PartitionIdToMaxBlock> & max_added_blocks)
|
||||
const std::shared_ptr<PartitionIdToMaxBlock> & max_added_blocks,
|
||||
bool allow_implicit_projections)
|
||||
{
|
||||
const auto & keys = aggregating.getParams().keys;
|
||||
const auto & aggregates = aggregating.getParams().aggregates;
|
||||
@ -453,7 +454,8 @@ AggregateProjectionCandidates getAggregateProjectionCandidates(
|
||||
if (projection.type == ProjectionDescription::Type::Aggregate)
|
||||
agg_projections.push_back(&projection);
|
||||
|
||||
bool can_use_minmax_projection = metadata->minmax_count_projection && !reading.getMergeTreeData().has_lightweight_delete_parts.load();
|
||||
bool can_use_minmax_projection = allow_implicit_projections && metadata->minmax_count_projection
|
||||
&& !reading.getMergeTreeData().has_lightweight_delete_parts.load();
|
||||
|
||||
if (!can_use_minmax_projection && agg_projections.empty())
|
||||
return candidates;
|
||||
@ -543,7 +545,7 @@ static QueryPlan::Node * findReadingStep(QueryPlan::Node & node)
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
bool optimizeUseAggregateProjections(QueryPlan::Node & node, QueryPlan::Nodes & nodes)
|
||||
bool optimizeUseAggregateProjections(QueryPlan::Node & node, QueryPlan::Nodes & nodes, bool allow_implicit_projections)
|
||||
{
|
||||
if (node.children.size() != 1)
|
||||
return false;
|
||||
@ -568,7 +570,7 @@ bool optimizeUseAggregateProjections(QueryPlan::Node & node, QueryPlan::Nodes &
|
||||
|
||||
std::shared_ptr<PartitionIdToMaxBlock> max_added_blocks = getMaxAddedBlocks(reading);
|
||||
|
||||
auto candidates = getAggregateProjectionCandidates(node, *aggregating, *reading, max_added_blocks);
|
||||
auto candidates = getAggregateProjectionCandidates(node, *aggregating, *reading, max_added_blocks, allow_implicit_projections);
|
||||
|
||||
AggregateProjectionCandidate * best_candidate = nullptr;
|
||||
if (candidates.minmax_projection)
|
||||
|
@ -64,23 +64,131 @@ namespace ErrorCodes
|
||||
}
|
||||
namespace
|
||||
{
|
||||
/// Forward-declared to use in LSWithFoldedRegexpMatching w/o circular dependency.
|
||||
std::vector<StorageHDFS::PathWithInfo> LSWithRegexpMatching(const String & path_for_ls,
|
||||
const HDFSFSPtr & fs,
|
||||
const String & for_match);
|
||||
|
||||
/*
|
||||
* When `{...}` has any `/`s, it must be processed in a different way:
|
||||
* Basically, a path with globs is processed by LSWithRegexpMatching. In case it detects multi-dir glob {.../..., .../...},
|
||||
* LSWithFoldedRegexpMatching is in charge from now on.
|
||||
* It works a bit different: it still recursively goes through subdirectories, but does not match every directory to glob.
|
||||
* Instead, it goes many levels down (until the approximate max_depth is reached) and compares this multi-dir path to a glob.
|
||||
* StorageFile.cpp has the same logic.
|
||||
*/
|
||||
std::vector<StorageHDFS::PathWithInfo> LSWithFoldedRegexpMatching(const String & path_for_ls,
|
||||
const HDFSFSPtr & fs,
|
||||
const String & processed_suffix,
|
||||
const String & suffix_with_globs,
|
||||
re2::RE2 & matcher,
|
||||
const size_t max_depth,
|
||||
const size_t next_slash_after_glob_pos)
|
||||
{
|
||||
/// We don't need to go all the way in every directory if max_depth is reached
|
||||
/// as it is upper limit of depth by simply counting `/`s in curly braces
|
||||
if (!max_depth)
|
||||
return {};
|
||||
|
||||
HDFSFileInfo ls;
|
||||
ls.file_info = hdfsListDirectory(fs.get(), path_for_ls.data(), &ls.length);
|
||||
if (ls.file_info == nullptr && errno != ENOENT) // NOLINT
|
||||
{
|
||||
// ignore file not found exception, keep throw other exception, libhdfs3 doesn't have function to get exception type, so use errno.
|
||||
throw Exception(
|
||||
ErrorCodes::ACCESS_DENIED, "Cannot list directory {}: {}", path_for_ls, String(hdfsGetLastError()));
|
||||
}
|
||||
|
||||
std::vector<StorageHDFS::PathWithInfo> result;
|
||||
|
||||
if (!ls.file_info && ls.length > 0)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "file_info shouldn't be null");
|
||||
|
||||
for (int i = 0; i < ls.length; ++i)
|
||||
{
|
||||
const String full_path = String(ls.file_info[i].mName);
|
||||
const size_t last_slash = full_path.rfind('/');
|
||||
const String dir_or_file_name = full_path.substr(last_slash);
|
||||
const bool is_directory = ls.file_info[i].mKind == 'D';
|
||||
|
||||
if (re2::RE2::FullMatch(processed_suffix + dir_or_file_name, matcher))
|
||||
{
|
||||
if (next_slash_after_glob_pos == std::string::npos)
|
||||
{
|
||||
result.emplace_back(
|
||||
String(ls.file_info[i].mName),
|
||||
StorageHDFS::PathInfo{ls.file_info[i].mLastMod, static_cast<size_t>(ls.file_info[i].mSize)});
|
||||
}
|
||||
else
|
||||
{
|
||||
std::vector<StorageHDFS::PathWithInfo> result_part = LSWithRegexpMatching(
|
||||
fs::path(full_path) / "" , fs, suffix_with_globs.substr(next_slash_after_glob_pos));
|
||||
std::move(result_part.begin(), result_part.end(), std::back_inserter(result));
|
||||
}
|
||||
}
|
||||
else if (is_directory)
|
||||
{
|
||||
std::vector<StorageHDFS::PathWithInfo> result_part = LSWithFoldedRegexpMatching(
|
||||
fs::path(full_path), fs, processed_suffix + dir_or_file_name,
|
||||
suffix_with_globs, matcher, max_depth - 1, next_slash_after_glob_pos);
|
||||
std::move(result_part.begin(), result_part.end(), std::back_inserter(result));
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Recursive directory listing with matched paths as a result.
|
||||
* Have the same method in StorageFile.
|
||||
*/
|
||||
std::vector<StorageHDFS::PathWithInfo> LSWithRegexpMatching(const String & path_for_ls, const HDFSFSPtr & fs, const String & for_match)
|
||||
std::vector<StorageHDFS::PathWithInfo> LSWithRegexpMatching(
|
||||
const String & path_for_ls,
|
||||
const HDFSFSPtr & fs,
|
||||
const String & for_match)
|
||||
{
|
||||
const size_t first_glob = for_match.find_first_of("*?{");
|
||||
const size_t first_glob_pos = for_match.find_first_of("*?{");
|
||||
const bool has_glob = first_glob_pos != std::string::npos;
|
||||
|
||||
const size_t end_of_path_without_globs = for_match.substr(0, first_glob).rfind('/');
|
||||
const size_t end_of_path_without_globs = for_match.substr(0, first_glob_pos).rfind('/');
|
||||
const String suffix_with_globs = for_match.substr(end_of_path_without_globs); /// begin with '/'
|
||||
const String prefix_without_globs = path_for_ls + for_match.substr(1, end_of_path_without_globs); /// ends with '/'
|
||||
|
||||
const size_t next_slash = suffix_with_globs.find('/', 1);
|
||||
re2::RE2 matcher(makeRegexpPatternFromGlobs(suffix_with_globs.substr(0, next_slash)));
|
||||
size_t slashes_in_glob = 0;
|
||||
const size_t next_slash_after_glob_pos = [&]()
|
||||
{
|
||||
if (!has_glob)
|
||||
return suffix_with_globs.find('/', 1);
|
||||
|
||||
size_t in_curly = 0;
|
||||
for (std::string::const_iterator it = ++suffix_with_globs.begin(); it != suffix_with_globs.end(); it++)
|
||||
{
|
||||
if (*it == '{')
|
||||
++in_curly;
|
||||
else if (*it == '/')
|
||||
{
|
||||
if (in_curly)
|
||||
++slashes_in_glob;
|
||||
else
|
||||
return size_t(std::distance(suffix_with_globs.begin(), it));
|
||||
}
|
||||
else if (*it == '}')
|
||||
--in_curly;
|
||||
}
|
||||
return std::string::npos;
|
||||
}();
|
||||
|
||||
const std::string current_glob = suffix_with_globs.substr(0, next_slash_after_glob_pos);
|
||||
|
||||
re2::RE2 matcher(makeRegexpPatternFromGlobs(current_glob));
|
||||
if (!matcher.ok())
|
||||
throw Exception(ErrorCodes::CANNOT_COMPILE_REGEXP,
|
||||
"Cannot compile regex from glob ({}): {}", for_match, matcher.error());
|
||||
|
||||
if (slashes_in_glob)
|
||||
{
|
||||
return LSWithFoldedRegexpMatching(fs::path(prefix_without_globs), fs, "", suffix_with_globs,
|
||||
matcher, slashes_in_glob, next_slash_after_glob_pos);
|
||||
}
|
||||
|
||||
HDFSFileInfo ls;
|
||||
ls.file_info = hdfsListDirectory(fs.get(), prefix_without_globs.data(), &ls.length);
|
||||
if (ls.file_info == nullptr && errno != ENOENT) // NOLINT
|
||||
@ -97,7 +205,7 @@ namespace
|
||||
const String full_path = String(ls.file_info[i].mName);
|
||||
const size_t last_slash = full_path.rfind('/');
|
||||
const String file_name = full_path.substr(last_slash);
|
||||
const bool looking_for_directory = next_slash != std::string::npos;
|
||||
const bool looking_for_directory = next_slash_after_glob_pos != std::string::npos;
|
||||
const bool is_directory = ls.file_info[i].mKind == 'D';
|
||||
/// Condition with type of current file_info means what kind of path is it in current iteration of ls
|
||||
if (!is_directory && !looking_for_directory)
|
||||
@ -111,7 +219,7 @@ namespace
|
||||
{
|
||||
if (re2::RE2::FullMatch(file_name, matcher))
|
||||
{
|
||||
std::vector<StorageHDFS::PathWithInfo> result_part = LSWithRegexpMatching(fs::path(full_path) / "", fs, suffix_with_globs.substr(next_slash));
|
||||
std::vector<StorageHDFS::PathWithInfo> result_part = LSWithRegexpMatching(fs::path(full_path) / "", fs, suffix_with_globs.substr(next_slash_after_glob_pos));
|
||||
/// Recursion depth is limited by pattern. '*' works only for depth = 1, for depth = 2 pattern path is '*/*'. So we do not need additional check.
|
||||
std::move(result_part.begin(), result_part.end(), std::back_inserter(result));
|
||||
}
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user