mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-25 09:02:00 +00:00
Merge branch 'master' into no-keep-context-lock-while-calculating-access
This commit is contained in:
commit
668062dc29
216
.github/workflows/master.yml
vendored
216
.github/workflows/master.yml
vendored
@ -2870,6 +2870,216 @@ jobs:
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
IntegrationTestsAnalyzerAsan0:
|
||||
needs: [BuilderDebAsan]
|
||||
runs-on: [self-hosted, stress-tester]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/integration_tests_asan
|
||||
REPORTS_PATH=${{runner.temp}}/reports_dir
|
||||
CHECK_NAME=Integration tests (asan, analyzer)
|
||||
REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
|
||||
RUN_BY_HASH_NUM=0
|
||||
RUN_BY_HASH_TOTAL=6
|
||||
EOF
|
||||
- name: Download json reports
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
path: ${{ env.REPORTS_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
- name: Integration test
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci"
|
||||
python3 integration_test_check.py "$CHECK_NAME"
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
IntegrationTestsAnalyzerAsan1:
|
||||
needs: [BuilderDebAsan]
|
||||
runs-on: [self-hosted, stress-tester]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/integration_tests_asan
|
||||
REPORTS_PATH=${{runner.temp}}/reports_dir
|
||||
CHECK_NAME=Integration tests (asan, analyzer)
|
||||
REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
|
||||
RUN_BY_HASH_NUM=1
|
||||
RUN_BY_HASH_TOTAL=6
|
||||
EOF
|
||||
- name: Download json reports
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
path: ${{ env.REPORTS_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
- name: Integration test
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci"
|
||||
python3 integration_test_check.py "$CHECK_NAME"
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
IntegrationTestsAnalyzerAsan2:
|
||||
needs: [BuilderDebAsan]
|
||||
runs-on: [self-hosted, stress-tester]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/integration_tests_asan
|
||||
REPORTS_PATH=${{runner.temp}}/reports_dir
|
||||
CHECK_NAME=Integration tests (asan, analyzer)
|
||||
REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
|
||||
RUN_BY_HASH_NUM=2
|
||||
RUN_BY_HASH_TOTAL=6
|
||||
EOF
|
||||
- name: Download json reports
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
path: ${{ env.REPORTS_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
- name: Integration test
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci"
|
||||
python3 integration_test_check.py "$CHECK_NAME"
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
IntegrationTestsAnalyzerAsan3:
|
||||
needs: [BuilderDebAsan]
|
||||
runs-on: [self-hosted, stress-tester]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/integration_tests_asan
|
||||
REPORTS_PATH=${{runner.temp}}/reports_dir
|
||||
CHECK_NAME=Integration tests (asan, analyzer)
|
||||
REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
|
||||
RUN_BY_HASH_NUM=3
|
||||
RUN_BY_HASH_TOTAL=6
|
||||
EOF
|
||||
- name: Download json reports
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
path: ${{ env.REPORTS_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
- name: Integration test
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci"
|
||||
python3 integration_test_check.py "$CHECK_NAME"
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
IntegrationTestsAnalyzerAsan4:
|
||||
needs: [BuilderDebAsan]
|
||||
runs-on: [self-hosted, stress-tester]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/integration_tests_asan
|
||||
REPORTS_PATH=${{runner.temp}}/reports_dir
|
||||
CHECK_NAME=Integration tests (asan, analyzer)
|
||||
REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
|
||||
RUN_BY_HASH_NUM=4
|
||||
RUN_BY_HASH_TOTAL=6
|
||||
EOF
|
||||
- name: Download json reports
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
path: ${{ env.REPORTS_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
- name: Integration test
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci"
|
||||
python3 integration_test_check.py "$CHECK_NAME"
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
IntegrationTestsAnalyzerAsan5:
|
||||
needs: [BuilderDebAsan]
|
||||
runs-on: [self-hosted, stress-tester]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/integration_tests_asan
|
||||
REPORTS_PATH=${{runner.temp}}/reports_dir
|
||||
CHECK_NAME=Integration tests (asan, analyzer)
|
||||
REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
|
||||
RUN_BY_HASH_NUM=5
|
||||
RUN_BY_HASH_TOTAL=6
|
||||
EOF
|
||||
- name: Download json reports
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
path: ${{ env.REPORTS_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
- name: Integration test
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci"
|
||||
python3 integration_test_check.py "$CHECK_NAME"
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
IntegrationTestsTsan0:
|
||||
needs: [BuilderDebTsan]
|
||||
runs-on: [self-hosted, stress-tester]
|
||||
@ -3963,6 +4173,12 @@ jobs:
|
||||
- IntegrationTestsAsan3
|
||||
- IntegrationTestsAsan4
|
||||
- IntegrationTestsAsan5
|
||||
- IntegrationTestsAnalyzerAsan0
|
||||
- IntegrationTestsAnalyzerAsan1
|
||||
- IntegrationTestsAnalyzerAsan2
|
||||
- IntegrationTestsAnalyzerAsan3
|
||||
- IntegrationTestsAnalyzerAsan4
|
||||
- IntegrationTestsAnalyzerAsan5
|
||||
- IntegrationTestsRelease0
|
||||
- IntegrationTestsRelease1
|
||||
- IntegrationTestsRelease2
|
||||
|
6
.github/workflows/pull_request.yml
vendored
6
.github/workflows/pull_request.yml
vendored
@ -5099,6 +5099,12 @@ jobs:
|
||||
- IntegrationTestsAsan3
|
||||
- IntegrationTestsAsan4
|
||||
- IntegrationTestsAsan5
|
||||
- IntegrationTestsAnalyzerAsan0
|
||||
- IntegrationTestsAnalyzerAsan1
|
||||
- IntegrationTestsAnalyzerAsan2
|
||||
- IntegrationTestsAnalyzerAsan3
|
||||
- IntegrationTestsAnalyzerAsan4
|
||||
- IntegrationTestsAnalyzerAsan5
|
||||
- IntegrationTestsRelease0
|
||||
- IntegrationTestsRelease1
|
||||
- IntegrationTestsRelease2
|
||||
|
@ -4,6 +4,8 @@ services:
|
||||
kafka_zookeeper:
|
||||
image: zookeeper:3.4.9
|
||||
hostname: kafka_zookeeper
|
||||
ports:
|
||||
- 2181:2181
|
||||
environment:
|
||||
ZOO_MY_ID: 1
|
||||
ZOO_PORT: 2181
|
||||
@ -15,15 +17,14 @@ services:
|
||||
image: confluentinc/cp-kafka:5.2.0
|
||||
hostname: kafka1
|
||||
ports:
|
||||
- ${KAFKA_EXTERNAL_PORT:-8081}:${KAFKA_EXTERNAL_PORT:-8081}
|
||||
- ${KAFKA_EXTERNAL_PORT}:${KAFKA_EXTERNAL_PORT}
|
||||
environment:
|
||||
KAFKA_ADVERTISED_LISTENERS: INSIDE://localhost:${KAFKA_EXTERNAL_PORT},OUTSIDE://kafka1:19092
|
||||
KAFKA_ADVERTISED_HOST_NAME: kafka1
|
||||
KAFKA_LISTENERS: INSIDE://0.0.0.0:${KAFKA_EXTERNAL_PORT},OUTSIDE://0.0.0.0:19092
|
||||
KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: INSIDE:PLAINTEXT,OUTSIDE:PLAINTEXT
|
||||
KAFKA_INTER_BROKER_LISTENER_NAME: INSIDE
|
||||
KAFKA_BROKER_ID: 1
|
||||
KAFKA_ZOOKEEPER_CONNECT: "kafka_zookeeper:2181"
|
||||
KAFKA_ZOOKEEPER_CONNECT: kafka_zookeeper:2181
|
||||
KAFKA_LOG4J_LOGGERS: "kafka.controller=INFO,kafka.producer.async.DefaultEventHandler=INFO,state.change.logger=INFO"
|
||||
KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
|
||||
depends_on:
|
||||
@ -35,13 +36,38 @@ services:
|
||||
image: confluentinc/cp-schema-registry:5.2.0
|
||||
hostname: schema-registry
|
||||
ports:
|
||||
- ${SCHEMA_REGISTRY_EXTERNAL_PORT:-12313}:${SCHEMA_REGISTRY_INTERNAL_PORT:-12313}
|
||||
- ${SCHEMA_REGISTRY_EXTERNAL_PORT}:${SCHEMA_REGISTRY_EXTERNAL_PORT}
|
||||
environment:
|
||||
SCHEMA_REGISTRY_HOST_NAME: schema-registry
|
||||
SCHEMA_REGISTRY_KAFKASTORE_SECURITY_PROTOCOL: PLAINTEXT
|
||||
SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS: PLAINTEXT://kafka1:19092
|
||||
SCHEMA_REGISTRY_LISTENERS: http://0.0.0.0:${SCHEMA_REGISTRY_EXTERNAL_PORT}
|
||||
SCHEMA_REGISTRY_SCHEMA_REGISTRY_GROUP_ID: noauth
|
||||
depends_on:
|
||||
- kafka_zookeeper
|
||||
- kafka1
|
||||
restart: always
|
||||
security_opt:
|
||||
- label:disable
|
||||
|
||||
schema-registry-auth:
|
||||
image: confluentinc/cp-schema-registry:5.2.0
|
||||
hostname: schema-registry-auth
|
||||
ports:
|
||||
- ${SCHEMA_REGISTRY_AUTH_EXTERNAL_PORT}:${SCHEMA_REGISTRY_AUTH_EXTERNAL_PORT}
|
||||
environment:
|
||||
SCHEMA_REGISTRY_HOST_NAME: schema-registry-auth
|
||||
SCHEMA_REGISTRY_LISTENERS: http://0.0.0.0:${SCHEMA_REGISTRY_AUTH_EXTERNAL_PORT}
|
||||
SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS: PLAINTEXT://kafka1:19092
|
||||
SCHEMA_REGISTRY_AUTHENTICATION_METHOD: BASIC
|
||||
SCHEMA_REGISTRY_AUTHENTICATION_ROLES: user
|
||||
SCHEMA_REGISTRY_AUTHENTICATION_REALM: RealmFooBar
|
||||
SCHEMA_REGISTRY_OPTS: "-Djava.security.auth.login.config=/etc/schema-registry/secrets/schema_registry_jaas.conf"
|
||||
SCHEMA_REGISTRY_SCHEMA_REGISTRY_GROUP_ID: auth
|
||||
volumes:
|
||||
- ${SCHEMA_REGISTRY_DIR:-}/secrets:/etc/schema-registry/secrets
|
||||
depends_on:
|
||||
- kafka_zookeeper
|
||||
- kafka1
|
||||
restart: always
|
||||
security_opt:
|
||||
- label:disable
|
||||
|
@ -76,6 +76,7 @@ The supported formats are:
|
||||
| [RowBinary](#rowbinary) | ✔ | ✔ |
|
||||
| [RowBinaryWithNames](#rowbinarywithnamesandtypes) | ✔ | ✔ |
|
||||
| [RowBinaryWithNamesAndTypes](#rowbinarywithnamesandtypes) | ✔ | ✔ |
|
||||
| [RowBinaryWithDefaults](#rowbinarywithdefaults) | ✔ | ✔ |
|
||||
| [Native](#native) | ✔ | ✔ |
|
||||
| [Null](#null) | ✗ | ✔ |
|
||||
| [XML](#xml) | ✗ | ✔ |
|
||||
@ -472,6 +473,7 @@ The CSV format supports the output of totals and extremes the same way as `TabSe
|
||||
- [input_format_csv_trim_whitespaces](/docs/en/operations/settings/settings-formats.md/#input_format_csv_trim_whitespaces) - trim spaces and tabs in non-quoted CSV strings. Default value - `true`.
|
||||
- [input_format_csv_allow_whitespace_or_tab_as_delimiter](/docs/en/operations/settings/settings-formats.md/# input_format_csv_allow_whitespace_or_tab_as_delimiter) - Allow to use whitespace or tab as field delimiter in CSV strings. Default value - `false`.
|
||||
- [input_format_csv_allow_variable_number_of_columns](/docs/en/operations/settings/settings-formats.md/#input_format_csv_allow_variable_number_of_columns) - ignore extra columns in CSV input (if file has more columns than expected) and treat missing fields in CSV input as default values. Default value - `false`.
|
||||
- [input_format_csv_use_default_on_bad_values](/docs/en/operations/settings/settings-formats.md/#input_format_csv_use_default_on_bad_values) - Allow to set default value to column when CSV field deserialization failed on bad value. Default value - `false`.
|
||||
|
||||
## CSVWithNames {#csvwithnames}
|
||||
|
||||
@ -1515,6 +1517,23 @@ If setting [input_format_with_types_use_header](/docs/en/operations/settings/set
|
||||
the types from input data will be compared with the types of the corresponding columns from the table. Otherwise, the second row will be skipped.
|
||||
:::
|
||||
|
||||
## RowBinaryWithDefaults {#rowbinarywithdefaults}
|
||||
|
||||
Similar to [RowBinary](#rowbinary), but with an extra byte before each column that indicates if default value should be used.
|
||||
|
||||
Examples:
|
||||
|
||||
```sql
|
||||
:) select * from format('RowBinaryWithDefaults', 'x UInt32 default 42, y UInt32', x'010001000000')
|
||||
|
||||
┌──x─┬─y─┐
|
||||
│ 42 │ 1 │
|
||||
└────┴───┘
|
||||
```
|
||||
|
||||
For column `x` there is only one byte `01` that indicates that default value should be used and no other data after this byte is provided.
|
||||
For column `y` data starts with byte `00` that indicates that column has actual value that should be read from the subsequent data `01000000`.
|
||||
|
||||
## RowBinary format settings {#row-binary-format-settings}
|
||||
|
||||
- [format_binary_max_string_size](/docs/en/operations/settings/settings-formats.md/#format_binary_max_string_size) - The maximum allowed size for String in RowBinary format. Default value - `1GiB`.
|
||||
|
@ -30,7 +30,7 @@ description: In order to effectively mitigate possible human errors, you should
|
||||
```
|
||||
|
||||
:::note ALL
|
||||
`ALL` is only applicable to the `RESTORE` command prior to version 23.4 of Clickhouse.
|
||||
Prior to version 23.4 of ClickHouse, `ALL` was only applicable to the `RESTORE` command.
|
||||
:::
|
||||
|
||||
## Background
|
||||
|
@ -989,6 +989,28 @@ Result
|
||||
a b
|
||||
```
|
||||
|
||||
### input_format_csv_use_default_on_bad_values {#input_format_csv_use_default_on_bad_values}
|
||||
|
||||
Allow to set default value to column when CSV field deserialization failed on bad value
|
||||
|
||||
Default value: `false`.
|
||||
|
||||
**Examples**
|
||||
|
||||
Query
|
||||
|
||||
```bash
|
||||
./clickhouse local -q "create table test_tbl (x String, y UInt32, z Date) engine=MergeTree order by x"
|
||||
echo 'a,b,c' | ./clickhouse local -q "INSERT INTO test_tbl SETTINGS input_format_csv_use_default_on_bad_values=true FORMAT CSV"
|
||||
./clickhouse local -q "select * from test_tbl"
|
||||
```
|
||||
|
||||
Result
|
||||
|
||||
```text
|
||||
a 0 1971-01-01
|
||||
```
|
||||
|
||||
## Values format settings {#values-format-settings}
|
||||
|
||||
### input_format_values_interpret_expressions {#input_format_values_interpret_expressions}
|
||||
@ -1325,6 +1347,17 @@ Default value: 0.
|
||||
|
||||
Sets [Confluent Schema Registry](https://docs.confluent.io/current/schema-registry/index.html) URL to use with [AvroConfluent](../../interfaces/formats.md/#data-format-avro-confluent) format.
|
||||
|
||||
Format:
|
||||
``` text
|
||||
http://[user:password@]machine[:port]"
|
||||
```
|
||||
|
||||
Examples:
|
||||
``` text
|
||||
http://registry.example.com:8081
|
||||
http://admin:secret@registry.example.com:8081
|
||||
```
|
||||
|
||||
Default value: `Empty`.
|
||||
|
||||
### output_format_avro_codec {#output_format_avro_codec}
|
||||
|
@ -4524,6 +4524,7 @@ This setting allows to specify renaming pattern for files processed by `file` ta
|
||||
|
||||
### Placeholders
|
||||
|
||||
- `%a` — Full original filename (e.g., "sample.csv").
|
||||
- `%f` — Original filename without extension (e.g., "sample").
|
||||
- `%e` — Original file extension with dot (e.g., ".csv").
|
||||
- `%t` — Timestamp (in microseconds).
|
||||
|
@ -722,7 +722,7 @@ SELECT toDate('2016-12-27') AS date, toYearWeek(date) AS yearWeek0, toYearWeek(d
|
||||
|
||||
## age
|
||||
|
||||
Returns the `unit` component of the difference between `startdate` and `enddate`. The difference is calculated using a precision of 1 second.
|
||||
Returns the `unit` component of the difference between `startdate` and `enddate`. The difference is calculated using a precision of 1 microsecond.
|
||||
E.g. the difference between `2021-12-29` and `2022-01-01` is 3 days for `day` unit, 0 months for `month` unit, 0 years for `year` unit.
|
||||
|
||||
For an alternative to `age`, see function `date\_diff`.
|
||||
@ -738,6 +738,8 @@ age('unit', startdate, enddate, [timezone])
|
||||
- `unit` — The type of interval for result. [String](../../sql-reference/data-types/string.md).
|
||||
Possible values:
|
||||
|
||||
- `microsecond` (possible abbreviations: `us`, `u`)
|
||||
- `millisecond` (possible abbreviations: `ms`)
|
||||
- `second` (possible abbreviations: `ss`, `s`)
|
||||
- `minute` (possible abbreviations: `mi`, `n`)
|
||||
- `hour` (possible abbreviations: `hh`, `h`)
|
||||
@ -813,6 +815,8 @@ Aliases: `dateDiff`, `DATE_DIFF`, `timestampDiff`, `timestamp_diff`, `TIMESTAMP_
|
||||
- `unit` — The type of interval for result. [String](../../sql-reference/data-types/string.md).
|
||||
Possible values:
|
||||
|
||||
- `microsecond` (possible abbreviations: `us`, `u`)
|
||||
- `millisecond` (possible abbreviations: `ms`)
|
||||
- `second` (possible abbreviations: `ss`, `s`)
|
||||
- `minute` (possible abbreviations: `mi`, `n`)
|
||||
- `hour` (possible abbreviations: `hh`, `h`)
|
||||
@ -1134,6 +1138,8 @@ Result:
|
||||
|
||||
Returns the current date and time at the moment of query analysis. The function is a constant expression.
|
||||
|
||||
Alias: `current_timestamp`.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
@ -1264,6 +1270,8 @@ Result:
|
||||
Accepts zero arguments and returns the current date at one of the moments of query analysis.
|
||||
The same as ‘toDate(now())’.
|
||||
|
||||
Aliases: `curdate`, `current_date`.
|
||||
|
||||
## yesterday
|
||||
|
||||
Accepts zero arguments and returns yesterday’s date at one of the moments of query analysis.
|
||||
|
@ -51,7 +51,7 @@ Calculates the MD5 from a string and returns the resulting set of bytes as Fixed
|
||||
If you do not need MD5 in particular, but you need a decent cryptographic 128-bit hash, use the ‘sipHash128’ function instead.
|
||||
If you want to get the same result as output by the md5sum utility, use lower(hex(MD5(s))).
|
||||
|
||||
## sipHash64 (#hash_functions-siphash64)
|
||||
## sipHash64 {#hash_functions-siphash64}
|
||||
|
||||
Produces a 64-bit [SipHash](https://en.wikipedia.org/wiki/SipHash) hash value.
|
||||
|
||||
|
@ -1267,3 +1267,36 @@ Like [initcap](#initcap), assuming that the string contains valid UTF-8 encoded
|
||||
Does not detect the language, e.g. for Turkish the result might not be exactly correct (i/İ vs. i/I).
|
||||
|
||||
If the length of the UTF-8 byte sequence is different for upper and lower case of a code point, the result may be incorrect for this code point.
|
||||
|
||||
## firstLine
|
||||
|
||||
Returns the first line from a multi-line string.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
firstLine(val)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `val` - Input value. [String](../data-types/string.md)
|
||||
|
||||
**Returned value**
|
||||
|
||||
- The first line of the input value or the whole value if there is no line
|
||||
separators. [String](../data-types/string.md)
|
||||
|
||||
**Example**
|
||||
|
||||
```sql
|
||||
select firstLine('foo\nbar\nbaz');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```result
|
||||
┌─firstLine('foo\nbar\nbaz')─┐
|
||||
│ foo │
|
||||
└────────────────────────────┘
|
||||
```
|
||||
|
@ -97,7 +97,7 @@ This is an experimental feature that may change in backwards-incompatible ways i
|
||||
:::
|
||||
|
||||
```sql
|
||||
CREATE LIVE VIEW [IF NOT EXISTS] [db.]table_name [WITH [TIMEOUT [value_in_sec] [AND]] [REFRESH [value_in_sec]]] AS SELECT ...
|
||||
CREATE LIVE VIEW [IF NOT EXISTS] [db.]table_name [WITH REFRESH [value_in_sec]] AS SELECT ...
|
||||
```
|
||||
|
||||
Live views store result of the corresponding [SELECT](../../../sql-reference/statements/select/index.md) query and are updated any time the result of the query changes. Query result as well as partial result needed to combine with new data are stored in memory providing increased performance for repeated queries. Live views can provide push notifications when query result changes using the [WATCH](../../../sql-reference/statements/watch.md) query.
|
||||
|
@ -5,7 +5,27 @@ sidebar_label: WITH
|
||||
|
||||
# WITH Clause
|
||||
|
||||
ClickHouse supports Common Table Expressions ([CTE](https://en.wikipedia.org/wiki/Hierarchical_and_recursive_queries_in_SQL)), that is provides to use results of `WITH` clause in the rest of `SELECT` query. Named subqueries can be included to the current and child query context in places where table objects are allowed. Recursion is prevented by hiding the current level CTEs from the WITH expression.
|
||||
ClickHouse supports Common Table Expressions ([CTE](https://en.wikipedia.org/wiki/Hierarchical_and_recursive_queries_in_SQL)) and substitutes the code defined in the `WITH` clause in all places of use for the rest of `SELECT` query. Named subqueries can be included to the current and child query context in places where table objects are allowed. Recursion is prevented by hiding the current level CTEs from the WITH expression.
|
||||
|
||||
Please note that CTEs do not guarantee the same results in all places they are called because the query will be re-executed for each use case.
|
||||
|
||||
An example of such behavior is below
|
||||
``` sql
|
||||
with cte_numbers as
|
||||
(
|
||||
select
|
||||
num
|
||||
from generateRandom('num UInt64', NULL)
|
||||
limit 1000000
|
||||
)
|
||||
select
|
||||
count()
|
||||
from cte_numbers
|
||||
where num in (select num from cte_numbers)
|
||||
```
|
||||
If CTEs were to pass exactly the results and not just a piece of code, you would always see `1000000`
|
||||
|
||||
However, due to the fact that we are referring `cte_numbers` twice, random numbers are generated each time and, accordingly, we see different random results, `280501, 392454, 261636, 196227` and so on...
|
||||
|
||||
## Syntax
|
||||
|
||||
|
@ -134,7 +134,7 @@ Multiple path components can have globs. For being processed file must exist and
|
||||
|
||||
- `*` — Substitutes any number of any characters except `/` including empty string.
|
||||
- `?` — Substitutes any single character.
|
||||
- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`.
|
||||
- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`, including `/`.
|
||||
- `{N..M}` — Substitutes any number in range from N to M including both borders.
|
||||
- `**` - Fetches all files inside the folder recursively.
|
||||
|
||||
|
@ -4201,6 +4201,7 @@ SELECT *, timezone() FROM test_tz WHERE d = '2000-01-01 00:00:00' SETTINGS sessi
|
||||
### Шаблон
|
||||
Шаблон поддерживает следующие виды плейсхолдеров:
|
||||
|
||||
- `%a` — Полное исходное имя файла (например "sample.csv").
|
||||
- `%f` — Исходное имя файла без расширения (например "sample").
|
||||
- `%e` — Оригинальное расширение файла с точкой (например ".csv").
|
||||
- `%t` — Текущее время (в микросекундах).
|
||||
|
@ -625,7 +625,7 @@ SELECT toDate('2016-12-27') AS date, toYearWeek(date) AS yearWeek0, toYearWeek(d
|
||||
|
||||
## age
|
||||
|
||||
Вычисляет компонент `unit` разницы между `startdate` и `enddate`. Разница вычисляется с точностью в 1 секунду.
|
||||
Вычисляет компонент `unit` разницы между `startdate` и `enddate`. Разница вычисляется с точностью в 1 микросекунду.
|
||||
Например, разница между `2021-12-29` и `2022-01-01` 3 дня для единицы `day`, 0 месяцев для единицы `month`, 0 лет для единицы `year`.
|
||||
|
||||
**Синтаксис**
|
||||
@ -639,6 +639,8 @@ age('unit', startdate, enddate, [timezone])
|
||||
- `unit` — единица измерения времени, в которой будет выражено возвращаемое значение функции. [String](../../sql-reference/data-types/string.md).
|
||||
Возможные значения:
|
||||
|
||||
- `microsecond` (возможные сокращения: `us`, `u`)
|
||||
- `millisecond` (возможные сокращения: `ms`)
|
||||
- `second` (возможные сокращения: `ss`, `s`)
|
||||
- `minute` (возможные сокращения: `mi`, `n`)
|
||||
- `hour` (возможные сокращения: `hh`, `h`)
|
||||
@ -712,6 +714,8 @@ date_diff('unit', startdate, enddate, [timezone])
|
||||
- `unit` — единица измерения времени, в которой будет выражено возвращаемое значение функции. [String](../../sql-reference/data-types/string.md).
|
||||
Возможные значения:
|
||||
|
||||
- `microsecond` (возможные сокращения: `us`, `u`)
|
||||
- `millisecond` (возможные сокращения: `ms`)
|
||||
- `second` (возможные сокращения: `ss`, `s`)
|
||||
- `minute` (возможные сокращения: `mi`, `n`)
|
||||
- `hour` (возможные сокращения: `hh`, `h`)
|
||||
|
@ -1124,3 +1124,39 @@ Do Nothing for 2 Minutes 2:00
|
||||
Не учитывает язык. То есть, для турецкого языка, результат может быть не совсем верным.
|
||||
Если длина UTF-8 последовательности байтов различна для верхнего и нижнего регистра кодовой точки, то для этой кодовой точки результат работы может быть некорректным.
|
||||
Если строка содержит набор байтов, не являющийся UTF-8, то поведение не определено.
|
||||
|
||||
## firstLine
|
||||
|
||||
Возвращает первую строку в многострочном тексте.
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
```sql
|
||||
firstLine(val)
|
||||
```
|
||||
|
||||
**Аргументы**
|
||||
|
||||
- `val` - текст для обработки. [String](../data-types/string.md)
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Первая строка текста или весь текст, если переносы строк отсутствуют.
|
||||
|
||||
Тип: [String](../data-types/string.md)
|
||||
|
||||
**Пример**
|
||||
|
||||
Запрос:
|
||||
|
||||
```sql
|
||||
select firstLine('foo\nbar\nbaz');
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
```result
|
||||
┌─firstLine('foo\nbar\nbaz')─┐
|
||||
│ foo │
|
||||
└────────────────────────────┘
|
||||
```
|
||||
|
@ -73,7 +73,7 @@ CREATE MATERIALIZED VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER] [TO[db.]na
|
||||
Чтобы использовать `LIVE VIEW` и запросы `WATCH`, включите настройку [allow_experimental_live_view](../../../operations/settings/settings.md#allow-experimental-live-view).
|
||||
:::
|
||||
```sql
|
||||
CREATE LIVE VIEW [IF NOT EXISTS] [db.]table_name [WITH [TIMEOUT [value_in_sec] [AND]] [REFRESH [value_in_sec]]] AS SELECT ...
|
||||
CREATE LIVE VIEW [IF NOT EXISTS] [db.]table_name [WITH REFRESH [value_in_sec]] AS SELECT ...
|
||||
```
|
||||
`LIVE VIEW` хранит результат запроса [SELECT](../../../sql-reference/statements/select/index.md), указанного при создании, и обновляется сразу же при изменении этого результата. Конечный результат запроса и промежуточные данные, из которых формируется результат, хранятся в оперативной памяти, и это обеспечивает высокую скорость обработки для повторяющихся запросов. LIVE-представления могут отправлять push-уведомления при изменении результата исходного запроса `SELECT`. Для этого используйте запрос [WATCH](../../../sql-reference/statements/watch.md).
|
||||
|
||||
|
@ -79,7 +79,7 @@ SELECT * FROM file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 U
|
||||
|
||||
- `*` — заменяет любое количество любых символов кроме `/`, включая отсутствие символов.
|
||||
- `?` — заменяет ровно один любой символ.
|
||||
- `{some_string,another_string,yet_another_one}` — заменяет любую из строк `'some_string', 'another_string', 'yet_another_one'`.
|
||||
- `{some_string,another_string,yet_another_one}` — заменяет любую из строк `'some_string', 'another_string', 'yet_another_one'`, причём строка может содержать `/`.
|
||||
- `{N..M}` — заменяет любое число в интервале от `N` до `M` включительно (может содержать ведущие нули).
|
||||
|
||||
Конструкция с `{}` аналогична табличной функции [remote](remote.md).
|
||||
|
@ -643,6 +643,8 @@ date_diff('unit', startdate, enddate, [timezone])
|
||||
- `unit` — `value`对应的时间单位。类型为[String](../../sql-reference/data-types/string.md)。
|
||||
可能的值:
|
||||
|
||||
- `microsecond`
|
||||
- `millisecond`
|
||||
- `second`
|
||||
- `minute`
|
||||
- `hour`
|
||||
|
@ -72,7 +72,7 @@ ClickHouse 中的物化视图更像是插入触发器。 如果视图查询中
|
||||
使用[allow_experimental_live_view](../../../operations/settings/settings.md#allow-experimental-live-view)设置启用实时视图和`WATCH`查询的使用。 输入命令`set allow_experimental_live_view = 1`。
|
||||
|
||||
```sql
|
||||
CREATE LIVE VIEW [IF NOT EXISTS] [db.]table_name [WITH [TIMEOUT [value_in_sec] [AND]] [REFRESH [value_in_sec]]] AS SELECT ...
|
||||
CREATE LIVE VIEW [IF NOT EXISTS] [db.]table_name [WITH REFRESH [value_in_sec]] AS SELECT ...
|
||||
```
|
||||
|
||||
实时视图存储相应[SELECT](../../../sql-reference/statements/select/index.md)查询的结果,并在查询结果更改时随时更新。 查询结果以及与新数据结合所需的部分结果存储在内存中,为重复查询提供更高的性能。当使用[WATCH](../../../sql-reference/statements/watch.md)查询更改查询结果时,实时视图可以提供推送通知。
|
||||
|
@ -887,6 +887,7 @@ try
|
||||
#endif
|
||||
|
||||
global_context->setRemoteHostFilter(config());
|
||||
global_context->setHTTPHeaderFilter(config());
|
||||
|
||||
std::string path_str = getCanonicalPath(config().getString("path", DBMS_DEFAULT_PATH));
|
||||
fs::path path = path_str;
|
||||
@ -1200,6 +1201,7 @@ try
|
||||
}
|
||||
|
||||
global_context->setRemoteHostFilter(*config);
|
||||
global_context->setHTTPHeaderFilter(*config);
|
||||
|
||||
global_context->setMaxTableSizeToDrop(server_settings_.max_table_size_to_drop);
|
||||
global_context->setMaxPartitionSizeToDrop(server_settings_.max_partition_size_to_drop);
|
||||
|
@ -866,6 +866,14 @@
|
||||
-->
|
||||
<!--</remote_url_allow_hosts>-->
|
||||
|
||||
<!-- The list of HTTP headers forbidden to use in HTTP-related storage engines and table functions.
|
||||
If this section is not present in configuration, all headers are allowed.
|
||||
-->
|
||||
<!-- <http_forbid_headers>
|
||||
<header>exact_header</header>
|
||||
<header_regexp>(?i)(case_insensitive_header)</header_regexp>
|
||||
</http_forbid_headers> -->
|
||||
|
||||
<!-- If element has 'incl' attribute, then for it's value will be used corresponding substitution from another file.
|
||||
By default, path to file with substitutions is /etc/metrika.xml. It could be changed in config in 'include_from' element.
|
||||
Values for substitutions are specified in /clickhouse/name_of_substitution elements in that file.
|
||||
|
@ -88,3 +88,4 @@ endfunction()
|
||||
|
||||
add_rust_subdirectory (BLAKE3)
|
||||
add_rust_subdirectory (skim)
|
||||
add_rust_subdirectory (prql)
|
||||
|
3
rust/prql/CMakeLists.txt
Normal file
3
rust/prql/CMakeLists.txt
Normal file
@ -0,0 +1,3 @@
|
||||
clickhouse_import_crate(MANIFEST_PATH Cargo.toml)
|
||||
target_include_directories(_ch_rust_prql INTERFACE include)
|
||||
add_library(ch_rust::prql ALIAS _ch_rust_prql)
|
569
rust/prql/Cargo.lock
generated
Normal file
569
rust/prql/Cargo.lock
generated
Normal file
@ -0,0 +1,569 @@
|
||||
# This file is automatically @generated by Cargo.
|
||||
# It is not intended for manual editing.
|
||||
version = 3
|
||||
|
||||
[[package]]
|
||||
name = "_ch_rust_prql"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"prql-compiler",
|
||||
"serde_json",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "addr2line"
|
||||
version = "0.20.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f4fa78e18c64fce05e902adecd7a5eed15a5e0a3439f7b0e169f0252214865e3"
|
||||
dependencies = [
|
||||
"gimli",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "adler"
|
||||
version = "1.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
|
||||
|
||||
[[package]]
|
||||
name = "ahash"
|
||||
version = "0.7.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47"
|
||||
dependencies = [
|
||||
"getrandom",
|
||||
"once_cell",
|
||||
"version_check",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "aho-corasick"
|
||||
version = "1.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "43f6cb1bf222025340178f382c426f13757b2960e89779dfcb319c32542a5a41"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "anyhow"
|
||||
version = "1.0.71"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9c7d0618f0e0b7e8ff11427422b64564d5fb0be1940354bfe2e0529b18a9d9b8"
|
||||
dependencies = [
|
||||
"backtrace",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ariadne"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "367fd0ad87307588d087544707bc5fbf4805ded96c7db922b70d368fa1cb5702"
|
||||
dependencies = [
|
||||
"unicode-width",
|
||||
"yansi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "backtrace"
|
||||
version = "0.3.68"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4319208da049c43661739c5fade2ba182f09d1dc2299b32298d3a31692b17e12"
|
||||
dependencies = [
|
||||
"addr2line",
|
||||
"cc",
|
||||
"cfg-if",
|
||||
"libc",
|
||||
"miniz_oxide",
|
||||
"object",
|
||||
"rustc-demangle",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cc"
|
||||
version = "1.0.79"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f"
|
||||
|
||||
[[package]]
|
||||
name = "cfg-if"
|
||||
version = "1.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
||||
|
||||
[[package]]
|
||||
name = "chumsky"
|
||||
version = "0.9.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "23170228b96236b5a7299057ac284a321457700bc8c41a4476052f0f4ba5349d"
|
||||
dependencies = [
|
||||
"hashbrown 0.12.3",
|
||||
"stacker",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "csv"
|
||||
version = "1.2.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "626ae34994d3d8d668f4269922248239db4ae42d538b14c398b74a52208e8086"
|
||||
dependencies = [
|
||||
"csv-core",
|
||||
"itoa",
|
||||
"ryu",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "csv-core"
|
||||
version = "0.1.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "either"
|
||||
version = "1.8.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91"
|
||||
|
||||
[[package]]
|
||||
name = "enum-as-inner"
|
||||
version = "0.5.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c9720bba047d567ffc8a3cba48bf19126600e249ab7f128e9233e6376976a116"
|
||||
dependencies = [
|
||||
"heck",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 1.0.109",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "equivalent"
|
||||
version = "1.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "88bffebc5d80432c9b140ee17875ff173a8ab62faad5b257da912bd2f6c1c0a1"
|
||||
|
||||
[[package]]
|
||||
name = "getrandom"
|
||||
version = "0.2.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"libc",
|
||||
"wasi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gimli"
|
||||
version = "0.27.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b6c80984affa11d98d1b88b66ac8853f143217b399d3c74116778ff8fdb4ed2e"
|
||||
|
||||
[[package]]
|
||||
name = "hashbrown"
|
||||
version = "0.12.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hashbrown"
|
||||
version = "0.14.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2c6201b9ff9fd90a5a3bac2e56a830d0caa509576f0e503818ee82c181b3437a"
|
||||
|
||||
[[package]]
|
||||
name = "heck"
|
||||
version = "0.4.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
|
||||
|
||||
[[package]]
|
||||
name = "indexmap"
|
||||
version = "2.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d5477fe2230a79769d8dc68e0eabf5437907c0457a5614a9e8dddb67f65eb65d"
|
||||
dependencies = [
|
||||
"equivalent",
|
||||
"hashbrown 0.14.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "itertools"
|
||||
version = "0.10.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473"
|
||||
dependencies = [
|
||||
"either",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "itoa"
|
||||
version = "1.0.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "62b02a5381cc465bd3041d84623d0fa3b66738b52b8e2fc3bab8ad63ab032f4a"
|
||||
|
||||
[[package]]
|
||||
name = "lazy_static"
|
||||
version = "1.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.147"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3"
|
||||
|
||||
[[package]]
|
||||
name = "log"
|
||||
version = "0.4.19"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b06a4cde4c0f271a446782e3eff8de789548ce57dbc8eca9292c27f4a42004b4"
|
||||
|
||||
[[package]]
|
||||
name = "memchr"
|
||||
version = "2.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
|
||||
|
||||
[[package]]
|
||||
name = "minimal-lexical"
|
||||
version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
|
||||
|
||||
[[package]]
|
||||
name = "miniz_oxide"
|
||||
version = "0.7.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e7810e0be55b428ada41041c41f32c9f1a42817901b4ccf45fa3d4b6561e74c7"
|
||||
dependencies = [
|
||||
"adler",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "nom"
|
||||
version = "7.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
"minimal-lexical",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "object"
|
||||
version = "0.31.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8bda667d9f2b5051b8833f59f3bf748b28ef54f850f4fcb389a252aa383866d1"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "once_cell"
|
||||
version = "1.18.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d"
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.63"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7b368fba921b0dce7e60f5e04ec15e565b3303972b42bcfde1d0713b881959eb"
|
||||
dependencies = [
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "prql-compiler"
|
||||
version = "0.8.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c99b52154002ac7f286dd2293c2f8d4e30526c1d396b14deef5ada1deef3c9ff"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"ariadne",
|
||||
"chumsky",
|
||||
"csv",
|
||||
"enum-as-inner",
|
||||
"itertools",
|
||||
"lazy_static",
|
||||
"log",
|
||||
"once_cell",
|
||||
"regex",
|
||||
"semver",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"serde_yaml",
|
||||
"sqlformat",
|
||||
"sqlparser",
|
||||
"strum",
|
||||
"strum_macros",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "psm"
|
||||
version = "0.1.21"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5787f7cda34e3033a72192c018bc5883100330f362ef279a8cbccfce8bb4e874"
|
||||
dependencies = [
|
||||
"cc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quote"
|
||||
version = "1.0.29"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "573015e8ab27661678357f27dc26460738fd2b6c86e46f386fde94cb5d913105"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex"
|
||||
version = "1.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "89089e897c013b3deb627116ae56a6955a72b8bed395c9526af31c9fe528b484"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"memchr",
|
||||
"regex-automata",
|
||||
"regex-syntax",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex-automata"
|
||||
version = "0.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fa250384981ea14565685dea16a9ccc4d1c541a13f82b9c168572264d1df8c56"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"memchr",
|
||||
"regex-syntax",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex-syntax"
|
||||
version = "0.7.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2ab07dc67230e4a4718e70fd5c20055a4334b121f1f9db8fe63ef39ce9b8c846"
|
||||
|
||||
[[package]]
|
||||
name = "rustc-demangle"
|
||||
version = "0.1.23"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76"
|
||||
|
||||
[[package]]
|
||||
name = "rustversion"
|
||||
version = "1.0.13"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dc31bd9b61a32c31f9650d18add92aa83a49ba979c143eefd27fe7177b05bd5f"
|
||||
|
||||
[[package]]
|
||||
name = "ryu"
|
||||
version = "1.0.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fe232bdf6be8c8de797b22184ee71118d63780ea42ac85b61d1baa6d3b782ae9"
|
||||
|
||||
[[package]]
|
||||
name = "semver"
|
||||
version = "1.0.17"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bebd363326d05ec3e2f532ab7660680f3b02130d780c299bca73469d521bc0ed"
|
||||
dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde"
|
||||
version = "1.0.166"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d01b7404f9d441d3ad40e6a636a7782c377d2abdbe4fa2440e2edcc2f4f10db8"
|
||||
dependencies = [
|
||||
"serde_derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_derive"
|
||||
version = "1.0.166"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5dd83d6dde2b6b2d466e14d9d1acce8816dedee94f735eac6395808b3483c6d6"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.23",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_json"
|
||||
version = "1.0.100"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0f1e14e89be7aa4c4b78bdbdc9eb5bf8517829a600ae8eaa39a6e1d960b5185c"
|
||||
dependencies = [
|
||||
"itoa",
|
||||
"ryu",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_yaml"
|
||||
version = "0.9.22"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "452e67b9c20c37fa79df53201dc03839651086ed9bbe92b3ca585ca9fdaa7d85"
|
||||
dependencies = [
|
||||
"indexmap",
|
||||
"itoa",
|
||||
"ryu",
|
||||
"serde",
|
||||
"unsafe-libyaml",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sqlformat"
|
||||
version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0c12bc9199d1db8234678b7051747c07f517cdcf019262d1847b94ec8b1aee3e"
|
||||
dependencies = [
|
||||
"itertools",
|
||||
"nom",
|
||||
"unicode_categories",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sqlparser"
|
||||
version = "0.33.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "355dc4d4b6207ca8a3434fc587db0a8016130a574dbcdbfb93d7f7b5bc5b211a"
|
||||
dependencies = [
|
||||
"log",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "stacker"
|
||||
version = "0.1.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c886bd4480155fd3ef527d45e9ac8dd7118a898a46530b7b94c3e21866259fce"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"cfg-if",
|
||||
"libc",
|
||||
"psm",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "strum"
|
||||
version = "0.24.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "063e6045c0e62079840579a7e47a355ae92f60eb74daaf156fb1e84ba164e63f"
|
||||
dependencies = [
|
||||
"strum_macros",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "strum_macros"
|
||||
version = "0.24.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1e385be0d24f186b4ce2f9982191e7101bb737312ad61c1f2f984f34bcf85d59"
|
||||
dependencies = [
|
||||
"heck",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"rustversion",
|
||||
"syn 1.0.109",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "1.0.109"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "2.0.23"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "59fb7d6d8281a51045d62b8eb3a7d1ce347b76f312af50cd3dc0af39c87c1737"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unicode-ident"
|
||||
version = "1.0.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "22049a19f4a68748a168c0fc439f9516686aa045927ff767eca0a85101fb6e73"
|
||||
|
||||
[[package]]
|
||||
name = "unicode-width"
|
||||
version = "0.1.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b"
|
||||
|
||||
[[package]]
|
||||
name = "unicode_categories"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e"
|
||||
|
||||
[[package]]
|
||||
name = "unsafe-libyaml"
|
||||
version = "0.2.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1865806a559042e51ab5414598446a5871b561d21b6764f2eabb0dd481d880a6"
|
||||
|
||||
[[package]]
|
||||
name = "version_check"
|
||||
version = "0.9.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
|
||||
|
||||
[[package]]
|
||||
name = "wasi"
|
||||
version = "0.11.0+wasi-snapshot-preview1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
|
||||
|
||||
[[package]]
|
||||
name = "winapi"
|
||||
version = "0.3.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
|
||||
dependencies = [
|
||||
"winapi-i686-pc-windows-gnu",
|
||||
"winapi-x86_64-pc-windows-gnu",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "winapi-i686-pc-windows-gnu"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
|
||||
|
||||
[[package]]
|
||||
name = "winapi-x86_64-pc-windows-gnu"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
|
||||
|
||||
[[package]]
|
||||
name = "yansi"
|
||||
version = "0.5.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec"
|
20
rust/prql/Cargo.toml
Normal file
20
rust/prql/Cargo.toml
Normal file
@ -0,0 +1,20 @@
|
||||
[package]
|
||||
name = "_ch_rust_prql"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
prql-compiler = "0.8.1"
|
||||
serde_json = "1.0"
|
||||
|
||||
[lib]
|
||||
crate-type = ["staticlib"]
|
||||
|
||||
[profile.release]
|
||||
debug = true
|
||||
|
||||
[profile.release-thinlto]
|
||||
inherits = "release"
|
||||
lto = true
|
18
rust/prql/include/prql.h
Normal file
18
rust/prql/include/prql.h
Normal file
@ -0,0 +1,18 @@
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
extern "C" {
|
||||
|
||||
/// Converts a PRQL query to an SQL query.
|
||||
/// @param query is a pointer to the beginning of the PRQL query.
|
||||
/// @param size is the size of the PRQL query.
|
||||
/// @param out is a pointer to a uint8_t pointer which will be set to the beginning of the null terminated SQL query or the error message.
|
||||
/// @param out_size is the size of the string pointed by `out`.
|
||||
/// @returns zero in case of success, non-zero in case of failure.
|
||||
int64_t prql_to_sql(const uint8_t * query, uint64_t size, uint8_t ** out, uint64_t * out_size);
|
||||
|
||||
/// Frees the passed in pointer which's memory was allocated by Rust allocators previously.
|
||||
void prql_free_pointer(uint8_t * ptr_to_free);
|
||||
|
||||
} // extern "C"
|
56
rust/prql/src/lib.rs
Normal file
56
rust/prql/src/lib.rs
Normal file
@ -0,0 +1,56 @@
|
||||
use prql_compiler::sql::Dialect;
|
||||
use prql_compiler::{Options, Target};
|
||||
use std::ffi::{c_char, CString};
|
||||
use std::slice;
|
||||
|
||||
fn set_output(result: String, out: *mut *mut u8, out_size: *mut u64) {
|
||||
assert!(!out_size.is_null());
|
||||
let out_size_ptr = unsafe { &mut *out_size };
|
||||
*out_size_ptr = (result.len() + 1).try_into().unwrap();
|
||||
|
||||
assert!(!out.is_null());
|
||||
let out_ptr = unsafe { &mut *out };
|
||||
*out_ptr = CString::new(result).unwrap().into_raw() as *mut u8;
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub unsafe extern "C" fn prql_to_sql(
|
||||
query: *const u8,
|
||||
size: u64,
|
||||
out: *mut *mut u8,
|
||||
out_size: *mut u64,
|
||||
) -> i64 {
|
||||
let query_vec = unsafe { slice::from_raw_parts(query, size.try_into().unwrap()) }.to_vec();
|
||||
let maybe_prql_query = String::from_utf8(query_vec);
|
||||
if maybe_prql_query.is_err() {
|
||||
set_output(
|
||||
String::from("The PRQL query must be UTF-8 encoded!"),
|
||||
out,
|
||||
out_size,
|
||||
);
|
||||
return 1;
|
||||
}
|
||||
let prql_query = maybe_prql_query.unwrap();
|
||||
let opts = &Options {
|
||||
format: true,
|
||||
target: Target::Sql(Some(Dialect::ClickHouse)),
|
||||
signature_comment: false,
|
||||
color: false,
|
||||
};
|
||||
let (is_err, res) = match prql_compiler::compile(&prql_query, &opts) {
|
||||
Ok(sql_str) => (false, sql_str),
|
||||
Err(err) => (true, err.to_string()),
|
||||
};
|
||||
|
||||
set_output(res, out, out_size);
|
||||
|
||||
match is_err {
|
||||
true => 1,
|
||||
false => 0,
|
||||
}
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub unsafe extern "C" fn prql_free_pointer(ptr_to_free: *mut u8) {
|
||||
std::mem::drop(CString::from_raw(ptr_to_free as *mut c_char));
|
||||
}
|
204
rust/skim/Cargo.lock
generated
204
rust/skim/Cargo.lock
generated
@ -42,17 +42,6 @@ version = "0.7.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711"
|
||||
|
||||
[[package]]
|
||||
name = "atty"
|
||||
version = "0.2.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
|
||||
dependencies = [
|
||||
"hermit-abi 0.1.19",
|
||||
"libc",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "autocfg"
|
||||
version = "1.1.0"
|
||||
@ -104,31 +93,6 @@ dependencies = [
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "clap"
|
||||
version = "3.2.25"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4ea181bf566f71cb9a5d17a59e1871af638180a18fb0035c92ae62b705207123"
|
||||
dependencies = [
|
||||
"atty",
|
||||
"bitflags",
|
||||
"clap_lex",
|
||||
"indexmap",
|
||||
"once_cell",
|
||||
"strsim",
|
||||
"termcolor",
|
||||
"textwrap",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "clap_lex"
|
||||
version = "0.2.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2850f2f5a82cbf437dd5af4d49848fbdfc27c157c3d010345776f952765261c5"
|
||||
dependencies = [
|
||||
"os_str_bytes",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "codespan-reporting"
|
||||
version = "0.11.1"
|
||||
@ -214,9 +178,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "cxx"
|
||||
version = "1.0.97"
|
||||
version = "1.0.101"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e88abab2f5abbe4c56e8f1fb431b784d710b709888f35755a160e62e33fe38e8"
|
||||
checksum = "5032837c1384de3708043de9d4e97bb91290faca6c16529a28aa340592a78166"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"cxxbridge-flags",
|
||||
@ -226,9 +190,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "cxx-build"
|
||||
version = "1.0.97"
|
||||
version = "1.0.101"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5c0c11acd0e63bae27dcd2afced407063312771212b7a823b4fd72d633be30fb"
|
||||
checksum = "51368b3d0dbf356e10fcbfd455a038503a105ee556f7ee79b6bb8c53a7247456"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"codespan-reporting",
|
||||
@ -236,24 +200,24 @@ dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"scratch",
|
||||
"syn 2.0.23",
|
||||
"syn 2.0.26",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cxxbridge-flags"
|
||||
version = "1.0.97"
|
||||
version = "1.0.101"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8d3816ed957c008ccd4728485511e3d9aaf7db419aa321e3d2c5a2f3411e36c8"
|
||||
checksum = "0d9062157072e4aafc8e56ceaf8325ce850c5ae37578c852a0d4de2cecdded13"
|
||||
|
||||
[[package]]
|
||||
name = "cxxbridge-macro"
|
||||
version = "1.0.97"
|
||||
version = "1.0.101"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a26acccf6f445af85ea056362561a24ef56cdc15fcc685f03aec50b9c702cb6d"
|
||||
checksum = "cf01e8a540f5a4e0f284595834f81cf88572f244b768f051724537afa99a2545"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.23",
|
||||
"syn 2.0.26",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -359,19 +323,6 @@ version = "1.8.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91"
|
||||
|
||||
[[package]]
|
||||
name = "env_logger"
|
||||
version = "0.9.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a12e6657c4c97ebab115a42dcee77225f7f482cdd841cf7088c657a42e9e00e7"
|
||||
dependencies = [
|
||||
"atty",
|
||||
"humantime",
|
||||
"log",
|
||||
"regex",
|
||||
"termcolor",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fnv"
|
||||
version = "1.0.7"
|
||||
@ -398,32 +349,11 @@ dependencies = [
|
||||
"wasi 0.11.0+wasi-snapshot-preview1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hashbrown"
|
||||
version = "0.12.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
|
||||
|
||||
[[package]]
|
||||
name = "hermit-abi"
|
||||
version = "0.1.19"
|
||||
version = "0.3.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hermit-abi"
|
||||
version = "0.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fed44880c466736ef9a5c5b5facefb5ed0785676d0c02d612db14e54f0d84286"
|
||||
|
||||
[[package]]
|
||||
name = "humantime"
|
||||
version = "2.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4"
|
||||
checksum = "443144c8cdadd93ebf52ddb4056d257f5b52c04d3c804e657d19eb73fc33668b"
|
||||
|
||||
[[package]]
|
||||
name = "iana-time-zone"
|
||||
@ -454,16 +384,6 @@ version = "1.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39"
|
||||
|
||||
[[package]]
|
||||
name = "indexmap"
|
||||
version = "1.9.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
"hashbrown",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "js-sys"
|
||||
version = "0.3.64"
|
||||
@ -487,9 +407,9 @@ checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3"
|
||||
|
||||
[[package]]
|
||||
name = "link-cplusplus"
|
||||
version = "1.0.8"
|
||||
version = "1.0.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ecd207c9c713c34f95a097a5b029ac2ce6010530c7b49d7fea24d977dede04f5"
|
||||
checksum = "9d240c6f7e1ba3a28b0249f774e6a9dd0175054b52dfbb61b16eb8505c3785c9"
|
||||
dependencies = [
|
||||
"cc",
|
||||
]
|
||||
@ -564,7 +484,7 @@ version = "1.16.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43"
|
||||
dependencies = [
|
||||
"hermit-abi 0.3.1",
|
||||
"hermit-abi",
|
||||
"libc",
|
||||
]
|
||||
|
||||
@ -574,12 +494,6 @@ version = "1.18.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d"
|
||||
|
||||
[[package]]
|
||||
name = "os_str_bytes"
|
||||
version = "6.5.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4d5d9eb14b174ee9aa2ef96dc2b94637a2d4b6e7cb873c7e171f0c20c6cf3eac"
|
||||
|
||||
[[package]]
|
||||
name = "pin-utils"
|
||||
version = "0.1.0"
|
||||
@ -588,18 +502,18 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.63"
|
||||
version = "1.0.66"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7b368fba921b0dce7e60f5e04ec15e565b3303972b42bcfde1d0713b881959eb"
|
||||
checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9"
|
||||
dependencies = [
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quote"
|
||||
version = "1.0.29"
|
||||
version = "1.0.31"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "573015e8ab27661678357f27dc26460738fd2b6c86e46f386fde94cb5d913105"
|
||||
checksum = "5fe8a65d69dd0808184ebb5f836ab526bb259db23c657efa38711b1072ee47f0"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
]
|
||||
@ -648,9 +562,21 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "regex"
|
||||
version = "1.8.4"
|
||||
version = "1.9.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d0ab3ca65655bb1e41f2a8c8cd662eb4fb035e67c3f78da1d61dffe89d07300f"
|
||||
checksum = "b2eae68fc220f7cf2532e4494aded17545fce192d59cd996e0fe7887f4ceb575"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"memchr",
|
||||
"regex-automata",
|
||||
"regex-syntax",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex-automata"
|
||||
version = "0.3.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "39354c10dd07468c2e73926b23bb9c2caca74c5501e38a35da70406f1d923310"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"memchr",
|
||||
@ -659,39 +585,33 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "regex-syntax"
|
||||
version = "0.7.2"
|
||||
version = "0.7.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "436b050e76ed2903236f032a59761c1eb99e1b0aead2c257922771dab1fc8c78"
|
||||
checksum = "e5ea92a5b6195c6ef2a0295ea818b312502c6fc94dde986c5553242e18fd4ce2"
|
||||
|
||||
[[package]]
|
||||
name = "rustversion"
|
||||
version = "1.0.12"
|
||||
version = "1.0.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4f3208ce4d8448b3f3e7d168a73f5e0c43a61e32930de3bceeccedb388b6bf06"
|
||||
checksum = "7ffc183a10b4478d04cbbbfc96d0873219d962dd5accaff2ffbd4ceb7df837f4"
|
||||
|
||||
[[package]]
|
||||
name = "scopeguard"
|
||||
version = "1.1.0"
|
||||
version = "1.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
|
||||
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
|
||||
|
||||
[[package]]
|
||||
name = "scratch"
|
||||
version = "1.0.5"
|
||||
version = "1.0.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1792db035ce95be60c3f8853017b3999209281c24e2ba5bc8e59bf97a0c590c1"
|
||||
checksum = "a3cf7c11c38cb994f3d40e8a8cde3bbd1f72a435e4c49e85d6553d8312306152"
|
||||
|
||||
[[package]]
|
||||
name = "serde"
|
||||
version = "1.0.164"
|
||||
version = "1.0.171"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9e8c8cf938e98f769bc164923b06dce91cea1751522f46f8466461af04c9027d"
|
||||
|
||||
[[package]]
|
||||
name = "shlex"
|
||||
version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "43b2853a4d09f215c24cc5489c992ce46052d359b5109343cbafbf26bc62f8a3"
|
||||
checksum = "30e27d1e4fd7659406c492fd6cfaf2066ba8773de45ca75e855590f856dc34a9"
|
||||
|
||||
[[package]]
|
||||
name = "skim"
|
||||
@ -699,23 +619,19 @@ version = "0.10.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e5d28de0a6cb2cdd83a076f1de9d965b973ae08b244df1aa70b432946dda0f32"
|
||||
dependencies = [
|
||||
"atty",
|
||||
"beef",
|
||||
"bitflags",
|
||||
"chrono",
|
||||
"clap",
|
||||
"crossbeam",
|
||||
"defer-drop",
|
||||
"derive_builder",
|
||||
"env_logger",
|
||||
"fuzzy-matcher",
|
||||
"lazy_static",
|
||||
"log",
|
||||
"nix 0.25.1",
|
||||
"rayon",
|
||||
"regex",
|
||||
"shlex",
|
||||
"time 0.3.22",
|
||||
"time 0.3.23",
|
||||
"timer",
|
||||
"tuikit",
|
||||
"unicode-width",
|
||||
@ -741,9 +657,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "2.0.23"
|
||||
version = "2.0.26"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "59fb7d6d8281a51045d62b8eb3a7d1ce347b76f312af50cd3dc0af39c87c1737"
|
||||
checksum = "45c3457aacde3c65315de5031ec191ce46604304d2446e803d71ade03308d970"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
@ -770,30 +686,24 @@ dependencies = [
|
||||
"winapi-util",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "textwrap"
|
||||
version = "0.16.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "222a222a5bfe1bba4a77b45ec488a741b3cb8872e5e499451fd7d0129c9c7c3d"
|
||||
|
||||
[[package]]
|
||||
name = "thiserror"
|
||||
version = "1.0.40"
|
||||
version = "1.0.43"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "978c9a314bd8dc99be594bc3c175faaa9794be04a5a5e153caba6915336cebac"
|
||||
checksum = "a35fc5b8971143ca348fa6df4f024d4d55264f3468c71ad1c2f365b0a4d58c42"
|
||||
dependencies = [
|
||||
"thiserror-impl",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thiserror-impl"
|
||||
version = "1.0.40"
|
||||
version = "1.0.43"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f9456a42c5b0d803c8cd86e73dd7cc9edd429499f37a3550d286d5e86720569f"
|
||||
checksum = "463fe12d7993d3b327787537ce8dd4dfa058de32fc2b195ef3cde03dc4771e8f"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.23",
|
||||
"syn 2.0.26",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -819,9 +729,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "time"
|
||||
version = "0.3.22"
|
||||
version = "0.3.23"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ea9e1b3cf1243ae005d9e74085d4d542f3125458f3a81af210d901dcd7411efd"
|
||||
checksum = "59e399c068f43a5d116fedaf73b203fa4f9c519f17e2b34f63221d3792f81446"
|
||||
dependencies = [
|
||||
"serde",
|
||||
"time-core",
|
||||
@ -858,9 +768,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "unicode-ident"
|
||||
version = "1.0.9"
|
||||
version = "1.0.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b15811caf2415fb889178633e7724bad2509101cde276048e013b9def5e51fa0"
|
||||
checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c"
|
||||
|
||||
[[package]]
|
||||
name = "unicode-width"
|
||||
@ -928,7 +838,7 @@ dependencies = [
|
||||
"once_cell",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.23",
|
||||
"syn 2.0.26",
|
||||
"wasm-bindgen-shared",
|
||||
]
|
||||
|
||||
@ -950,7 +860,7 @@ checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.23",
|
||||
"syn 2.0.26",
|
||||
"wasm-bindgen-backend",
|
||||
"wasm-bindgen-shared",
|
||||
]
|
||||
|
@ -6,7 +6,7 @@ edition = "2021"
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
skim = "0.10.2"
|
||||
skim = { version = "0.10.2", default-features = false }
|
||||
cxx = "1.0.83"
|
||||
term = "0.7.0"
|
||||
|
||||
|
@ -1,10 +1,25 @@
|
||||
#include <AggregateFunctions/IAggregateFunction.h>
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <AggregateFunctions/AggregateFunctionGroupArrayMoving.h>
|
||||
#include <AggregateFunctions/Helpers.h>
|
||||
#include <AggregateFunctions/FactoryHelpers.h>
|
||||
#include <DataTypes/DataTypeDate.h>
|
||||
#include <DataTypes/DataTypeDateTime.h>
|
||||
#include <DataTypes/DataTypeDateTime64.h>
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/DataTypesDecimal.h>
|
||||
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
|
||||
#include <Columns/ColumnVector.h>
|
||||
#include <Columns/ColumnArray.h>
|
||||
|
||||
#include <Common/ArenaAllocator.h>
|
||||
#include <Common/assert_cast.h>
|
||||
|
||||
#include <type_traits>
|
||||
|
||||
#define AGGREGATE_FUNCTION_MOVING_MAX_ARRAY_SIZE 0xFFFFFF
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -13,11 +28,186 @@ struct Settings;
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int TOO_LARGE_ARRAY_SIZE;
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
struct MovingData
|
||||
{
|
||||
/// For easy serialization.
|
||||
static_assert(std::has_unique_object_representations_v<T> || std::is_floating_point_v<T>);
|
||||
|
||||
using Accumulator = T;
|
||||
|
||||
/// Switch to ordinary Allocator after 4096 bytes to avoid fragmentation and trash in Arena
|
||||
using Allocator = MixedAlignedArenaAllocator<alignof(T), 4096>;
|
||||
using Array = PODArray<T, 32, Allocator>;
|
||||
|
||||
Array value; /// Prefix sums.
|
||||
T sum{};
|
||||
|
||||
void NO_SANITIZE_UNDEFINED add(T val, Arena * arena)
|
||||
{
|
||||
sum += val;
|
||||
value.push_back(sum, arena);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct MovingSumData : public MovingData<T>
|
||||
{
|
||||
static constexpr auto name = "groupArrayMovingSum";
|
||||
|
||||
T NO_SANITIZE_UNDEFINED get(size_t idx, UInt64 window_size) const
|
||||
{
|
||||
if (idx < window_size)
|
||||
return this->value[idx];
|
||||
else
|
||||
return this->value[idx] - this->value[idx - window_size];
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct MovingAvgData : public MovingData<T>
|
||||
{
|
||||
static constexpr auto name = "groupArrayMovingAvg";
|
||||
|
||||
T NO_SANITIZE_UNDEFINED get(size_t idx, UInt64 window_size) const
|
||||
{
|
||||
if (idx < window_size)
|
||||
return this->value[idx] / T(window_size);
|
||||
else
|
||||
return (this->value[idx] - this->value[idx - window_size]) / T(window_size);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template <typename T, typename LimitNumElements, typename Data>
|
||||
class MovingImpl final
|
||||
: public IAggregateFunctionDataHelper<Data, MovingImpl<T, LimitNumElements, Data>>
|
||||
{
|
||||
static constexpr bool limit_num_elems = LimitNumElements::value;
|
||||
UInt64 window_size;
|
||||
|
||||
public:
|
||||
using ResultT = typename Data::Accumulator;
|
||||
|
||||
using ColumnSource = ColumnVectorOrDecimal<T>;
|
||||
|
||||
/// Probably for overflow function in the future.
|
||||
using ColumnResult = ColumnVectorOrDecimal<ResultT>;
|
||||
|
||||
explicit MovingImpl(const DataTypePtr & data_type_, UInt64 window_size_ = std::numeric_limits<UInt64>::max())
|
||||
: IAggregateFunctionDataHelper<Data, MovingImpl<T, LimitNumElements, Data>>({data_type_}, {}, createResultType(data_type_))
|
||||
, window_size(window_size_) {}
|
||||
|
||||
String getName() const override { return Data::name; }
|
||||
|
||||
static DataTypePtr createResultType(const DataTypePtr & argument)
|
||||
{
|
||||
return std::make_shared<DataTypeArray>(getReturnTypeElement(argument));
|
||||
}
|
||||
|
||||
void NO_SANITIZE_UNDEFINED add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
|
||||
{
|
||||
auto value = static_cast<const ColumnSource &>(*columns[0]).getData()[row_num];
|
||||
this->data(place).add(static_cast<ResultT>(value), arena);
|
||||
}
|
||||
|
||||
void NO_SANITIZE_UNDEFINED merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
|
||||
{
|
||||
auto & cur_elems = this->data(place);
|
||||
auto & rhs_elems = this->data(rhs);
|
||||
|
||||
size_t cur_size = cur_elems.value.size();
|
||||
|
||||
if (rhs_elems.value.size())
|
||||
cur_elems.value.insert(rhs_elems.value.begin(), rhs_elems.value.end(), arena);
|
||||
|
||||
for (size_t i = cur_size; i < cur_elems.value.size(); ++i)
|
||||
{
|
||||
cur_elems.value[i] += cur_elems.sum;
|
||||
}
|
||||
|
||||
cur_elems.sum += rhs_elems.sum;
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
|
||||
{
|
||||
const auto & value = this->data(place).value;
|
||||
size_t size = value.size();
|
||||
writeVarUInt(size, buf);
|
||||
buf.write(reinterpret_cast<const char *>(value.data()), size * sizeof(value[0]));
|
||||
}
|
||||
|
||||
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
|
||||
{
|
||||
size_t size = 0;
|
||||
readVarUInt(size, buf);
|
||||
|
||||
if (unlikely(size > AGGREGATE_FUNCTION_MOVING_MAX_ARRAY_SIZE))
|
||||
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE,
|
||||
"Too large array size (maximum: {})", AGGREGATE_FUNCTION_MOVING_MAX_ARRAY_SIZE);
|
||||
|
||||
if (size > 0)
|
||||
{
|
||||
auto & value = this->data(place).value;
|
||||
value.resize(size, arena);
|
||||
buf.readStrict(reinterpret_cast<char *>(value.data()), size * sizeof(value[0]));
|
||||
this->data(place).sum = value.back();
|
||||
}
|
||||
}
|
||||
|
||||
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
|
||||
{
|
||||
const auto & data = this->data(place);
|
||||
size_t size = data.value.size();
|
||||
|
||||
ColumnArray & arr_to = assert_cast<ColumnArray &>(to);
|
||||
ColumnArray::Offsets & offsets_to = arr_to.getOffsets();
|
||||
|
||||
offsets_to.push_back(offsets_to.back() + size);
|
||||
|
||||
if (size)
|
||||
{
|
||||
typename ColumnResult::Container & data_to = assert_cast<ColumnResult &>(arr_to.getData()).getData();
|
||||
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
if (!limit_num_elems)
|
||||
{
|
||||
data_to.push_back(data.get(i, size));
|
||||
}
|
||||
else
|
||||
{
|
||||
data_to.push_back(data.get(i, window_size));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool allocatesMemoryInArena() const override
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
private:
|
||||
static auto getReturnTypeElement(const DataTypePtr & argument)
|
||||
{
|
||||
if constexpr (!is_decimal<ResultT>)
|
||||
return std::make_shared<DataTypeNumber<ResultT>>();
|
||||
else
|
||||
{
|
||||
using Res = DataTypeDecimal<ResultT>;
|
||||
return std::make_shared<Res>(Res::maxPrecision(), getDecimalScale(*argument));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
@ -79,7 +269,7 @@ AggregateFunctionPtr createAggregateFunctionMoving(
|
||||
if (type != Field::Types::Int64 && type != Field::Types::UInt64)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter for aggregate function {} should be positive integer", name);
|
||||
|
||||
if ((type == Field::Types::Int64 && parameters[0].get<Int64>() < 0) ||
|
||||
if ((type == Field::Types::Int64 && parameters[0].get<Int64>() <= 0) ||
|
||||
(type == Field::Types::UInt64 && parameters[0].get<UInt64>() == 0))
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter for aggregate function {} should be positive integer", name);
|
||||
|
||||
|
@ -1,207 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/DataTypesDecimal.h>
|
||||
|
||||
#include <Columns/ColumnVector.h>
|
||||
#include <Columns/ColumnArray.h>
|
||||
|
||||
#include <Common/ArenaAllocator.h>
|
||||
#include <Common/assert_cast.h>
|
||||
|
||||
#include <AggregateFunctions/IAggregateFunction.h>
|
||||
|
||||
#include <type_traits>
|
||||
|
||||
#define AGGREGATE_FUNCTION_MOVING_MAX_ARRAY_SIZE 0xFFFFFF
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
struct Settings;
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int TOO_LARGE_ARRAY_SIZE;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
struct MovingData
|
||||
{
|
||||
/// For easy serialization.
|
||||
static_assert(std::has_unique_object_representations_v<T> || std::is_floating_point_v<T>);
|
||||
|
||||
using Accumulator = T;
|
||||
|
||||
/// Switch to ordinary Allocator after 4096 bytes to avoid fragmentation and trash in Arena
|
||||
using Allocator = MixedAlignedArenaAllocator<alignof(T), 4096>;
|
||||
using Array = PODArray<T, 32, Allocator>;
|
||||
|
||||
Array value; /// Prefix sums.
|
||||
T sum{};
|
||||
|
||||
void NO_SANITIZE_UNDEFINED add(T val, Arena * arena)
|
||||
{
|
||||
sum += val;
|
||||
value.push_back(sum, arena);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct MovingSumData : public MovingData<T>
|
||||
{
|
||||
static constexpr auto name = "groupArrayMovingSum";
|
||||
|
||||
T NO_SANITIZE_UNDEFINED get(size_t idx, UInt64 window_size) const
|
||||
{
|
||||
if (idx < window_size)
|
||||
return this->value[idx];
|
||||
else
|
||||
return this->value[idx] - this->value[idx - window_size];
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct MovingAvgData : public MovingData<T>
|
||||
{
|
||||
static constexpr auto name = "groupArrayMovingAvg";
|
||||
|
||||
T NO_SANITIZE_UNDEFINED get(size_t idx, UInt64 window_size) const
|
||||
{
|
||||
if (idx < window_size)
|
||||
return this->value[idx] / T(window_size);
|
||||
else
|
||||
return (this->value[idx] - this->value[idx - window_size]) / T(window_size);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template <typename T, typename LimitNumElements, typename Data>
|
||||
class MovingImpl final
|
||||
: public IAggregateFunctionDataHelper<Data, MovingImpl<T, LimitNumElements, Data>>
|
||||
{
|
||||
static constexpr bool limit_num_elems = LimitNumElements::value;
|
||||
UInt64 window_size;
|
||||
|
||||
public:
|
||||
using ResultT = typename Data::Accumulator;
|
||||
|
||||
using ColumnSource = ColumnVectorOrDecimal<T>;
|
||||
|
||||
/// Probably for overflow function in the future.
|
||||
using ColumnResult = ColumnVectorOrDecimal<ResultT>;
|
||||
|
||||
explicit MovingImpl(const DataTypePtr & data_type_, UInt64 window_size_ = std::numeric_limits<UInt64>::max())
|
||||
: IAggregateFunctionDataHelper<Data, MovingImpl<T, LimitNumElements, Data>>({data_type_}, {}, createResultType(data_type_))
|
||||
, window_size(window_size_) {}
|
||||
|
||||
String getName() const override { return Data::name; }
|
||||
|
||||
static DataTypePtr createResultType(const DataTypePtr & argument)
|
||||
{
|
||||
return std::make_shared<DataTypeArray>(getReturnTypeElement(argument));
|
||||
}
|
||||
|
||||
void NO_SANITIZE_UNDEFINED add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
|
||||
{
|
||||
auto value = static_cast<const ColumnSource &>(*columns[0]).getData()[row_num];
|
||||
this->data(place).add(static_cast<ResultT>(value), arena);
|
||||
}
|
||||
|
||||
void NO_SANITIZE_UNDEFINED merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
|
||||
{
|
||||
auto & cur_elems = this->data(place);
|
||||
auto & rhs_elems = this->data(rhs);
|
||||
|
||||
size_t cur_size = cur_elems.value.size();
|
||||
|
||||
if (rhs_elems.value.size())
|
||||
cur_elems.value.insert(rhs_elems.value.begin(), rhs_elems.value.end(), arena);
|
||||
|
||||
for (size_t i = cur_size; i < cur_elems.value.size(); ++i)
|
||||
{
|
||||
cur_elems.value[i] += cur_elems.sum;
|
||||
}
|
||||
|
||||
cur_elems.sum += rhs_elems.sum;
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
|
||||
{
|
||||
const auto & value = this->data(place).value;
|
||||
size_t size = value.size();
|
||||
writeVarUInt(size, buf);
|
||||
buf.write(reinterpret_cast<const char *>(value.data()), size * sizeof(value[0]));
|
||||
}
|
||||
|
||||
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
|
||||
{
|
||||
size_t size = 0;
|
||||
readVarUInt(size, buf);
|
||||
|
||||
if (unlikely(size > AGGREGATE_FUNCTION_MOVING_MAX_ARRAY_SIZE))
|
||||
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE,
|
||||
"Too large array size (maximum: {})", AGGREGATE_FUNCTION_MOVING_MAX_ARRAY_SIZE);
|
||||
|
||||
if (size > 0)
|
||||
{
|
||||
auto & value = this->data(place).value;
|
||||
value.resize(size, arena);
|
||||
buf.readStrict(reinterpret_cast<char *>(value.data()), size * sizeof(value[0]));
|
||||
this->data(place).sum = value.back();
|
||||
}
|
||||
}
|
||||
|
||||
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
|
||||
{
|
||||
const auto & data = this->data(place);
|
||||
size_t size = data.value.size();
|
||||
|
||||
ColumnArray & arr_to = assert_cast<ColumnArray &>(to);
|
||||
ColumnArray::Offsets & offsets_to = arr_to.getOffsets();
|
||||
|
||||
offsets_to.push_back(offsets_to.back() + size);
|
||||
|
||||
if (size)
|
||||
{
|
||||
typename ColumnResult::Container & data_to = assert_cast<ColumnResult &>(arr_to.getData()).getData();
|
||||
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
if (!limit_num_elems)
|
||||
{
|
||||
data_to.push_back(data.get(i, size));
|
||||
}
|
||||
else
|
||||
{
|
||||
data_to.push_back(data.get(i, window_size));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool allocatesMemoryInArena() const override
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
private:
|
||||
static auto getReturnTypeElement(const DataTypePtr & argument)
|
||||
{
|
||||
if constexpr (!is_decimal<ResultT>)
|
||||
return std::make_shared<DataTypeNumber<ResultT>>();
|
||||
else
|
||||
{
|
||||
using Res = DataTypeDecimal<ResultT>;
|
||||
return std::make_shared<Res>(Res::maxPrecision(), getDecimalScale(*argument));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
#undef AGGREGATE_FUNCTION_MOVING_MAX_ARRAY_SIZE
|
||||
|
||||
}
|
@ -46,6 +46,7 @@
|
||||
#include <Parsers/ASTColumnDeclaration.h>
|
||||
#include <Parsers/ASTFunction.h>
|
||||
#include <Parsers/Kusto/ParserKQLStatement.h>
|
||||
#include <Parsers/PRQL/ParserPRQLQuery.h>
|
||||
|
||||
#include <Processors/Formats/Impl/NullFormat.h>
|
||||
#include <Processors/Formats/IInputFormat.h>
|
||||
@ -72,6 +73,7 @@
|
||||
#include <iostream>
|
||||
#include <filesystem>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "config_version.h"
|
||||
@ -338,6 +340,8 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_mu
|
||||
|
||||
if (dialect == Dialect::kusto)
|
||||
parser = std::make_unique<ParserKQLStatement>(end, global_context->getSettings().allow_settings_after_format_in_insert);
|
||||
else if (dialect == Dialect::prql)
|
||||
parser = std::make_unique<ParserPRQLQuery>(max_length, settings.max_parser_depth);
|
||||
else
|
||||
parser = std::make_unique<ParserQuery>(end, global_context->getSettings().allow_settings_after_format_in_insert);
|
||||
|
||||
|
@ -105,6 +105,8 @@ void Connection::connect(const ConnectionTimeouts & timeouts)
|
||||
|
||||
for (auto it = addresses.begin(); it != addresses.end();)
|
||||
{
|
||||
have_more_addresses_to_connect = it != std::prev(addresses.end());
|
||||
|
||||
if (connected)
|
||||
disconnect();
|
||||
|
||||
|
@ -159,6 +159,8 @@ public:
|
||||
out->setAsyncCallback(async_callback);
|
||||
}
|
||||
|
||||
bool haveMoreAddressesToConnect() const { return have_more_addresses_to_connect; }
|
||||
|
||||
private:
|
||||
String host;
|
||||
UInt16 port;
|
||||
@ -227,6 +229,8 @@ private:
|
||||
std::shared_ptr<WriteBuffer> maybe_compressed_out;
|
||||
std::unique_ptr<NativeWriter> block_out;
|
||||
|
||||
bool have_more_addresses_to_connect = false;
|
||||
|
||||
/// Logger is created lazily, for avoid to run DNS request in constructor.
|
||||
class LoggerWrapper
|
||||
{
|
||||
|
@ -179,7 +179,7 @@ bool ConnectionEstablisherAsync::checkTimeout()
|
||||
is_timeout_alarmed = true;
|
||||
}
|
||||
|
||||
if (is_timeout_alarmed && !is_socket_ready)
|
||||
if (is_timeout_alarmed && !is_socket_ready && !haveMoreAddressesToConnect())
|
||||
{
|
||||
/// In not async case timeout exception would be thrown and caught in ConnectionEstablisher::run,
|
||||
/// but in async case we process timeout outside and cannot throw exception. So, we just save fail message.
|
||||
@ -225,6 +225,11 @@ void ConnectionEstablisherAsync::resetResult()
|
||||
}
|
||||
}
|
||||
|
||||
bool ConnectionEstablisherAsync::haveMoreAddressesToConnect()
|
||||
{
|
||||
return !result.entry.isNull() && result.entry->haveMoreAddressesToConnect();
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
}
|
||||
|
@ -104,6 +104,8 @@ private:
|
||||
|
||||
void resetResult();
|
||||
|
||||
bool haveMoreAddressesToConnect();
|
||||
|
||||
ConnectionEstablisher connection_establisher;
|
||||
TryResult result;
|
||||
std::string fail_message;
|
||||
|
@ -319,11 +319,9 @@ Packet MultiplexedConnections::receivePacketUnlocked(AsyncCallback async_callbac
|
||||
throw Exception(ErrorCodes::NO_AVAILABLE_REPLICA, "Logical error: no available replica");
|
||||
|
||||
Packet packet;
|
||||
{
|
||||
AsyncCallbackSetter async_setter(current_connection, std::move(async_callback));
|
||||
|
||||
try
|
||||
{
|
||||
AsyncCallbackSetter async_setter(current_connection, std::move(async_callback));
|
||||
packet = current_connection->receivePacket();
|
||||
}
|
||||
catch (Exception & e)
|
||||
@ -337,7 +335,6 @@ Packet MultiplexedConnections::receivePacketUnlocked(AsyncCallback async_callbac
|
||||
}
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
switch (packet.type)
|
||||
{
|
||||
|
@ -848,6 +848,9 @@ ASTs QueryFuzzer::getDropQueriesForFuzzedTables(const ASTDropQuery & drop_query)
|
||||
|
||||
void QueryFuzzer::notifyQueryFailed(ASTPtr ast)
|
||||
{
|
||||
if (ast == nullptr)
|
||||
return;
|
||||
|
||||
auto remove_fuzzed_table = [this](const auto & table_name)
|
||||
{
|
||||
auto pos = table_name.find("__fuzz_");
|
||||
|
@ -1,26 +1,4 @@
|
||||
#include "Allocator.h"
|
||||
|
||||
/** Keep definition of this constant in cpp file; otherwise its value
|
||||
* is inlined into allocator code making it impossible to override it
|
||||
* in third-party code.
|
||||
*
|
||||
* Note: extern may seem redundant, but is actually needed due to bug in GCC.
|
||||
* See also: https://gcc.gnu.org/legacy-ml/gcc-help/2017-12/msg00021.html
|
||||
*/
|
||||
#ifdef NDEBUG
|
||||
__attribute__((__weak__)) extern const size_t MMAP_THRESHOLD = 128 * (1ULL << 20);
|
||||
#else
|
||||
/**
|
||||
* In debug build, use small mmap threshold to reproduce more memory
|
||||
* stomping bugs. Along with ASLR it will hopefully detect more issues than
|
||||
* ASan. The program may fail due to the limit on number of memory mappings.
|
||||
*
|
||||
* Not too small to avoid too quick exhaust of memory mappings.
|
||||
*/
|
||||
__attribute__((__weak__)) extern const size_t MMAP_THRESHOLD = 16384;
|
||||
#endif
|
||||
|
||||
template class Allocator<false, false>;
|
||||
template class Allocator<true, false>;
|
||||
template class Allocator<false, true>;
|
||||
template class Allocator<true, true>;
|
||||
template class Allocator<false>;
|
||||
template class Allocator<true>;
|
||||
|
@ -36,51 +36,26 @@
|
||||
#include <Common/Allocator_fwd.h>
|
||||
|
||||
|
||||
/// Required for older Darwin builds, that lack definition of MAP_ANONYMOUS
|
||||
#ifndef MAP_ANONYMOUS
|
||||
#define MAP_ANONYMOUS MAP_ANON
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Many modern allocators (for example, tcmalloc) do not do a mremap for
|
||||
* realloc, even in case of large enough chunks of memory. Although this allows
|
||||
* you to increase performance and reduce memory consumption during realloc.
|
||||
* To fix this, we do mremap manually if the chunk of memory is large enough.
|
||||
* The threshold (64 MB) is chosen quite large, since changing the address
|
||||
* space is very slow, especially in the case of a large number of threads. We
|
||||
* expect that the set of operations mmap/something to do/mremap can only be
|
||||
* performed about 1000 times per second.
|
||||
*
|
||||
* P.S. This is also required, because tcmalloc can not allocate a chunk of
|
||||
* memory greater than 16 GB.
|
||||
*
|
||||
* P.P.S. Note that MMAP_THRESHOLD symbol is intentionally made weak. It allows
|
||||
* to override it during linkage when using ClickHouse as a library in
|
||||
* third-party applications which may already use own allocator doing mmaps
|
||||
* in the implementation of alloc/realloc.
|
||||
*/
|
||||
extern const size_t MMAP_THRESHOLD;
|
||||
|
||||
static constexpr size_t MALLOC_MIN_ALIGNMENT = 8;
|
||||
|
||||
namespace CurrentMetrics
|
||||
{
|
||||
extern const Metric MMappedAllocs;
|
||||
extern const Metric MMappedAllocBytes;
|
||||
}
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
extern const int CANNOT_ALLOCATE_MEMORY;
|
||||
extern const int CANNOT_MUNMAP;
|
||||
extern const int CANNOT_MREMAP;
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/** Previously there was a code which tried to use manual mmap and mremap (clickhouse_mremap.h) for large allocations/reallocations (64MB+).
|
||||
* Most modern allocators (including jemalloc) don't use mremap, so the idea was to take advantage from mremap system call for large reallocs.
|
||||
* Actually jemalloc had support for mremap, but it was intentionally removed from codebase https://github.com/jemalloc/jemalloc/commit/e2deab7a751c8080c2b2cdcfd7b11887332be1bb.
|
||||
* Our performance tests also shows that without manual mmap/mremap/munmap clickhouse is overall faster for about 1-2% and up to 5-7x for some types of queries.
|
||||
* That is why we don't do manuall mmap/mremap/munmap here and completely rely on jemalloc for allocations of any size.
|
||||
*/
|
||||
|
||||
/** Responsible for allocating / freeing memory. Used, for example, in PODArray, Arena.
|
||||
* Also used in hash tables.
|
||||
* The interface is different from std::allocator
|
||||
@ -88,10 +63,8 @@ namespace ErrorCodes
|
||||
* - passing the size into the `free` method;
|
||||
* - by the presence of the `alignment` argument;
|
||||
* - the possibility of zeroing memory (used in hash tables);
|
||||
* - random hint address for mmap
|
||||
* - mmap_threshold for using mmap less or more
|
||||
*/
|
||||
template <bool clear_memory_, bool mmap_populate>
|
||||
template <bool clear_memory_>
|
||||
class Allocator
|
||||
{
|
||||
public:
|
||||
@ -109,7 +82,7 @@ public:
|
||||
try
|
||||
{
|
||||
checkSize(size);
|
||||
freeNoTrack(buf, size);
|
||||
freeNoTrack(buf);
|
||||
CurrentMemoryTracker::free(size);
|
||||
}
|
||||
catch (...)
|
||||
@ -132,49 +105,26 @@ public:
|
||||
/// nothing to do.
|
||||
/// BTW, it's not possible to change alignment while doing realloc.
|
||||
}
|
||||
else if (old_size < MMAP_THRESHOLD && new_size < MMAP_THRESHOLD
|
||||
&& alignment <= MALLOC_MIN_ALIGNMENT)
|
||||
else if (alignment <= MALLOC_MIN_ALIGNMENT)
|
||||
{
|
||||
/// Resize malloc'd memory region with no special alignment requirement.
|
||||
CurrentMemoryTracker::realloc(old_size, new_size);
|
||||
|
||||
void * new_buf = ::realloc(buf, new_size);
|
||||
if (nullptr == new_buf)
|
||||
DB::throwFromErrno(fmt::format("Allocator: Cannot realloc from {} to {}.", ReadableSize(old_size), ReadableSize(new_size)), DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY);
|
||||
{
|
||||
DB::throwFromErrno(
|
||||
fmt::format("Allocator: Cannot realloc from {} to {}.", ReadableSize(old_size), ReadableSize(new_size)), DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY);
|
||||
}
|
||||
|
||||
buf = new_buf;
|
||||
if constexpr (clear_memory)
|
||||
if (new_size > old_size)
|
||||
memset(reinterpret_cast<char *>(buf) + old_size, 0, new_size - old_size);
|
||||
}
|
||||
else if (old_size >= MMAP_THRESHOLD && new_size >= MMAP_THRESHOLD)
|
||||
{
|
||||
/// Resize mmap'd memory region.
|
||||
CurrentMemoryTracker::realloc(old_size, new_size);
|
||||
|
||||
// On apple and freebsd self-implemented mremap used (common/mremap.h)
|
||||
buf = clickhouse_mremap(buf, old_size, new_size, MREMAP_MAYMOVE,
|
||||
PROT_READ | PROT_WRITE, mmap_flags, -1, 0);
|
||||
if (MAP_FAILED == buf)
|
||||
DB::throwFromErrno(fmt::format("Allocator: Cannot mremap memory chunk from {} to {}.",
|
||||
ReadableSize(old_size), ReadableSize(new_size)), DB::ErrorCodes::CANNOT_MREMAP);
|
||||
|
||||
/// No need for zero-fill, because mmap guarantees it.
|
||||
}
|
||||
else if (new_size < MMAP_THRESHOLD)
|
||||
{
|
||||
/// Small allocs that requires a copy. Assume there's enough memory in system. Call CurrentMemoryTracker once.
|
||||
CurrentMemoryTracker::realloc(old_size, new_size);
|
||||
|
||||
void * new_buf = allocNoTrack(new_size, alignment);
|
||||
memcpy(new_buf, buf, std::min(old_size, new_size));
|
||||
freeNoTrack(buf, old_size);
|
||||
buf = new_buf;
|
||||
}
|
||||
else
|
||||
{
|
||||
/// Big allocs that requires a copy. MemoryTracker is called inside 'alloc', 'free' methods.
|
||||
|
||||
void * new_buf = alloc(new_size, alignment);
|
||||
memcpy(new_buf, buf, std::min(old_size, new_size));
|
||||
free(buf, old_size);
|
||||
@ -192,43 +142,10 @@ protected:
|
||||
|
||||
static constexpr bool clear_memory = clear_memory_;
|
||||
|
||||
// Freshly mmapped pages are copy-on-write references to a global zero page.
|
||||
// On the first write, a page fault occurs, and an actual writable page is
|
||||
// allocated. If we are going to use this memory soon, such as when resizing
|
||||
// hash tables, it makes sense to pre-fault the pages by passing
|
||||
// MAP_POPULATE to mmap(). This takes some time, but should be faster
|
||||
// overall than having a hot loop interrupted by page faults.
|
||||
// It is only supported on Linux.
|
||||
static constexpr int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS
|
||||
#if defined(OS_LINUX)
|
||||
| (mmap_populate ? MAP_POPULATE : 0)
|
||||
#endif
|
||||
;
|
||||
|
||||
private:
|
||||
void * allocNoTrack(size_t size, size_t alignment)
|
||||
{
|
||||
void * buf;
|
||||
size_t mmap_min_alignment = ::getPageSize();
|
||||
|
||||
if (size >= MMAP_THRESHOLD)
|
||||
{
|
||||
if (alignment > mmap_min_alignment)
|
||||
throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS,
|
||||
"Too large alignment {}: more than page size when allocating {}.",
|
||||
ReadableSize(alignment), ReadableSize(size));
|
||||
|
||||
buf = mmap(getMmapHint(), size, PROT_READ | PROT_WRITE,
|
||||
mmap_flags, -1, 0);
|
||||
if (MAP_FAILED == buf)
|
||||
DB::throwFromErrno(fmt::format("Allocator: Cannot mmap {}.", ReadableSize(size)), DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY);
|
||||
/// No need for zero-fill, because mmap guarantees it.
|
||||
|
||||
CurrentMetrics::add(CurrentMetrics::MMappedAllocs);
|
||||
CurrentMetrics::add(CurrentMetrics::MMappedAllocBytes, size);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (alignment <= MALLOC_MIN_ALIGNMENT)
|
||||
{
|
||||
if constexpr (clear_memory)
|
||||
@ -251,25 +168,13 @@ private:
|
||||
if constexpr (clear_memory)
|
||||
memset(buf, 0, size);
|
||||
}
|
||||
}
|
||||
return buf;
|
||||
}
|
||||
|
||||
void freeNoTrack(void * buf, size_t size)
|
||||
{
|
||||
if (size >= MMAP_THRESHOLD)
|
||||
{
|
||||
if (0 != munmap(buf, size))
|
||||
DB::throwFromErrno(fmt::format("Allocator: Cannot munmap {}.", ReadableSize(size)), DB::ErrorCodes::CANNOT_MUNMAP);
|
||||
|
||||
CurrentMetrics::sub(CurrentMetrics::MMappedAllocs);
|
||||
CurrentMetrics::sub(CurrentMetrics::MMappedAllocBytes, size);
|
||||
}
|
||||
else
|
||||
void freeNoTrack(void * buf)
|
||||
{
|
||||
::free(buf);
|
||||
}
|
||||
}
|
||||
|
||||
void checkSize(size_t size)
|
||||
{
|
||||
@ -277,21 +182,6 @@ private:
|
||||
if (size >= 0x8000000000000000ULL)
|
||||
throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Too large size ({}) passed to allocator. It indicates an error.", size);
|
||||
}
|
||||
|
||||
#ifndef NDEBUG
|
||||
/// In debug builds, request mmap() at random addresses (a kind of ASLR), to
|
||||
/// reproduce more memory stomping bugs. Note that Linux doesn't do it by
|
||||
/// default. This may lead to worse TLB performance.
|
||||
void * getMmapHint()
|
||||
{
|
||||
return reinterpret_cast<void *>(std::uniform_int_distribution<intptr_t>(0x100000000000UL, 0x700000000000UL)(thread_local_rng));
|
||||
}
|
||||
#else
|
||||
void * getMmapHint()
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
#endif
|
||||
};
|
||||
|
||||
|
||||
@ -367,7 +257,5 @@ constexpr size_t allocatorInitialBytes<AllocatorWithStackMemory<
|
||||
|
||||
/// Prevent implicit template instantiation of Allocator
|
||||
|
||||
extern template class Allocator<false, false>;
|
||||
extern template class Allocator<true, false>;
|
||||
extern template class Allocator<false, true>;
|
||||
extern template class Allocator<true, true>;
|
||||
extern template class Allocator<false>;
|
||||
extern template class Allocator<true>;
|
||||
|
@ -3,7 +3,7 @@
|
||||
* This file provides forward declarations for Allocator.
|
||||
*/
|
||||
|
||||
template <bool clear_memory_, bool mmap_populate = false>
|
||||
template <bool clear_memory_>
|
||||
class Allocator;
|
||||
|
||||
template <typename Base, size_t N = 64, size_t Alignment = 1>
|
||||
|
@ -5,7 +5,6 @@ namespace DB
|
||||
|
||||
AsyncTaskExecutor::AsyncTaskExecutor(std::unique_ptr<AsyncTask> task_) : task(std::move(task_))
|
||||
{
|
||||
createFiber();
|
||||
}
|
||||
|
||||
void AsyncTaskExecutor::resume()
|
||||
@ -13,6 +12,10 @@ void AsyncTaskExecutor::resume()
|
||||
if (routine_is_finished)
|
||||
return;
|
||||
|
||||
/// Create fiber lazily on first resume() call.
|
||||
if (!fiber)
|
||||
createFiber();
|
||||
|
||||
if (!checkBeforeTaskResume())
|
||||
return;
|
||||
|
||||
@ -22,6 +25,11 @@ void AsyncTaskExecutor::resume()
|
||||
return;
|
||||
|
||||
resumeUnlocked();
|
||||
|
||||
/// Destroy fiber when it's finished.
|
||||
if (routine_is_finished)
|
||||
destroyFiber();
|
||||
|
||||
if (exception)
|
||||
processException(exception);
|
||||
}
|
||||
@ -46,9 +54,8 @@ void AsyncTaskExecutor::cancel()
|
||||
void AsyncTaskExecutor::restart()
|
||||
{
|
||||
std::lock_guard guard(fiber_lock);
|
||||
if (fiber)
|
||||
if (!routine_is_finished)
|
||||
destroyFiber();
|
||||
createFiber();
|
||||
routine_is_finished = false;
|
||||
}
|
||||
|
||||
|
@ -173,8 +173,6 @@
|
||||
M(PartsInMemory, "In-memory parts.") \
|
||||
M(MMappedFiles, "Total number of mmapped files.") \
|
||||
M(MMappedFileBytes, "Sum size of mmapped file regions.") \
|
||||
M(MMappedAllocs, "Total number of mmapped allocations") \
|
||||
M(MMappedAllocBytes, "Sum bytes of mmapped allocations") \
|
||||
M(AsynchronousReadWait, "Number of threads waiting for asynchronous read.") \
|
||||
M(PendingAsyncInsert, "Number of asynchronous inserts that are waiting for flush.") \
|
||||
M(KafkaConsumers, "Number of active Kafka consumers") \
|
||||
|
@ -10,7 +10,6 @@
|
||||
#include <cassert>
|
||||
#include <chrono>
|
||||
#include <cstring>
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
|
||||
|
||||
|
@ -47,6 +47,7 @@ String FileRenamer::generateNewFilename(const String & filename) const
|
||||
// Define placeholders and their corresponding values
|
||||
std::map<String, String> placeholders =
|
||||
{
|
||||
{"%a", filename},
|
||||
{"%f", file_base},
|
||||
{"%e", file_ext},
|
||||
{"%t", timestamp},
|
||||
@ -69,16 +70,17 @@ bool FileRenamer::isEmpty() const
|
||||
bool FileRenamer::validateRenamingRule(const String & rule, bool throw_on_error)
|
||||
{
|
||||
// Check if the rule contains invalid placeholders
|
||||
re2::RE2 invalid_placeholder_pattern("^([^%]|%[fet%])*$");
|
||||
re2::RE2 invalid_placeholder_pattern("^([^%]|%[afet%])*$");
|
||||
if (!re2::RE2::FullMatch(rule, invalid_placeholder_pattern))
|
||||
{
|
||||
if (throw_on_error)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid renaming rule: Allowed placeholders only %f, %e, %t, and %%");
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid renaming rule: Allowed placeholders only %a, %f, %e, %t, and %%");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Replace valid placeholders with empty strings and count remaining percentage signs.
|
||||
String replaced_rule = rule;
|
||||
boost::replace_all(replaced_rule, "%a", "");
|
||||
boost::replace_all(replaced_rule, "%f", "");
|
||||
boost::replace_all(replaced_rule, "%e", "");
|
||||
boost::replace_all(replaced_rule, "%t", "");
|
||||
|
@ -9,6 +9,7 @@ namespace DB
|
||||
/**
|
||||
* The FileRenamer class provides functionality for renaming files based on given pattern with placeholders
|
||||
* The supported placeholders are:
|
||||
* %a - Full original file name ("sample.csv")
|
||||
* %f - Original filename without extension ("sample")
|
||||
* %e - Original file extension with dot (".csv")
|
||||
* %t - Timestamp (in microseconds)
|
||||
|
56
src/Common/HTTPHeaderFilter.cpp
Normal file
56
src/Common/HTTPHeaderFilter.cpp
Normal file
@ -0,0 +1,56 @@
|
||||
#include <Common/HTTPHeaderFilter.h>
|
||||
#include <Common/StringUtils/StringUtils.h>
|
||||
#include <Common/Exception.h>
|
||||
|
||||
#include <re2/re2.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
void HTTPHeaderFilter::checkHeaders(const HTTPHeaderEntries & entries) const
|
||||
{
|
||||
std::lock_guard guard(mutex);
|
||||
|
||||
for (const auto & entry : entries)
|
||||
{
|
||||
if (forbidden_headers.contains(entry.name))
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "HTTP header \"{}\" is forbidden in configuration file, "
|
||||
"see <http_forbid_headers>", entry.name);
|
||||
|
||||
for (const auto & header_regex : forbidden_headers_regexp)
|
||||
if (re2::RE2::FullMatch(entry.name, header_regex))
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "HTTP header \"{}\" is forbidden in configuration file, "
|
||||
"see <http_forbid_headers>", entry.name);
|
||||
}
|
||||
}
|
||||
|
||||
void HTTPHeaderFilter::setValuesFromConfig(const Poco::Util::AbstractConfiguration & config)
|
||||
{
|
||||
std::lock_guard guard(mutex);
|
||||
|
||||
if (config.has("http_forbid_headers"))
|
||||
{
|
||||
std::vector<std::string> keys;
|
||||
config.keys("http_forbid_headers", keys);
|
||||
|
||||
for (const auto & key : keys)
|
||||
{
|
||||
if (startsWith(key, "header_regexp"))
|
||||
forbidden_headers_regexp.push_back(config.getString("http_forbid_headers." + key));
|
||||
else if (startsWith(key, "header"))
|
||||
forbidden_headers.insert(config.getString("http_forbid_headers." + key));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
forbidden_headers.clear();
|
||||
forbidden_headers_regexp.clear();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
27
src/Common/HTTPHeaderFilter.h
Normal file
27
src/Common/HTTPHeaderFilter.h
Normal file
@ -0,0 +1,27 @@
|
||||
#pragma once
|
||||
|
||||
#include <IO/HTTPHeaderEntries.h>
|
||||
#include <Poco/Util/AbstractConfiguration.h>
|
||||
#include <vector>
|
||||
#include <unordered_set>
|
||||
#include <mutex>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class HTTPHeaderFilter
|
||||
{
|
||||
public:
|
||||
|
||||
void setValuesFromConfig(const Poco::Util::AbstractConfiguration & config);
|
||||
void checkHeaders(const HTTPHeaderEntries & entries) const;
|
||||
|
||||
private:
|
||||
std::unordered_set<std::string> forbidden_headers;
|
||||
std::vector<std::string> forbidden_headers_regexp;
|
||||
|
||||
mutable std::mutex mutex;
|
||||
};
|
||||
|
||||
}
|
@ -8,7 +8,7 @@
|
||||
* table, so it makes sense to pre-fault the pages so that page faults don't
|
||||
* interrupt the resize loop. Set the allocator parameter accordingly.
|
||||
*/
|
||||
using HashTableAllocator = Allocator<true /* clear_memory */, true /* mmap_populate */>;
|
||||
using HashTableAllocator = Allocator<true /* clear_memory */>;
|
||||
|
||||
template <size_t initial_bytes = 64>
|
||||
using HashTableAllocatorWithStackMemory = AllocatorWithStackMemory<HashTableAllocator, initial_bytes>;
|
||||
|
@ -3,8 +3,10 @@
|
||||
#include "KeeperException.h"
|
||||
#include "TestKeeper.h"
|
||||
|
||||
#include <functional>
|
||||
#include <filesystem>
|
||||
#include <functional>
|
||||
#include <ranges>
|
||||
#include <vector>
|
||||
|
||||
#include <Common/ZooKeeper/Types.h>
|
||||
#include <Common/ZooKeeper/ZooKeeperCommon.h>
|
||||
@ -350,15 +352,35 @@ void ZooKeeper::createIfNotExists(const std::string & path, const std::string &
|
||||
|
||||
void ZooKeeper::createAncestors(const std::string & path)
|
||||
{
|
||||
size_t pos = 1;
|
||||
std::string data;
|
||||
std::string path_created; // Ignored
|
||||
std::vector<std::string> pending_nodes;
|
||||
|
||||
size_t last_pos = path.rfind('/');
|
||||
if (last_pos == std::string::npos || last_pos == 0)
|
||||
return;
|
||||
std::string current_node = path.substr(0, last_pos);
|
||||
|
||||
while (true)
|
||||
{
|
||||
pos = path.find('/', pos);
|
||||
if (pos == std::string::npos)
|
||||
break;
|
||||
createIfNotExists(path.substr(0, pos), "");
|
||||
++pos;
|
||||
Coordination::Error code = createImpl(current_node, data, CreateMode::Persistent, path_created);
|
||||
if (code == Coordination::Error::ZNONODE)
|
||||
{
|
||||
/// The parent node doesn't exist. Save the current node and try with the parent
|
||||
last_pos = current_node.rfind('/');
|
||||
if (last_pos == std::string::npos || last_pos == 0)
|
||||
throw KeeperException(code, path);
|
||||
pending_nodes.emplace_back(std::move(current_node));
|
||||
current_node = path.substr(0, last_pos);
|
||||
}
|
||||
else if (code == Coordination::Error::ZOK || code == Coordination::Error::ZNODEEXISTS)
|
||||
break;
|
||||
else
|
||||
throw KeeperException(code, path);
|
||||
}
|
||||
|
||||
for (const std::string & pending : pending_nodes | std::views::reverse)
|
||||
createIfNotExists(pending, data);
|
||||
}
|
||||
|
||||
void ZooKeeper::checkExistsAndGetCreateAncestorsOps(const std::string & path, Coordination::Requests & requests)
|
||||
|
@ -54,6 +54,7 @@
|
||||
#cmakedefine01 USE_BORINGSSL
|
||||
#cmakedefine01 USE_BLAKE3
|
||||
#cmakedefine01 USE_SKIM
|
||||
#cmakedefine01 USE_PRQL
|
||||
#cmakedefine01 USE_OPENSSL_INTREE
|
||||
#cmakedefine01 USE_ULID
|
||||
#cmakedefine01 FIU_ENABLE
|
||||
|
@ -41,9 +41,38 @@ void KeeperContext::initialize(const Poco::Util::AbstractConfiguration & config)
|
||||
initializeDisks(config);
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
bool diskValidator(const Poco::Util::AbstractConfiguration & config, const std::string & disk_config_prefix)
|
||||
{
|
||||
const auto disk_type = config.getString(disk_config_prefix + ".type", "local");
|
||||
|
||||
using namespace std::literals;
|
||||
static constexpr std::array supported_disk_types
|
||||
{
|
||||
"s3"sv,
|
||||
"s3_plain"sv,
|
||||
"local"sv
|
||||
};
|
||||
|
||||
if (std::all_of(
|
||||
supported_disk_types.begin(),
|
||||
supported_disk_types.end(),
|
||||
[&](const auto supported_type) { return disk_type != supported_type; }))
|
||||
{
|
||||
LOG_INFO(&Poco::Logger::get("KeeperContext"), "Disk type '{}' is not supported for Keeper", disk_type);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void KeeperContext::initializeDisks(const Poco::Util::AbstractConfiguration & config)
|
||||
{
|
||||
disk_selector->initialize(config, "storage_configuration.disks", Context::getGlobalContextInstance());
|
||||
disk_selector->initialize(config, "storage_configuration.disks", Context::getGlobalContextInstance(), diskValidator);
|
||||
|
||||
log_storage = getLogsPathFromConfig(config);
|
||||
|
||||
|
@ -48,7 +48,11 @@ inline auto scaleMultiplier(UInt32 scale)
|
||||
|
||||
/** Components of DecimalX value:
|
||||
* whole - represents whole part of decimal, can be negative or positive.
|
||||
* fractional - for fractional part of decimal, always positive.
|
||||
* fractional - for fractional part of decimal.
|
||||
*
|
||||
* 0.123 represents 0 / 0.123
|
||||
* -0.123 represents 0 / -0.123
|
||||
* -1.123 represents -1 / 0.123
|
||||
*/
|
||||
template <typename DecimalType>
|
||||
struct DecimalComponents
|
||||
|
@ -577,6 +577,7 @@ class IColumn;
|
||||
M(Bool, optimize_skip_merged_partitions, false, "Skip partitions with one part with level > 0 in optimize final", 0) \
|
||||
M(Bool, optimize_on_insert, true, "Do the same transformation for inserted block of data as if merge was done on this block.", 0) \
|
||||
M(Bool, optimize_use_projections, true, "Automatically choose projections to perform SELECT query", 0) ALIAS(allow_experimental_projection_optimization) \
|
||||
M(Bool, optimize_use_implicit_projections, false, "Automatically choose implicit projections to perform SELECT query", 0) \
|
||||
M(Bool, force_optimize_projection, false, "If projection optimization is enabled, SELECT queries need to use projection", 0) \
|
||||
M(Bool, async_socket_for_remote, true, "Asynchronously read from socket executing remote query", 0) \
|
||||
M(Bool, async_query_sending_for_remote, true, "Asynchronously create connections and send query to shards in remote query", 0) \
|
||||
@ -736,7 +737,7 @@ class IColumn;
|
||||
M(String, workload, "default", "Name of workload to be used to access resources", 0) \
|
||||
M(Milliseconds, storage_system_stack_trace_pipe_read_timeout_ms, 100, "Maximum time to read from a pipe for receiving information from the threads when querying the `system.stack_trace` table. This setting is used for testing purposes and not meant to be changed by users.", 0) \
|
||||
\
|
||||
M(String, rename_files_after_processing, "", "Rename successfully processed files according to the specified pattern; Pattern can include the following placeholders: `%f` (original filename without extension), `%e` (file extension with dot), `%t` (current timestamp in µs), and `%%` (% sign)", 0) \
|
||||
M(String, rename_files_after_processing, "", "Rename successfully processed files according to the specified pattern; Pattern can include the following placeholders: `%a` (full original file name), `%f` (original filename without extension), `%e` (file extension with dot), `%t` (current timestamp in µs), and `%%` (% sign)", 0) \
|
||||
\
|
||||
M(Bool, parallelize_output_from_storages, true, "Parallelize output for reading step from storage. It allows parallelizing query processing right after reading from storage if possible", 0) \
|
||||
M(String, insert_deduplication_token, "", "If not empty, used for duplicate detection instead of data digest", 0) \
|
||||
@ -774,6 +775,7 @@ class IColumn;
|
||||
M(Bool, keeper_map_strict_mode, false, "Enforce additional checks during operations on KeeperMap. E.g. throw an exception on an insert for already existing key", 0) \
|
||||
M(UInt64, extract_kvp_max_pairs_per_row, 1000, "Max number pairs that can be produced by extractKeyValuePairs function. Used to safeguard against consuming too much memory.", 0) \
|
||||
M(Timezone, session_timezone, "", "The default timezone for current session or query. The server default timezone if empty.", 0) \
|
||||
M(Bool, allow_create_index_without_type, false, "Allow CREATE INDEX query without TYPE. Query will be ignored. Made for SQL compatibility tests.", 0)\
|
||||
// End of COMMON_SETTINGS
|
||||
// Please add settings related to formats into the FORMAT_FACTORY_SETTINGS and move obsolete settings to OBSOLETE_SETTINGS.
|
||||
|
||||
@ -872,6 +874,7 @@ class IColumn;
|
||||
M(Bool, input_format_csv_detect_header, true, "Automatically detect header with names and types in CSV format", 0) \
|
||||
M(Bool, input_format_csv_allow_whitespace_or_tab_as_delimiter, false, "Allow to use spaces and tabs(\\t) as field delimiter in the CSV strings", 0) \
|
||||
M(Bool, input_format_csv_trim_whitespaces, true, "Trims spaces and tabs (\\t) characters at the beginning and end in CSV strings", 0) \
|
||||
M(Bool, input_format_csv_use_default_on_bad_values, false, "Allow to set default value to column when CSV field deserialization failed on bad value", 0) \
|
||||
M(Bool, input_format_tsv_detect_header, true, "Automatically detect header with names and types in TSV format", 0) \
|
||||
M(Bool, input_format_custom_detect_header, true, "Automatically detect header with names and types in CustomSeparated format", 0) \
|
||||
M(Bool, input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference, false, "Skip columns with unsupported types while schema inference for format Parquet", 0) \
|
||||
|
@ -80,6 +80,7 @@ namespace SettingsChangesHistory
|
||||
/// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972)
|
||||
static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> settings_changes_history =
|
||||
{
|
||||
{"23.7", {{"optimize_use_implicit_projections", true, false, "Disable implicit projections due to unexpected results."}}},
|
||||
{"23.6", {{"http_send_timeout", 180, 30, "3 minutes seems crazy long. Note that this is timeout for a single network write call, not for the whole upload operation."},
|
||||
{"http_receive_timeout", 180, 30, "See http_send_timeout."}}},
|
||||
{"23.5", {{"input_format_parquet_preserve_order", true, false, "Allow Parquet reader to reorder rows for better parallelism."},
|
||||
|
@ -138,7 +138,9 @@ IMPLEMENT_SETTING_ENUM(MsgPackUUIDRepresentation, ErrorCodes::BAD_ARGUMENTS,
|
||||
|
||||
IMPLEMENT_SETTING_ENUM(Dialect, ErrorCodes::BAD_ARGUMENTS,
|
||||
{{"clickhouse", Dialect::clickhouse},
|
||||
{"kusto", Dialect::kusto}})
|
||||
{"kusto", Dialect::kusto},
|
||||
{"kusto", Dialect::kusto},
|
||||
{"prql", Dialect::prql}})
|
||||
// FIXME: do not add 'kusto_auto' to the list. Maybe remove it from code completely?
|
||||
|
||||
IMPLEMENT_SETTING_ENUM(ParallelReplicasCustomKeyFilterType, ErrorCodes::BAD_ARGUMENTS,
|
||||
|
@ -207,6 +207,7 @@ enum class Dialect
|
||||
clickhouse,
|
||||
kusto,
|
||||
kusto_auto,
|
||||
prql,
|
||||
};
|
||||
|
||||
DECLARE_SETTING_ENUM(Dialect)
|
||||
|
@ -3,6 +3,7 @@
|
||||
#if USE_MYSQL
|
||||
|
||||
#include <Databases/MySQL/MaterializedMySQLSyncThread.h>
|
||||
#include <Databases/MySQL/tryParseTableIDFromDDL.h>
|
||||
#include <cstdlib>
|
||||
#include <random>
|
||||
#include <string_view>
|
||||
@ -151,61 +152,6 @@ static void checkMySQLVariables(const mysqlxx::Pool::Entry & connection, const S
|
||||
}
|
||||
}
|
||||
|
||||
static std::tuple<String, String> tryExtractTableNameFromDDL(const String & ddl)
|
||||
{
|
||||
String table_name;
|
||||
String database_name;
|
||||
if (ddl.empty()) return std::make_tuple(database_name, table_name);
|
||||
|
||||
bool parse_failed = false;
|
||||
Tokens tokens(ddl.data(), ddl.data() + ddl.size());
|
||||
IParser::Pos pos(tokens, 0);
|
||||
Expected expected;
|
||||
ASTPtr res;
|
||||
ASTPtr table;
|
||||
if (ParserKeyword("CREATE TEMPORARY TABLE").ignore(pos, expected) || ParserKeyword("CREATE TABLE").ignore(pos, expected))
|
||||
{
|
||||
ParserKeyword("IF NOT EXISTS").ignore(pos, expected);
|
||||
if (!ParserCompoundIdentifier(true).parse(pos, table, expected))
|
||||
parse_failed = true;
|
||||
}
|
||||
else if (ParserKeyword("ALTER TABLE").ignore(pos, expected))
|
||||
{
|
||||
if (!ParserCompoundIdentifier(true).parse(pos, table, expected))
|
||||
parse_failed = true;
|
||||
}
|
||||
else if (ParserKeyword("DROP TABLE").ignore(pos, expected) || ParserKeyword("DROP TEMPORARY TABLE").ignore(pos, expected))
|
||||
{
|
||||
ParserKeyword("IF EXISTS").ignore(pos, expected);
|
||||
if (!ParserCompoundIdentifier(true).parse(pos, table, expected))
|
||||
parse_failed = true;
|
||||
}
|
||||
else if (ParserKeyword("TRUNCATE").ignore(pos, expected))
|
||||
{
|
||||
ParserKeyword("TABLE").ignore(pos, expected);
|
||||
if (!ParserCompoundIdentifier(true).parse(pos, table, expected))
|
||||
parse_failed = true;
|
||||
}
|
||||
else if (ParserKeyword("RENAME TABLE").ignore(pos, expected))
|
||||
{
|
||||
if (!ParserCompoundIdentifier(true).parse(pos, table, expected))
|
||||
parse_failed = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
parse_failed = true;
|
||||
}
|
||||
if (!parse_failed)
|
||||
{
|
||||
if (auto table_id = table->as<ASTTableIdentifier>()->getTableId())
|
||||
{
|
||||
database_name = table_id.database_name;
|
||||
table_name = table_id.table_name;
|
||||
}
|
||||
}
|
||||
return std::make_tuple(database_name, table_name);
|
||||
}
|
||||
|
||||
MaterializedMySQLSyncThread::MaterializedMySQLSyncThread(
|
||||
ContextPtr context_,
|
||||
const String & database_name_,
|
||||
@ -868,14 +814,12 @@ void MaterializedMySQLSyncThread::executeDDLAtomic(const QueryEvent & query_even
|
||||
String query = query_event.query;
|
||||
if (!materialized_tables_list.empty())
|
||||
{
|
||||
auto [ddl_database_name, ddl_table_name] = tryExtractTableNameFromDDL(query_event.query);
|
||||
|
||||
if (!ddl_table_name.empty())
|
||||
auto table_id = tryParseTableIDFromDDL(query, query_event.schema);
|
||||
if (!table_id.table_name.empty())
|
||||
{
|
||||
ddl_database_name = ddl_database_name.empty() ? query_event.schema: ddl_database_name;
|
||||
if (ddl_database_name != mysql_database_name || !materialized_tables_list.contains(ddl_table_name))
|
||||
if (table_id.database_name != mysql_database_name || !materialized_tables_list.contains(table_id.table_name))
|
||||
{
|
||||
LOG_DEBUG(log, "Skip MySQL DDL: \n {}", query_event.query);
|
||||
LOG_DEBUG(log, "Skip MySQL DDL for {}.{}:\n{}", table_id.database_name, table_id.table_name, query);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
185
src/Databases/MySQL/tests/gtest_try_parse_table_id_from_ddl.cpp
Normal file
185
src/Databases/MySQL/tests/gtest_try_parse_table_id_from_ddl.cpp
Normal file
@ -0,0 +1,185 @@
|
||||
#include "config.h"
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <Databases/MySQL/tryParseTableIDFromDDL.h>
|
||||
|
||||
using namespace DB;
|
||||
|
||||
struct ParseTableIDFromDDLTestCase
|
||||
{
|
||||
String query;
|
||||
String database_name;
|
||||
String table_name;
|
||||
|
||||
ParseTableIDFromDDLTestCase(
|
||||
const String & query_,
|
||||
const String & database_name_,
|
||||
const String & table_name_)
|
||||
: query(query_)
|
||||
, database_name(database_name_)
|
||||
, table_name(table_name_)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
std::ostream & operator<<(std::ostream & ostr, const ParseTableIDFromDDLTestCase & test_case)
|
||||
{
|
||||
return ostr << '"' << test_case.query << "\" extracts `" << test_case.database_name << "`.`" << test_case.table_name << "`";
|
||||
}
|
||||
|
||||
class ParseTableIDFromDDLTest : public ::testing::TestWithParam<ParseTableIDFromDDLTestCase>
|
||||
{
|
||||
};
|
||||
|
||||
TEST_P(ParseTableIDFromDDLTest, parse)
|
||||
{
|
||||
const auto & [query, expected_database_name, expected_table_name] = GetParam();
|
||||
auto table_id = tryParseTableIDFromDDL(query, "default");
|
||||
EXPECT_EQ(expected_database_name, table_id.database_name);
|
||||
EXPECT_EQ(expected_table_name, table_id.table_name);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(MaterializedMySQL, ParseTableIDFromDDLTest, ::testing::ValuesIn(std::initializer_list<ParseTableIDFromDDLTestCase>{
|
||||
{
|
||||
"SELECT * FROM db.table",
|
||||
"",
|
||||
""
|
||||
},
|
||||
{
|
||||
"CREATE TEMPORARY TABLE db.table",
|
||||
"db",
|
||||
"table"
|
||||
},
|
||||
{
|
||||
"CREATE TEMPORARY TABLE IF NOT EXISTS db.table",
|
||||
"db",
|
||||
"table"
|
||||
},
|
||||
{
|
||||
"CREATE TEMPORARY TABLE table",
|
||||
"default",
|
||||
"table"
|
||||
},
|
||||
{
|
||||
"CREATE TEMPORARY TABLE IF NOT EXISTS table",
|
||||
"default",
|
||||
"table"
|
||||
},
|
||||
{
|
||||
"CREATE TABLE db.table",
|
||||
"db",
|
||||
"table"
|
||||
},
|
||||
{
|
||||
"CREATE TABLE IF NOT EXISTS db.table",
|
||||
"db",
|
||||
"table"
|
||||
},
|
||||
{
|
||||
"CREATE TABLE table",
|
||||
"default",
|
||||
"table"
|
||||
},
|
||||
{
|
||||
"CREATE TABLE IF NOT EXISTS table",
|
||||
"default",
|
||||
"table"
|
||||
},
|
||||
{
|
||||
"ALTER TABLE db.table",
|
||||
"db",
|
||||
"table"
|
||||
},
|
||||
{
|
||||
"ALTER TABLE table",
|
||||
"default",
|
||||
"table"
|
||||
},
|
||||
{
|
||||
"DROP TABLE db.table",
|
||||
"db",
|
||||
"table"
|
||||
},
|
||||
{
|
||||
"DROP TABLE IF EXISTS db.table",
|
||||
"db",
|
||||
"table"
|
||||
},
|
||||
{
|
||||
"DROP TABLE table",
|
||||
"default",
|
||||
"table"
|
||||
},
|
||||
{
|
||||
"DROP TABLE IF EXISTS table",
|
||||
"default",
|
||||
"table"
|
||||
},
|
||||
{
|
||||
"DROP TEMPORARY TABLE db.table",
|
||||
"db",
|
||||
"table"
|
||||
},
|
||||
{
|
||||
"DROP TEMPORARY TABLE IF EXISTS db.table",
|
||||
"db",
|
||||
"table"
|
||||
},
|
||||
{
|
||||
"DROP TEMPORARY TABLE table",
|
||||
"default",
|
||||
"table"
|
||||
},
|
||||
{
|
||||
"DROP TEMPORARY TABLE IF EXISTS table",
|
||||
"default",
|
||||
"table"
|
||||
},
|
||||
{
|
||||
"TRUNCATE db.table",
|
||||
"db",
|
||||
"table"
|
||||
},
|
||||
{
|
||||
"TRUNCATE TABLE db.table",
|
||||
"db",
|
||||
"table"
|
||||
},
|
||||
{
|
||||
"TRUNCATE table1",
|
||||
"default",
|
||||
"table1"
|
||||
},
|
||||
{
|
||||
"TRUNCATE TABLE table",
|
||||
"default",
|
||||
"table"
|
||||
},
|
||||
{
|
||||
"RENAME TABLE db.table",
|
||||
"db",
|
||||
"table"
|
||||
},
|
||||
{
|
||||
"RENAME TABLE table",
|
||||
"default",
|
||||
"table"
|
||||
},
|
||||
{
|
||||
"DROP DATABASE db",
|
||||
"",
|
||||
""
|
||||
},
|
||||
{
|
||||
"DROP DATA`BASE db",
|
||||
"",
|
||||
""
|
||||
},
|
||||
{
|
||||
"NOT A SQL",
|
||||
"",
|
||||
""
|
||||
},
|
||||
|
||||
}));
|
44
src/Databases/MySQL/tryParseTableIDFromDDL.cpp
Normal file
44
src/Databases/MySQL/tryParseTableIDFromDDL.cpp
Normal file
@ -0,0 +1,44 @@
|
||||
#include <Databases/MySQL/tryParseTableIDFromDDL.h>
|
||||
#include <Parsers/ASTIdentifier.h>
|
||||
#include <Parsers/CommonParsers.h>
|
||||
#include <Parsers/ExpressionElementParsers.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
StorageID tryParseTableIDFromDDL(const String & query, const String & default_database_name)
|
||||
{
|
||||
bool is_ddl = false;
|
||||
Tokens tokens(query.data(), query.data() + query.size());
|
||||
IParser::Pos pos(tokens, 0);
|
||||
Expected expected;
|
||||
if (ParserKeyword("CREATE TEMPORARY TABLE").ignore(pos, expected) || ParserKeyword("CREATE TABLE").ignore(pos, expected))
|
||||
{
|
||||
ParserKeyword("IF NOT EXISTS").ignore(pos, expected);
|
||||
is_ddl = true;
|
||||
}
|
||||
else if (ParserKeyword("ALTER TABLE").ignore(pos, expected) || ParserKeyword("RENAME TABLE").ignore(pos, expected))
|
||||
{
|
||||
is_ddl = true;
|
||||
}
|
||||
else if (ParserKeyword("DROP TABLE").ignore(pos, expected) || ParserKeyword("DROP TEMPORARY TABLE").ignore(pos, expected))
|
||||
{
|
||||
ParserKeyword("IF EXISTS").ignore(pos, expected);
|
||||
is_ddl = true;
|
||||
}
|
||||
else if (ParserKeyword("TRUNCATE").ignore(pos, expected))
|
||||
{
|
||||
ParserKeyword("TABLE").ignore(pos, expected);
|
||||
is_ddl = true;
|
||||
}
|
||||
|
||||
ASTPtr table;
|
||||
if (!is_ddl || !ParserCompoundIdentifier(true).parse(pos, table, expected))
|
||||
return StorageID::createEmpty();
|
||||
auto table_id = table->as<ASTTableIdentifier>()->getTableId();
|
||||
if (table_id.database_name.empty())
|
||||
table_id.database_name = default_database_name;
|
||||
return table_id;
|
||||
}
|
||||
|
||||
}
|
11
src/Databases/MySQL/tryParseTableIDFromDDL.h
Normal file
11
src/Databases/MySQL/tryParseTableIDFromDDL.h
Normal file
@ -0,0 +1,11 @@
|
||||
#pragma once
|
||||
|
||||
#include <base/types.h>
|
||||
#include <Storages/IStorage.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
StorageID tryParseTableIDFromDDL(const String & query, const String & default_database_name);
|
||||
|
||||
}
|
@ -257,7 +257,6 @@ void registerDictionarySourceHTTP(DictionarySourceFactory & factory)
|
||||
|
||||
const auto & headers_prefix = settings_config_prefix + ".headers";
|
||||
|
||||
|
||||
if (config.has(headers_prefix))
|
||||
{
|
||||
Poco::Util::AbstractConfiguration::Keys config_keys;
|
||||
@ -297,7 +296,10 @@ void registerDictionarySourceHTTP(DictionarySourceFactory & factory)
|
||||
auto context = copyContextAndApplySettingsFromDictionaryConfig(global_context, config, config_prefix);
|
||||
|
||||
if (created_from_ddl)
|
||||
{
|
||||
context->getRemoteHostFilter().checkURL(Poco::URI(configuration.url));
|
||||
context->getHTTPHeaderFilter().checkHeaders(configuration.header_entries);
|
||||
}
|
||||
|
||||
return std::make_unique<HTTPDictionarySource>(dict_struct, configuration, credentials, sample_block, context);
|
||||
};
|
||||
|
@ -27,7 +27,7 @@ void DiskSelector::assertInitialized() const
|
||||
}
|
||||
|
||||
|
||||
void DiskSelector::initialize(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context)
|
||||
void DiskSelector::initialize(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context, DiskValidator disk_validator)
|
||||
{
|
||||
Poco::Util::AbstractConfiguration::Keys keys;
|
||||
config.keys(config_prefix, keys);
|
||||
@ -46,6 +46,9 @@ void DiskSelector::initialize(const Poco::Util::AbstractConfiguration & config,
|
||||
|
||||
auto disk_config_prefix = config_prefix + "." + disk_name;
|
||||
|
||||
if (disk_validator && !disk_validator(config, disk_config_prefix))
|
||||
continue;
|
||||
|
||||
disks.emplace(disk_name, factory.create(disk_name, config, disk_config_prefix, context, disks));
|
||||
}
|
||||
if (!has_default_disk)
|
||||
|
@ -23,7 +23,8 @@ public:
|
||||
DiskSelector() = default;
|
||||
DiskSelector(const DiskSelector & from) = default;
|
||||
|
||||
void initialize(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context);
|
||||
using DiskValidator = std::function<bool(const Poco::Util::AbstractConfiguration & config, const String & disk_config_prefix)>;
|
||||
void initialize(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context, DiskValidator disk_validator = {});
|
||||
|
||||
DiskSelectorPtr updateFromConfig(
|
||||
const Poco::Util::AbstractConfiguration & config,
|
||||
|
@ -23,10 +23,6 @@ namespace ErrorCodes
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
TemporaryFileOnDisk::TemporaryFileOnDisk(const DiskPtr & disk_)
|
||||
: TemporaryFileOnDisk(disk_, "")
|
||||
{}
|
||||
|
||||
TemporaryFileOnDisk::TemporaryFileOnDisk(const DiskPtr & disk_, CurrentMetrics::Metric metric_scope)
|
||||
: TemporaryFileOnDisk(disk_)
|
||||
{
|
||||
|
@ -16,9 +16,8 @@ using DiskPtr = std::shared_ptr<IDisk>;
|
||||
class TemporaryFileOnDisk
|
||||
{
|
||||
public:
|
||||
explicit TemporaryFileOnDisk(const DiskPtr & disk_);
|
||||
explicit TemporaryFileOnDisk(const DiskPtr & disk_, CurrentMetrics::Metric metric_scope);
|
||||
explicit TemporaryFileOnDisk(const DiskPtr & disk_, const String & prefix);
|
||||
explicit TemporaryFileOnDisk(const DiskPtr & disk_, const String & prefix = "tmp");
|
||||
|
||||
~TemporaryFileOnDisk();
|
||||
|
||||
|
@ -73,6 +73,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
|
||||
format_settings.csv.trim_whitespaces = settings.input_format_csv_trim_whitespaces;
|
||||
format_settings.csv.allow_whitespace_or_tab_as_delimiter = settings.input_format_csv_allow_whitespace_or_tab_as_delimiter;
|
||||
format_settings.csv.allow_variable_number_of_columns = settings.input_format_csv_allow_variable_number_of_columns;
|
||||
format_settings.csv.use_default_on_bad_values = settings.input_format_csv_use_default_on_bad_values;
|
||||
format_settings.hive_text.fields_delimiter = settings.input_format_hive_text_fields_delimiter;
|
||||
format_settings.hive_text.collection_items_delimiter = settings.input_format_hive_text_collection_items_delimiter;
|
||||
format_settings.hive_text.map_keys_delimiter = settings.input_format_hive_text_map_keys_delimiter;
|
||||
|
@ -152,6 +152,7 @@ struct FormatSettings
|
||||
bool trim_whitespaces = true;
|
||||
bool allow_whitespace_or_tab_as_delimiter = false;
|
||||
bool allow_variable_number_of_columns = false;
|
||||
bool use_default_on_bad_values = false;
|
||||
} csv;
|
||||
|
||||
struct HiveText
|
||||
|
@ -19,6 +19,9 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
static constexpr auto microsecond_multiplier = 1000000;
|
||||
static constexpr auto millisecond_multiplier = 1000;
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
@ -1377,6 +1380,36 @@ struct ToRelativeSecondNumImpl
|
||||
using FactorTransform = ZeroTransform;
|
||||
};
|
||||
|
||||
template <Int64 scale_multiplier>
|
||||
struct ToRelativeSubsecondNumImpl
|
||||
{
|
||||
static constexpr auto name = "toRelativeSubsecondNumImpl";
|
||||
|
||||
static inline Int64 execute(const DateTime64 & t, DateTime64::NativeType scale, const DateLUTImpl &)
|
||||
{
|
||||
static_assert(scale_multiplier == 1000 || scale_multiplier == 1000000);
|
||||
if (scale == scale_multiplier)
|
||||
return t.value;
|
||||
if (scale > scale_multiplier)
|
||||
return t.value / (scale / scale_multiplier);
|
||||
return t.value * (scale_multiplier / scale);
|
||||
}
|
||||
static inline Int64 execute(UInt32 t, const DateLUTImpl &)
|
||||
{
|
||||
return t * scale_multiplier;
|
||||
}
|
||||
static inline Int64 execute(Int32 d, const DateLUTImpl & time_zone)
|
||||
{
|
||||
return static_cast<Int64>(time_zone.fromDayNum(ExtendedDayNum(d))) * scale_multiplier;
|
||||
}
|
||||
static inline Int64 execute(UInt16 d, const DateLUTImpl & time_zone)
|
||||
{
|
||||
return static_cast<Int64>(time_zone.fromDayNum(DayNum(d)) * scale_multiplier);
|
||||
}
|
||||
|
||||
using FactorTransform = ZeroTransform;
|
||||
};
|
||||
|
||||
struct ToYYYYMMImpl
|
||||
{
|
||||
static constexpr auto name = "toYYYYMM";
|
||||
@ -1476,25 +1509,47 @@ struct ToYYYYMMDDhhmmssImpl
|
||||
using FactorTransform = ZeroTransform;
|
||||
};
|
||||
|
||||
struct DateTimeComponentsWithFractionalPart : public DateLUTImpl::DateTimeComponents
|
||||
{
|
||||
UInt16 millisecond;
|
||||
UInt16 microsecond;
|
||||
};
|
||||
|
||||
struct ToDateTimeComponentsImpl
|
||||
{
|
||||
static constexpr auto name = "toDateTimeComponents";
|
||||
|
||||
static inline DateLUTImpl::DateTimeComponents execute(Int64 t, const DateLUTImpl & time_zone)
|
||||
static inline DateTimeComponentsWithFractionalPart execute(const DateTime64 & t, DateTime64::NativeType scale_multiplier, const DateLUTImpl & time_zone)
|
||||
{
|
||||
return time_zone.toDateTimeComponents(t);
|
||||
auto components = DecimalUtils::splitWithScaleMultiplier(t, scale_multiplier);
|
||||
|
||||
if (t.value < 0 && components.fractional)
|
||||
{
|
||||
components.fractional = scale_multiplier + (components.whole ? Int64(-1) : Int64(1)) * components.fractional;
|
||||
--components.whole;
|
||||
}
|
||||
static inline DateLUTImpl::DateTimeComponents execute(UInt32 t, const DateLUTImpl & time_zone)
|
||||
{
|
||||
return time_zone.toDateTimeComponents(static_cast<DateLUTImpl::Time>(t));
|
||||
Int64 fractional = components.fractional;
|
||||
if (scale_multiplier > microsecond_multiplier)
|
||||
fractional = fractional / (scale_multiplier / microsecond_multiplier);
|
||||
else if (scale_multiplier < microsecond_multiplier)
|
||||
fractional = fractional * (microsecond_multiplier / scale_multiplier);
|
||||
|
||||
constexpr Int64 divider = microsecond_multiplier/ millisecond_multiplier;
|
||||
UInt16 millisecond = static_cast<UInt16>(fractional / divider);
|
||||
UInt16 microsecond = static_cast<UInt16>(fractional % divider);
|
||||
return DateTimeComponentsWithFractionalPart{time_zone.toDateTimeComponents(components.whole), millisecond, microsecond};
|
||||
}
|
||||
static inline DateLUTImpl::DateTimeComponents execute(Int32 d, const DateLUTImpl & time_zone)
|
||||
static inline DateTimeComponentsWithFractionalPart execute(UInt32 t, const DateLUTImpl & time_zone)
|
||||
{
|
||||
return time_zone.toDateTimeComponents(ExtendedDayNum(d));
|
||||
return DateTimeComponentsWithFractionalPart{time_zone.toDateTimeComponents(static_cast<DateLUTImpl::Time>(t)), 0, 0};
|
||||
}
|
||||
static inline DateLUTImpl::DateTimeComponents execute(UInt16 d, const DateLUTImpl & time_zone)
|
||||
static inline DateTimeComponentsWithFractionalPart execute(Int32 d, const DateLUTImpl & time_zone)
|
||||
{
|
||||
return time_zone.toDateTimeComponents(DayNum(d));
|
||||
return DateTimeComponentsWithFractionalPart{time_zone.toDateTimeComponents(ExtendedDayNum(d)), 0, 0};
|
||||
}
|
||||
static inline DateTimeComponentsWithFractionalPart execute(UInt16 d, const DateLUTImpl & time_zone)
|
||||
{
|
||||
return DateTimeComponentsWithFractionalPart{time_zone.toDateTimeComponents(DayNum(d)), 0, 0};
|
||||
}
|
||||
|
||||
using FactorTransform = ZeroTransform;
|
||||
|
@ -1112,6 +1112,11 @@ private:
|
||||
bool c0_const = isColumnConst(*c0);
|
||||
bool c1_const = isColumnConst(*c1);
|
||||
|
||||
/// This is a paranoid check to protect from a broken query analysis.
|
||||
if (c0->isNullable() != c1->isNullable())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
||||
"Logical error: columns are assumed to be of identical types, but they are different in Nullable");
|
||||
|
||||
if (c0_const && c1_const)
|
||||
{
|
||||
UInt8 res = 0;
|
||||
|
@ -79,28 +79,51 @@ namespace impl
|
||||
UInt64 key1 = 0;
|
||||
};
|
||||
|
||||
static SipHashKey parseSipHashKey(const ColumnWithTypeAndName & key)
|
||||
struct SipHashKeyColumns
|
||||
{
|
||||
SipHashKey ret{};
|
||||
ColumnPtr key0;
|
||||
ColumnPtr key1;
|
||||
bool is_const;
|
||||
|
||||
const auto * tuple = checkAndGetColumn<ColumnTuple>(key.column.get());
|
||||
size_t size() const
|
||||
{
|
||||
assert(key0 && key1);
|
||||
assert(key0->size() == key1->size());
|
||||
return key0->size();
|
||||
}
|
||||
SipHashKey getKey(size_t i) const
|
||||
{
|
||||
if (is_const)
|
||||
i = 0;
|
||||
const auto & key0data = assert_cast<const ColumnUInt64 &>(*key0).getData();
|
||||
const auto & key1data = assert_cast<const ColumnUInt64 &>(*key1).getData();
|
||||
return {key0data[i], key1data[i]};
|
||||
}
|
||||
};
|
||||
|
||||
static SipHashKeyColumns parseSipHashKeyColumns(const ColumnWithTypeAndName & key)
|
||||
{
|
||||
const ColumnTuple * tuple = nullptr;
|
||||
const auto * column = key.column.get();
|
||||
bool is_const = false;
|
||||
if (isColumnConst(*column))
|
||||
{
|
||||
is_const = true;
|
||||
tuple = checkAndGetColumnConstData<ColumnTuple>(column);
|
||||
}
|
||||
else
|
||||
tuple = checkAndGetColumn<ColumnTuple>(column);
|
||||
if (!tuple)
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "key must be a tuple");
|
||||
|
||||
if (tuple->tupleSize() != 2)
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "wrong tuple size: key must be a tuple of 2 UInt64");
|
||||
|
||||
if (tuple->empty())
|
||||
return ret;
|
||||
|
||||
if (const auto * key0col = checkAndGetColumn<ColumnUInt64>(&(tuple->getColumn(0))))
|
||||
ret.key0 = key0col->get64(0);
|
||||
else
|
||||
SipHashKeyColumns ret{tuple->getColumnPtr(0), tuple->getColumnPtr(1), is_const};
|
||||
assert(ret.key0);
|
||||
if (!checkColumn<ColumnUInt64>(*ret.key0))
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "first element of the key tuple is not UInt64");
|
||||
|
||||
if (const auto * key1col = checkAndGetColumn<ColumnUInt64>(&(tuple->getColumn(1))))
|
||||
ret.key1 = key1col->get64(0);
|
||||
else
|
||||
assert(ret.key1);
|
||||
if (!checkColumn<ColumnUInt64>(*ret.key1))
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "second element of the key tuple is not UInt64");
|
||||
|
||||
return ret;
|
||||
@ -329,8 +352,10 @@ struct SipHash64KeyedImpl
|
||||
static constexpr auto name = "sipHash64Keyed";
|
||||
using ReturnType = UInt64;
|
||||
using Key = impl::SipHashKey;
|
||||
using KeyColumns = impl::SipHashKeyColumns;
|
||||
|
||||
static Key parseKey(const ColumnWithTypeAndName & key) { return impl::parseSipHashKey(key); }
|
||||
static KeyColumns parseKeyColumns(const ColumnWithTypeAndName & key) { return impl::parseSipHashKeyColumns(key); }
|
||||
static Key getKey(const KeyColumns & key, size_t i) { return key.getKey(i); }
|
||||
|
||||
static UInt64 applyKeyed(const Key & key, const char * begin, size_t size) { return sipHash64Keyed(key.key0, key.key1, begin, size); }
|
||||
|
||||
@ -371,8 +396,10 @@ struct SipHash128KeyedImpl
|
||||
static constexpr auto name = "sipHash128Keyed";
|
||||
using ReturnType = UInt128;
|
||||
using Key = impl::SipHashKey;
|
||||
using KeyColumns = impl::SipHashKeyColumns;
|
||||
|
||||
static Key parseKey(const ColumnWithTypeAndName & key) { return impl::parseSipHashKey(key); }
|
||||
static KeyColumns parseKeyColumns(const ColumnWithTypeAndName & key) { return impl::parseSipHashKeyColumns(key); }
|
||||
static Key getKey(const KeyColumns & key, size_t i) { return key.getKey(i); }
|
||||
|
||||
static UInt128 applyKeyed(const Key & key, const char * begin, size_t size) { return sipHash128Keyed(key.key0, key.key1, begin, size); }
|
||||
|
||||
@ -398,13 +425,43 @@ struct SipHash128ReferenceImpl
|
||||
|
||||
using ReturnType = UInt128;
|
||||
|
||||
static UInt128 combineHashes(UInt128 h1, UInt128 h2) { return combineHashesFunc<UInt128, SipHash128Impl>(h1, h2); }
|
||||
static UInt128 combineHashes(UInt128 h1, UInt128 h2) { return combineHashesFunc<UInt128, SipHash128ReferenceImpl>(h1, h2); }
|
||||
|
||||
static UInt128 apply(const char * data, const size_t size) { return sipHash128Reference(data, size); }
|
||||
|
||||
static constexpr bool use_int_hash_for_pods = false;
|
||||
};
|
||||
|
||||
struct SipHash128ReferenceKeyedImpl
|
||||
{
|
||||
static constexpr auto name = "sipHash128ReferenceKeyed";
|
||||
using ReturnType = UInt128;
|
||||
using Key = impl::SipHashKey;
|
||||
using KeyColumns = impl::SipHashKeyColumns;
|
||||
|
||||
static KeyColumns parseKeyColumns(const ColumnWithTypeAndName & key) { return impl::parseSipHashKeyColumns(key); }
|
||||
static Key getKey(const KeyColumns & key, size_t i) { return key.getKey(i); }
|
||||
|
||||
static UInt128 applyKeyed(const Key & key, const char * begin, size_t size)
|
||||
{
|
||||
return sipHash128ReferenceKeyed(key.key0, key.key1, begin, size);
|
||||
}
|
||||
|
||||
static UInt128 combineHashesKeyed(const Key & key, UInt128 h1, UInt128 h2)
|
||||
{
|
||||
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
||||
UInt128 tmp;
|
||||
reverseMemcpy(&tmp, &h1, sizeof(UInt128));
|
||||
h1 = tmp;
|
||||
reverseMemcpy(&tmp, &h2, sizeof(UInt128));
|
||||
h2 = tmp;
|
||||
#endif
|
||||
UInt128 hashes[] = {h1, h2};
|
||||
return applyKeyed(key, reinterpret_cast<const char *>(hashes), 2 * sizeof(UInt128));
|
||||
}
|
||||
|
||||
static constexpr bool use_int_hash_for_pods = false;
|
||||
};
|
||||
|
||||
/** Why we need MurmurHash2?
|
||||
* MurmurHash2 is an outdated hash function, superseded by MurmurHash3 and subsequently by CityHash, xxHash, HighwayHash.
|
||||
@ -1023,7 +1080,7 @@ private:
|
||||
|
||||
DECLARE_MULTITARGET_CODE(
|
||||
|
||||
template <typename Impl, bool Keyed, typename KeyType>
|
||||
template <typename Impl, bool Keyed, typename KeyType, typename KeyColumnsType>
|
||||
class FunctionAnyHash : public IFunction
|
||||
{
|
||||
public:
|
||||
@ -1033,9 +1090,12 @@ private:
|
||||
using ToType = typename Impl::ReturnType;
|
||||
|
||||
template <typename FromType, bool first>
|
||||
void executeIntType(const KeyType & key, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to) const
|
||||
void executeIntType(const KeyColumnsType & key_cols, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to) const
|
||||
{
|
||||
using ColVecType = ColumnVectorOrDecimal<FromType>;
|
||||
KeyType key{};
|
||||
if constexpr (Keyed)
|
||||
key = Impl::getKey(key_cols, 0);
|
||||
|
||||
if (const ColVecType * col_from = checkAndGetColumn<ColVecType>(column))
|
||||
{
|
||||
@ -1044,6 +1104,9 @@ private:
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
ToType hash;
|
||||
if constexpr (Keyed)
|
||||
if (!key_cols.is_const && i != 0)
|
||||
key = Impl::getKey(key_cols, i);
|
||||
|
||||
if constexpr (Impl::use_int_hash_for_pods)
|
||||
{
|
||||
@ -1077,6 +1140,14 @@ private:
|
||||
}
|
||||
else if (auto col_from_const = checkAndGetColumnConst<ColVecType>(column))
|
||||
{
|
||||
if constexpr (Keyed)
|
||||
{
|
||||
if (!key_cols.is_const)
|
||||
{
|
||||
ColumnPtr full_column = col_from_const->convertToFullColumn();
|
||||
return executeIntType<FromType, first>(key_cols, full_column.get(), vec_to);
|
||||
}
|
||||
}
|
||||
auto value = col_from_const->template getValue<FromType>();
|
||||
ToType hash;
|
||||
|
||||
@ -1107,18 +1178,28 @@ private:
|
||||
if constexpr (first)
|
||||
vec_to.assign(size, hash);
|
||||
else
|
||||
{
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
if constexpr (Keyed)
|
||||
if (!key_cols.is_const && i != 0)
|
||||
key = Impl::getKey(key_cols, i);
|
||||
vec_to[i] = combineHashes(key, vec_to[i], hash);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}",
|
||||
column->getName(), getName());
|
||||
}
|
||||
|
||||
template <typename FromType, bool first>
|
||||
void executeBigIntType(const KeyType & key, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to) const
|
||||
void executeBigIntType(const KeyColumnsType & key_cols, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to) const
|
||||
{
|
||||
using ColVecType = ColumnVectorOrDecimal<FromType>;
|
||||
KeyType key{};
|
||||
if constexpr (Keyed)
|
||||
key = Impl::getKey(key_cols, 0);
|
||||
|
||||
if (const ColVecType * col_from = checkAndGetColumn<ColVecType>(column))
|
||||
{
|
||||
@ -1127,6 +1208,9 @@ private:
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
ToType hash;
|
||||
if constexpr (Keyed)
|
||||
if (!key_cols.is_const && i != 0)
|
||||
key = Impl::getKey(key_cols, i);
|
||||
if constexpr (std::endian::native == std::endian::little)
|
||||
hash = apply(key, reinterpret_cast<const char *>(&vec_from[i]), sizeof(vec_from[i]));
|
||||
else
|
||||
@ -1143,6 +1227,14 @@ private:
|
||||
}
|
||||
else if (auto col_from_const = checkAndGetColumnConst<ColVecType>(column))
|
||||
{
|
||||
if constexpr (Keyed)
|
||||
{
|
||||
if (!key_cols.is_const)
|
||||
{
|
||||
ColumnPtr full_column = col_from_const->convertToFullColumn();
|
||||
return executeBigIntType<FromType, first>(key_cols, full_column.get(), vec_to);
|
||||
}
|
||||
}
|
||||
auto value = col_from_const->template getValue<FromType>();
|
||||
|
||||
ToType hash;
|
||||
@ -1158,19 +1250,32 @@ private:
|
||||
if constexpr (first)
|
||||
vec_to.assign(size, hash);
|
||||
else
|
||||
{
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
if constexpr (Keyed)
|
||||
if (!key_cols.is_const && i != 0)
|
||||
key = Impl::getKey(key_cols, i);
|
||||
vec_to[i] = combineHashes(key, vec_to[i], hash);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}",
|
||||
column->getName(), getName());
|
||||
}
|
||||
|
||||
template <bool first>
|
||||
void executeGeneric(const KeyType & key, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to) const
|
||||
void executeGeneric(const KeyColumnsType & key_cols, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to) const
|
||||
{
|
||||
KeyType key{};
|
||||
if constexpr (Keyed)
|
||||
key = Impl::getKey(key_cols, 0);
|
||||
for (size_t i = 0, size = column->size(); i < size; ++i)
|
||||
{
|
||||
if constexpr (Keyed)
|
||||
if (!key_cols.is_const && i != 0)
|
||||
key = Impl::getKey(key_cols, i);
|
||||
StringRef bytes = column->getDataAt(i);
|
||||
const ToType hash = apply(key, bytes.data, bytes.size);
|
||||
if constexpr (first)
|
||||
@ -1181,8 +1286,11 @@ private:
|
||||
}
|
||||
|
||||
template <bool first>
|
||||
void executeString(const KeyType & key, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to) const
|
||||
void executeString(const KeyColumnsType & key_cols, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to) const
|
||||
{
|
||||
KeyType key{};
|
||||
if constexpr (Keyed)
|
||||
key = Impl::getKey(key_cols, 0);
|
||||
if (const ColumnString * col_from = checkAndGetColumn<ColumnString>(column))
|
||||
{
|
||||
const typename ColumnString::Chars & data = col_from->getChars();
|
||||
@ -1192,6 +1300,9 @@ private:
|
||||
ColumnString::Offset current_offset = 0;
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
if constexpr (Keyed)
|
||||
if (!key_cols.is_const && i != 0)
|
||||
key = Impl::getKey(key_cols, i);
|
||||
const ToType hash = apply(key,
|
||||
reinterpret_cast<const char *>(&data[current_offset]),
|
||||
offsets[i] - current_offset - 1);
|
||||
@ -1212,6 +1323,9 @@ private:
|
||||
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
if constexpr (Keyed)
|
||||
if (!key_cols.is_const && i != 0)
|
||||
key = Impl::getKey(key_cols, i);
|
||||
const ToType hash = apply(key, reinterpret_cast<const char *>(&data[i * n]), n);
|
||||
if constexpr (first)
|
||||
vec_to[i] = hash;
|
||||
@ -1221,6 +1335,14 @@ private:
|
||||
}
|
||||
else if (const ColumnConst * col_from_const = checkAndGetColumnConstStringOrFixedString(column))
|
||||
{
|
||||
if constexpr (Keyed)
|
||||
{
|
||||
if (!key_cols.is_const)
|
||||
{
|
||||
ColumnPtr full_column = col_from_const->convertToFullColumn();
|
||||
return executeString<first>(key_cols, full_column.get(), vec_to);
|
||||
}
|
||||
}
|
||||
String value = col_from_const->getValue<String>();
|
||||
const ToType hash = apply(key, value.data(), value.size());
|
||||
const size_t size = vec_to.size();
|
||||
@ -1228,16 +1350,23 @@ private:
|
||||
if constexpr (first)
|
||||
vec_to.assign(size, hash);
|
||||
else
|
||||
{
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
if constexpr (Keyed)
|
||||
if (!key_cols.is_const && i != 0)
|
||||
key = Impl::getKey(key_cols, i);
|
||||
vec_to[i] = combineHashes(key, vec_to[i], hash);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}",
|
||||
column->getName(), getName());
|
||||
}
|
||||
|
||||
template <bool first>
|
||||
void executeArray(const KeyType & key, const IDataType * type, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to) const
|
||||
void executeArray(const KeyColumnsType & key_cols, const IDataType * type, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to) const
|
||||
{
|
||||
const IDataType * nested_type = typeid_cast<const DataTypeArray &>(*type).getNestedType().get();
|
||||
|
||||
@ -1249,13 +1378,19 @@ private:
|
||||
|
||||
typename ColumnVector<ToType>::Container vec_temp(nested_size);
|
||||
bool nested_is_first = true;
|
||||
executeForArgument(key, nested_type, nested_column, vec_temp, nested_is_first);
|
||||
executeForArgument(key_cols, nested_type, nested_column, vec_temp, nested_is_first);
|
||||
|
||||
const size_t size = offsets.size();
|
||||
|
||||
ColumnArray::Offset current_offset = 0;
|
||||
KeyType key{};
|
||||
if constexpr (Keyed)
|
||||
key = Impl::getKey(key_cols, 0);
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
if constexpr (Keyed)
|
||||
if (!key_cols.is_const && i != 0)
|
||||
key = Impl::getKey(key_cols, i);
|
||||
ColumnArray::Offset next_offset = offsets[i];
|
||||
|
||||
ToType hash;
|
||||
@ -1279,7 +1414,7 @@ private:
|
||||
{
|
||||
/// NOTE: here, of course, you can do without the materialization of the column.
|
||||
ColumnPtr full_column = col_from_const->convertToFullColumn();
|
||||
executeArray<first>(key, type, full_column.get(), vec_to);
|
||||
executeArray<first>(key_cols, type, full_column.get(), vec_to);
|
||||
}
|
||||
else
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}",
|
||||
@ -1287,7 +1422,7 @@ private:
|
||||
}
|
||||
|
||||
template <bool first>
|
||||
void executeAny(const KeyType & key, const IDataType * from_type, const IColumn * icolumn, typename ColumnVector<ToType>::Container & vec_to) const
|
||||
void executeAny(const KeyColumnsType & key_cols, const IDataType * from_type, const IColumn * icolumn, typename ColumnVector<ToType>::Container & vec_to) const
|
||||
{
|
||||
WhichDataType which(from_type);
|
||||
|
||||
@ -1295,40 +1430,45 @@ private:
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Argument column '{}' size {} doesn't match result column size {} of function {}",
|
||||
icolumn->getName(), icolumn->size(), vec_to.size(), getName());
|
||||
|
||||
if (which.isUInt8()) executeIntType<UInt8, first>(key, icolumn, vec_to);
|
||||
else if (which.isUInt16()) executeIntType<UInt16, first>(key, icolumn, vec_to);
|
||||
else if (which.isUInt32()) executeIntType<UInt32, first>(key, icolumn, vec_to);
|
||||
else if (which.isUInt64()) executeIntType<UInt64, first>(key, icolumn, vec_to);
|
||||
else if (which.isUInt128()) executeBigIntType<UInt128, first>(key, icolumn, vec_to);
|
||||
else if (which.isUInt256()) executeBigIntType<UInt256, first>(key, icolumn, vec_to);
|
||||
else if (which.isInt8()) executeIntType<Int8, first>(key, icolumn, vec_to);
|
||||
else if (which.isInt16()) executeIntType<Int16, first>(key, icolumn, vec_to);
|
||||
else if (which.isInt32()) executeIntType<Int32, first>(key, icolumn, vec_to);
|
||||
else if (which.isInt64()) executeIntType<Int64, first>(key, icolumn, vec_to);
|
||||
else if (which.isInt128()) executeBigIntType<Int128, first>(key, icolumn, vec_to);
|
||||
else if (which.isInt256()) executeBigIntType<Int256, first>(key, icolumn, vec_to);
|
||||
else if (which.isUUID()) executeBigIntType<UUID, first>(key, icolumn, vec_to);
|
||||
else if (which.isIPv4()) executeIntType<IPv4, first>(key, icolumn, vec_to);
|
||||
else if (which.isIPv6()) executeBigIntType<IPv6, first>(key, icolumn, vec_to);
|
||||
else if (which.isEnum8()) executeIntType<Int8, first>(key, icolumn, vec_to);
|
||||
else if (which.isEnum16()) executeIntType<Int16, first>(key, icolumn, vec_to);
|
||||
else if (which.isDate()) executeIntType<UInt16, first>(key, icolumn, vec_to);
|
||||
else if (which.isDate32()) executeIntType<Int32, first>(key, icolumn, vec_to);
|
||||
else if (which.isDateTime()) executeIntType<UInt32, first>(key, icolumn, vec_to);
|
||||
if constexpr (Keyed)
|
||||
if ((!key_cols.is_const && key_cols.size() != vec_to.size())
|
||||
|| (key_cols.is_const && key_cols.size() != 1))
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Key column size {} doesn't match result column size {} of function {}", key_cols.size(), vec_to.size(), getName());
|
||||
|
||||
if (which.isUInt8()) executeIntType<UInt8, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isUInt16()) executeIntType<UInt16, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isUInt32()) executeIntType<UInt32, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isUInt64()) executeIntType<UInt64, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isUInt128()) executeBigIntType<UInt128, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isUInt256()) executeBigIntType<UInt256, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isInt8()) executeIntType<Int8, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isInt16()) executeIntType<Int16, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isInt32()) executeIntType<Int32, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isInt64()) executeIntType<Int64, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isInt128()) executeBigIntType<Int128, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isInt256()) executeBigIntType<Int256, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isUUID()) executeBigIntType<UUID, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isIPv4()) executeIntType<IPv4, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isIPv6()) executeBigIntType<IPv6, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isEnum8()) executeIntType<Int8, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isEnum16()) executeIntType<Int16, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isDate()) executeIntType<UInt16, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isDate32()) executeIntType<Int32, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isDateTime()) executeIntType<UInt32, first>(key_cols, icolumn, vec_to);
|
||||
/// TODO: executeIntType() for Decimal32/64 leads to incompatible result
|
||||
else if (which.isDecimal32()) executeBigIntType<Decimal32, first>(key, icolumn, vec_to);
|
||||
else if (which.isDecimal64()) executeBigIntType<Decimal64, first>(key, icolumn, vec_to);
|
||||
else if (which.isDecimal128()) executeBigIntType<Decimal128, first>(key, icolumn, vec_to);
|
||||
else if (which.isDecimal256()) executeBigIntType<Decimal256, first>(key, icolumn, vec_to);
|
||||
else if (which.isFloat32()) executeIntType<Float32, first>(key, icolumn, vec_to);
|
||||
else if (which.isFloat64()) executeIntType<Float64, first>(key, icolumn, vec_to);
|
||||
else if (which.isString()) executeString<first>(key, icolumn, vec_to);
|
||||
else if (which.isFixedString()) executeString<first>(key, icolumn, vec_to);
|
||||
else if (which.isArray()) executeArray<first>(key, from_type, icolumn, vec_to);
|
||||
else executeGeneric<first>(key, icolumn, vec_to);
|
||||
else if (which.isDecimal32()) executeBigIntType<Decimal32, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isDecimal64()) executeBigIntType<Decimal64, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isDecimal128()) executeBigIntType<Decimal128, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isDecimal256()) executeBigIntType<Decimal256, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isFloat32()) executeIntType<Float32, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isFloat64()) executeIntType<Float64, first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isString()) executeString<first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isFixedString()) executeString<first>(key_cols, icolumn, vec_to);
|
||||
else if (which.isArray()) executeArray<first>(key_cols, from_type, icolumn, vec_to);
|
||||
else executeGeneric<first>(key_cols, icolumn, vec_to);
|
||||
}
|
||||
|
||||
void executeForArgument(const KeyType & key, const IDataType * type, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to, bool & is_first) const
|
||||
void executeForArgument(const KeyColumnsType & key_cols, const IDataType * type, const IColumn * column, typename ColumnVector<ToType>::Container & vec_to, bool & is_first) const
|
||||
{
|
||||
/// Flattening of tuples.
|
||||
if (const ColumnTuple * tuple = typeid_cast<const ColumnTuple *>(column))
|
||||
@ -1337,7 +1477,7 @@ private:
|
||||
const DataTypes & tuple_types = typeid_cast<const DataTypeTuple &>(*type).getElements();
|
||||
size_t tuple_size = tuple_columns.size();
|
||||
for (size_t i = 0; i < tuple_size; ++i)
|
||||
executeForArgument(key, tuple_types[i].get(), tuple_columns[i].get(), vec_to, is_first);
|
||||
executeForArgument(key_cols, tuple_types[i].get(), tuple_columns[i].get(), vec_to, is_first);
|
||||
}
|
||||
else if (const ColumnTuple * tuple_const = checkAndGetColumnConstData<ColumnTuple>(column))
|
||||
{
|
||||
@ -1347,24 +1487,24 @@ private:
|
||||
for (size_t i = 0; i < tuple_size; ++i)
|
||||
{
|
||||
auto tmp = ColumnConst::create(tuple_columns[i], column->size());
|
||||
executeForArgument(key, tuple_types[i].get(), tmp.get(), vec_to, is_first);
|
||||
executeForArgument(key_cols, tuple_types[i].get(), tmp.get(), vec_to, is_first);
|
||||
}
|
||||
}
|
||||
else if (const auto * map = checkAndGetColumn<ColumnMap>(column))
|
||||
{
|
||||
const auto & type_map = assert_cast<const DataTypeMap &>(*type);
|
||||
executeForArgument(key, type_map.getNestedType().get(), map->getNestedColumnPtr().get(), vec_to, is_first);
|
||||
executeForArgument(key_cols, type_map.getNestedType().get(), map->getNestedColumnPtr().get(), vec_to, is_first);
|
||||
}
|
||||
else if (const auto * const_map = checkAndGetColumnConst<ColumnMap>(column))
|
||||
{
|
||||
executeForArgument(key, type, const_map->convertToFullColumnIfConst().get(), vec_to, is_first);
|
||||
executeForArgument(key_cols, type, const_map->convertToFullColumnIfConst().get(), vec_to, is_first);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (is_first)
|
||||
executeAny<true>(key, type, column, vec_to);
|
||||
executeAny<true>(key_cols, type, column, vec_to);
|
||||
else
|
||||
executeAny<false>(key, type, column, vec_to);
|
||||
executeAny<false>(key_cols, type, column, vec_to);
|
||||
}
|
||||
|
||||
is_first = false;
|
||||
@ -1395,6 +1535,8 @@ public:
|
||||
{
|
||||
auto col_to = ColumnVector<ToType>::create(input_rows_count);
|
||||
|
||||
if (input_rows_count != 0)
|
||||
{
|
||||
typename ColumnVector<ToType>::Container & vec_to = col_to->getData();
|
||||
|
||||
/// If using a "keyed" algorithm, the first argument is the key and
|
||||
@ -1408,17 +1550,18 @@ public:
|
||||
vec_to.assign(input_rows_count, static_cast<ToType>(0xe28dbde7fe22e41c));
|
||||
}
|
||||
|
||||
KeyType key{};
|
||||
KeyColumnsType key_cols{};
|
||||
if constexpr (Keyed)
|
||||
if (!arguments.empty())
|
||||
key = Impl::parseKey(arguments[0]);
|
||||
key_cols = Impl::parseKeyColumns(arguments[0]);
|
||||
|
||||
/// The function supports arbitrary number of arguments of arbitrary types.
|
||||
bool is_first_argument = true;
|
||||
for (size_t i = first_data_argument; i < arguments.size(); ++i)
|
||||
{
|
||||
const auto & col = arguments[i];
|
||||
executeForArgument(key, col.type.get(), col.column.get(), vec_to, is_first_argument);
|
||||
executeForArgument(key_cols, col.type.get(), col.column.get(), vec_to, is_first_argument);
|
||||
}
|
||||
}
|
||||
|
||||
if constexpr (std::is_same_v<ToType, UInt128>) /// backward-compatible
|
||||
@ -1450,17 +1593,19 @@ public:
|
||||
|
||||
) // DECLARE_MULTITARGET_CODE
|
||||
|
||||
template <typename Impl, bool Keyed = false, typename KeyType = char>
|
||||
class FunctionAnyHash : public TargetSpecific::Default::FunctionAnyHash<Impl, Keyed, KeyType>
|
||||
template <typename Impl, bool Keyed = false, typename KeyType = char, typename KeyColumnsType = char>
|
||||
class FunctionAnyHash : public TargetSpecific::Default::FunctionAnyHash<Impl, Keyed, KeyType, KeyColumnsType>
|
||||
{
|
||||
public:
|
||||
explicit FunctionAnyHash(ContextPtr context) : selector(context)
|
||||
{
|
||||
selector.registerImplementation<TargetArch::Default, TargetSpecific::Default::FunctionAnyHash<Impl, Keyed, KeyType>>();
|
||||
selector
|
||||
.registerImplementation<TargetArch::Default, TargetSpecific::Default::FunctionAnyHash<Impl, Keyed, KeyType, KeyColumnsType>>();
|
||||
|
||||
#if USE_MULTITARGET_CODE
|
||||
selector.registerImplementation<TargetArch::AVX2, TargetSpecific::AVX2::FunctionAnyHash<Impl, Keyed, KeyType>>();
|
||||
selector.registerImplementation<TargetArch::AVX512F, TargetSpecific::AVX512F::FunctionAnyHash<Impl, Keyed, KeyType>>();
|
||||
selector.registerImplementation<TargetArch::AVX2, TargetSpecific::AVX2::FunctionAnyHash<Impl, Keyed, KeyType, KeyColumnsType>>();
|
||||
selector
|
||||
.registerImplementation<TargetArch::AVX512F, TargetSpecific::AVX512F::FunctionAnyHash<Impl, Keyed, KeyType, KeyColumnsType>>();
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -1696,7 +1841,7 @@ struct NameIntHash32 { static constexpr auto name = "intHash32"; };
|
||||
struct NameIntHash64 { static constexpr auto name = "intHash64"; };
|
||||
|
||||
using FunctionSipHash64 = FunctionAnyHash<SipHash64Impl>;
|
||||
using FunctionSipHash64Keyed = FunctionAnyHash<SipHash64KeyedImpl, true, SipHash64KeyedImpl::Key>;
|
||||
using FunctionSipHash64Keyed = FunctionAnyHash<SipHash64KeyedImpl, true, SipHash64KeyedImpl::Key, SipHash64KeyedImpl::KeyColumns>;
|
||||
using FunctionIntHash32 = FunctionIntHash<IntHash32Impl, NameIntHash32>;
|
||||
using FunctionIntHash64 = FunctionIntHash<IntHash64Impl, NameIntHash64>;
|
||||
#if USE_SSL
|
||||
@ -1710,8 +1855,10 @@ using FunctionSHA384 = FunctionStringHashFixedString<SHA384Impl>;
|
||||
using FunctionSHA512 = FunctionStringHashFixedString<SHA512Impl>;
|
||||
#endif
|
||||
using FunctionSipHash128 = FunctionAnyHash<SipHash128Impl>;
|
||||
using FunctionSipHash128Keyed = FunctionAnyHash<SipHash128KeyedImpl, true, SipHash128KeyedImpl::Key>;
|
||||
using FunctionSipHash128Keyed = FunctionAnyHash<SipHash128KeyedImpl, true, SipHash128KeyedImpl::Key, SipHash128KeyedImpl::KeyColumns>;
|
||||
using FunctionSipHash128Reference = FunctionAnyHash<SipHash128ReferenceImpl>;
|
||||
using FunctionSipHash128ReferenceKeyed
|
||||
= FunctionAnyHash<SipHash128ReferenceKeyedImpl, true, SipHash128ReferenceKeyedImpl::Key, SipHash128ReferenceKeyedImpl::KeyColumns>;
|
||||
using FunctionCityHash64 = FunctionAnyHash<ImplCityHash64>;
|
||||
using FunctionFarmFingerprint64 = FunctionAnyHash<ImplFarmFingerprint64>;
|
||||
using FunctionFarmHash64 = FunctionAnyHash<ImplFarmHash64>;
|
||||
|
@ -20,6 +20,11 @@ REGISTER_FUNCTION(Hashing)
|
||||
.examples{{"hash", "SELECT hex(sipHash128Reference('foo', '\\x01', 3))", ""}},
|
||||
.categories{"Hash"}
|
||||
});
|
||||
factory.registerFunction<FunctionSipHash128ReferenceKeyed>(FunctionDocumentation{
|
||||
.description = "Same as [sipHash128Reference](#hash_functions-siphash128reference) but additionally takes an explicit key argument "
|
||||
"instead of using a fixed key.",
|
||||
.examples{{"hash", "SELECT hex(sipHash128ReferenceKeyed((506097522914230528, 1084818905618843912),'foo', '\\x01', 3));", ""}},
|
||||
.categories{"Hash"}});
|
||||
factory.registerFunction<FunctionCityHash64>();
|
||||
factory.registerFunction<FunctionFarmFingerprint64>();
|
||||
factory.registerFunction<FunctionFarmHash64>();
|
||||
|
@ -39,6 +39,9 @@ struct HasTokenImpl
|
||||
if (start_pos != nullptr)
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Function '{}' does not support start_pos argument", name);
|
||||
|
||||
if (pattern.empty())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Needle cannot be empty, because empty string isn't a token");
|
||||
|
||||
if (haystack_offsets.empty())
|
||||
return;
|
||||
|
||||
|
@ -7,8 +7,8 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/** URL processing functions. See implementation in separate .cpp files.
|
||||
* All functions are not strictly follow RFC, instead they are maximally simplified for performance reasons.
|
||||
/** These helpers are used by URL processing functions. See implementation in separate .cpp files.
|
||||
* All functions do not strictly follow RFC, instead they are maximally simplified for performance reasons.
|
||||
*
|
||||
* Functions for extraction parts of URL.
|
||||
* If URL has nothing like, then empty string is returned.
|
||||
@ -101,7 +101,7 @@ struct ExtractSubstringImpl
|
||||
|
||||
static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &)
|
||||
{
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Column of type FixedString is not supported by URL functions");
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Column of type FixedString is not supported by this function");
|
||||
}
|
||||
};
|
||||
|
||||
@ -156,7 +156,7 @@ struct CutSubstringImpl
|
||||
|
||||
static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &)
|
||||
{
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Column of type FixedString is not supported by URL functions");
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Column of type FixedString is not supported by this function");
|
||||
}
|
||||
};
|
||||
|
@ -5,7 +5,7 @@
|
||||
|
||||
namespace DB
|
||||
{
|
||||
/** Tansform-type wrapper for DateTime64, simplifies DateTime64 support for given Transform.
|
||||
/** Transform-type wrapper for DateTime64, simplifies DateTime64 support for given Transform.
|
||||
*
|
||||
* Depending on what overloads of Transform::execute() are available, when called with DateTime64 value,
|
||||
* invokes Transform::execute() with either:
|
||||
@ -80,7 +80,10 @@ public:
|
||||
}
|
||||
else
|
||||
{
|
||||
const auto components = DecimalUtils::splitWithScaleMultiplier(t, scale_multiplier);
|
||||
auto components = DecimalUtils::splitWithScaleMultiplier(t, scale_multiplier);
|
||||
if (t.value < 0 && components.fractional)
|
||||
--components.whole;
|
||||
|
||||
return wrapped_transform.execute(static_cast<Int64>(components.whole), std::forward<Args>(args)...);
|
||||
}
|
||||
}
|
||||
|
@ -1,8 +1,8 @@
|
||||
#pragma once
|
||||
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/URL/FunctionsURL.h>
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <Functions/StringHelpers.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Columns/ColumnFixedString.h>
|
||||
|
@ -1,7 +1,7 @@
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/FunctionStringToString.h>
|
||||
#include <Functions/StringHelpers.h>
|
||||
#include <base/find_symbols.h>
|
||||
#include "FunctionsURL.h"
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
@ -1,7 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include "FunctionsURL.h"
|
||||
#include <base/find_symbols.h>
|
||||
#include <Functions/StringHelpers.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
@ -1,7 +1,7 @@
|
||||
#include <Common/StringUtils/StringUtils.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/FunctionStringToString.h>
|
||||
#include <Functions/URL/FunctionsURL.h>
|
||||
#include <Functions/StringHelpers.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -154,4 +154,3 @@ REGISTER_FUNCTION(Netloc)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/FunctionStringToString.h>
|
||||
#include "FunctionsURL.h"
|
||||
#include <Functions/StringHelpers.h>
|
||||
#include "path.h"
|
||||
#include <base/find_symbols.h>
|
||||
|
||||
|
@ -1,7 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <base/find_symbols.h>
|
||||
#include <Functions/URL/FunctionsURL.h>
|
||||
#include <Functions/StringHelpers.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
|
@ -1,6 +1,6 @@
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/FunctionStringToString.h>
|
||||
#include "FunctionsURL.h"
|
||||
#include <Functions/StringHelpers.h>
|
||||
#include "path.h"
|
||||
#include <base/find_symbols.h>
|
||||
|
||||
|
@ -1,7 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include "FunctionsURL.h"
|
||||
#include <Common/StringUtils/StringUtils.h>
|
||||
#include <Functions/StringHelpers.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -54,4 +54,3 @@ struct ExtractProtocol
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
|
@ -1,7 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include "FunctionsURL.h"
|
||||
#include <base/find_symbols.h>
|
||||
#include <Functions/StringHelpers.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
|
@ -1,7 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include "FunctionsURL.h"
|
||||
#include <base/find_symbols.h>
|
||||
#include <Functions/StringHelpers.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -34,4 +34,3 @@ struct ExtractQueryStringAndFragment
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
|
@ -174,12 +174,13 @@ public:
|
||||
{
|
||||
auto res = static_cast<Int64>(transform_y.execute(y, timezone_y))
|
||||
- static_cast<Int64>(transform_x.execute(x, timezone_x));
|
||||
DateLUTImpl::DateTimeComponents a_comp;
|
||||
DateLUTImpl::DateTimeComponents b_comp;
|
||||
DateTimeComponentsWithFractionalPart a_comp;
|
||||
DateTimeComponentsWithFractionalPart b_comp;
|
||||
Int64 adjust_value;
|
||||
auto x_seconds = TransformDateTime64<ToRelativeSecondNumImpl<ResultPrecision::Extended>>(transform_x.getScaleMultiplier()).execute(x, timezone_x);
|
||||
auto y_seconds = TransformDateTime64<ToRelativeSecondNumImpl<ResultPrecision::Extended>>(transform_y.getScaleMultiplier()).execute(y, timezone_y);
|
||||
if (x_seconds <= y_seconds)
|
||||
auto x_microseconds = TransformDateTime64<ToRelativeSubsecondNumImpl<microsecond_multiplier>>(transform_x.getScaleMultiplier()).execute(x, timezone_x);
|
||||
auto y_microseconds = TransformDateTime64<ToRelativeSubsecondNumImpl<microsecond_multiplier>>(transform_y.getScaleMultiplier()).execute(y, timezone_y);
|
||||
|
||||
if (x_microseconds <= y_microseconds)
|
||||
{
|
||||
a_comp = TransformDateTime64<ToDateTimeComponentsImpl>(transform_x.getScaleMultiplier()).execute(x, timezone_x);
|
||||
b_comp = TransformDateTime64<ToDateTimeComponentsImpl>(transform_y.getScaleMultiplier()).execute(y, timezone_y);
|
||||
@ -192,14 +193,16 @@ public:
|
||||
adjust_value = 1;
|
||||
}
|
||||
|
||||
|
||||
if constexpr (std::is_same_v<TransformX, TransformDateTime64<ToRelativeYearNumImpl<ResultPrecision::Extended>>>)
|
||||
{
|
||||
if ((a_comp.date.month > b_comp.date.month)
|
||||
|| ((a_comp.date.month == b_comp.date.month) && ((a_comp.date.day > b_comp.date.day)
|
||||
|| ((a_comp.date.day == b_comp.date.day) && ((a_comp.time.hour > b_comp.time.hour)
|
||||
|| ((a_comp.time.hour == b_comp.time.hour) && ((a_comp.time.minute > b_comp.time.minute)
|
||||
|| ((a_comp.time.minute == b_comp.time.minute) && (a_comp.time.second > b_comp.time.second))))
|
||||
)))))
|
||||
|| ((a_comp.time.minute == b_comp.time.minute) && ((a_comp.time.second > b_comp.time.second)
|
||||
|| ((a_comp.time.second == b_comp.time.second) && ((a_comp.millisecond > b_comp.millisecond)
|
||||
|| ((a_comp.millisecond == b_comp.millisecond) && (a_comp.microsecond > b_comp.microsecond)))))))))))))
|
||||
res += adjust_value;
|
||||
}
|
||||
else if constexpr (std::is_same_v<TransformX, TransformDateTime64<ToRelativeQuarterNumImpl<ResultPrecision::Extended>>>)
|
||||
@ -210,8 +213,9 @@ public:
|
||||
|| ((x_month_in_quarter == y_month_in_quarter) && ((a_comp.date.day > b_comp.date.day)
|
||||
|| ((a_comp.date.day == b_comp.date.day) && ((a_comp.time.hour > b_comp.time.hour)
|
||||
|| ((a_comp.time.hour == b_comp.time.hour) && ((a_comp.time.minute > b_comp.time.minute)
|
||||
|| ((a_comp.time.minute == b_comp.time.minute) && (a_comp.time.second > b_comp.time.second))))
|
||||
)))))
|
||||
|| ((a_comp.time.minute == b_comp.time.minute) && ((a_comp.time.second > b_comp.time.second)
|
||||
|| ((a_comp.time.second == b_comp.time.second) && ((a_comp.millisecond > b_comp.millisecond)
|
||||
|| ((a_comp.millisecond == b_comp.millisecond) && (a_comp.microsecond > b_comp.microsecond)))))))))))))
|
||||
res += adjust_value;
|
||||
}
|
||||
else if constexpr (std::is_same_v<TransformX, TransformDateTime64<ToRelativeMonthNumImpl<ResultPrecision::Extended>>>)
|
||||
@ -219,8 +223,9 @@ public:
|
||||
if ((a_comp.date.day > b_comp.date.day)
|
||||
|| ((a_comp.date.day == b_comp.date.day) && ((a_comp.time.hour > b_comp.time.hour)
|
||||
|| ((a_comp.time.hour == b_comp.time.hour) && ((a_comp.time.minute > b_comp.time.minute)
|
||||
|| ((a_comp.time.minute == b_comp.time.minute) && (a_comp.time.second > b_comp.time.second))))
|
||||
)))
|
||||
|| ((a_comp.time.minute == b_comp.time.minute) && ((a_comp.time.second > b_comp.time.second)
|
||||
|| ((a_comp.time.second == b_comp.time.second) && ((a_comp.millisecond > b_comp.millisecond)
|
||||
|| ((a_comp.millisecond == b_comp.millisecond) && (a_comp.microsecond > b_comp.microsecond)))))))))))
|
||||
res += adjust_value;
|
||||
}
|
||||
else if constexpr (std::is_same_v<TransformX, TransformDateTime64<ToRelativeWeekNumImpl<ResultPrecision::Extended>>>)
|
||||
@ -230,25 +235,44 @@ public:
|
||||
if ((x_day_of_week > y_day_of_week)
|
||||
|| ((x_day_of_week == y_day_of_week) && (a_comp.time.hour > b_comp.time.hour))
|
||||
|| ((a_comp.time.hour == b_comp.time.hour) && ((a_comp.time.minute > b_comp.time.minute)
|
||||
|| ((a_comp.time.minute == b_comp.time.minute) && (a_comp.time.second > b_comp.time.second)))))
|
||||
|| ((a_comp.time.minute == b_comp.time.minute) && ((a_comp.time.second > b_comp.time.second)
|
||||
|| ((a_comp.time.second == b_comp.time.second) && ((a_comp.millisecond > b_comp.millisecond)
|
||||
|| ((a_comp.millisecond == b_comp.millisecond) && (a_comp.microsecond > b_comp.microsecond)))))))))
|
||||
res += adjust_value;
|
||||
}
|
||||
else if constexpr (std::is_same_v<TransformX, TransformDateTime64<ToRelativeDayNumImpl<ResultPrecision::Extended>>>)
|
||||
{
|
||||
if ((a_comp.time.hour > b_comp.time.hour)
|
||||
|| ((a_comp.time.hour == b_comp.time.hour) && ((a_comp.time.minute > b_comp.time.minute)
|
||||
|| ((a_comp.time.minute == b_comp.time.minute) && (a_comp.time.second > b_comp.time.second)))))
|
||||
|| ((a_comp.time.minute == b_comp.time.minute) && ((a_comp.time.second > b_comp.time.second)
|
||||
|| ((a_comp.time.second == b_comp.time.second) && ((a_comp.millisecond > b_comp.millisecond)
|
||||
|| ((a_comp.millisecond == b_comp.millisecond) && (a_comp.microsecond > b_comp.microsecond)))))))))
|
||||
res += adjust_value;
|
||||
}
|
||||
else if constexpr (std::is_same_v<TransformX, TransformDateTime64<ToRelativeHourNumImpl<ResultPrecision::Extended>>>)
|
||||
{
|
||||
if ((a_comp.time.minute > b_comp.time.minute)
|
||||
|| ((a_comp.time.minute == b_comp.time.minute) && (a_comp.time.second > b_comp.time.second)))
|
||||
|| ((a_comp.time.minute == b_comp.time.minute) && ((a_comp.time.second > b_comp.time.second)
|
||||
|| ((a_comp.time.second == b_comp.time.second) && ((a_comp.millisecond > b_comp.millisecond)
|
||||
|| ((a_comp.millisecond == b_comp.millisecond) && (a_comp.microsecond > b_comp.microsecond)))))))
|
||||
res += adjust_value;
|
||||
}
|
||||
else if constexpr (std::is_same_v<TransformX, TransformDateTime64<ToRelativeMinuteNumImpl<ResultPrecision::Extended>>>)
|
||||
{
|
||||
if (a_comp.time.second > b_comp.time.second)
|
||||
if ((a_comp.time.second > b_comp.time.second)
|
||||
|| ((a_comp.time.second == b_comp.time.second) && ((a_comp.millisecond > b_comp.millisecond)
|
||||
|| ((a_comp.millisecond == b_comp.millisecond) && (a_comp.microsecond > b_comp.microsecond)))))
|
||||
res += adjust_value;
|
||||
}
|
||||
else if constexpr (std::is_same_v<TransformX, TransformDateTime64<ToRelativeSecondNumImpl<ResultPrecision::Extended>>>)
|
||||
{
|
||||
if ((a_comp.millisecond > b_comp.millisecond)
|
||||
|| ((a_comp.millisecond == b_comp.millisecond) && (a_comp.microsecond > b_comp.microsecond)))
|
||||
res += adjust_value;
|
||||
}
|
||||
else if constexpr (std::is_same_v<TransformX, TransformDateTime64<ToRelativeSubsecondNumImpl<1000>>>)
|
||||
{
|
||||
if (a_comp.microsecond > b_comp.microsecond)
|
||||
res += adjust_value;
|
||||
}
|
||||
return res;
|
||||
@ -373,6 +397,10 @@ public:
|
||||
impl.template dispatchForColumns<ToRelativeMinuteNumImpl<ResultPrecision::Extended>>(x, y, timezone_x, timezone_y, res->getData());
|
||||
else if (unit == "second" || unit == "ss" || unit == "s")
|
||||
impl.template dispatchForColumns<ToRelativeSecondNumImpl<ResultPrecision::Extended>>(x, y, timezone_x, timezone_y, res->getData());
|
||||
else if (unit == "millisecond" || unit == "ms")
|
||||
impl.template dispatchForColumns<ToRelativeSubsecondNumImpl<millisecond_multiplier>>(x, y, timezone_x, timezone_y, res->getData());
|
||||
else if (unit == "microsecond" || unit == "us" || unit == "u")
|
||||
impl.template dispatchForColumns<ToRelativeSubsecondNumImpl<microsecond_multiplier>>(x, y, timezone_x, timezone_y, res->getData());
|
||||
else
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Function {} does not support '{}' unit", getName(), unit);
|
||||
|
42
src/Functions/firstLine.cpp
Normal file
42
src/Functions/firstLine.cpp
Normal file
@ -0,0 +1,42 @@
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/FunctionStringToString.h>
|
||||
#include <Functions/StringHelpers.h>
|
||||
#include <base/find_symbols.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
struct FirstLine
|
||||
{
|
||||
static size_t getReserveLengthForElement() { return 16; }
|
||||
|
||||
static void execute(Pos data, size_t size, Pos & res_data, size_t & res_size)
|
||||
{
|
||||
res_data = data;
|
||||
|
||||
const Pos end = data + size;
|
||||
const Pos pos = find_first_symbols<'\r', '\n'>(data, end);
|
||||
res_size = pos - data;
|
||||
}
|
||||
};
|
||||
|
||||
struct NameFirstLine
|
||||
{
|
||||
static constexpr auto name = "firstLine";
|
||||
};
|
||||
|
||||
using FunctionFirstLine = FunctionStringToString<ExtractSubstringImpl<FirstLine>, NameFirstLine>;
|
||||
|
||||
REGISTER_FUNCTION(FirstLine)
|
||||
{
|
||||
factory.registerFunction<FunctionFirstLine>(FunctionDocumentation{
|
||||
.description = "Returns first line of a multi-line string.",
|
||||
.syntax = "firstLine(string)",
|
||||
.arguments = {{.name = "string", .description = "The string to process."}},
|
||||
.returned_value = {"The first line of the string or the whole string if there is no line separators."},
|
||||
.examples = {
|
||||
{.name = "Return first line", .query = "firstLine('Hello\\nWorld')", .result = "'Hello'"},
|
||||
{.name = "Return whole string", .query = "firstLine('Hello World')", .result = "'Hello World'"},
|
||||
}});
|
||||
}
|
||||
}
|
@ -20,6 +20,7 @@ namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
namespace
|
||||
@ -108,6 +109,12 @@ public:
|
||||
|
||||
/// S2 acceptes point as (latitude, longitude)
|
||||
S2LatLng lat_lng = S2LatLng::FromDegrees(lat, lon);
|
||||
|
||||
if (!lat_lng.is_valid())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Point is invalid. For valid point the latitude is between -90 and 90 degrees inclusive"
|
||||
"and the longitude is between -180 and 180 degrees inclusive.");
|
||||
|
||||
S2CellId id(lat_lng);
|
||||
|
||||
dst_data[row] = id.id();
|
||||
|
@ -119,7 +119,7 @@ public:
|
||||
|
||||
if (!lhs_array->hasEqualOffsets(*rhs_array))
|
||||
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH,
|
||||
"The argument 1 and argument {} of function {} have different array offsets",
|
||||
"The argument 2 and argument {} of function {} have different array offsets",
|
||||
i + 1,
|
||||
getName());
|
||||
|
||||
|
@ -138,6 +138,7 @@ private:
|
||||
REGISTER_FUNCTION(Now)
|
||||
{
|
||||
factory.registerFunction<NowOverloadResolver>({}, FunctionFactory::CaseInsensitive);
|
||||
factory.registerAlias("current_timestamp", NowOverloadResolver::name, FunctionFactory::CaseInsensitive);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -114,13 +114,18 @@ public:
|
||||
const auto hi = S2CellId(data_hi[row]);
|
||||
const auto point = S2CellId(data_point[row]);
|
||||
|
||||
if (!lo.is_valid() || !hi.is_valid())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Rectangle is not valid");
|
||||
S2LatLngRect rect(lo.ToLatLng(), hi.ToLatLng());
|
||||
|
||||
if (!point.is_valid())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Point is not valid");
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Point is invalid. For valid point the latitude is between -90 and 90 degrees inclusive "
|
||||
"and the longitude is between -180 and 180 degrees inclusive.");
|
||||
|
||||
S2LatLngRect rect(lo.ToLatLng(), hi.ToLatLng());
|
||||
if (!rect.is_valid())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Rectangle is invalid. For valid rectangles the latitude bounds do not exceed "
|
||||
"Pi/2 in absolute value and the longitude bounds do not exceed Pi in absolute value. "
|
||||
"Also, if either the latitude or longitude bound is empty then both must be. ");
|
||||
|
||||
rect.AddPoint(point.ToPoint());
|
||||
|
||||
|
@ -107,13 +107,18 @@ public:
|
||||
const auto hi = S2CellId(data_hi[row]);
|
||||
const auto point = S2CellId(data_point[row]);
|
||||
|
||||
if (!lo.is_valid() || !hi.is_valid())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Rectangle is not valid");
|
||||
S2LatLngRect rect(lo.ToLatLng(), hi.ToLatLng());
|
||||
|
||||
if (!point.is_valid())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Point is not valid");
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Point is invalid. For valid point the latitude is between -90 and 90 degrees inclusive "
|
||||
"and the longitude is between -180 and 180 degrees inclusive.");
|
||||
|
||||
S2LatLngRect rect(lo.ToLatLng(), hi.ToLatLng());
|
||||
if (!rect.is_valid())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Rectangle is invalid. For valid rectangles the latitude bounds do not exceed "
|
||||
"Pi/2 in absolute value and the longitude bounds do not exceed Pi in absolute value. "
|
||||
"Also, if either the latitude or longitude bound is empty then both must be. ");
|
||||
|
||||
dst_data.emplace_back(rect.Contains(point.ToLatLng()));
|
||||
}
|
||||
|
@ -128,15 +128,15 @@ public:
|
||||
const auto lo2 = S2CellId(data_lo2[row]);
|
||||
const auto hi2 = S2CellId(data_hi2[row]);
|
||||
|
||||
if (!lo1.is_valid() || !hi1.is_valid())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "First rectangle is not valid");
|
||||
|
||||
if (!lo2.is_valid() || !hi2.is_valid())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second rectangle is not valid");
|
||||
|
||||
S2LatLngRect rect1(lo1.ToLatLng(), hi1.ToLatLng());
|
||||
S2LatLngRect rect2(lo2.ToLatLng(), hi2.ToLatLng());
|
||||
|
||||
if (!rect1.is_valid() || !rect2.is_valid())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Rectangle is invalid. For valid rectangles the latitude bounds do not exceed "
|
||||
"Pi/2 in absolute value and the longitude bounds do not exceed Pi in absolute value. "
|
||||
"Also, if either the latitude or longitude bound is empty then both must be.");
|
||||
|
||||
S2LatLngRect rect_intersection = rect1.Intersection(rect2);
|
||||
|
||||
vec_res_first.emplace_back(S2CellId(rect_intersection.lo()).id());
|
||||
|
@ -126,15 +126,15 @@ public:
|
||||
const auto lo2 = S2CellId(data_lo2[row]);
|
||||
const auto hi2 = S2CellId(data_hi2[row]);
|
||||
|
||||
if (!lo1.is_valid() || !hi1.is_valid())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "First rectangle is not valid");
|
||||
|
||||
if (!lo2.is_valid() || !hi2.is_valid())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second rectangle is not valid");
|
||||
|
||||
S2LatLngRect rect1(lo1.ToLatLng(), hi1.ToLatLng());
|
||||
S2LatLngRect rect2(lo2.ToLatLng(), hi2.ToLatLng());
|
||||
|
||||
if (!rect1.is_valid() || !rect2.is_valid())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Rectangle is invalid. For valid rectangles the latitude bounds do not exceed "
|
||||
"Pi/2 in absolute value and the longitude bounds do not exceed Pi in absolute value. "
|
||||
"Also, if either the latitude or longitude bound is empty then both must be. ");
|
||||
|
||||
S2LatLngRect rect_union = rect1.Union(rect2);
|
||||
|
||||
vec_res_first.emplace_back(S2CellId(rect_union.lo()).id());
|
||||
|
@ -97,7 +97,7 @@ public:
|
||||
const auto id = S2CellId(data_id[row]);
|
||||
|
||||
if (!id.is_valid())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Point is not valid");
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "CellId is invalid.");
|
||||
|
||||
S2Point point = id.ToPoint();
|
||||
S2LatLng ll(point);
|
||||
|
@ -10,7 +10,6 @@
|
||||
#include <Functions/DateTimeTransforms.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/IFunction.h>
|
||||
#include <Functions/TransformDateTime64.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
|
||||
|
||||
|
@ -86,6 +86,8 @@ public:
|
||||
REGISTER_FUNCTION(Today)
|
||||
{
|
||||
factory.registerFunction<TodayOverloadResolver>();
|
||||
factory.registerAlias("current_date", TodayOverloadResolver::name, FunctionFactory::CaseInsensitive);
|
||||
factory.registerAlias("curdate", TodayOverloadResolver::name, FunctionFactory::CaseInsensitive);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -16,19 +16,15 @@
|
||||
|
||||
#include <DataTypes/DataTypeSet.h>
|
||||
#include <DataTypes/DataTypeFunction.h>
|
||||
#include <DataTypes/DataTypeFixedString.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <DataTypes/DataTypeTuple.h>
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypeLowCardinality.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/FieldToDataType.h>
|
||||
#include <DataTypes/DataTypesDecimal.h>
|
||||
#include <DataTypes/DataTypeFactory.h>
|
||||
|
||||
#include <Columns/ColumnArray.h>
|
||||
#include <Columns/ColumnConst.h>
|
||||
#include <Columns/ColumnFixedString.h>
|
||||
#include <Columns/ColumnSet.h>
|
||||
|
||||
#include <Storages/StorageSet.h>
|
||||
@ -47,7 +43,6 @@
|
||||
#include <Interpreters/ExpressionActions.h>
|
||||
#include <Interpreters/misc.h>
|
||||
#include <Interpreters/ActionsVisitor.h>
|
||||
#include <Interpreters/InterpreterSelectWithUnionQuery.h>
|
||||
#include <Interpreters/Set.h>
|
||||
#include <Interpreters/evaluateConstantExpression.h>
|
||||
#include <Interpreters/convertFieldToType.h>
|
||||
@ -61,6 +56,7 @@
|
||||
#include <Interpreters/InterpreterSelectQueryAnalyzer.h>
|
||||
#include <Parsers/queryToString.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
@ -715,7 +711,7 @@ bool ActionsMatcher::needChildVisit(const ASTPtr & node, const ASTPtr & child)
|
||||
node->as<ASTExpressionList>())
|
||||
return false;
|
||||
|
||||
/// Do not go to FROM, JOIN, UNION.
|
||||
/// Do not go to FROM, JOIN, UNION
|
||||
if (child->as<ASTTableExpression>() ||
|
||||
child->as<ASTSelectQuery>())
|
||||
return false;
|
||||
|
@ -97,6 +97,10 @@ UInt128 AsynchronousInsertQueue::InsertQuery::calculateHash() const
|
||||
|
||||
for (const auto & setting : settings.allChanged())
|
||||
{
|
||||
/// We don't consider this setting because it is only for deduplication,
|
||||
/// which means we can put two inserts with different tokens in the same block safely.
|
||||
if (setting.getName() == "insert_deduplication_token")
|
||||
continue;
|
||||
siphash.update(setting.getName());
|
||||
applyVisitor(FieldVisitorHash(siphash), setting.getValue());
|
||||
}
|
||||
@ -111,9 +115,10 @@ bool AsynchronousInsertQueue::InsertQuery::operator==(const InsertQuery & other)
|
||||
return query_str == other.query_str && settings == other.settings;
|
||||
}
|
||||
|
||||
AsynchronousInsertQueue::InsertData::Entry::Entry(String && bytes_, String && query_id_, MemoryTracker * user_memory_tracker_)
|
||||
AsynchronousInsertQueue::InsertData::Entry::Entry(String && bytes_, String && query_id_, const String & async_dedup_token_, MemoryTracker * user_memory_tracker_)
|
||||
: bytes(std::move(bytes_))
|
||||
, query_id(std::move(query_id_))
|
||||
, async_dedup_token(async_dedup_token_)
|
||||
, user_memory_tracker(user_memory_tracker_)
|
||||
, create_time(std::chrono::system_clock::now())
|
||||
{
|
||||
@ -227,7 +232,7 @@ AsynchronousInsertQueue::push(ASTPtr query, ContextPtr query_context)
|
||||
/// to avoid buffering of huge amount of data in memory.
|
||||
|
||||
auto read_buf = getReadBufferFromASTInsertQuery(query);
|
||||
LimitReadBuffer limit_buf(*read_buf, settings.async_insert_max_data_size, /* trow_exception */ false, /* exact_limit */ {});
|
||||
LimitReadBuffer limit_buf(*read_buf, settings.async_insert_max_data_size, /* throw_exception */ false, /* exact_limit */ {});
|
||||
|
||||
WriteBufferFromString write_buf(bytes);
|
||||
copyData(limit_buf, write_buf);
|
||||
@ -253,7 +258,7 @@ AsynchronousInsertQueue::push(ASTPtr query, ContextPtr query_context)
|
||||
if (auto quota = query_context->getQuota())
|
||||
quota->used(QuotaType::WRITTEN_BYTES, bytes.size());
|
||||
|
||||
auto entry = std::make_shared<InsertData::Entry>(std::move(bytes), query_context->getCurrentQueryId(), CurrentThread::getUserMemoryTracker());
|
||||
auto entry = std::make_shared<InsertData::Entry>(std::move(bytes), query_context->getCurrentQueryId(), settings.insert_deduplication_token, CurrentThread::getUserMemoryTracker());
|
||||
|
||||
InsertQuery key{query, settings};
|
||||
InsertDataPtr data_to_process;
|
||||
@ -515,7 +520,7 @@ try
|
||||
|
||||
StreamingFormatExecutor executor(header, format, std::move(on_error), std::move(adding_defaults_transform));
|
||||
std::unique_ptr<ReadBuffer> last_buffer;
|
||||
auto chunk_info = std::make_shared<ChunkOffsets>();
|
||||
auto chunk_info = std::make_shared<AsyncInsertInfo>();
|
||||
for (const auto & entry : data->entries)
|
||||
{
|
||||
auto buffer = std::make_unique<ReadBufferFromString>(entry->bytes);
|
||||
@ -524,6 +529,7 @@ try
|
||||
size_t num_rows = executor.execute(*buffer);
|
||||
total_rows += num_rows;
|
||||
chunk_info->offsets.push_back(total_rows);
|
||||
chunk_info->tokens.push_back(entry->async_dedup_token);
|
||||
|
||||
/// Keep buffer, because it still can be used
|
||||
/// in destructor, while resetting buffer at next iteration.
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user