mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-20 00:30:49 +00:00
Merge remote-tracking branch 'origin/master' into clickhouse-help
This commit is contained in:
commit
1baa15d603
69
.github/workflows/backport_branches.yml
vendored
69
.github/workflows/backport_branches.yml
vendored
@ -512,6 +512,75 @@ jobs:
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
############################################################################################
|
||||
#################################### INSTALL PACKAGES ######################################
|
||||
############################################################################################
|
||||
InstallPackagesTestRelease:
|
||||
needs: [BuilderDebRelease]
|
||||
runs-on: [self-hosted, style-checker]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/test_install
|
||||
REPORTS_PATH=${{runner.temp}}/reports_dir
|
||||
CHECK_NAME=Install packages (amd64)
|
||||
REPO_COPY=${{runner.temp}}/test_install/ClickHouse
|
||||
EOF
|
||||
- name: Download json reports
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
path: ${{ env.REPORTS_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
- name: Test packages installation
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci"
|
||||
python3 install_check.py "$CHECK_NAME"
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
InstallPackagesTestAarch64:
|
||||
needs: [BuilderDebRelease]
|
||||
runs-on: [self-hosted, style-checker-aarch64]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/test_install
|
||||
REPORTS_PATH=${{runner.temp}}/reports_dir
|
||||
CHECK_NAME=Install packages (arm64)
|
||||
REPO_COPY=${{runner.temp}}/test_install/ClickHouse
|
||||
EOF
|
||||
- name: Download json reports
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
path: ${{ env.REPORTS_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
- name: Test packages installation
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci"
|
||||
python3 install_check.py "$CHECK_NAME"
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
##############################################################################################
|
||||
########################### FUNCTIONAl STATELESS TESTS #######################################
|
||||
##############################################################################################
|
||||
|
69
.github/workflows/master.yml
vendored
69
.github/workflows/master.yml
vendored
@ -946,6 +946,75 @@ jobs:
|
||||
run: |
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
python3 mark_release_ready.py
|
||||
############################################################################################
|
||||
#################################### INSTALL PACKAGES ######################################
|
||||
############################################################################################
|
||||
InstallPackagesTestRelease:
|
||||
needs: [BuilderDebRelease]
|
||||
runs-on: [self-hosted, style-checker]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/test_install
|
||||
REPORTS_PATH=${{runner.temp}}/reports_dir
|
||||
CHECK_NAME=Install packages (amd64)
|
||||
REPO_COPY=${{runner.temp}}/test_install/ClickHouse
|
||||
EOF
|
||||
- name: Download json reports
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
path: ${{ env.REPORTS_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
- name: Test packages installation
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci"
|
||||
python3 install_check.py "$CHECK_NAME"
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
InstallPackagesTestAarch64:
|
||||
needs: [BuilderDebRelease]
|
||||
runs-on: [self-hosted, style-checker-aarch64]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/test_install
|
||||
REPORTS_PATH=${{runner.temp}}/reports_dir
|
||||
CHECK_NAME=Install packages (arm64)
|
||||
REPO_COPY=${{runner.temp}}/test_install/ClickHouse
|
||||
EOF
|
||||
- name: Download json reports
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
path: ${{ env.REPORTS_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
- name: Test packages installation
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci"
|
||||
python3 install_check.py "$CHECK_NAME"
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
##############################################################################################
|
||||
########################### FUNCTIONAl STATELESS TESTS #######################################
|
||||
##############################################################################################
|
||||
|
69
.github/workflows/pull_request.yml
vendored
69
.github/workflows/pull_request.yml
vendored
@ -984,6 +984,75 @@ jobs:
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
############################################################################################
|
||||
#################################### INSTALL PACKAGES ######################################
|
||||
############################################################################################
|
||||
InstallPackagesTestRelease:
|
||||
needs: [BuilderDebRelease]
|
||||
runs-on: [self-hosted, style-checker]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/test_install
|
||||
REPORTS_PATH=${{runner.temp}}/reports_dir
|
||||
CHECK_NAME=Install packages (amd64)
|
||||
REPO_COPY=${{runner.temp}}/test_install/ClickHouse
|
||||
EOF
|
||||
- name: Download json reports
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
path: ${{ env.REPORTS_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
- name: Test packages installation
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci"
|
||||
python3 install_check.py "$CHECK_NAME"
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
InstallPackagesTestAarch64:
|
||||
needs: [BuilderDebRelease]
|
||||
runs-on: [self-hosted, style-checker-aarch64]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/test_install
|
||||
REPORTS_PATH=${{runner.temp}}/reports_dir
|
||||
CHECK_NAME=Install packages (arm64)
|
||||
REPO_COPY=${{runner.temp}}/test_install/ClickHouse
|
||||
EOF
|
||||
- name: Download json reports
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
path: ${{ env.REPORTS_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
- name: Test packages installation
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci"
|
||||
python3 install_check.py "$CHECK_NAME"
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
##############################################################################################
|
||||
########################### FUNCTIONAl STATELESS TESTS #######################################
|
||||
##############################################################################################
|
||||
|
69
.github/workflows/release_branches.yml
vendored
69
.github/workflows/release_branches.yml
vendored
@ -604,6 +604,75 @@ jobs:
|
||||
run: |
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
python3 mark_release_ready.py
|
||||
############################################################################################
|
||||
#################################### INSTALL PACKAGES ######################################
|
||||
############################################################################################
|
||||
InstallPackagesTestRelease:
|
||||
needs: [BuilderDebRelease]
|
||||
runs-on: [self-hosted, style-checker]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/test_install
|
||||
REPORTS_PATH=${{runner.temp}}/reports_dir
|
||||
CHECK_NAME=Install packages (amd64)
|
||||
REPO_COPY=${{runner.temp}}/test_install/ClickHouse
|
||||
EOF
|
||||
- name: Download json reports
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
path: ${{ env.REPORTS_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
- name: Test packages installation
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci"
|
||||
python3 install_check.py "$CHECK_NAME"
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
InstallPackagesTestAarch64:
|
||||
needs: [BuilderDebRelease]
|
||||
runs-on: [self-hosted, style-checker-aarch64]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/test_install
|
||||
REPORTS_PATH=${{runner.temp}}/reports_dir
|
||||
CHECK_NAME=Install packages (arm64)
|
||||
REPO_COPY=${{runner.temp}}/test_install/ClickHouse
|
||||
EOF
|
||||
- name: Download json reports
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
path: ${{ env.REPORTS_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
- name: Test packages installation
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci"
|
||||
python3 install_check.py "$CHECK_NAME"
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
##############################################################################################
|
||||
########################### FUNCTIONAl STATELESS TESTS #######################################
|
||||
##############################################################################################
|
||||
|
2
contrib/azure
vendored
2
contrib/azure
vendored
@ -1 +1 @@
|
||||
Subproject commit e4fcdfc81e337e589ce231a452dcc280fcbb3f99
|
||||
Subproject commit 096049bf24fffafcaccc132b9367694532716731
|
@ -21,5 +21,3 @@ RUN yarn config set registry https://registry.npmjs.org \
|
||||
COPY run.sh /run.sh
|
||||
|
||||
ENTRYPOINT ["/run.sh"]
|
||||
|
||||
CMD ["yarn", "build"]
|
||||
|
@ -25,7 +25,8 @@ done
|
||||
sed -i '/onBrokenMarkdownLinks:/ s/ignore/error/g' docusaurus.config.js
|
||||
|
||||
if [[ $# -lt 1 ]] || [[ "$1" == "--"* ]]; then
|
||||
export CI=true
|
||||
export CI=true
|
||||
yarn install
|
||||
exec yarn build "$@"
|
||||
fi
|
||||
|
||||
|
@ -134,6 +134,14 @@
|
||||
"name": "clickhouse/keeper-jepsen-test",
|
||||
"dependent": []
|
||||
},
|
||||
"docker/test/install/deb": {
|
||||
"name": "clickhouse/install-deb-test",
|
||||
"dependent": []
|
||||
},
|
||||
"docker/test/install/rpm": {
|
||||
"name": "clickhouse/install-rpm-test",
|
||||
"dependent": []
|
||||
},
|
||||
"docker/docs/builder": {
|
||||
"name": "clickhouse/docs-builder",
|
||||
"dependent": [
|
||||
|
@ -231,6 +231,7 @@ function run_tests
|
||||
--hung-check
|
||||
--fast-tests-only
|
||||
--no-random-settings
|
||||
--no-random-merge-tree-settings
|
||||
--no-long
|
||||
--testname
|
||||
--shard
|
||||
|
64
docker/test/install/deb/Dockerfile
Normal file
64
docker/test/install/deb/Dockerfile
Normal file
@ -0,0 +1,64 @@
|
||||
FROM ubuntu:22.04
|
||||
|
||||
# The Dockerfile is nicely borrowed from
|
||||
# https://github.com/lionelnicolas/docker-ubuntu-systemd/blob/83aa3249146f5df264fe45353f79fc76eb1e42d7/Dockerfile
|
||||
|
||||
ENV \
|
||||
DEBIAN_FRONTEND=noninteractive \
|
||||
LANG=C.UTF-8 \
|
||||
container=docker \
|
||||
init=/lib/systemd/systemd
|
||||
|
||||
# install systemd packages
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
systemd \
|
||||
&& \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists
|
||||
|
||||
# configure systemd
|
||||
# remove systemd 'wants' triggers
|
||||
# remove everything except tmpfiles setup in sysinit target
|
||||
# remove UTMP updater service
|
||||
# disable /tmp mount
|
||||
# fix missing BPF firewall support warning
|
||||
# just for cosmetics, fix "not-found" entries while using "systemctl --all"
|
||||
RUN \
|
||||
find \
|
||||
/etc/systemd/system/*.wants/* \
|
||||
/lib/systemd/system/multi-user.target.wants/* \
|
||||
/lib/systemd/system/sockets.target.wants/*initctl* \
|
||||
! -type d \
|
||||
-delete && \
|
||||
find \
|
||||
/lib/systemd/system/sysinit.target.wants \
|
||||
! -type d \
|
||||
! -name '*systemd-tmpfiles-setup*' \
|
||||
-delete && \
|
||||
find \
|
||||
/lib/systemd \
|
||||
-name systemd-update-utmp-runlevel.service \
|
||||
-delete && \
|
||||
rm -vf /usr/share/systemd/tmp.mount && \
|
||||
sed -ri '/^IPAddressDeny/d' /lib/systemd/system/systemd-journald.service && \
|
||||
for MATCH in \
|
||||
plymouth-start.service \
|
||||
plymouth-quit-wait.service \
|
||||
syslog.socket \
|
||||
syslog.service \
|
||||
display-manager.service \
|
||||
systemd-sysusers.service \
|
||||
tmp.mount \
|
||||
systemd-udevd.service \
|
||||
; do \
|
||||
grep -rn --binary-files=without-match ${MATCH} /lib/systemd/ | cut -d: -f1 | xargs sed -ri 's/(.*=.*)'${MATCH}'(.*)/\1\2/'; \
|
||||
done && \
|
||||
systemctl disable ondemand.service && \
|
||||
systemctl set-default multi-user.target
|
||||
|
||||
VOLUME ["/run", "/run/lock"]
|
||||
|
||||
STOPSIGNAL SIGRTMIN+3
|
||||
|
||||
ENTRYPOINT ["/lib/systemd/systemd"]
|
55
docker/test/install/rpm/Dockerfile
Normal file
55
docker/test/install/rpm/Dockerfile
Normal file
@ -0,0 +1,55 @@
|
||||
FROM centos:8
|
||||
|
||||
# The Dockerfile is nicely borrowed from
|
||||
# https://github.com/lionelnicolas/docker-ubuntu-systemd/blob/83aa3249146f5df264fe45353f79fc76eb1e42d7/Dockerfile
|
||||
|
||||
ENV \
|
||||
LANG=C.UTF-8 \
|
||||
container=docker \
|
||||
init=/lib/systemd/systemd
|
||||
|
||||
# configure systemd
|
||||
# remove systemd 'wants' triggers
|
||||
# remove everything except tmpfiles setup in sysinit target
|
||||
# remove UTMP updater service
|
||||
# disable /tmp mount
|
||||
# fix missing BPF firewall support warning
|
||||
# just for cosmetics, fix "not-found" entries while using "systemctl --all"
|
||||
RUN \
|
||||
find \
|
||||
/etc/systemd/system/*.wants/ \
|
||||
/lib/systemd/system/multi-user.target.wants/ \
|
||||
/lib/systemd/system/local-fs.target.wants/ \
|
||||
/lib/systemd/system/sockets.target.wants/*initctl* \
|
||||
! -type d \
|
||||
-delete && \
|
||||
find \
|
||||
/lib/systemd/system/sysinit.target.wants \
|
||||
! -type d \
|
||||
! -name '*systemd-tmpfiles-setup*' \
|
||||
-delete && \
|
||||
find \
|
||||
/lib/systemd \
|
||||
-name systemd-update-utmp-runlevel.service \
|
||||
-delete && \
|
||||
rm -vf /usr/share/systemd/tmp.mount && \
|
||||
sed -ri '/^IPAddressDeny/d' /lib/systemd/system/systemd-journald.service && \
|
||||
for MATCH in \
|
||||
plymouth-start.service \
|
||||
plymouth-quit-wait.service \
|
||||
syslog.socket \
|
||||
syslog.service \
|
||||
display-manager.service \
|
||||
systemd-sysusers.service \
|
||||
tmp.mount \
|
||||
systemd-udevd.service \
|
||||
; do \
|
||||
grep -rn --binary-files=without-match ${MATCH} /lib/systemd/ | cut -d: -f1 | xargs sed -ri 's/(.*=.*)'${MATCH}'(.*)/\1\2/'; \
|
||||
done && \
|
||||
systemctl set-default multi-user.target
|
||||
|
||||
VOLUME ["/run", "/run/lock"]
|
||||
|
||||
STOPSIGNAL SIGRTMIN+3
|
||||
|
||||
ENTRYPOINT ["/lib/systemd/systemd"]
|
226
docs/en/engines/table-engines/special/executable.md
Normal file
226
docs/en/engines/table-engines/special/executable.md
Normal file
@ -0,0 +1,226 @@
|
||||
---
|
||||
slug: /en/engines/table-engines/special/executable
|
||||
sidebar_position: 40
|
||||
sidebar_label: Executable
|
||||
---
|
||||
|
||||
# Executable and ExecutablePool Table Engines
|
||||
|
||||
The `Executable` and `ExecutablePool` table engines allow you to define a table whose rows are generated from a script that you define (by writing rows to **stdout**). The executable script is stored in the `users_scripts` directory and can read data from any source.
|
||||
|
||||
- `Executable` tables: the script is run on every query
|
||||
- `ExecutablePool` tables: maintains a pool of persistent processes, and takes processes from the pool for reads
|
||||
|
||||
You can optionally include one or more input queries that stream their results to **stdin** for the script to read.
|
||||
|
||||
## Creating an Executable Table
|
||||
|
||||
The `Executable` table engine requires two parameters: the name of the script and the format of the incoming data. You can optionally pass in one or more input queries:
|
||||
|
||||
```sql
|
||||
Executable(script_name, format, [input_query...])
|
||||
```
|
||||
|
||||
Here are the relevant settings for an `Executable` table:
|
||||
|
||||
- `send_chunk_header`
|
||||
- Description: Send the number of rows in each chunk before sending a chunk to process. This setting can help to write your script in a more efficient way to preallocate some resources
|
||||
- Default value: false
|
||||
- `command_termination_timeout`
|
||||
- Description: Command termination timeout in seconds
|
||||
- Default value: 10
|
||||
- `command_read_timeout`
|
||||
- Description: Timeout for reading data from command stdout in milliseconds
|
||||
- Default value: 10000
|
||||
- `command_write_timeout`
|
||||
- Description: Timeout for writing data to command stdin in milliseconds
|
||||
- Default value: 10000
|
||||
|
||||
|
||||
Let's look at an example. The following Python script is named `my_script.py` and is saved in the `user_scripts` folder. It reads in a number `i` and prints `i` random strings, with each string preceded by a number that is separated by a tab:
|
||||
|
||||
```python
|
||||
#!/usr/bin/python3
|
||||
|
||||
import sys
|
||||
import string
|
||||
import random
|
||||
|
||||
def main():
|
||||
|
||||
# Read input value
|
||||
for number in sys.stdin:
|
||||
i = int(number)
|
||||
|
||||
# Generate some random rows
|
||||
for id in range(0, i):
|
||||
letters = string.ascii_letters
|
||||
random_string = ''.join(random.choices(letters ,k=10))
|
||||
print(str(id) + '\t' + random_string + '\n', end='')
|
||||
|
||||
# Flush results to stdout
|
||||
sys.stdout.flush()
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
```
|
||||
|
||||
The following `my_executable_table` is built from the output of `my_script.py`, which will generate 10 random strings everytime you run a `SELECT` from `my_executable_table`:
|
||||
|
||||
```sql
|
||||
CREATE TABLE my_executable_table (
|
||||
x UInt32,
|
||||
y String
|
||||
)
|
||||
ENGINE = Executable('my_script.py', TabSeparated, (SELECT 10))
|
||||
```
|
||||
|
||||
Creating the table returns immediately and does not invoke the script. Querying `my_executable_table` causes the script to be invoked:
|
||||
|
||||
```sql
|
||||
SELECT * FROM my_executable_table
|
||||
```
|
||||
|
||||
```response
|
||||
┌─x─┬─y──────────┐
|
||||
│ 0 │ BsnKBsNGNH │
|
||||
│ 1 │ mgHfBCUrWM │
|
||||
│ 2 │ iDQAVhlygr │
|
||||
│ 3 │ uNGwDuXyCk │
|
||||
│ 4 │ GcFdQWvoLB │
|
||||
│ 5 │ UkciuuOTVO │
|
||||
│ 6 │ HoKeCdHkbs │
|
||||
│ 7 │ xRvySxqAcR │
|
||||
│ 8 │ LKbXPHpyDI │
|
||||
│ 9 │ zxogHTzEVV │
|
||||
└───┴────────────┘
|
||||
```
|
||||
|
||||
## Passing Query Results to a Script
|
||||
|
||||
Users of the Hacker News website leave comments. Python contains a natural language processing toolkit (`nltk`) with a `SentimentIntensityAnalyzer` for determining if comments are positive, negative, or neutral - including assigning a value between -1 (a very negative comment) and 1 (a very positive comment). Let's create an `Executable` table that computes the sentiment of Hacker News comments using `nltk`.
|
||||
|
||||
This example uses the `hackernews` table described [here](https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/invertedindexes/#full-text-search-of-the-hacker-news-dataset). The `hackernews` table includes an `id` column of type `UInt64` and a `String` column named `comment`. Let's start by defining the `Executable` table:
|
||||
|
||||
```sql
|
||||
CREATE TABLE sentiment (
|
||||
id UInt64,
|
||||
sentiment Float32
|
||||
)
|
||||
ENGINE = Executable(
|
||||
'sentiment.py',
|
||||
TabSeparated,
|
||||
(SELECT id, comment FROM hackernews WHERE id > 0 AND comment != '' LIMIT 20)
|
||||
);
|
||||
```
|
||||
|
||||
Some comments about the `sentiment` table:
|
||||
|
||||
- The file `sentiment.py` is saved in the `user_scripts` folder (the default folder of the `user_scripts_path` setting)
|
||||
- The `TabSeparated` format means our Python script needs to generate rows of raw data that contain tab-separated values
|
||||
- The query selects two columns from `hackernews`. The Python script will need to parse out those column values from the incoming rows
|
||||
|
||||
Here is the defintion of `sentiment.py`:
|
||||
|
||||
```python
|
||||
#!/usr/local/bin/python3.9
|
||||
|
||||
import sys
|
||||
import nltk
|
||||
from nltk.sentiment import SentimentIntensityAnalyzer
|
||||
|
||||
def main():
|
||||
sentiment_analyzer = SentimentIntensityAnalyzer()
|
||||
|
||||
while True:
|
||||
try:
|
||||
row = sys.stdin.readline()
|
||||
if row == '':
|
||||
break
|
||||
|
||||
split_line = row.split("\t")
|
||||
|
||||
id = str(split_line[0])
|
||||
comment = split_line[1]
|
||||
|
||||
score = sentiment_analyzer.polarity_scores(comment)['compound']
|
||||
print(id + '\t' + str(score) + '\n', end='')
|
||||
sys.stdout.flush()
|
||||
except BaseException as x:
|
||||
break
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
```
|
||||
|
||||
Some comments about our Python script:
|
||||
|
||||
- For this to work, you will need to run `nltk.downloader.download('vader_lexicon')`. This could have been placed in the script, but then it would have been downloaded every time a query was executed on the `sentiment` table - which is not efficient
|
||||
- Each value of `row` is going to be a row in the result set of `SELECT id, comment FROM hackernews WHERE id > 0 AND comment != '' LIMIT 20`
|
||||
- The incoming row is tab-separated, so we parse out the `id` and `comment` using the Python `split` function
|
||||
- The result of `polarity_scores` is a JSON object with a handful of values. We decided to just grab the `compound` value of this JSON object
|
||||
- Recall that the `sentiment` table in ClickHouse uses the `TabSeparated` format and contains two columns, so our `print` function separates those columns with a tab
|
||||
|
||||
Every time you write a query that selects rows from the `sentiment` table, the `SELECT id, comment FROM hackernews WHERE id > 0 AND comment != '' LIMIT 20` query is executed and the result is passed to `sentiment.py`. Let's test it out:
|
||||
|
||||
```sql
|
||||
SELECT *
|
||||
FROM sentiment
|
||||
```
|
||||
|
||||
The response looks like:
|
||||
|
||||
```response
|
||||
┌───────id─┬─sentiment─┐
|
||||
│ 7398199 │ 0.4404 │
|
||||
│ 21640317 │ 0.1779 │
|
||||
│ 21462000 │ 0 │
|
||||
│ 25168863 │ 0 │
|
||||
│ 25168978 │ -0.1531 │
|
||||
│ 25169359 │ 0 │
|
||||
│ 25169394 │ -0.9231 │
|
||||
│ 25169766 │ 0.4137 │
|
||||
│ 25172570 │ 0.7469 │
|
||||
│ 25173687 │ 0.6249 │
|
||||
│ 28291534 │ 0 │
|
||||
│ 28291669 │ -0.4767 │
|
||||
│ 28291731 │ 0 │
|
||||
│ 28291949 │ -0.4767 │
|
||||
│ 28292004 │ 0.3612 │
|
||||
│ 28292050 │ -0.296 │
|
||||
│ 28292322 │ 0 │
|
||||
│ 28295172 │ 0.7717 │
|
||||
│ 28295288 │ 0.4404 │
|
||||
│ 21465723 │ -0.6956 │
|
||||
└──────────┴───────────┘
|
||||
```
|
||||
|
||||
|
||||
## Creating an ExecutablePool Table
|
||||
|
||||
The syntax for `ExecutablePool` is similar to `Executable`, but there are a couple of relevant settings unique to an `ExecutablePool` table:
|
||||
|
||||
- `pool_size`
|
||||
- Description: Processes pool size. If size is 0, then there are no size restrictions
|
||||
- Default value: 16
|
||||
- `max_command_execution_time`
|
||||
- Description: Max command execution time in seconds
|
||||
- Default value: 10
|
||||
|
||||
We can easily convert the `sentiment` table above to use `ExecutablePool` instead of `Executable`:
|
||||
|
||||
```sql
|
||||
CREATE TABLE sentiment_pooled (
|
||||
id UInt64,
|
||||
sentiment Float32
|
||||
)
|
||||
ENGINE = ExecutablePool(
|
||||
'sentiment.py',
|
||||
TabSeparated,
|
||||
(SELECT id, comment FROM hackernews WHERE id > 0 AND comment != '' LIMIT 20000)
|
||||
)
|
||||
SETTINGS
|
||||
pool_size = 4;
|
||||
```
|
||||
|
||||
ClickHouse will maintain 4 processes on-demand when your client queries the `sentiment_pooled` table.
|
@ -1279,7 +1279,9 @@ The following settings are available:
|
||||
- `size`: The maximum cache size in bytes. 0 means the query cache is disabled. Default value: `1073741824` (1 GiB).
|
||||
- `max_entries`: The maximum number of `SELECT` query results stored in the cache. Default value: `1024`.
|
||||
- `max_entry_size`: The maximum size in bytes `SELECT` query results may have to be saved in the cache. Default value: `1048576` (1 MiB).
|
||||
- `max_entry_records`: The maximum number of records `SELECT` query results may have to be saved in the cache. Default value: `30000000` (30 mil).
|
||||
- `max_entry_rows`: The maximum number of rows `SELECT` query results may have to be saved in the cache. Default value: `30000000` (30 mil).
|
||||
|
||||
Changed settings take effect immediately.
|
||||
|
||||
:::warning
|
||||
Data for the query cache is allocated in DRAM. If memory is scarce, make sure to set a small value for `size` or disable the query cache altogether.
|
||||
@ -1292,7 +1294,7 @@ Data for the query cache is allocated in DRAM. If memory is scarce, make sure to
|
||||
<size>1073741824</size>
|
||||
<max_entries>1024</max_entries>
|
||||
<max_entry_size>1048576</max_entry_size>
|
||||
<max_entry_records>30000000</max_entry_records>
|
||||
<max_entry_rows>30000000</max_entry_rows>
|
||||
</query_cache>
|
||||
```
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
---
|
||||
slug: /en/sql-reference/aggregate-functions/reference/sparkbar
|
||||
slug: /en/sql-reference/aggregate-functions/reference/sparkbar
|
||||
sidebar_position: 311
|
||||
sidebar_label: sparkbar
|
||||
---
|
||||
@ -7,9 +7,11 @@ sidebar_label: sparkbar
|
||||
# sparkbar
|
||||
|
||||
The function plots a frequency histogram for values `x` and the repetition rate `y` of these values over the interval `[min_x, max_x]`.
|
||||
Repetitions for all `x` falling into the same bucket are averaged, so data should be pre-aggregated.
|
||||
Negative repetitions are ignored.
|
||||
|
||||
|
||||
If no interval is specified, then the minimum `x` is used as the interval start, and the maximum `x` — as the interval end.
|
||||
If no interval is specified, then the minimum `x` is used as the interval start, and the maximum `x` — as the interval end.
|
||||
Otherwise, values outside the interval are ignored.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -37,29 +39,24 @@ sparkbar(width[, min_x, max_x])(x, y)
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
CREATE TABLE spark_bar_data (`cnt` UInt64,`event_date` Date) ENGINE = MergeTree ORDER BY event_date SETTINGS index_granularity = 8192;
|
||||
|
||||
INSERT INTO spark_bar_data VALUES(1,'2020-01-01'),(4,'2020-01-02'),(5,'2020-01-03'),(2,'2020-01-04'),(3,'2020-01-05'),(7,'2020-01-06'),(6,'2020-01-07'),(8,'2020-01-08'),(2,'2020-01-11');
|
||||
CREATE TABLE spark_bar_data (`value` Int64, `event_date` Date) ENGINE = MergeTree ORDER BY event_date;
|
||||
|
||||
SELECT sparkbar(9)(event_date,cnt) FROM spark_bar_data;
|
||||
INSERT INTO spark_bar_data VALUES (1,'2020-01-01'), (3,'2020-01-02'), (4,'2020-01-02'), (-3,'2020-01-02'), (5,'2020-01-03'), (2,'2020-01-04'), (3,'2020-01-05'), (7,'2020-01-06'), (6,'2020-01-07'), (8,'2020-01-08'), (2,'2020-01-11');
|
||||
|
||||
SELECT sparkbar(9,toDate('2020-01-01'),toDate('2020-01-10'))(event_date,cnt) FROM spark_bar_data;
|
||||
SELECT sparkbar(9)(event_date,cnt) FROM (SELECT sum(value) as cnt, event_date FROM spark_bar_data GROUP BY event_date);
|
||||
|
||||
SELECT sparkbar(9, toDate('2020-01-01'), toDate('2020-01-10'))(event_date,cnt) FROM (SELECT sum(value) as cnt, event_date FROM spark_bar_data GROUP BY event_date);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
|
||||
┌─sparkbar(9)(event_date, cnt)─┐
|
||||
│ │
|
||||
│ ▁▅▄▃██▅ ▁ │
|
||||
│ │
|
||||
│ ▂▅▂▃▆█ ▂ │
|
||||
└──────────────────────────────┘
|
||||
|
||||
┌─sparkbar(9, toDate('2020-01-01'), toDate('2020-01-10'))(event_date, cnt)─┐
|
||||
│ │
|
||||
│▁▄▄▂▅▇█▁ │
|
||||
│ │
|
||||
│ ▂▅▂▃▇▆█ │
|
||||
└──────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
|
97
docs/en/sql-reference/table-functions/executable.md
Normal file
97
docs/en/sql-reference/table-functions/executable.md
Normal file
@ -0,0 +1,97 @@
|
||||
---
|
||||
slug: /en/engines/table-functions/executable
|
||||
sidebar_position: 55
|
||||
sidebar_label: executable
|
||||
keywords: [udf, user defined function, clickhouse, executable, table, function]
|
||||
---
|
||||
|
||||
# executable Table Function for UDFs
|
||||
|
||||
The `executable` table function creates a table based on the output of a user-defined function (UDF) that you define in a script that outputs rows to **stdout**. The executable script is stored in the `users_scripts` directory and can read data from any source.
|
||||
|
||||
You can optionally include one or more input queries that stream their results to **stdin** for the script to read.
|
||||
|
||||
:::note
|
||||
A key advantage between ordinary UDF functions and the `executable` table function and `Executable` table engine is that ordinary UDF functions cannot change the row count. For example, if the input is 100 rows, then the result must return 100 rows. When using the `executable` table function or `Executable` table engine, your script can make any data transformations you want, including complex aggregations.
|
||||
:::
|
||||
|
||||
## Syntax
|
||||
|
||||
The `executable` table function requires three parameters and accepts an optional list of input queries:
|
||||
|
||||
```sql
|
||||
executable(script_name, format, structure, [input_query...])
|
||||
```
|
||||
|
||||
- `script_name`: the file name of the script. saved in the `user_scripts` folder (the default folder of the `user_scripts_path` setting)
|
||||
- `format`: the format of the generated table
|
||||
- `structure`: the table schema of the generated table
|
||||
- `input_query`: an optional query (or collection or queries) whose results are passed to the script via **stdin**
|
||||
|
||||
:::note
|
||||
If you are going to invoke the same script repeatedly with the same input queries, consider using the [`Executable` table engine](../../engines/table-engines/special/executable.md).
|
||||
:::
|
||||
|
||||
The following Python script is named `generate_random.py` and is saved in the `user_scripts` folder. It reads in a number `i` and prints `i` random strings, with each string preceded by a number that is separated by a tab:
|
||||
|
||||
```python
|
||||
#!/usr/local/bin/python3.9
|
||||
|
||||
import sys
|
||||
import string
|
||||
import random
|
||||
|
||||
def main():
|
||||
|
||||
# Read input value
|
||||
for number in sys.stdin:
|
||||
i = int(number)
|
||||
|
||||
# Generate some random rows
|
||||
for id in range(0, i):
|
||||
letters = string.ascii_letters
|
||||
random_string = ''.join(random.choices(letters ,k=10))
|
||||
print(str(id) + '\t' + random_string + '\n', end='')
|
||||
|
||||
# Flush results to stdout
|
||||
sys.stdout.flush()
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
```
|
||||
|
||||
Let's invoke the script and have it generate 10 random strings:
|
||||
|
||||
```sql
|
||||
SELECT * FROM executable('my_script.py', TabSeparated, 'id UInt32, random String', (SELECT 10))
|
||||
```
|
||||
|
||||
The response looks like:
|
||||
|
||||
```response
|
||||
┌─id─┬─random─────┐
|
||||
│ 0 │ xheXXCiSkH │
|
||||
│ 1 │ AqxvHAoTrl │
|
||||
│ 2 │ JYvPCEbIkY │
|
||||
│ 3 │ sWgnqJwGRm │
|
||||
│ 4 │ fTZGrjcLon │
|
||||
│ 5 │ ZQINGktPnd │
|
||||
│ 6 │ YFSvGGoezb │
|
||||
│ 7 │ QyMJJZOOia │
|
||||
│ 8 │ NfiyDDhmcI │
|
||||
│ 9 │ REJRdJpWrg │
|
||||
└────┴────────────┘
|
||||
```
|
||||
|
||||
## Passing Query Results to a Script
|
||||
|
||||
Be sure to check out the example in the `Executable` table engine on [how to pass query results to a script](../../engines/table-engines/special/executable#passing-query-results-to-a-script). Here is how you execute the same script in that example using the `executable` table function:
|
||||
|
||||
```sql
|
||||
SELECT * FROM executable(
|
||||
'sentiment.py',
|
||||
TabSeparated,
|
||||
'id UInt64, sentiment Float32',
|
||||
(SELECT id, comment FROM hackernews WHERE id > 0 AND comment != '' LIMIT 20)
|
||||
);
|
||||
```
|
@ -1,14 +1,15 @@
|
||||
---
|
||||
slug: /ru/sql-reference/aggregate-functions/reference/sparkbar
|
||||
slug: /ru/sql-reference/aggregate-functions/reference/sparkbar
|
||||
sidebar_position: 311
|
||||
sidebar_label: sparkbar
|
||||
---
|
||||
|
||||
# sparkbar {#sparkbar}
|
||||
|
||||
Функция строит гистограмму частот по заданным значениям `x` и частоте повторения этих значений `y` на интервале `[min_x, max_x]`.
|
||||
Функция строит гистограмму частот по заданным значениям `x` и частоте повторения этих значений `y` на интервале `[min_x, max_x]`. Повторения для всех `x`, попавших в один бакет, усредняются, поэтому данные должны быть предварительно агрегированы. Отрицательные повторения игнорируются.
|
||||
|
||||
Если интервал для построения не указан, то в качестве нижней границы интервала будет взято минимальное значение `x`, а в качестве верхней границы — максимальное значение `x`.
|
||||
Значения `x` вне указанного интервала игнорируются.
|
||||
|
||||
|
||||
**Синтаксис**
|
||||
@ -39,29 +40,23 @@ sparkbar(width[, min_x, max_x])(x, y)
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
CREATE TABLE spark_bar_data (`cnt` UInt64,`event_date` Date) ENGINE = MergeTree ORDER BY event_date SETTINGS index_granularity = 8192;
|
||||
|
||||
INSERT INTO spark_bar_data VALUES(1,'2020-01-01'),(4,'2020-01-02'),(5,'2020-01-03'),(2,'2020-01-04'),(3,'2020-01-05'),(7,'2020-01-06'),(6,'2020-01-07'),(8,'2020-01-08'),(2,'2020-01-11');
|
||||
CREATE TABLE spark_bar_data (`value` Int64, `event_date` Date) ENGINE = MergeTree ORDER BY event_date;
|
||||
|
||||
SELECT sparkbar(9)(event_date,cnt) FROM spark_bar_data;
|
||||
INSERT INTO spark_bar_data VALUES (1,'2020-01-01'), (3,'2020-01-02'), (4,'2020-01-02'), (-3,'2020-01-02'), (5,'2020-01-03'), (2,'2020-01-04'), (3,'2020-01-05'), (7,'2020-01-06'), (6,'2020-01-07'), (8,'2020-01-08'), (2,'2020-01-11');
|
||||
|
||||
SELECT sparkbar(9,toDate('2020-01-01'),toDate('2020-01-10'))(event_date,cnt) FROM spark_bar_data;
|
||||
SELECT sparkbar(9)(event_date,cnt) FROM (SELECT sum(value) as cnt, event_date FROM spark_bar_data GROUP BY event_date);
|
||||
|
||||
SELECT sparkbar(9, toDate('2020-01-01'), toDate('2020-01-10'))(event_date,cnt) FROM (SELECT sum(value) as cnt, event_date FROM spark_bar_data GROUP BY event_date);
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
|
||||
┌─sparkbar(9)(event_date, cnt)─┐
|
||||
│ │
|
||||
│ ▁▅▄▃██▅ ▁ │
|
||||
│ │
|
||||
│ ▂▅▂▃▆█ ▂ │
|
||||
└──────────────────────────────┘
|
||||
|
||||
┌─sparkbar(9, toDate('2020-01-01'), toDate('2020-01-10'))(event_date, cnt)─┐
|
||||
│ │
|
||||
│▁▄▄▂▅▇█▁ │
|
||||
│ │
|
||||
│ ▂▅▂▃▇▆█ │
|
||||
└──────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
|
@ -102,7 +102,8 @@ done
|
||||
EOF
|
||||
chmod +x "$PKG_PATH/install/doinst.sh"
|
||||
if [ -f "$PKG_PATH/DEBIAN/postinst" ]; then
|
||||
tail +2 "$PKG_PATH/DEBIAN/postinst" >> "$PKG_PATH/install/doinst.sh"
|
||||
# we don't need debconf source in doinst in any case
|
||||
tail +2 "$PKG_PATH/DEBIAN/postinst" | grep -v debconf/confmodule >> "$PKG_PATH/install/doinst.sh"
|
||||
fi
|
||||
rm -rf "$PKG_PATH/DEBIAN"
|
||||
if [ -f "/usr/bin/pigz" ]; then
|
||||
|
46
packages/clickhouse-keeper.postinstall
Normal file
46
packages/clickhouse-keeper.postinstall
Normal file
@ -0,0 +1,46 @@
|
||||
#!/bin/sh
|
||||
set -e
|
||||
# set -x
|
||||
|
||||
PROGRAM=clickhouse-keeper
|
||||
KEEPER_USER=${KEEPER_USER:=clickhouse}
|
||||
KEEPER_GROUP=${KEEPER_GROUP:=clickhouse}
|
||||
# Please note that we don't support paths with whitespaces. This is rather ignorant.
|
||||
KEEPER_CONFDIR=${KEEPER_CONFDIR:=/etc/$PROGRAM}
|
||||
KEEPER_DATADIR=${KEEPER_DATADIR:=/var/lib/clickhouse}
|
||||
KEEPER_LOGDIR=${KEEPER_LOGDIR:=/var/log/$PROGRAM}
|
||||
|
||||
[ -f /usr/share/debconf/confmodule ] && . /usr/share/debconf/confmodule
|
||||
[ -f /etc/default/clickhouse-keeper ] && . /etc/default/clickhouse-keeper
|
||||
|
||||
if [ ! -f "/etc/debian_version" ]; then
|
||||
not_deb_os=1
|
||||
fi
|
||||
|
||||
if [ "$1" = configure ] || [ -n "$not_deb_os" ]; then
|
||||
if ! getent group "${KEEPER_GROUP}" > /dev/null 2>&1 ; then
|
||||
groupadd --system "${KEEPER_GROUP}"
|
||||
fi
|
||||
GID=$(getent group "${KEEPER_GROUP}" | cut -d: -f 3)
|
||||
if ! id "${KEEPER_USER}" > /dev/null 2>&1 ; then
|
||||
adduser --system --home /dev/null --no-create-home \
|
||||
--gid "${GID}" --shell /bin/false \
|
||||
"${KEEPER_USER}"
|
||||
fi
|
||||
|
||||
chown -R "${KEEPER_USER}:${KEEPER_GROUP}" "${KEEPER_CONFDIR}"
|
||||
chmod 0755 "${KEEPER_CONFDIR}"
|
||||
|
||||
if ! [ -d "${KEEPER_DATADIR}" ]; then
|
||||
mkdir -p "${KEEPER_DATADIR}"
|
||||
chown -R "${KEEPER_USER}:${KEEPER_GROUP}" "${KEEPER_DATADIR}"
|
||||
chmod 0700 "${KEEPER_DATADIR}"
|
||||
fi
|
||||
|
||||
if ! [ -d "${KEEPER_LOGDIR}" ]; then
|
||||
mkdir -p "${KEEPER_LOGDIR}"
|
||||
chown -R "${KEEPER_USER}:${KEEPER_GROUP}" "${KEEPER_LOGDIR}"
|
||||
chmod 0770 "${KEEPER_LOGDIR}"
|
||||
fi
|
||||
fi
|
||||
# vim: ts=4: sw=4: sts=4: expandtab
|
27
packages/clickhouse-keeper.service
Normal file
27
packages/clickhouse-keeper.service
Normal file
@ -0,0 +1,27 @@
|
||||
[Unit]
|
||||
Description=ClickHouse Keeper - zookeeper compatible distributed coordination server
|
||||
Requires=network-online.target
|
||||
# NOTE: that After/Wants=time-sync.target is not enough, you need to ensure
|
||||
# that the time was adjusted already, if you use systemd-timesyncd you are
|
||||
# safe, but if you use ntp or some other daemon, you should configure it
|
||||
# additionaly.
|
||||
After=time-sync.target network-online.target
|
||||
Wants=time-sync.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User=clickhouse
|
||||
Group=clickhouse
|
||||
Restart=always
|
||||
RestartSec=30
|
||||
RuntimeDirectory=%p # %p is resolved to the systemd unit name
|
||||
ExecStart=/usr/bin/clickhouse-keeper --config=/etc/clickhouse-keeper/keeper_config.xml --pid-file=%t/%p/%p.pid
|
||||
# Minus means that this file is optional.
|
||||
EnvironmentFile=-/etc/default/%p
|
||||
LimitCORE=infinity
|
||||
LimitNOFILE=500000
|
||||
CapabilityBoundingSet=CAP_NET_ADMIN CAP_IPC_LOCK CAP_SYS_NICE CAP_NET_BIND_SERVICE
|
||||
|
||||
[Install]
|
||||
# ClickHouse should not start from the rescue shell (rescue.target).
|
||||
WantedBy=multi-user.target
|
@ -30,6 +30,8 @@ contents:
|
||||
type: config|noreplace
|
||||
- src: root/usr/bin/clickhouse-keeper
|
||||
dst: /usr/bin/clickhouse-keeper
|
||||
- src: clickhouse-keeper.service
|
||||
dst: /lib/systemd/system/clickhouse-keeper.service
|
||||
# docs
|
||||
- src: ../AUTHORS
|
||||
dst: /usr/share/doc/clickhouse-keeper/AUTHORS
|
||||
@ -39,3 +41,6 @@ contents:
|
||||
dst: /usr/share/doc/clickhouse-keeper/LICENSE
|
||||
- src: ../README.md
|
||||
dst: /usr/share/doc/clickhouse-keeper/README.md
|
||||
|
||||
scripts:
|
||||
postinstall: ./clickhouse-keeper.postinstall
|
||||
|
@ -11,8 +11,6 @@ CLICKHOUSE_DATADIR=${CLICKHOUSE_DATADIR:=/var/lib/clickhouse}
|
||||
CLICKHOUSE_LOGDIR=${CLICKHOUSE_LOGDIR:=/var/log/clickhouse-server}
|
||||
CLICKHOUSE_BINDIR=${CLICKHOUSE_BINDIR:=/usr/bin}
|
||||
CLICKHOUSE_GENERIC_PROGRAM=${CLICKHOUSE_GENERIC_PROGRAM:=clickhouse}
|
||||
EXTRACT_FROM_CONFIG=${CLICKHOUSE_GENERIC_PROGRAM}-extract-from-config
|
||||
CLICKHOUSE_CONFIG=$CLICKHOUSE_CONFDIR/config.xml
|
||||
CLICKHOUSE_PIDDIR=/var/run/$PROGRAM
|
||||
|
||||
[ -f /usr/share/debconf/confmodule ] && . /usr/share/debconf/confmodule
|
||||
|
@ -17,10 +17,10 @@ User=clickhouse
|
||||
Group=clickhouse
|
||||
Restart=always
|
||||
RestartSec=30
|
||||
RuntimeDirectory=clickhouse-server
|
||||
ExecStart=/usr/bin/clickhouse-server --config=/etc/clickhouse-server/config.xml --pid-file=/run/clickhouse-server/clickhouse-server.pid
|
||||
RuntimeDirectory=%p # %p is resolved to the systemd unit name
|
||||
ExecStart=/usr/bin/clickhouse-server --config=/etc/clickhouse-server/config.xml --pid-file=%t/%p/%p.pid
|
||||
# Minus means that this file is optional.
|
||||
EnvironmentFile=-/etc/default/clickhouse
|
||||
EnvironmentFile=-/etc/default/%p
|
||||
LimitCORE=infinity
|
||||
LimitNOFILE=500000
|
||||
CapabilityBoundingSet=CAP_NET_ADMIN CAP_IPC_LOCK CAP_SYS_NICE CAP_NET_BIND_SERVICE
|
||||
|
@ -1,3 +1,4 @@
|
||||
#include <boost/algorithm/string/join.hpp>
|
||||
#include <cstdlib>
|
||||
#include <fcntl.h>
|
||||
#include <map>
|
||||
@ -538,24 +539,28 @@ void Client::connect()
|
||||
// Prints changed settings to stderr. Useful for debugging fuzzing failures.
|
||||
void Client::printChangedSettings() const
|
||||
{
|
||||
const auto & changes = global_context->getSettingsRef().changes();
|
||||
if (!changes.empty())
|
||||
auto print_changes = [](const auto & changes, std::string_view settings_name)
|
||||
{
|
||||
fmt::print(stderr, "Changed settings: ");
|
||||
for (size_t i = 0; i < changes.size(); ++i)
|
||||
if (!changes.empty())
|
||||
{
|
||||
if (i)
|
||||
fmt::print(stderr, "Changed {}: ", settings_name);
|
||||
for (size_t i = 0; i < changes.size(); ++i)
|
||||
{
|
||||
fmt::print(stderr, ", ");
|
||||
if (i)
|
||||
fmt::print(stderr, ", ");
|
||||
fmt::print(stderr, "{} = '{}'", changes[i].name, toString(changes[i].value));
|
||||
}
|
||||
fmt::print(stderr, "{} = '{}'", changes[i].name, toString(changes[i].value));
|
||||
|
||||
fmt::print(stderr, "\n");
|
||||
}
|
||||
fmt::print(stderr, "\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
fmt::print(stderr, "No changed settings.\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
fmt::print(stderr, "No changed {}.\n", settings_name);
|
||||
}
|
||||
};
|
||||
|
||||
print_changes(global_context->getSettingsRef().changes(), "settings");
|
||||
print_changes(cmd_merge_tree_settings.changes(), "MergeTree settings");
|
||||
}
|
||||
|
||||
|
||||
@ -1387,6 +1392,8 @@ void Client::readArguments(
|
||||
}
|
||||
else if (arg == "--allow_repeated_settings")
|
||||
allow_repeated_settings = true;
|
||||
else if (arg == "--allow_merge_tree_settings")
|
||||
allow_merge_tree_settings = true;
|
||||
else
|
||||
common_arguments.emplace_back(arg);
|
||||
}
|
||||
|
@ -362,6 +362,7 @@ try
|
||||
else
|
||||
path = std::filesystem::path{KEEPER_DEFAULT_PATH};
|
||||
|
||||
std::filesystem::create_directories(path);
|
||||
|
||||
/// Check that the process user id matches the owner of the data.
|
||||
const auto effective_user_id = geteuid();
|
||||
|
@ -82,9 +82,7 @@
|
||||
#include <Common/ThreadFuzzer.h>
|
||||
#include <Common/getHashOfLoadedBinary.h>
|
||||
#include <Common/filesystemHelpers.h>
|
||||
#if USE_BORINGSSL
|
||||
#include <Compression/CompressionCodecEncrypted.h>
|
||||
#endif
|
||||
#include <Server/HTTP/HTTPServerConnectionFactory.h>
|
||||
#include <Server/MySQLHandlerFactory.h>
|
||||
#include <Server/PostgreSQLHandlerFactory.h>
|
||||
@ -1348,9 +1346,8 @@ try
|
||||
|
||||
global_context->updateStorageConfiguration(*config);
|
||||
global_context->updateInterserverCredentials(*config);
|
||||
#if USE_BORINGSSL
|
||||
global_context->updateQueryCacheConfiguration(*config);
|
||||
CompressionCodecEncrypted::Configuration::instance().tryLoad(*config, "encryption_codecs");
|
||||
#endif
|
||||
#if USE_SSL
|
||||
CertificateReloader::instance().tryLoad(*config);
|
||||
#endif
|
||||
@ -1534,13 +1531,7 @@ try
|
||||
global_context->setMMappedFileCache(mmap_cache_size);
|
||||
|
||||
/// A cache for query results.
|
||||
size_t query_cache_size = config().getUInt64("query_cache.size", 1_GiB);
|
||||
if (query_cache_size)
|
||||
global_context->setQueryCache(
|
||||
query_cache_size,
|
||||
config().getUInt64("query_cache.max_entries", 1024),
|
||||
config().getUInt64("query_cache.max_entry_size", 1_MiB),
|
||||
config().getUInt64("query_cache.max_entry_records", 30'000'000));
|
||||
global_context->setQueryCache(config());
|
||||
|
||||
#if USE_EMBEDDED_COMPILER
|
||||
/// 128 MB
|
||||
@ -1564,10 +1555,8 @@ try
|
||||
global_context->getMergeTreeSettings().sanityCheck(background_pool_tasks);
|
||||
global_context->getReplicatedMergeTreeSettings().sanityCheck(background_pool_tasks);
|
||||
}
|
||||
#if USE_BORINGSSL
|
||||
/// try set up encryption. There are some errors in config, error will be printed and server wouldn't start.
|
||||
CompressionCodecEncrypted::Configuration::instance().load(config(), "encryption_codecs");
|
||||
#endif
|
||||
|
||||
SCOPE_EXIT({
|
||||
async_metrics.stop();
|
||||
|
@ -1516,7 +1516,7 @@
|
||||
<!-- <size>1073741824</size> -->
|
||||
<!-- <max_entries>1024</max_entries> -->
|
||||
<!-- <max_entry_size>1048576</max_entry_size> -->
|
||||
<!-- <max_entry_records>30000000</max_entry_records> -->
|
||||
<!-- <max_entry_rows>30000000</max_entry_rows> -->
|
||||
<!-- </query_cache> -->
|
||||
|
||||
<!-- Uncomment if enable merge tree metadata cache -->
|
||||
|
@ -10,22 +10,25 @@ mod ffi {
|
||||
}
|
||||
|
||||
struct Item {
|
||||
text: String,
|
||||
text_no_newlines: String,
|
||||
orig_text: String,
|
||||
}
|
||||
impl Item {
|
||||
fn new(text: String) -> Self {
|
||||
return Self{
|
||||
// Text that will be printed by skim, and will be used for matching.
|
||||
//
|
||||
// Text that will be shown should not contains new lines since in this case skim may
|
||||
// live some symbols on the screen, and this looks odd.
|
||||
text: text.replace("\n", " "),
|
||||
text_no_newlines: text.replace("\n", " "),
|
||||
// This will be used when the match had been selected.
|
||||
orig_text: text,
|
||||
};
|
||||
}
|
||||
}
|
||||
impl SkimItem for Item {
|
||||
fn text(&self) -> Cow<str> {
|
||||
return Cow::Borrowed(&self.text);
|
||||
return Cow::Borrowed(&self.text_no_newlines);
|
||||
}
|
||||
|
||||
fn output(&self) -> Cow<str> {
|
||||
@ -44,6 +47,24 @@ fn skim(prefix: &CxxString, words: &CxxVector<CxxString>) -> Result<String, Stri
|
||||
.query(Some(prefix.to_str().unwrap()))
|
||||
.tac(true)
|
||||
.tiebreak(Some("-score".to_string()))
|
||||
// Exact mode performs better for SQL.
|
||||
//
|
||||
// Default fuzzy search is too smart for SQL, it even takes into account the case, which
|
||||
// should not be accounted (you don't want to type "SELECT" instead of "select" to find the
|
||||
// query).
|
||||
//
|
||||
// Exact matching seems better algorithm for SQL, it is not 100% exact, it splits by space,
|
||||
// and apply separate matcher actually for each word.
|
||||
// Note, that if you think that "space is not enough" as the delimiter, then you should
|
||||
// first know that this is the delimiter only for the input query, so to match
|
||||
// "system.query_log" you can use "sy qu log"
|
||||
// Also it should be more common for users who did not know how to use fuzzy search.
|
||||
// (also you can disable exact mode by prepending "'" char).
|
||||
//
|
||||
// Also it ignores the case correctly, i.e. it does not have penalty for case mismatch,
|
||||
// like fuzzy algorithms (take a look at SkimScoreConfig::penalty_case_mismatch).
|
||||
.exact(true)
|
||||
.case(CaseMatching::Ignore)
|
||||
.build()
|
||||
.unwrap();
|
||||
|
||||
|
@ -247,15 +247,8 @@ void Adam::merge(const IWeightsUpdater & rhs, Float64 frac, Float64 rhs_frac)
|
||||
if (adam_rhs.average_gradient.empty())
|
||||
return;
|
||||
|
||||
if (average_gradient.empty())
|
||||
{
|
||||
if (!average_squared_gradient.empty() ||
|
||||
adam_rhs.average_gradient.size() != adam_rhs.average_squared_gradient.size())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Average_gradient and average_squared_gradient must have same size");
|
||||
|
||||
average_gradient.resize(adam_rhs.average_gradient.size(), Float64{0.0});
|
||||
average_squared_gradient.resize(adam_rhs.average_squared_gradient.size(), Float64{0.0});
|
||||
}
|
||||
average_gradient.resize(adam_rhs.average_gradient.size(), Float64{0.0});
|
||||
average_squared_gradient.resize(adam_rhs.average_squared_gradient.size(), Float64{0.0});
|
||||
|
||||
for (size_t i = 0; i < average_gradient.size(); ++i)
|
||||
{
|
||||
@ -268,14 +261,8 @@ void Adam::merge(const IWeightsUpdater & rhs, Float64 frac, Float64 rhs_frac)
|
||||
|
||||
void Adam::update(UInt64 batch_size, std::vector<Float64> & weights, Float64 & bias, Float64 learning_rate, const std::vector<Float64> & batch_gradient)
|
||||
{
|
||||
if (average_gradient.empty())
|
||||
{
|
||||
if (!average_squared_gradient.empty())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Average_gradient and average_squared_gradient must have same size");
|
||||
|
||||
average_gradient.resize(batch_gradient.size(), Float64{0.0});
|
||||
average_squared_gradient.resize(batch_gradient.size(), Float64{0.0});
|
||||
}
|
||||
average_gradient.resize(batch_gradient.size(), Float64{0.0});
|
||||
average_squared_gradient.resize(batch_gradient.size(), Float64{0.0});
|
||||
|
||||
for (size_t i = 0; i != average_gradient.size(); ++i)
|
||||
{
|
||||
@ -328,8 +315,7 @@ void Nesterov::write(WriteBuffer & buf) const
|
||||
void Nesterov::merge(const IWeightsUpdater & rhs, Float64 frac, Float64 rhs_frac)
|
||||
{
|
||||
const auto & nesterov_rhs = static_cast<const Nesterov &>(rhs);
|
||||
if (accumulated_gradient.empty())
|
||||
accumulated_gradient.resize(nesterov_rhs.accumulated_gradient.size(), Float64{0.0});
|
||||
accumulated_gradient.resize(nesterov_rhs.accumulated_gradient.size(), Float64{0.0});
|
||||
|
||||
for (size_t i = 0; i < accumulated_gradient.size(); ++i)
|
||||
{
|
||||
@ -339,10 +325,7 @@ void Nesterov::merge(const IWeightsUpdater & rhs, Float64 frac, Float64 rhs_frac
|
||||
|
||||
void Nesterov::update(UInt64 batch_size, std::vector<Float64> & weights, Float64 & bias, Float64 learning_rate, const std::vector<Float64> & batch_gradient)
|
||||
{
|
||||
if (accumulated_gradient.empty())
|
||||
{
|
||||
accumulated_gradient.resize(batch_gradient.size(), Float64{0.0});
|
||||
}
|
||||
accumulated_gradient.resize(batch_gradient.size(), Float64{0.0});
|
||||
|
||||
for (size_t i = 0; i < batch_gradient.size(); ++i)
|
||||
{
|
||||
@ -402,10 +385,7 @@ void Momentum::merge(const IWeightsUpdater & rhs, Float64 frac, Float64 rhs_frac
|
||||
void Momentum::update(UInt64 batch_size, std::vector<Float64> & weights, Float64 & bias, Float64 learning_rate, const std::vector<Float64> & batch_gradient)
|
||||
{
|
||||
/// batch_size is already checked to be greater than 0
|
||||
if (accumulated_gradient.empty())
|
||||
{
|
||||
accumulated_gradient.resize(batch_gradient.size(), Float64{0.0});
|
||||
}
|
||||
accumulated_gradient.resize(batch_gradient.size(), Float64{0.0});
|
||||
|
||||
for (size_t i = 0; i < batch_gradient.size(); ++i)
|
||||
{
|
||||
|
@ -149,9 +149,11 @@ public:
|
||||
class Momentum : public IWeightsUpdater
|
||||
{
|
||||
public:
|
||||
Momentum() = default;
|
||||
|
||||
explicit Momentum(Float64 alpha_) : alpha(alpha_) {}
|
||||
explicit Momentum(size_t num_params, Float64 alpha_ = 0.1) : alpha(alpha_)
|
||||
{
|
||||
accumulated_gradient.resize(num_params + 1, 0);
|
||||
}
|
||||
|
||||
void update(UInt64 batch_size, std::vector<Float64> & weights, Float64 & bias, Float64 learning_rate, const std::vector<Float64> & batch_gradient) override;
|
||||
|
||||
@ -170,9 +172,10 @@ private:
|
||||
class Nesterov : public IWeightsUpdater
|
||||
{
|
||||
public:
|
||||
Nesterov() = default;
|
||||
|
||||
explicit Nesterov(Float64 alpha_) : alpha(alpha_) {}
|
||||
explicit Nesterov(size_t num_params, Float64 alpha_ = 0.9) : alpha(alpha_)
|
||||
{
|
||||
accumulated_gradient.resize(num_params + 1, 0);
|
||||
}
|
||||
|
||||
void addToBatch(
|
||||
std::vector<Float64> & batch_gradient,
|
||||
@ -201,10 +204,14 @@ private:
|
||||
class Adam : public IWeightsUpdater
|
||||
{
|
||||
public:
|
||||
Adam()
|
||||
Adam(size_t num_params)
|
||||
{
|
||||
beta1_powered = beta1;
|
||||
beta2_powered = beta2;
|
||||
|
||||
|
||||
average_gradient.resize(num_params + 1, 0);
|
||||
average_squared_gradient.resize(num_params + 1, 0);
|
||||
}
|
||||
|
||||
void addToBatch(
|
||||
@ -338,11 +345,11 @@ public:
|
||||
if (weights_updater_name == "SGD")
|
||||
new_weights_updater = std::make_shared<StochasticGradientDescent>();
|
||||
else if (weights_updater_name == "Momentum")
|
||||
new_weights_updater = std::make_shared<Momentum>();
|
||||
new_weights_updater = std::make_shared<Momentum>(param_num);
|
||||
else if (weights_updater_name == "Nesterov")
|
||||
new_weights_updater = std::make_shared<Nesterov>();
|
||||
new_weights_updater = std::make_shared<Nesterov>(param_num);
|
||||
else if (weights_updater_name == "Adam")
|
||||
new_weights_updater = std::make_shared<Adam>();
|
||||
new_weights_updater = std::make_shared<Adam>(param_num);
|
||||
else
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Illegal name of weights updater (should have been checked earlier)");
|
||||
|
||||
|
@ -50,11 +50,13 @@ AggregateFunctionPtr createAggregateFunctionSparkbar(const std::string & name, c
|
||||
assertBinary(name, arguments);
|
||||
|
||||
if (params.size() != 1 && params.size() != 3)
|
||||
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "The number of params does not match for aggregate function {}", name);
|
||||
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
|
||||
"The number of params does not match for aggregate function '{}', expected 1 or 3, got {}", name, params.size());
|
||||
|
||||
if (params.size() == 3)
|
||||
{
|
||||
if (params.at(1).getType() != arguments[0]->getDefault().getType() || params.at(2).getType() != arguments[0]->getDefault().getType())
|
||||
if (params.at(1).getType() != arguments[0]->getDefault().getType() ||
|
||||
params.at(2).getType() != arguments[0]->getDefault().getType())
|
||||
{
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||
"The second and third parameters are not the same type as the first arguments for aggregate function {}", name);
|
||||
@ -63,7 +65,6 @@ AggregateFunctionPtr createAggregateFunctionSparkbar(const std::string & name, c
|
||||
return createAggregateFunctionSparkbarImpl(name, *arguments[0], *arguments[1], arguments, params);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
void registerAggregateFunctionSparkbar(AggregateFunctionFactory & factory)
|
||||
|
@ -18,10 +18,15 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
template<typename X, typename Y>
|
||||
struct AggregateFunctionSparkbarData
|
||||
{
|
||||
|
||||
/// TODO: calculate histogram instead of storing all points
|
||||
using Points = HashMap<X, Y>;
|
||||
Points points;
|
||||
|
||||
@ -31,20 +36,26 @@ struct AggregateFunctionSparkbarData
|
||||
Y min_y = std::numeric_limits<Y>::max();
|
||||
Y max_y = std::numeric_limits<Y>::lowest();
|
||||
|
||||
void insert(const X & x, const Y & y)
|
||||
Y insert(const X & x, const Y & y)
|
||||
{
|
||||
auto result = points.insert({x, y});
|
||||
if (!result.second)
|
||||
result.first->getMapped() += y;
|
||||
if (isNaN(y) || y <= 0)
|
||||
return 0;
|
||||
|
||||
auto [it, inserted] = points.insert({x, y});
|
||||
if (!inserted)
|
||||
it->getMapped() += y;
|
||||
return it->getMapped();
|
||||
}
|
||||
|
||||
void add(X x, Y y)
|
||||
{
|
||||
insert(x, y);
|
||||
auto new_y = insert(x, y);
|
||||
|
||||
min_x = std::min(x, min_x);
|
||||
max_x = std::max(x, max_x);
|
||||
|
||||
min_y = std::min(y, min_y);
|
||||
max_y = std::max(y, max_y);
|
||||
max_y = std::max(new_y, max_y);
|
||||
}
|
||||
|
||||
void merge(const AggregateFunctionSparkbarData & other)
|
||||
@ -53,10 +64,14 @@ struct AggregateFunctionSparkbarData
|
||||
return;
|
||||
|
||||
for (auto & point : other.points)
|
||||
insert(point.getKey(), point.getMapped());
|
||||
{
|
||||
auto new_y = insert(point.getKey(), point.getMapped());
|
||||
max_y = std::max(new_y, max_y);
|
||||
}
|
||||
|
||||
min_x = std::min(other.min_x, min_x);
|
||||
max_x = std::max(other.max_x, max_x);
|
||||
|
||||
min_y = std::min(other.min_y, min_y);
|
||||
max_y = std::max(other.max_y, max_y);
|
||||
}
|
||||
@ -85,7 +100,6 @@ struct AggregateFunctionSparkbarData
|
||||
size_t size;
|
||||
readVarUInt(size, buf);
|
||||
|
||||
/// TODO Protection against huge size
|
||||
X x;
|
||||
Y y;
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
@ -95,7 +109,6 @@ struct AggregateFunctionSparkbarData
|
||||
insert(x, y);
|
||||
}
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
template<typename X, typename Y>
|
||||
@ -104,16 +117,17 @@ class AggregateFunctionSparkbar final
|
||||
{
|
||||
|
||||
private:
|
||||
size_t width;
|
||||
X min_x;
|
||||
X max_x;
|
||||
bool specified_min_max_x;
|
||||
const size_t width = 0;
|
||||
|
||||
template <class T>
|
||||
size_t updateFrame(ColumnString::Chars & frame, const T value) const
|
||||
/// Range for x specified in parameters.
|
||||
const bool is_specified_range_x = false;
|
||||
const X begin_x = std::numeric_limits<X>::min();
|
||||
const X end_x = std::numeric_limits<X>::max();
|
||||
|
||||
size_t updateFrame(ColumnString::Chars & frame, Y value) const
|
||||
{
|
||||
static constexpr std::array<std::string_view, 9> bars{" ", "▁", "▂", "▃", "▄", "▅", "▆", "▇", "█"};
|
||||
const auto & bar = (isNaN(value) || value > 8 || value < 1) ? bars[0] : bars[static_cast<UInt8>(value)];
|
||||
const auto & bar = (isNaN(value) || value < 1 || 8 < value) ? bars[0] : bars[static_cast<UInt8>(value)];
|
||||
frame.insert(bar.begin(), bar.end());
|
||||
return bar.size();
|
||||
}
|
||||
@ -122,161 +136,108 @@ private:
|
||||
* The minimum value of y is rendered as the lowest height "▁",
|
||||
* the maximum value of y is rendered as the highest height "█", and the middle value will be rendered proportionally.
|
||||
* If a bucket has no y value, it will be rendered as " ".
|
||||
* If the actual number of buckets is greater than the specified bucket, it will be compressed by width.
|
||||
* For example, there are actually 11 buckets, specify 10 buckets, and divide the 11 buckets as follows (11/10):
|
||||
* 0.0-1.1, 1.1-2.2, 2.2-3.3, 3.3-4.4, 4.4-5.5, 5.5-6.6, 6.6-7.7, 7.7-8.8, 8.8-9.9, 9.9-11.
|
||||
* The y value of the first bucket will be calculated as follows:
|
||||
* the actual y value of the first position + the actual second position y*0.1, and the remaining y*0.9 is reserved for the next bucket.
|
||||
* The next bucket will use the last y*0.9 + the actual third position y*0.2, and the remaining y*0.8 will be reserved for the next bucket. And so on.
|
||||
*/
|
||||
void render(ColumnString & to_column, const AggregateFunctionSparkbarData<X, Y> & data) const
|
||||
{
|
||||
size_t sz = 0;
|
||||
auto & values = to_column.getChars();
|
||||
auto & offsets = to_column.getOffsets();
|
||||
auto update_column = [&] ()
|
||||
|
||||
if (data.points.empty())
|
||||
{
|
||||
values.push_back('\0');
|
||||
offsets.push_back(offsets.empty() ? 1 : offsets.back() + 1);
|
||||
return;
|
||||
}
|
||||
|
||||
auto from_x = is_specified_range_x ? begin_x : data.min_x;
|
||||
auto to_x = is_specified_range_x ? end_x : data.max_x;
|
||||
|
||||
if (from_x >= to_x)
|
||||
{
|
||||
size_t sz = updateFrame(values, 8);
|
||||
values.push_back('\0');
|
||||
offsets.push_back(offsets.empty() ? sz + 1 : offsets.back() + sz + 1);
|
||||
};
|
||||
|
||||
if (data.points.empty() || !width)
|
||||
return update_column();
|
||||
|
||||
size_t diff_x;
|
||||
X min_x_local;
|
||||
if (specified_min_max_x)
|
||||
{
|
||||
diff_x = max_x - min_x;
|
||||
min_x_local = min_x;
|
||||
}
|
||||
else
|
||||
{
|
||||
diff_x = data.max_x - data.min_x;
|
||||
min_x_local = data.min_x;
|
||||
return;
|
||||
}
|
||||
|
||||
if ((diff_x + 1) <= width)
|
||||
{
|
||||
Y min_y = data.min_y;
|
||||
Y max_y = data.max_y;
|
||||
Float64 diff_y = max_y - min_y;
|
||||
PaddedPODArray<Y> histogram(width, 0);
|
||||
PaddedPODArray<UInt64> fhistogram(width, 0);
|
||||
|
||||
if (diff_y != 0.0)
|
||||
for (const auto & point : data.points)
|
||||
{
|
||||
if (point.getKey() < from_x || to_x < point.getKey())
|
||||
continue;
|
||||
|
||||
X delta = to_x - from_x;
|
||||
if (delta < std::numeric_limits<X>::max())
|
||||
delta = delta + 1;
|
||||
|
||||
X value = point.getKey() - from_x;
|
||||
Float64 w = histogram.size();
|
||||
size_t index = std::min<size_t>(static_cast<size_t>(w / delta * value), histogram.size() - 1);
|
||||
|
||||
if (std::numeric_limits<Y>::max() - histogram[index] > point.getMapped())
|
||||
{
|
||||
for (size_t i = 0; i <= diff_x; ++i)
|
||||
{
|
||||
auto it = data.points.find(static_cast<X>(min_x_local + i));
|
||||
bool found = it != data.points.end();
|
||||
sz += updateFrame(values, found ? std::round(((it->getMapped() - min_y) / diff_y) * 7) + 1 : 0.0);
|
||||
}
|
||||
histogram[index] += point.getMapped();
|
||||
fhistogram[index] += 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
for (size_t i = 0; i <= diff_x; ++i)
|
||||
sz += updateFrame(values, data.points.has(min_x_local + static_cast<X>(i)) ? 1 : 0);
|
||||
/// In case of overflow, just saturate
|
||||
histogram[index] = std::numeric_limits<Y>::max();
|
||||
}
|
||||
}
|
||||
else
|
||||
|
||||
for (size_t i = 0; i < histogram.size(); ++i)
|
||||
{
|
||||
// begin reshapes to width buckets
|
||||
Float64 multiple_d = (diff_x + 1) / static_cast<Float64>(width);
|
||||
|
||||
std::optional<Float64> min_y;
|
||||
std::optional<Float64> max_y;
|
||||
|
||||
std::optional<Float64> new_y;
|
||||
std::vector<std::optional<Float64>> new_points;
|
||||
new_points.reserve(width);
|
||||
|
||||
std::pair<size_t, Float64> bound{0, 0.0};
|
||||
size_t cur_bucket_num = 0;
|
||||
// upper bound for bucket
|
||||
auto upper_bound = [&](size_t bucket_num)
|
||||
{
|
||||
bound.second = (bucket_num + 1) * multiple_d;
|
||||
bound.first = static_cast<size_t>(std::floor(bound.second));
|
||||
};
|
||||
upper_bound(cur_bucket_num);
|
||||
for (size_t i = 0; i <= (diff_x + 1); ++i)
|
||||
{
|
||||
if (i == bound.first) // is bound
|
||||
{
|
||||
Float64 proportion = bound.second - bound.first;
|
||||
auto it = data.points.find(min_x_local + static_cast<X>(i));
|
||||
bool found = (it != data.points.end());
|
||||
if (found && proportion > 0)
|
||||
new_y = new_y.value_or(0) + it->getMapped() * proportion;
|
||||
|
||||
if (new_y)
|
||||
{
|
||||
Float64 avg_y = new_y.value() / multiple_d;
|
||||
|
||||
new_points.emplace_back(avg_y);
|
||||
// If min_y has no value, or if the avg_y of the current bucket is less than min_y, update it.
|
||||
if (!min_y || avg_y < min_y)
|
||||
min_y = avg_y;
|
||||
if (!max_y || avg_y > max_y)
|
||||
max_y = avg_y;
|
||||
}
|
||||
else
|
||||
{
|
||||
new_points.emplace_back();
|
||||
}
|
||||
|
||||
// next bucket
|
||||
new_y = found ? ((1 - proportion) * it->getMapped()) : std::optional<Float64>();
|
||||
upper_bound(++cur_bucket_num);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto it = data.points.find(min_x_local + static_cast<X>(i));
|
||||
if (it != data.points.end())
|
||||
new_y = new_y.value_or(0) + it->getMapped();
|
||||
}
|
||||
}
|
||||
|
||||
if (!min_y || !max_y) // No value is set
|
||||
return update_column();
|
||||
|
||||
Float64 diff_y = max_y.value() - min_y.value();
|
||||
|
||||
auto update_frame = [&] (const std::optional<Float64> & point_y)
|
||||
{
|
||||
sz += updateFrame(values, point_y ? std::round(((point_y.value() - min_y.value()) / diff_y) * 7) + 1 : 0);
|
||||
};
|
||||
auto update_frame_for_constant = [&] (const std::optional<Float64> & point_y)
|
||||
{
|
||||
sz += updateFrame(values, point_y ? 1 : 0);
|
||||
};
|
||||
|
||||
if (diff_y != 0.0)
|
||||
std::for_each(new_points.begin(), new_points.end(), update_frame);
|
||||
else
|
||||
std::for_each(new_points.begin(), new_points.end(), update_frame_for_constant);
|
||||
if (fhistogram[i] > 0)
|
||||
histogram[i] /= fhistogram[i];
|
||||
}
|
||||
update_column();
|
||||
|
||||
Y y_max = 0;
|
||||
for (auto & y : histogram)
|
||||
{
|
||||
if (isNaN(y) || y <= 0)
|
||||
continue;
|
||||
y_max = std::max(y_max, y);
|
||||
}
|
||||
|
||||
if (y_max == 0)
|
||||
{
|
||||
values.push_back('\0');
|
||||
offsets.push_back(offsets.empty() ? 1 : offsets.back() + 1);
|
||||
return;
|
||||
}
|
||||
|
||||
for (auto & y : histogram)
|
||||
{
|
||||
if (isNaN(y) || y <= 0)
|
||||
y = 0;
|
||||
else
|
||||
y = y * 7 / y_max + 1;
|
||||
}
|
||||
|
||||
size_t sz = 0;
|
||||
for (const auto & y : histogram)
|
||||
sz += updateFrame(values, y);
|
||||
|
||||
values.push_back('\0');
|
||||
offsets.push_back(offsets.empty() ? sz + 1 : offsets.back() + sz + 1);
|
||||
}
|
||||
|
||||
|
||||
public:
|
||||
AggregateFunctionSparkbar(const DataTypes & arguments, const Array & params)
|
||||
: IAggregateFunctionDataHelper<AggregateFunctionSparkbarData<X, Y>, AggregateFunctionSparkbar>(
|
||||
arguments, params, std::make_shared<DataTypeString>())
|
||||
: IAggregateFunctionDataHelper<AggregateFunctionSparkbarData<X, Y>, AggregateFunctionSparkbar>(arguments, params, std::make_shared<DataTypeString>())
|
||||
, width(params.empty() ? 0 : params.at(0).safeGet<UInt64>())
|
||||
, is_specified_range_x(params.size() >= 3)
|
||||
, begin_x(is_specified_range_x ? static_cast<X>(params.at(1).safeGet<X>()) : std::numeric_limits<X>::min())
|
||||
, end_x(is_specified_range_x ? static_cast<X>(params.at(2).safeGet<X>()) : std::numeric_limits<X>::max())
|
||||
{
|
||||
width = params.at(0).safeGet<UInt64>();
|
||||
if (params.size() == 3)
|
||||
{
|
||||
specified_min_max_x = true;
|
||||
min_x = static_cast<X>(params.at(1).safeGet<X>());
|
||||
max_x = static_cast<X>(params.at(2).safeGet<X>());
|
||||
}
|
||||
else
|
||||
{
|
||||
specified_min_max_x = false;
|
||||
min_x = std::numeric_limits<X>::min();
|
||||
max_x = std::numeric_limits<X>::max();
|
||||
}
|
||||
if (width < 2 || 1024 < width)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter width must be in range [2, 1024]");
|
||||
|
||||
if (begin_x >= end_x)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter `min_x` must be less than `max_x`");
|
||||
}
|
||||
|
||||
String getName() const override
|
||||
@ -287,7 +248,7 @@ public:
|
||||
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * /*arena*/) const override
|
||||
{
|
||||
X x = assert_cast<const ColumnVector<X> *>(columns[0])->getData()[row_num];
|
||||
if (min_x <= x && x <= max_x)
|
||||
if (begin_x <= x && x <= end_x)
|
||||
{
|
||||
Y y = assert_cast<const ColumnVector<Y> *>(columns[1])->getData()[row_num];
|
||||
this->data(place).add(x, y);
|
||||
|
@ -953,7 +953,12 @@ void BackupImpl::writeFile(const String & file_name, BackupEntryPtr entry)
|
||||
{
|
||||
LOG_TRACE(log, "Will copy file {}", adjusted_path);
|
||||
|
||||
if (!num_entries)
|
||||
bool has_entries = false;
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
has_entries = num_entries > 0;
|
||||
}
|
||||
if (!has_entries)
|
||||
checkLockFile(true);
|
||||
|
||||
if (use_archives)
|
||||
|
@ -226,13 +226,7 @@ add_object_library(clickhouse_access Access)
|
||||
add_object_library(clickhouse_backups Backups)
|
||||
add_object_library(clickhouse_core Core)
|
||||
add_object_library(clickhouse_core_mysql Core/MySQL)
|
||||
if (ENABLE_OPENSSL OR ENABLE_OPENSSL_DYNAMIC)
|
||||
add_headers_and_sources(dbms Compression)
|
||||
list(REMOVE_ITEM dbms_headers Compression/CompressionCodecEncrypted.h)
|
||||
list(REMOVE_ITEM dbms_sources Compression/CompressionCodecEncrypted.cpp)
|
||||
else ()
|
||||
add_object_library(clickhouse_compression Compression)
|
||||
endif ()
|
||||
add_object_library(clickhouse_compression Compression)
|
||||
add_object_library(clickhouse_querypipeline QueryPipeline)
|
||||
add_object_library(clickhouse_datatypes DataTypes)
|
||||
add_object_library(clickhouse_datatypes_serializations DataTypes/Serializations)
|
||||
|
@ -41,6 +41,7 @@
|
||||
#include <Parsers/ASTLiteral.h>
|
||||
#include <Parsers/ASTIdentifier.h>
|
||||
#include <Parsers/ASTColumnDeclaration.h>
|
||||
#include <Parsers/ASTFunction.h>
|
||||
#include <Parsers/Kusto/ParserKQLStatement.h>
|
||||
|
||||
#include <Processors/Formats/Impl/NullFormat.h>
|
||||
@ -816,17 +817,15 @@ void ClientBase::processTextAsSingleQuery(const String & full_query)
|
||||
|
||||
void ClientBase::processOrdinaryQuery(const String & query_to_execute, ASTPtr parsed_query)
|
||||
{
|
||||
if (fake_drop)
|
||||
{
|
||||
if (parsed_query->as<ASTDropQuery>())
|
||||
return;
|
||||
}
|
||||
if (fake_drop && parsed_query->as<ASTDropQuery>())
|
||||
return;
|
||||
|
||||
auto query = query_to_execute;
|
||||
|
||||
/// Rewrite query only when we have query parameters.
|
||||
/// Note that if query is rewritten, comments in query are lost.
|
||||
/// But the user often wants to see comments in server logs, query log, processlist, etc.
|
||||
/// For recent versions of the server query parameters will be transferred by network and applied on the server side.
|
||||
auto query = query_to_execute;
|
||||
if (!query_parameters.empty()
|
||||
&& connection->getServerRevision(connection_parameters.timeouts) < DBMS_MIN_PROTOCOL_VERSION_WITH_PARAMETERS)
|
||||
{
|
||||
@ -838,6 +837,22 @@ void ClientBase::processOrdinaryQuery(const String & query_to_execute, ASTPtr pa
|
||||
query = serializeAST(*parsed_query);
|
||||
}
|
||||
|
||||
if (allow_merge_tree_settings && parsed_query->as<ASTCreateQuery>())
|
||||
{
|
||||
/// Rewrite query if new settings were added.
|
||||
if (addMergeTreeSettings(*parsed_query->as<ASTCreateQuery>()))
|
||||
{
|
||||
/// Replace query parameters because AST cannot be serialized otherwise.
|
||||
if (!query_parameters.empty())
|
||||
{
|
||||
ReplaceQueryParameterVisitor visitor(query_parameters);
|
||||
visitor.visit(parsed_query);
|
||||
}
|
||||
|
||||
query = serializeAST(*parsed_query);
|
||||
}
|
||||
}
|
||||
|
||||
int retries_left = 10;
|
||||
while (retries_left)
|
||||
{
|
||||
@ -2065,6 +2080,41 @@ void ClientBase::initQueryIdFormats()
|
||||
}
|
||||
|
||||
|
||||
bool ClientBase::addMergeTreeSettings(ASTCreateQuery & ast_create)
|
||||
{
|
||||
if (ast_create.attach
|
||||
|| !ast_create.storage
|
||||
|| !ast_create.storage->isExtendedStorageDefinition()
|
||||
|| !ast_create.storage->engine
|
||||
|| ast_create.storage->engine->name.find("MergeTree") == std::string::npos)
|
||||
return false;
|
||||
|
||||
auto all_changed = cmd_merge_tree_settings.allChanged();
|
||||
if (all_changed.begin() == all_changed.end())
|
||||
return false;
|
||||
|
||||
if (!ast_create.storage->settings)
|
||||
{
|
||||
auto settings_ast = std::make_shared<ASTSetQuery>();
|
||||
settings_ast->is_standalone = false;
|
||||
ast_create.storage->set(ast_create.storage->settings, settings_ast);
|
||||
}
|
||||
|
||||
auto & storage_settings = *ast_create.storage->settings;
|
||||
bool added_new_setting = false;
|
||||
|
||||
for (const auto & setting : all_changed)
|
||||
{
|
||||
if (!storage_settings.changes.tryGet(setting.getName()))
|
||||
{
|
||||
storage_settings.changes.emplace_back(setting.getName(), setting.getValue());
|
||||
added_new_setting = true;
|
||||
}
|
||||
}
|
||||
|
||||
return added_new_setting;
|
||||
}
|
||||
|
||||
void ClientBase::runInteractive()
|
||||
{
|
||||
if (config().has("query_id"))
|
||||
@ -2302,6 +2352,30 @@ void ClientBase::parseAndCheckOptions(OptionsDescription & options_description,
|
||||
cmd_settings.addProgramOptionsAsMultitokens(options_description.main_description.value());
|
||||
else
|
||||
cmd_settings.addProgramOptions(options_description.main_description.value());
|
||||
|
||||
if (allow_merge_tree_settings)
|
||||
{
|
||||
/// Add merge tree settings manually, because names of some settings
|
||||
/// may clash. Query settings have higher priority and we just
|
||||
/// skip ambiguous merge tree settings.
|
||||
auto & main_options = options_description.main_description.value();
|
||||
|
||||
NameSet main_option_names;
|
||||
for (const auto & option : main_options.options())
|
||||
main_option_names.insert(option->long_name());
|
||||
|
||||
for (const auto & setting : cmd_merge_tree_settings.all())
|
||||
{
|
||||
if (main_option_names.contains(setting.getName()))
|
||||
continue;
|
||||
|
||||
if (allow_repeated_settings)
|
||||
cmd_merge_tree_settings.addProgramOptionAsMultitoken(main_options, setting);
|
||||
else
|
||||
cmd_merge_tree_settings.addProgramOption(main_options, setting);
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse main commandline options.
|
||||
auto parser = po::command_line_parser(arguments).options(options_description.main_description.value()).allow_unregistered();
|
||||
po::parsed_options parsed = parser.run();
|
||||
|
@ -1,6 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include "Common/NamePrompter.h"
|
||||
#include <Parsers/ASTCreateQuery.h>
|
||||
#include <Common/ProgressIndication.h>
|
||||
#include <Common/InterruptListener.h>
|
||||
#include <Common/ShellCommand.h>
|
||||
@ -14,6 +15,7 @@
|
||||
#include <boost/program_options.hpp>
|
||||
#include <Storages/StorageFile.h>
|
||||
#include <Storages/SelectQueryInfo.h>
|
||||
#include <Storages/MergeTree/MergeTreeSettings.h>
|
||||
|
||||
|
||||
namespace po = boost::program_options;
|
||||
@ -164,6 +166,7 @@ private:
|
||||
void updateSuggest(const ASTPtr & ast);
|
||||
|
||||
void initQueryIdFormats();
|
||||
bool addMergeTreeSettings(ASTCreateQuery & ast_create);
|
||||
|
||||
protected:
|
||||
static bool isSyncInsertWithData(const ASTInsertQuery & insert_query, const ContextPtr & context);
|
||||
@ -212,6 +215,7 @@ protected:
|
||||
|
||||
/// Settings specified via command line args
|
||||
Settings cmd_settings;
|
||||
MergeTreeSettings cmd_merge_tree_settings;
|
||||
|
||||
/// thread status should be destructed before shared context because it relies on process list.
|
||||
std::optional<ThreadStatus> thread_status;
|
||||
@ -298,6 +302,7 @@ protected:
|
||||
std::vector<HostAndPort> hosts_and_ports{};
|
||||
|
||||
bool allow_repeated_settings = false;
|
||||
bool allow_merge_tree_settings = false;
|
||||
|
||||
bool cancelled = false;
|
||||
|
||||
|
@ -1,5 +1,6 @@
|
||||
#include <Columns/ColumnArray.h>
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <Columns/ColumnDecimal.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Columns/ColumnTuple.h>
|
||||
#include <Columns/ColumnNullable.h>
|
||||
@ -549,19 +550,48 @@ void ColumnArray::insertRangeFrom(const IColumn & src, size_t start, size_t leng
|
||||
|
||||
ColumnPtr ColumnArray::filter(const Filter & filt, ssize_t result_size_hint) const
|
||||
{
|
||||
if (typeid_cast<const ColumnUInt8 *>(data.get())) return filterNumber<UInt8>(filt, result_size_hint);
|
||||
if (typeid_cast<const ColumnUInt16 *>(data.get())) return filterNumber<UInt16>(filt, result_size_hint);
|
||||
if (typeid_cast<const ColumnUInt32 *>(data.get())) return filterNumber<UInt32>(filt, result_size_hint);
|
||||
if (typeid_cast<const ColumnUInt64 *>(data.get())) return filterNumber<UInt64>(filt, result_size_hint);
|
||||
if (typeid_cast<const ColumnInt8 *>(data.get())) return filterNumber<Int8>(filt, result_size_hint);
|
||||
if (typeid_cast<const ColumnInt16 *>(data.get())) return filterNumber<Int16>(filt, result_size_hint);
|
||||
if (typeid_cast<const ColumnInt32 *>(data.get())) return filterNumber<Int32>(filt, result_size_hint);
|
||||
if (typeid_cast<const ColumnInt64 *>(data.get())) return filterNumber<Int64>(filt, result_size_hint);
|
||||
if (typeid_cast<const ColumnFloat32 *>(data.get())) return filterNumber<Float32>(filt, result_size_hint);
|
||||
if (typeid_cast<const ColumnFloat64 *>(data.get())) return filterNumber<Float64>(filt, result_size_hint);
|
||||
if (typeid_cast<const ColumnString *>(data.get())) return filterString(filt, result_size_hint);
|
||||
if (typeid_cast<const ColumnTuple *>(data.get())) return filterTuple(filt, result_size_hint);
|
||||
if (typeid_cast<const ColumnNullable *>(data.get())) return filterNullable(filt, result_size_hint);
|
||||
if (typeid_cast<const ColumnUInt8 *>(data.get()))
|
||||
return filterNumber<UInt8>(filt, result_size_hint);
|
||||
if (typeid_cast<const ColumnUInt16 *>(data.get()))
|
||||
return filterNumber<UInt16>(filt, result_size_hint);
|
||||
if (typeid_cast<const ColumnUInt32 *>(data.get()))
|
||||
return filterNumber<UInt32>(filt, result_size_hint);
|
||||
if (typeid_cast<const ColumnUInt64 *>(data.get()))
|
||||
return filterNumber<UInt64>(filt, result_size_hint);
|
||||
if (typeid_cast<const ColumnUInt128 *>(data.get()))
|
||||
return filterNumber<UInt128>(filt, result_size_hint);
|
||||
if (typeid_cast<const ColumnUInt256 *>(data.get()))
|
||||
return filterNumber<UInt256>(filt, result_size_hint);
|
||||
if (typeid_cast<const ColumnInt8 *>(data.get()))
|
||||
return filterNumber<Int8>(filt, result_size_hint);
|
||||
if (typeid_cast<const ColumnInt16 *>(data.get()))
|
||||
return filterNumber<Int16>(filt, result_size_hint);
|
||||
if (typeid_cast<const ColumnInt32 *>(data.get()))
|
||||
return filterNumber<Int32>(filt, result_size_hint);
|
||||
if (typeid_cast<const ColumnInt64 *>(data.get()))
|
||||
return filterNumber<Int64>(filt, result_size_hint);
|
||||
if (typeid_cast<const ColumnInt128 *>(data.get()))
|
||||
return filterNumber<Int128>(filt, result_size_hint);
|
||||
if (typeid_cast<const ColumnInt256 *>(data.get()))
|
||||
return filterNumber<Int256>(filt, result_size_hint);
|
||||
if (typeid_cast<const ColumnFloat32 *>(data.get()))
|
||||
return filterNumber<Float32>(filt, result_size_hint);
|
||||
if (typeid_cast<const ColumnFloat64 *>(data.get()))
|
||||
return filterNumber<Float64>(filt, result_size_hint);
|
||||
if (typeid_cast<const ColumnDecimal<Decimal32> *>(data.get()))
|
||||
return filterNumber<Decimal32>(filt, result_size_hint);
|
||||
if (typeid_cast<const ColumnDecimal<Decimal64> *>(data.get()))
|
||||
return filterNumber<Decimal64>(filt, result_size_hint);
|
||||
if (typeid_cast<const ColumnDecimal<Decimal128> *>(data.get()))
|
||||
return filterNumber<Decimal128>(filt, result_size_hint);
|
||||
if (typeid_cast<const ColumnDecimal<Decimal256> *>(data.get()))
|
||||
return filterNumber<Decimal256>(filt, result_size_hint);
|
||||
if (typeid_cast<const ColumnString *>(data.get()))
|
||||
return filterString(filt, result_size_hint);
|
||||
if (typeid_cast<const ColumnTuple *>(data.get()))
|
||||
return filterTuple(filt, result_size_hint);
|
||||
if (typeid_cast<const ColumnNullable *>(data.get()))
|
||||
return filterNullable(filt, result_size_hint);
|
||||
return filterGeneric(filt, result_size_hint);
|
||||
}
|
||||
|
||||
@ -597,15 +627,17 @@ void ColumnArray::expand(const IColumn::Filter & mask, bool inverted)
|
||||
template <typename T>
|
||||
ColumnPtr ColumnArray::filterNumber(const Filter & filt, ssize_t result_size_hint) const
|
||||
{
|
||||
using ColVecType = ColumnVectorOrDecimal<T>;
|
||||
|
||||
if (getOffsets().empty())
|
||||
return ColumnArray::create(data);
|
||||
|
||||
auto res = ColumnArray::create(data->cloneEmpty());
|
||||
|
||||
auto & res_elems = assert_cast<ColumnVector<T> &>(res->getData()).getData();
|
||||
auto & res_elems = assert_cast<ColVecType &>(res->getData()).getData();
|
||||
Offsets & res_offsets = res->getOffsets();
|
||||
|
||||
filterArraysImpl<T>(assert_cast<const ColumnVector<T> &>(*data).getData(), getOffsets(), res_elems, res_offsets, filt, result_size_hint);
|
||||
filterArraysImpl<T>(assert_cast<const ColVecType &>(*data).getData(), getOffsets(), res_elems, res_offsets, filt, result_size_hint);
|
||||
return res;
|
||||
}
|
||||
|
||||
@ -932,20 +964,50 @@ ColumnPtr ColumnArray::replicate(const Offsets & replicate_offsets) const
|
||||
if (replicate_offsets.empty())
|
||||
return cloneEmpty();
|
||||
|
||||
if (typeid_cast<const ColumnUInt8 *>(data.get())) return replicateNumber<UInt8>(replicate_offsets);
|
||||
if (typeid_cast<const ColumnUInt16 *>(data.get())) return replicateNumber<UInt16>(replicate_offsets);
|
||||
if (typeid_cast<const ColumnUInt32 *>(data.get())) return replicateNumber<UInt32>(replicate_offsets);
|
||||
if (typeid_cast<const ColumnUInt64 *>(data.get())) return replicateNumber<UInt64>(replicate_offsets);
|
||||
if (typeid_cast<const ColumnInt8 *>(data.get())) return replicateNumber<Int8>(replicate_offsets);
|
||||
if (typeid_cast<const ColumnInt16 *>(data.get())) return replicateNumber<Int16>(replicate_offsets);
|
||||
if (typeid_cast<const ColumnInt32 *>(data.get())) return replicateNumber<Int32>(replicate_offsets);
|
||||
if (typeid_cast<const ColumnInt64 *>(data.get())) return replicateNumber<Int64>(replicate_offsets);
|
||||
if (typeid_cast<const ColumnFloat32 *>(data.get())) return replicateNumber<Float32>(replicate_offsets);
|
||||
if (typeid_cast<const ColumnFloat64 *>(data.get())) return replicateNumber<Float64>(replicate_offsets);
|
||||
if (typeid_cast<const ColumnString *>(data.get())) return replicateString(replicate_offsets);
|
||||
if (typeid_cast<const ColumnConst *>(data.get())) return replicateConst(replicate_offsets);
|
||||
if (typeid_cast<const ColumnNullable *>(data.get())) return replicateNullable(replicate_offsets);
|
||||
if (typeid_cast<const ColumnTuple *>(data.get())) return replicateTuple(replicate_offsets);
|
||||
if (typeid_cast<const ColumnUInt8 *>(data.get()))
|
||||
return replicateNumber<UInt8>(replicate_offsets);
|
||||
if (typeid_cast<const ColumnUInt16 *>(data.get()))
|
||||
return replicateNumber<UInt16>(replicate_offsets);
|
||||
if (typeid_cast<const ColumnUInt32 *>(data.get()))
|
||||
return replicateNumber<UInt32>(replicate_offsets);
|
||||
if (typeid_cast<const ColumnUInt64 *>(data.get()))
|
||||
return replicateNumber<UInt64>(replicate_offsets);
|
||||
if (typeid_cast<const ColumnUInt128 *>(data.get()))
|
||||
return replicateNumber<UInt128>(replicate_offsets);
|
||||
if (typeid_cast<const ColumnUInt256 *>(data.get()))
|
||||
return replicateNumber<UInt256>(replicate_offsets);
|
||||
if (typeid_cast<const ColumnInt8 *>(data.get()))
|
||||
return replicateNumber<Int8>(replicate_offsets);
|
||||
if (typeid_cast<const ColumnInt16 *>(data.get()))
|
||||
return replicateNumber<Int16>(replicate_offsets);
|
||||
if (typeid_cast<const ColumnInt32 *>(data.get()))
|
||||
return replicateNumber<Int32>(replicate_offsets);
|
||||
if (typeid_cast<const ColumnInt64 *>(data.get()))
|
||||
return replicateNumber<Int64>(replicate_offsets);
|
||||
if (typeid_cast<const ColumnInt128 *>(data.get()))
|
||||
return replicateNumber<Int128>(replicate_offsets);
|
||||
if (typeid_cast<const ColumnInt256 *>(data.get()))
|
||||
return replicateNumber<Int256>(replicate_offsets);
|
||||
if (typeid_cast<const ColumnFloat32 *>(data.get()))
|
||||
return replicateNumber<Float32>(replicate_offsets);
|
||||
if (typeid_cast<const ColumnFloat64 *>(data.get()))
|
||||
return replicateNumber<Float64>(replicate_offsets);
|
||||
if (typeid_cast<const ColumnDecimal<Decimal32> *>(data.get()))
|
||||
return replicateNumber<Decimal32>(replicate_offsets);
|
||||
if (typeid_cast<const ColumnDecimal<Decimal64> *>(data.get()))
|
||||
return replicateNumber<Decimal64>(replicate_offsets);
|
||||
if (typeid_cast<const ColumnDecimal<Decimal128> *>(data.get()))
|
||||
return replicateNumber<Decimal128>(replicate_offsets);
|
||||
if (typeid_cast<const ColumnDecimal<Decimal256> *>(data.get()))
|
||||
return replicateNumber<Decimal256>(replicate_offsets);
|
||||
if (typeid_cast<const ColumnString *>(data.get()))
|
||||
return replicateString(replicate_offsets);
|
||||
if (typeid_cast<const ColumnConst *>(data.get()))
|
||||
return replicateConst(replicate_offsets);
|
||||
if (typeid_cast<const ColumnNullable *>(data.get()))
|
||||
return replicateNullable(replicate_offsets);
|
||||
if (typeid_cast<const ColumnTuple *>(data.get()))
|
||||
return replicateTuple(replicate_offsets);
|
||||
return replicateGeneric(replicate_offsets);
|
||||
}
|
||||
|
||||
@ -953,6 +1015,8 @@ ColumnPtr ColumnArray::replicate(const Offsets & replicate_offsets) const
|
||||
template <typename T>
|
||||
ColumnPtr ColumnArray::replicateNumber(const Offsets & replicate_offsets) const
|
||||
{
|
||||
using ColVecType = ColumnVectorOrDecimal<T>;
|
||||
|
||||
size_t col_size = size();
|
||||
if (col_size != replicate_offsets.size())
|
||||
throw Exception(ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH, "Size of offsets doesn't match size of column.");
|
||||
@ -964,10 +1028,10 @@ ColumnPtr ColumnArray::replicateNumber(const Offsets & replicate_offsets) const
|
||||
|
||||
ColumnArray & res_arr = typeid_cast<ColumnArray &>(*res);
|
||||
|
||||
const typename ColumnVector<T>::Container & src_data = typeid_cast<const ColumnVector<T> &>(*data).getData();
|
||||
const typename ColVecType::Container & src_data = typeid_cast<const ColVecType &>(*data).getData();
|
||||
const Offsets & src_offsets = getOffsets();
|
||||
|
||||
typename ColumnVector<T>::Container & res_data = typeid_cast<ColumnVector<T> &>(res_arr.getData()).getData();
|
||||
typename ColVecType::Container & res_data = typeid_cast<ColVecType &>(res_arr.getData()).getData();
|
||||
Offsets & res_offsets = res_arr.getOffsets();
|
||||
|
||||
res_data.reserve(data->size() / col_size * replicate_offsets.back());
|
||||
|
@ -59,10 +59,7 @@ public:
|
||||
void insertFrom(const IColumn & src, size_t n) override { data.push_back(static_cast<const Self &>(src).getData()[n]); }
|
||||
void insertData(const char * src, size_t /*length*/) override;
|
||||
void insertDefault() override { data.push_back(T()); }
|
||||
virtual void insertManyDefaults(size_t length) override
|
||||
{
|
||||
data.resize_fill(data.size() + length);
|
||||
}
|
||||
void insertManyDefaults(size_t length) override { data.resize_fill(data.size() + length); }
|
||||
void insert(const Field & x) override { data.push_back(x.get<T>()); }
|
||||
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
|
||||
|
||||
|
@ -320,12 +320,20 @@ INSTANTIATE(UInt8)
|
||||
INSTANTIATE(UInt16)
|
||||
INSTANTIATE(UInt32)
|
||||
INSTANTIATE(UInt64)
|
||||
INSTANTIATE(UInt128)
|
||||
INSTANTIATE(UInt256)
|
||||
INSTANTIATE(Int8)
|
||||
INSTANTIATE(Int16)
|
||||
INSTANTIATE(Int32)
|
||||
INSTANTIATE(Int64)
|
||||
INSTANTIATE(Int128)
|
||||
INSTANTIATE(Int256)
|
||||
INSTANTIATE(Float32)
|
||||
INSTANTIATE(Float64)
|
||||
INSTANTIATE(Decimal32)
|
||||
INSTANTIATE(Decimal64)
|
||||
INSTANTIATE(Decimal128)
|
||||
INSTANTIATE(Decimal256)
|
||||
|
||||
#undef INSTANTIATE
|
||||
|
||||
|
@ -82,7 +82,12 @@ FilterDescription::FilterDescription(const IColumn & column_)
|
||||
const auto size = res.size();
|
||||
assert(size == null_map.size());
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
res[i] = res[i] && !null_map[i];
|
||||
{
|
||||
auto has_val = static_cast<UInt8>(!!res[i]);
|
||||
auto not_null = static_cast<UInt8>(!null_map[i]);
|
||||
/// Instead of the logical AND operator(&&), the bitwise one(&) is utilized for the auto vectorization.
|
||||
res[i] = has_val & not_null;
|
||||
}
|
||||
|
||||
data = &res;
|
||||
data_holder = std::move(mutable_holder);
|
||||
|
@ -1,6 +1,6 @@
|
||||
#pragma once
|
||||
|
||||
#include <mutex>
|
||||
#include <atomic>
|
||||
#include <memory>
|
||||
#include <base/defines.h>
|
||||
|
||||
@ -32,26 +32,25 @@ public:
|
||||
MultiVersion() = default;
|
||||
|
||||
explicit MultiVersion(std::unique_ptr<const T> && value)
|
||||
: current_version(std::move(value))
|
||||
{
|
||||
set(std::move(value));
|
||||
}
|
||||
|
||||
/// Obtain current version for read-only usage. Returns shared_ptr, that manages lifetime of version.
|
||||
Version get() const
|
||||
{
|
||||
/// NOTE: is it possible to lock-free replace of shared_ptr?
|
||||
std::lock_guard lock(mutex);
|
||||
return current_version;
|
||||
return std::atomic_load(¤t_version);
|
||||
}
|
||||
|
||||
/// TODO: replace atomic_load/store() on shared_ptr (which is deprecated as of C++20) by C++20 std::atomic<std::shared_ptr>.
|
||||
/// Clang 15 currently does not support it.
|
||||
|
||||
/// Update an object with new version.
|
||||
void set(std::unique_ptr<const T> && value)
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
current_version = std::move(value);
|
||||
std::atomic_store(¤t_version, Version{std::move(value)});
|
||||
}
|
||||
|
||||
private:
|
||||
Version current_version TSA_GUARDED_BY(mutex);
|
||||
mutable std::mutex mutex;
|
||||
Version current_version;
|
||||
};
|
||||
|
@ -29,6 +29,7 @@
|
||||
#cmakedefine01 USE_FASTOPS
|
||||
#cmakedefine01 USE_NLP
|
||||
#cmakedefine01 USE_VECTORSCAN
|
||||
#cmakedefine01 USE_LIBURING
|
||||
#cmakedefine01 USE_AVRO
|
||||
#cmakedefine01 USE_CAPNP
|
||||
#cmakedefine01 USE_PARQUET
|
||||
|
@ -11,10 +11,14 @@
|
||||
|
||||
// This depends on BoringSSL-specific API, notably <openssl/aead.h>.
|
||||
#if USE_SSL
|
||||
#include <openssl/digest.h>
|
||||
#include <openssl/err.h>
|
||||
#include <boost/algorithm/hex.hpp>
|
||||
#include <openssl/aead.h>
|
||||
# include <openssl/err.h>
|
||||
# include <boost/algorithm/hex.hpp>
|
||||
# if USE_BORINGSSL
|
||||
# include <openssl/digest.h>
|
||||
# include <openssl/aead.h>
|
||||
# else
|
||||
# include <openssl/evp.h>
|
||||
# endif
|
||||
#endif
|
||||
|
||||
// Common part for both parts (with SSL and without)
|
||||
@ -87,23 +91,6 @@ constexpr size_t nonce_max_size = 13; /// Nonce size and one byte to show i
|
||||
constexpr size_t actual_nonce_size = 12; /// Nonce actual size
|
||||
const String empty_nonce = {"\0\0\0\0\0\0\0\0\0\0\0\0", actual_nonce_size};
|
||||
|
||||
/// Get encryption/decryption algorithms.
|
||||
auto getMethod(EncryptionMethod Method)
|
||||
{
|
||||
if (Method == AES_128_GCM_SIV)
|
||||
{
|
||||
return EVP_aead_aes_128_gcm_siv;
|
||||
}
|
||||
else if (Method == AES_256_GCM_SIV)
|
||||
{
|
||||
return EVP_aead_aes_256_gcm_siv;
|
||||
}
|
||||
else
|
||||
{
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong encryption Method. Got {}", getMethodName(Method));
|
||||
}
|
||||
}
|
||||
|
||||
/// Find out key size for each algorithm
|
||||
UInt64 methodKeySize(EncryptionMethod Method)
|
||||
{
|
||||
@ -128,6 +115,24 @@ std::string lastErrorString()
|
||||
return std::string(buffer.data());
|
||||
}
|
||||
|
||||
#if USE_BORINGSSL
|
||||
/// Get encryption/decryption algorithms.
|
||||
auto getMethod(EncryptionMethod Method)
|
||||
{
|
||||
if (Method == AES_128_GCM_SIV)
|
||||
{
|
||||
return EVP_aead_aes_128_gcm_siv;
|
||||
}
|
||||
else if (Method == AES_256_GCM_SIV)
|
||||
{
|
||||
return EVP_aead_aes_256_gcm_siv;
|
||||
}
|
||||
else
|
||||
{
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong encryption Method. Got {}", getMethodName(Method));
|
||||
}
|
||||
}
|
||||
|
||||
/// Encrypt plaintext with particular algorithm and put result into ciphertext_and_tag.
|
||||
/// This function get key and nonce and encrypt text with their help.
|
||||
/// If something went wrong (can't init context or can't encrypt data) it throws exception.
|
||||
@ -186,6 +191,160 @@ size_t decrypt(std::string_view ciphertext, char * plaintext, EncryptionMethod m
|
||||
|
||||
return out_len;
|
||||
}
|
||||
#else
|
||||
/// Get encryption/decryption algorithms.
|
||||
auto getMethod(EncryptionMethod Method)
|
||||
{
|
||||
if (Method == AES_128_GCM_SIV)
|
||||
{
|
||||
return EVP_aes_128_gcm;
|
||||
}
|
||||
else if (Method == AES_256_GCM_SIV)
|
||||
{
|
||||
return EVP_aes_256_gcm;
|
||||
}
|
||||
else
|
||||
{
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong encryption Method. Got {}", getMethodName(Method));
|
||||
}
|
||||
}
|
||||
|
||||
/// Encrypt plaintext with particular algorithm and put result into ciphertext_and_tag.
|
||||
/// This function get key and nonce and encrypt text with their help.
|
||||
/// If something went wrong (can't init context or can't encrypt data) it throws exception.
|
||||
/// It returns length of encrypted text.
|
||||
size_t encrypt(std::string_view plaintext, char * ciphertext_and_tag, EncryptionMethod method, const String & key, const String & nonce)
|
||||
{
|
||||
int out_len;
|
||||
int ciphertext_len;
|
||||
EVP_CIPHER_CTX *encrypt_ctx;
|
||||
|
||||
if (!(encrypt_ctx = EVP_CIPHER_CTX_new()))
|
||||
throw Exception::createDeprecated(lastErrorString(), ErrorCodes::OPENSSL_ERROR);
|
||||
|
||||
try
|
||||
{
|
||||
const int ok_cryptinit = EVP_EncryptInit_ex(encrypt_ctx,
|
||||
getMethod(method)(),
|
||||
nullptr, nullptr, nullptr);
|
||||
if (!ok_cryptinit)
|
||||
throw Exception::createDeprecated(lastErrorString(), ErrorCodes::OPENSSL_ERROR);
|
||||
|
||||
const int ok_cipherctrl = EVP_CIPHER_CTX_ctrl(encrypt_ctx,
|
||||
EVP_CTRL_GCM_SET_IVLEN,
|
||||
static_cast<int32_t>(nonce.size()),
|
||||
nullptr);
|
||||
if (!ok_cipherctrl)
|
||||
throw Exception::createDeprecated(lastErrorString(), ErrorCodes::OPENSSL_ERROR);
|
||||
|
||||
const int ok_nonceinit = EVP_EncryptInit_ex(encrypt_ctx, nullptr, nullptr,
|
||||
reinterpret_cast<const uint8_t*>(key.data()),
|
||||
reinterpret_cast<const uint8_t *>(nonce.data()));
|
||||
if (!ok_nonceinit)
|
||||
throw Exception::createDeprecated(lastErrorString(), ErrorCodes::OPENSSL_ERROR);
|
||||
|
||||
const int ok_encryptupdate = EVP_EncryptUpdate(encrypt_ctx,
|
||||
reinterpret_cast<uint8_t *>(ciphertext_and_tag),
|
||||
&out_len,
|
||||
reinterpret_cast<const uint8_t *>(plaintext.data()),
|
||||
static_cast<int32_t>(plaintext.size()));
|
||||
ciphertext_len = out_len;
|
||||
if (!ok_encryptupdate)
|
||||
throw Exception::createDeprecated(lastErrorString(), ErrorCodes::OPENSSL_ERROR);
|
||||
|
||||
const int ok_encryptfinal = EVP_EncryptFinal_ex(encrypt_ctx,
|
||||
reinterpret_cast<uint8_t *>(ciphertext_and_tag) + out_len,
|
||||
reinterpret_cast<int32_t *>(&out_len));
|
||||
ciphertext_len += out_len;
|
||||
if (!ok_encryptfinal)
|
||||
throw Exception::createDeprecated(lastErrorString(), ErrorCodes::OPENSSL_ERROR);
|
||||
|
||||
/* Get the tag */
|
||||
const int ok_tag = EVP_CIPHER_CTX_ctrl(encrypt_ctx,
|
||||
EVP_CTRL_GCM_GET_TAG,
|
||||
tag_size,
|
||||
reinterpret_cast<uint8_t *>(ciphertext_and_tag) + plaintext.size());
|
||||
|
||||
if (!ok_tag)
|
||||
throw Exception::createDeprecated(lastErrorString(), ErrorCodes::OPENSSL_ERROR);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
EVP_CIPHER_CTX_free(encrypt_ctx);
|
||||
throw;
|
||||
}
|
||||
EVP_CIPHER_CTX_free(encrypt_ctx);
|
||||
return ciphertext_len + tag_size;
|
||||
}
|
||||
|
||||
/// Encrypt plaintext with particular algorithm and put result into ciphertext_and_tag.
|
||||
/// This function get key and nonce and encrypt text with their help.
|
||||
/// If something went wrong (can't init context or can't encrypt data) it throws exception.
|
||||
/// It returns length of encrypted text.
|
||||
size_t decrypt(std::string_view ciphertext, char * plaintext, EncryptionMethod method, const String & key, const String & nonce)
|
||||
{
|
||||
|
||||
int out_len;
|
||||
int plaintext_len;
|
||||
EVP_CIPHER_CTX *decrypt_ctx;
|
||||
|
||||
if (!(decrypt_ctx = EVP_CIPHER_CTX_new()))
|
||||
throw Exception::createDeprecated(lastErrorString(), ErrorCodes::OPENSSL_ERROR);
|
||||
|
||||
try
|
||||
{
|
||||
const int ok_cryptinit = EVP_DecryptInit_ex(decrypt_ctx,
|
||||
getMethod(method)(),
|
||||
nullptr, nullptr, nullptr);
|
||||
if (!ok_cryptinit)
|
||||
throw Exception::createDeprecated(lastErrorString(), ErrorCodes::OPENSSL_ERROR);
|
||||
|
||||
const int ok_cipherctrl = EVP_CIPHER_CTX_ctrl(decrypt_ctx,
|
||||
EVP_CTRL_GCM_SET_IVLEN,
|
||||
static_cast<int32_t>(nonce.size()), nullptr);
|
||||
if (!ok_cipherctrl)
|
||||
throw Exception::createDeprecated(lastErrorString(), ErrorCodes::OPENSSL_ERROR);
|
||||
|
||||
const int ok_nonceinit = EVP_DecryptInit_ex(decrypt_ctx, nullptr, nullptr,
|
||||
reinterpret_cast<const uint8_t*>(key.data()),
|
||||
reinterpret_cast<const uint8_t *>(nonce.data()));
|
||||
if (!ok_nonceinit)
|
||||
throw Exception::createDeprecated(lastErrorString(), ErrorCodes::OPENSSL_ERROR);
|
||||
|
||||
const int ok_decryptudpate = EVP_DecryptUpdate(decrypt_ctx,
|
||||
reinterpret_cast<uint8_t *>(plaintext),
|
||||
reinterpret_cast<int32_t *>(&out_len),
|
||||
reinterpret_cast<const uint8_t *>(ciphertext.data()),
|
||||
static_cast<int32_t>(ciphertext.size()) - tag_size);
|
||||
plaintext_len = out_len;
|
||||
|
||||
if (!ok_decryptudpate)
|
||||
throw Exception::createDeprecated(lastErrorString(), ErrorCodes::OPENSSL_ERROR);
|
||||
|
||||
const int ok_tag = EVP_CIPHER_CTX_ctrl(decrypt_ctx,
|
||||
EVP_CTRL_GCM_SET_TAG,
|
||||
tag_size,
|
||||
reinterpret_cast<uint8_t *>(const_cast<char *>(ciphertext.data())) + ciphertext.size() - tag_size);
|
||||
if (!ok_tag)
|
||||
throw Exception::createDeprecated(lastErrorString(), ErrorCodes::OPENSSL_ERROR);
|
||||
|
||||
const int ok_decryptfinal = EVP_DecryptFinal_ex(decrypt_ctx,
|
||||
reinterpret_cast<uint8_t *>(plaintext) + out_len,
|
||||
reinterpret_cast<int32_t *>(&out_len));
|
||||
|
||||
if (!ok_decryptfinal)
|
||||
throw Exception::createDeprecated(lastErrorString(), ErrorCodes::OPENSSL_ERROR);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
EVP_CIPHER_CTX_free(decrypt_ctx);
|
||||
throw;
|
||||
}
|
||||
EVP_CIPHER_CTX_free(decrypt_ctx);
|
||||
|
||||
return plaintext_len + out_len;
|
||||
}
|
||||
#endif
|
||||
|
||||
/// Register codec in factory
|
||||
void registerEncryptionCodec(CompressionCodecFactory & factory, EncryptionMethod Method)
|
||||
|
@ -178,9 +178,7 @@ void registerCodecDelta(CompressionCodecFactory & factory);
|
||||
void registerCodecT64(CompressionCodecFactory & factory);
|
||||
void registerCodecDoubleDelta(CompressionCodecFactory & factory);
|
||||
void registerCodecGorilla(CompressionCodecFactory & factory);
|
||||
#if USE_BORINGSSL
|
||||
void registerCodecEncrypted(CompressionCodecFactory & factory);
|
||||
#endif
|
||||
void registerCodecFPC(CompressionCodecFactory & factory);
|
||||
|
||||
#endif
|
||||
@ -197,9 +195,7 @@ CompressionCodecFactory::CompressionCodecFactory()
|
||||
registerCodecT64(*this);
|
||||
registerCodecDoubleDelta(*this);
|
||||
registerCodecGorilla(*this);
|
||||
#if USE_BORINGSSL
|
||||
registerCodecEncrypted(*this);
|
||||
#endif
|
||||
registerCodecFPC(*this);
|
||||
#ifdef ENABLE_QPL_COMPRESSION
|
||||
registerCodecDeflateQpl(*this);
|
||||
|
@ -5,10 +5,17 @@
|
||||
#include <base/range.h>
|
||||
#include <boost/blank.hpp>
|
||||
#include <unordered_map>
|
||||
#include <boost/program_options/options_description.hpp>
|
||||
|
||||
|
||||
namespace boost::program_options
|
||||
{
|
||||
class options_description;
|
||||
}
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class ReadBuffer;
|
||||
class WriteBuffer;
|
||||
|
||||
@ -19,7 +26,6 @@ enum class SettingsWriteFormat
|
||||
DEFAULT = STRINGS_WITH_FLAGS,
|
||||
};
|
||||
|
||||
|
||||
/** Template class to define collections of settings.
|
||||
* Example of usage:
|
||||
*
|
||||
@ -119,6 +125,18 @@ public:
|
||||
std::conditional_t<Traits::allow_custom_settings, const CustomSettingMap::mapped_type*, boost::blank> custom_setting;
|
||||
};
|
||||
|
||||
/// Adds program options to set the settings from a command line.
|
||||
/// (Don't forget to call notify() on the `variables_map` after parsing it!)
|
||||
void addProgramOptions(boost::program_options::options_description & options);
|
||||
|
||||
/// Adds program options as to set the settings from a command line.
|
||||
/// Allows to set one setting multiple times, the last value will be used.
|
||||
/// (Don't forget to call notify() on the `variables_map` after parsing it!)
|
||||
void addProgramOptionsAsMultitokens(boost::program_options::options_description & options);
|
||||
|
||||
void addProgramOption(boost::program_options::options_description & options, const SettingFieldRef & field);
|
||||
void addProgramOptionAsMultitoken(boost::program_options::options_description & options, const SettingFieldRef & field);
|
||||
|
||||
enum SkipFlags
|
||||
{
|
||||
SKIP_NONE = 0,
|
||||
@ -518,6 +536,38 @@ String BaseSettings<TTraits>::toString() const
|
||||
return res;
|
||||
}
|
||||
|
||||
template <typename TTraits>
|
||||
void BaseSettings<TTraits>::addProgramOptions(boost::program_options::options_description & options)
|
||||
{
|
||||
for (const auto & field : all())
|
||||
addProgramOption(options, field);
|
||||
}
|
||||
|
||||
template <typename TTraits>
|
||||
void BaseSettings<TTraits>::addProgramOptionsAsMultitokens(boost::program_options::options_description & options)
|
||||
{
|
||||
for (const auto & field : all())
|
||||
addProgramOptionAsMultitoken(options, field);
|
||||
}
|
||||
|
||||
template <typename TTraits>
|
||||
void BaseSettings<TTraits>::addProgramOption(boost::program_options::options_description & options, const SettingFieldRef & field)
|
||||
{
|
||||
const std::string_view name = field.getName();
|
||||
auto on_program_option = boost::function1<void, const std::string &>([this, name](const std::string & value) { set(name, value); });
|
||||
options.add(boost::shared_ptr<boost::program_options::option_description>(new boost::program_options::option_description(
|
||||
name.data(), boost::program_options::value<std::string>()->composing()->notifier(on_program_option), field.getDescription())));
|
||||
}
|
||||
|
||||
template <typename TTraits>
|
||||
void BaseSettings<TTraits>::addProgramOptionAsMultitoken(boost::program_options::options_description & options, const SettingFieldRef & field)
|
||||
{
|
||||
const std::string_view name = field.getName();
|
||||
auto on_program_option = boost::function1<void, const Strings &>([this, name](const Strings & values) { set(name, values.back()); });
|
||||
options.add(boost::shared_ptr<boost::program_options::option_description>(new boost::program_options::option_description(
|
||||
name.data(), boost::program_options::value<Strings>()->multitoken()->composing()->notifier(on_program_option), field.getDescription())));
|
||||
}
|
||||
|
||||
template <typename TTraits>
|
||||
bool operator==(const BaseSettings<TTraits> & left, const BaseSettings<TTraits> & right)
|
||||
{
|
||||
|
@ -6,7 +6,6 @@
|
||||
#include <Columns/ColumnMap.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <cstring>
|
||||
#include <boost/program_options/options_description.hpp>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -82,38 +81,6 @@ void Settings::dumpToMapColumn(IColumn * column, bool changed_only)
|
||||
offsets.push_back(offsets.back() + size);
|
||||
}
|
||||
|
||||
void Settings::addProgramOptions(boost::program_options::options_description & options)
|
||||
{
|
||||
for (const auto & field : all())
|
||||
{
|
||||
addProgramOption(options, field);
|
||||
}
|
||||
}
|
||||
|
||||
void Settings::addProgramOptionsAsMultitokens(boost::program_options::options_description & options)
|
||||
{
|
||||
for (const auto & field : all())
|
||||
{
|
||||
addProgramOptionAsMultitoken(options, field);
|
||||
}
|
||||
}
|
||||
|
||||
void Settings::addProgramOption(boost::program_options::options_description & options, const SettingFieldRef & field)
|
||||
{
|
||||
const std::string_view name = field.getName();
|
||||
auto on_program_option = boost::function1<void, const std::string &>([this, name](const std::string & value) { set(name, value); });
|
||||
options.add(boost::shared_ptr<boost::program_options::option_description>(new boost::program_options::option_description(
|
||||
name.data(), boost::program_options::value<std::string>()->composing()->notifier(on_program_option), field.getDescription())));
|
||||
}
|
||||
|
||||
void Settings::addProgramOptionAsMultitoken(boost::program_options::options_description & options, const SettingFieldRef & field)
|
||||
{
|
||||
const std::string_view name = field.getName();
|
||||
auto on_program_option = boost::function1<void, const Strings &>([this, name](const Strings & values) { set(name, values.back()); });
|
||||
options.add(boost::shared_ptr<boost::program_options::option_description>(new boost::program_options::option_description(
|
||||
name.data(), boost::program_options::value<Strings>()->multitoken()->composing()->notifier(on_program_option), field.getDescription())));
|
||||
}
|
||||
|
||||
void Settings::checkNoSettingNamesAtTopLevel(const Poco::Util::AbstractConfiguration & config, const String & config_path)
|
||||
{
|
||||
if (config.getBool("skip_check_for_incorrect_settings", false))
|
||||
|
@ -13,12 +13,6 @@ namespace Poco::Util
|
||||
class AbstractConfiguration;
|
||||
}
|
||||
|
||||
namespace boost::program_options
|
||||
{
|
||||
class options_description;
|
||||
}
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
class IColumn;
|
||||
@ -96,6 +90,7 @@ class IColumn;
|
||||
M(Bool, s3_create_new_file_on_insert, false, "Enables or disables creating a new file on each insert in s3 engine tables", 0) \
|
||||
M(Bool, s3_check_objects_after_upload, false, "Check each uploaded object to s3 with head request to be sure that upload was successful", 0) \
|
||||
M(Bool, s3_allow_parallel_part_upload, true, "Use multiple threads for s3 multipart upload. It may lead to slightly higher memory usage", 0) \
|
||||
M(Bool, s3_throw_on_zero_files_match, false, "Throw an error, when ListObjects request cannot match any files", 0) \
|
||||
M(Bool, enable_s3_requests_logging, false, "Enable very explicit logging of S3 requests. Makes sense for debug only.", 0) \
|
||||
M(UInt64, hdfs_replication, 0, "The actual number of replications can be specified when the hdfs file is created.", 0) \
|
||||
M(Bool, hdfs_truncate_on_insert, false, "Enables or disables truncate before insert in s3 engine tables", 0) \
|
||||
@ -894,6 +889,7 @@ class IColumn;
|
||||
M(Bool, input_format_bson_skip_fields_with_unsupported_types_in_schema_inference, false, "Skip fields with unsupported types while schema inference for format BSON.", 0) \
|
||||
\
|
||||
M(Bool, regexp_dict_allow_other_sources, false, "Allow regexp_tree dictionary to use sources other than yaml source.", 0) \
|
||||
M(Bool, regexp_dict_allow_hyperscan, false, "Allow regexp_tree dictionary using Hyperscan library.", 0) \
|
||||
|
||||
// End of FORMAT_FACTORY_SETTINGS
|
||||
// Please add settings non-related to formats into the COMMON_SETTINGS above.
|
||||
@ -926,25 +922,12 @@ struct Settings : public BaseSettings<SettingsTraits>, public IHints<2, Settings
|
||||
/// Dumps profile events to column of type Map(String, String)
|
||||
void dumpToMapColumn(IColumn * column, bool changed_only = true);
|
||||
|
||||
/// Adds program options to set the settings from a command line.
|
||||
/// (Don't forget to call notify() on the `variables_map` after parsing it!)
|
||||
void addProgramOptions(boost::program_options::options_description & options);
|
||||
|
||||
/// Adds program options as to set the settings from a command line.
|
||||
/// Allows to set one setting multiple times, the last value will be used.
|
||||
/// (Don't forget to call notify() on the `variables_map` after parsing it!)
|
||||
void addProgramOptionsAsMultitokens(boost::program_options::options_description & options);
|
||||
|
||||
/// Check that there is no user-level settings at the top level in config.
|
||||
/// This is a common source of mistake (user don't know where to write user-level setting).
|
||||
static void checkNoSettingNamesAtTopLevel(const Poco::Util::AbstractConfiguration & config, const String & config_path);
|
||||
|
||||
std::vector<String> getAllRegisteredNames() const override;
|
||||
|
||||
void addProgramOption(boost::program_options::options_description & options, const SettingFieldRef & field);
|
||||
|
||||
void addProgramOptionAsMultitoken(boost::program_options::options_description & options, const SettingFieldRef & field);
|
||||
|
||||
void set(std::string_view name, const Field & value) override;
|
||||
|
||||
void setDefaultValue(const String & name) { resetToDefault(name); }
|
||||
|
@ -1,3 +1,4 @@
|
||||
#include <exception>
|
||||
#include <optional>
|
||||
#include <string_view>
|
||||
|
||||
@ -88,8 +89,15 @@ struct RegExpTreeDictionary::RegexTreeNode
|
||||
UInt64 parent_id;
|
||||
std::string regex;
|
||||
re2_st::RE2 searcher;
|
||||
|
||||
RegexTreeNode(UInt64 id_, UInt64 parent_id_, const String & regex_, const re2_st::RE2::Options & regexp_options):
|
||||
id(id_), parent_id(parent_id_), regex(regex_), searcher(regex_, regexp_options) {}
|
||||
|
||||
bool match(const char * haystack, size_t size) const
|
||||
{
|
||||
return searcher.Match(haystack, 0, size, re2_st::RE2::Anchor::UNANCHORED, nullptr, 0);
|
||||
}
|
||||
|
||||
struct AttributeValue
|
||||
{
|
||||
Field field;
|
||||
@ -118,7 +126,7 @@ std::vector<StringPiece> createStringPieces(const String & value, int num_captur
|
||||
}
|
||||
int ref_num = value[i+1]-'0';
|
||||
if (ref_num >= num_captures)
|
||||
LOG_DEBUG(logger,
|
||||
LOG_TRACE(logger,
|
||||
"Reference Id {} in set string is invalid, the regexp {} only has {} capturing groups",
|
||||
ref_num, regex, num_captures-1);
|
||||
result.push_back(StringPiece(ref_num));
|
||||
@ -137,13 +145,60 @@ std::vector<StringPiece> createStringPieces(const String & value, int num_captur
|
||||
|
||||
void RegExpTreeDictionary::calculateBytesAllocated()
|
||||
{
|
||||
for (const String & regex : regexps)
|
||||
for (const String & regex : simple_regexps)
|
||||
bytes_allocated += regex.size();
|
||||
bytes_allocated += sizeof(UInt64) * regexp_ids.size();
|
||||
bytes_allocated += (sizeof(RegexTreeNode) + sizeof(UInt64)) * regex_nodes.size();
|
||||
bytes_allocated += 2 * sizeof(UInt64) * topology_order.size();
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
/// hyper scan is not good at processing regex containing {0, 200}
|
||||
/// This will make re compilation slow and failed. So we select this heavy regular expressions and
|
||||
/// process it with re2.
|
||||
struct RegexChecker
|
||||
{
|
||||
re2_st::RE2 searcher;
|
||||
RegexChecker() : searcher(R"(\{([\d]+),([\d]+)\})") {}
|
||||
|
||||
static bool isFigureLargerThanFifty(const String & str)
|
||||
try
|
||||
{
|
||||
auto number = std::stoi(str);
|
||||
return number > 50;
|
||||
}
|
||||
catch (std::exception &)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
[[maybe_unused]]
|
||||
bool isSimpleRegex(const String & regex) const
|
||||
{
|
||||
|
||||
re2_st::StringPiece haystack(regex.data(), regex.size());
|
||||
re2_st::StringPiece matches[10];
|
||||
size_t start_pos = 0;
|
||||
while (start_pos < regex.size())
|
||||
{
|
||||
if (searcher.Match(haystack, start_pos, regex.size(), re2_st::RE2::Anchor::UNANCHORED, matches, 10))
|
||||
{
|
||||
const auto & match = matches[0];
|
||||
start_pos += match.length();
|
||||
const auto & match1 = matches[1];
|
||||
const auto & match2 = matches[2];
|
||||
if (isFigureLargerThanFifty(match1.ToString()) || isFigureLargerThanFifty(match2.ToString()))
|
||||
return false;
|
||||
}
|
||||
else
|
||||
break;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
void RegExpTreeDictionary::initRegexNodes(Block & block)
|
||||
{
|
||||
auto id_column = block.getByName(kId).column;
|
||||
@ -152,6 +207,8 @@ void RegExpTreeDictionary::initRegexNodes(Block & block)
|
||||
auto keys_column = block.getByName(kKeys).column;
|
||||
auto values_column = block.getByName(kValues).column;
|
||||
|
||||
RegexChecker checker;
|
||||
|
||||
size_t size = block.rows();
|
||||
for (size_t i = 0; i < size; i++)
|
||||
{
|
||||
@ -165,12 +222,10 @@ void RegExpTreeDictionary::initRegexNodes(Block & block)
|
||||
if (id == 0)
|
||||
throw Exception(ErrorCodes::INCORRECT_DICTIONARY_DEFINITION, "There are invalid id {}", id);
|
||||
|
||||
regexps.push_back(regex);
|
||||
regexp_ids.push_back(id);
|
||||
|
||||
re2_st::RE2::Options regexp_options;
|
||||
regexp_options.set_log_errors(false);
|
||||
RegexTreeNodePtr node = std::make_unique<RegexTreeNode>(id, parent_id, regex, regexp_options);
|
||||
RegexTreeNodePtr node = std::make_shared<RegexTreeNode>(id, parent_id, regex, regexp_options);
|
||||
|
||||
int num_captures = std::min(node->searcher.NumberOfCapturingGroups() + 1, 10);
|
||||
|
||||
@ -196,7 +251,16 @@ void RegExpTreeDictionary::initRegexNodes(Block & block)
|
||||
}
|
||||
}
|
||||
}
|
||||
regex_nodes.emplace(id, std::move(node));
|
||||
regex_nodes.emplace(id, node);
|
||||
#if USE_VECTORSCAN
|
||||
if (use_vectorscan && checker.isSimpleRegex(regex))
|
||||
{
|
||||
simple_regexps.push_back(regex);
|
||||
regexp_ids.push_back(id);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
complex_regexp_nodes.push_back(node);
|
||||
}
|
||||
}
|
||||
|
||||
@ -226,7 +290,7 @@ void RegExpTreeDictionary::initTopologyOrder(UInt64 node_idx, std::set<UInt64> &
|
||||
visited.insert(node_idx);
|
||||
for (UInt64 child_idx : regex_nodes[node_idx]->children)
|
||||
if (visited.contains(child_idx))
|
||||
throw Exception(ErrorCodes::INCORRECT_DICTIONARY_DEFINITION, "Invalid Regex tree");
|
||||
throw Exception(ErrorCodes::INCORRECT_DICTIONARY_DEFINITION, "Invalid Regex tree. The input tree is cyclical");
|
||||
else
|
||||
initTopologyOrder(child_idx, visited, topology_id);
|
||||
topology_order[node_idx] = topology_id++;
|
||||
@ -245,12 +309,18 @@ void RegExpTreeDictionary::loadData()
|
||||
initRegexNodes(block);
|
||||
}
|
||||
initGraph();
|
||||
if (regexps.empty())
|
||||
if (simple_regexps.empty() && complex_regexp_nodes.empty())
|
||||
throw Exception(ErrorCodes::INCORRECT_DICTIONARY_DEFINITION, "There are no available regular expression. Please check your config");
|
||||
LOG_INFO(logger, "There are {} simple regexps and {} complex regexps", simple_regexps.size(), complex_regexp_nodes.size());
|
||||
/// If all the regexps cannot work with hyperscan, we should set this flag off to avoid exceptions.
|
||||
if (simple_regexps.empty())
|
||||
use_vectorscan = false;
|
||||
if (!use_vectorscan)
|
||||
return;
|
||||
#if USE_VECTORSCAN
|
||||
try
|
||||
{
|
||||
std::vector<std::string_view> regexps_views(regexps.begin(), regexps.end());
|
||||
std::vector<std::string_view> regexps_views(simple_regexps.begin(), simple_regexps.end());
|
||||
hyperscan_regex = MultiRegexps::getOrSet<true, false>(regexps_views, std::nullopt);
|
||||
hyperscan_regex->get();
|
||||
}
|
||||
@ -258,7 +328,6 @@ void RegExpTreeDictionary::loadData()
|
||||
{
|
||||
/// Some compile errors will be thrown as LOGICAL ERROR and cause crash, e.g. empty expression or expressions are too large.
|
||||
/// We catch the error here and rethrow again.
|
||||
/// TODO: fallback to other engine, like re2, when exceptions occur.
|
||||
throw Exception(ErrorCodes::INCORRECT_DICTIONARY_DEFINITION, "Error occurs when compiling regular expressions, reason: {}", e.message());
|
||||
}
|
||||
#endif
|
||||
@ -270,8 +339,17 @@ void RegExpTreeDictionary::loadData()
|
||||
}
|
||||
|
||||
RegExpTreeDictionary::RegExpTreeDictionary(
|
||||
const StorageID & id_, const DictionaryStructure & structure_, DictionarySourcePtr source_ptr_, Configuration configuration_)
|
||||
: IDictionary(id_), structure(structure_), source_ptr(source_ptr_), configuration(configuration_), logger(&Poco::Logger::get("RegExpTreeDictionary"))
|
||||
const StorageID & id_,
|
||||
const DictionaryStructure & structure_,
|
||||
DictionarySourcePtr source_ptr_,
|
||||
Configuration configuration_,
|
||||
bool use_vectorscan_)
|
||||
: IDictionary(id_),
|
||||
structure(structure_),
|
||||
source_ptr(source_ptr_),
|
||||
configuration(configuration_),
|
||||
use_vectorscan(use_vectorscan_),
|
||||
logger(&Poco::Logger::get("RegExpTreeDictionary"))
|
||||
{
|
||||
if (auto * ch_source = typeid_cast<ClickHouseDictionarySource *>(source_ptr.get()))
|
||||
{
|
||||
@ -289,12 +367,15 @@ RegExpTreeDictionary::RegExpTreeDictionary(
|
||||
calculateBytesAllocated();
|
||||
}
|
||||
|
||||
String processBackRefs(const String & data, const re2_st::RE2 & searcher, const std::vector<StringPiece> & pieces)
|
||||
std::pair<String, bool> processBackRefs(const String & data, const re2_st::RE2 & searcher, const std::vector<StringPiece> & pieces)
|
||||
{
|
||||
re2_st::StringPiece haystack(data.data(), data.size());
|
||||
re2_st::StringPiece matches[10];
|
||||
String result;
|
||||
searcher.Match(haystack, 0, data.size(), re2_st::RE2::Anchor::UNANCHORED, matches, 10);
|
||||
/// if the pattern is a single '$1' but fails to match, we would use the default value.
|
||||
if (pieces.size() == 1 && pieces[0].ref_num >= 0 && pieces[0].ref_num < 10 && matches[pieces[0].ref_num].empty())
|
||||
return std::make_pair(result, true);
|
||||
for (const auto & item : pieces)
|
||||
{
|
||||
if (item.ref_num >= 0 && item.ref_num < 10)
|
||||
@ -302,7 +383,7 @@ String processBackRefs(const String & data, const re2_st::RE2 & searcher, const
|
||||
else
|
||||
result += item.literal;
|
||||
}
|
||||
return result;
|
||||
return {result, false};
|
||||
}
|
||||
|
||||
// walk towards root and collect attributes.
|
||||
@ -312,7 +393,9 @@ bool RegExpTreeDictionary::setAttributes(
|
||||
std::unordered_map<String, Field> & attributes_to_set,
|
||||
const String & data,
|
||||
std::unordered_set<UInt64> & visited_nodes,
|
||||
const std::unordered_map<String, const DictionaryAttribute &> & attributes) const
|
||||
const std::unordered_map<String, const DictionaryAttribute &> & attributes,
|
||||
const std::unordered_map<String, ColumnPtr> & defaults,
|
||||
size_t key_index) const
|
||||
{
|
||||
|
||||
if (visited_nodes.contains(id))
|
||||
@ -325,8 +408,14 @@ bool RegExpTreeDictionary::setAttributes(
|
||||
continue;
|
||||
if (value.containsBackRefs())
|
||||
{
|
||||
String updated_str = processBackRefs(data, regex_nodes.at(id)->searcher, value.pieces);
|
||||
attributes_to_set[name] = parseStringToField(updated_str, attributes.at(name).type);
|
||||
auto [updated_str, use_default] = processBackRefs(data, regex_nodes.at(id)->searcher, value.pieces);
|
||||
if (use_default)
|
||||
{
|
||||
DefaultValueProvider default_value(attributes.at(name).null_value, defaults.at(name));
|
||||
attributes_to_set[name] = default_value.getDefaultValue(key_index);
|
||||
}
|
||||
else
|
||||
attributes_to_set[name] = parseStringToField(updated_str, attributes.at(name).type);
|
||||
}
|
||||
else
|
||||
attributes_to_set[name] = value.field;
|
||||
@ -334,18 +423,17 @@ bool RegExpTreeDictionary::setAttributes(
|
||||
|
||||
auto parent_id = regex_nodes.at(id)->parent_id;
|
||||
if (parent_id > 0)
|
||||
setAttributes(parent_id, attributes_to_set, data, visited_nodes, attributes);
|
||||
setAttributes(parent_id, attributes_to_set, data, visited_nodes, attributes, defaults, key_index);
|
||||
|
||||
// if all the attributes have set, the walking through can be stopped.
|
||||
/// if all the attributes have set, the walking through can be stopped.
|
||||
return attributes_to_set.size() == attributes.size();
|
||||
}
|
||||
|
||||
#if USE_VECTORSCAN
|
||||
namespace
|
||||
{
|
||||
struct MatchContext
|
||||
{
|
||||
std::unordered_set<UInt64> matched_idx_set;
|
||||
std::set<UInt64> matched_idx_set;
|
||||
std::vector<std::pair<UInt64, UInt64>> matched_idx_sorted_list;
|
||||
|
||||
const std::vector<UInt64> & regexp_ids ;
|
||||
@ -354,14 +442,23 @@ namespace
|
||||
MatchContext(const std::vector<UInt64> & regexp_ids_, const std::unordered_map<UInt64, UInt64> & topology_order_)
|
||||
: regexp_ids(regexp_ids_), topology_order(topology_order_) {}
|
||||
|
||||
void insert(unsigned int id)
|
||||
[[maybe_unused]]
|
||||
void insertIdx(unsigned int idx)
|
||||
{
|
||||
UInt64 idx = regexp_ids[id-1];
|
||||
UInt64 topological_order = topology_order.at(idx);
|
||||
matched_idx_set.emplace(idx);
|
||||
matched_idx_sorted_list.push_back(std::make_pair(topological_order, idx));
|
||||
UInt64 node_id = regexp_ids[idx-1];
|
||||
UInt64 topological_order = topology_order.at(node_id);
|
||||
matched_idx_set.emplace(node_id);
|
||||
matched_idx_sorted_list.push_back(std::make_pair(topological_order, node_id));
|
||||
}
|
||||
|
||||
void insertNodeID(UInt64 id)
|
||||
{
|
||||
UInt64 topological_order = topology_order.at(id);
|
||||
matched_idx_set.emplace(id);
|
||||
matched_idx_sorted_list.push_back(std::make_pair(topological_order, id));
|
||||
}
|
||||
|
||||
/// Sort by topological order, which indicates the matching priorities.
|
||||
void sort()
|
||||
{
|
||||
std::sort(matched_idx_sorted_list.begin(), matched_idx_sorted_list.end());
|
||||
@ -373,24 +470,28 @@ namespace
|
||||
}
|
||||
};
|
||||
}
|
||||
#endif // USE_VECTORSCAN
|
||||
|
||||
std::unordered_map<String, ColumnPtr> RegExpTreeDictionary::matchSearchAllIndices(
|
||||
[[maybe_unused]] const ColumnString::Chars & keys_data,
|
||||
[[maybe_unused]] const ColumnString::Offsets & keys_offsets,
|
||||
[[maybe_unused]] const std::unordered_map<String, const DictionaryAttribute &> & attributes,
|
||||
[[maybe_unused]] const std::unordered_map<String, ColumnPtr> & defaults) const
|
||||
std::unordered_map<String, ColumnPtr> RegExpTreeDictionary::match(
|
||||
const ColumnString::Chars & keys_data,
|
||||
const ColumnString::Offsets & keys_offsets,
|
||||
const std::unordered_map<String, const DictionaryAttribute &> & attributes,
|
||||
const std::unordered_map<String, ColumnPtr> & defaults) const
|
||||
{
|
||||
|
||||
#if USE_VECTORSCAN
|
||||
hs_scratch_t * scratch = nullptr;
|
||||
hs_error_t err = hs_clone_scratch(hyperscan_regex->get()->getScratch(), &scratch);
|
||||
|
||||
if (err != HS_SUCCESS)
|
||||
if (use_vectorscan)
|
||||
{
|
||||
throw Exception(ErrorCodes::CANNOT_ALLOCATE_MEMORY, "Could not clone scratch space for hyperscan");
|
||||
hs_error_t err = hs_clone_scratch(hyperscan_regex->get()->getScratch(), &scratch);
|
||||
|
||||
if (err != HS_SUCCESS)
|
||||
{
|
||||
throw Exception(ErrorCodes::CANNOT_ALLOCATE_MEMORY, "Could not clone scratch space for hyperscan");
|
||||
}
|
||||
}
|
||||
|
||||
MultiRegexps::ScratchPtr smart_scratch(scratch);
|
||||
#endif
|
||||
|
||||
std::unordered_map<String, MutableColumnPtr> columns;
|
||||
|
||||
@ -402,16 +503,6 @@ std::unordered_map<String, ColumnPtr> RegExpTreeDictionary::matchSearchAllIndice
|
||||
columns[name] = std::move(col_ptr);
|
||||
}
|
||||
|
||||
auto on_match = [](unsigned int id,
|
||||
unsigned long long /* from */, // NOLINT
|
||||
unsigned long long /* to */, // NOLINT
|
||||
unsigned int /* flags */,
|
||||
void * context) -> int
|
||||
{
|
||||
static_cast<MatchContext *>(context)->insert(id);
|
||||
return 0;
|
||||
};
|
||||
|
||||
UInt64 offset = 0;
|
||||
for (size_t key_idx = 0; key_idx < keys_offsets.size(); ++key_idx)
|
||||
{
|
||||
@ -420,25 +511,46 @@ std::unordered_map<String, ColumnPtr> RegExpTreeDictionary::matchSearchAllIndice
|
||||
|
||||
MatchContext match_result(regexp_ids, topology_order);
|
||||
|
||||
err = hs_scan(
|
||||
hyperscan_regex->get()->getDB(),
|
||||
reinterpret_cast<const char *>(keys_data.data()) + offset,
|
||||
static_cast<unsigned>(length),
|
||||
0,
|
||||
smart_scratch.get(),
|
||||
on_match,
|
||||
&match_result);
|
||||
#if USE_VECTORSCAN
|
||||
if (use_vectorscan)
|
||||
{
|
||||
auto on_match = [](unsigned int id,
|
||||
unsigned long long /* from */, // NOLINT
|
||||
unsigned long long /* to */, // NOLINT
|
||||
unsigned int /* flags */,
|
||||
void * context) -> int
|
||||
{
|
||||
static_cast<MatchContext *>(context)->insertIdx(id);
|
||||
return 0;
|
||||
};
|
||||
hs_error_t err = hs_scan(
|
||||
hyperscan_regex->get()->getDB(),
|
||||
reinterpret_cast<const char *>(keys_data.data()) + offset,
|
||||
static_cast<unsigned>(length),
|
||||
0,
|
||||
smart_scratch.get(),
|
||||
on_match,
|
||||
&match_result);
|
||||
|
||||
if (err != HS_SUCCESS)
|
||||
throw Exception(ErrorCodes::HYPERSCAN_CANNOT_SCAN_TEXT, "Failed to scan data with vectorscan");
|
||||
if (err != HS_SUCCESS)
|
||||
throw Exception(ErrorCodes::HYPERSCAN_CANNOT_SCAN_TEXT, "Failed to scan data with vectorscan");
|
||||
}
|
||||
#endif
|
||||
|
||||
for (const auto & node_ptr : complex_regexp_nodes)
|
||||
{
|
||||
if (node_ptr->match(reinterpret_cast<const char *>(keys_data.data()) + offset, length))
|
||||
{
|
||||
match_result.insertNodeID(node_ptr->id);
|
||||
}
|
||||
}
|
||||
|
||||
match_result.sort();
|
||||
|
||||
// Walk through the regex tree util all attributes are set;
|
||||
/// Walk through the regex tree util all attributes are set;
|
||||
std::unordered_map<String, Field> attributes_to_set;
|
||||
std::unordered_set<UInt64> visited_nodes;
|
||||
|
||||
// check if it is a valid id
|
||||
/// Some node matches but its parents cannot match. In this case we must regard this node unmatched.
|
||||
auto is_invalid = [&](UInt64 id)
|
||||
{
|
||||
while (id)
|
||||
@ -459,7 +571,7 @@ std::unordered_map<String, ColumnPtr> RegExpTreeDictionary::matchSearchAllIndice
|
||||
continue;
|
||||
if (visited_nodes.contains(id))
|
||||
continue;
|
||||
if (setAttributes(id, attributes_to_set, str, visited_nodes, attributes))
|
||||
if (setAttributes(id, attributes_to_set, str, visited_nodes, attributes, defaults, key_idx))
|
||||
break;
|
||||
}
|
||||
|
||||
@ -468,12 +580,11 @@ std::unordered_map<String, ColumnPtr> RegExpTreeDictionary::matchSearchAllIndice
|
||||
if (attributes_to_set.contains(name))
|
||||
continue;
|
||||
|
||||
/// TODO: default value might be a back-reference, that is useful in lib ua-core
|
||||
DefaultValueProvider default_value(attr.null_value, defaults.at(name));
|
||||
columns[name]->insert(default_value.getDefaultValue(key_idx));
|
||||
}
|
||||
|
||||
// insert to columns
|
||||
/// insert to columns
|
||||
for (const auto & [name, value] : attributes_to_set)
|
||||
columns[name]->insert(value);
|
||||
|
||||
@ -485,9 +596,6 @@ std::unordered_map<String, ColumnPtr> RegExpTreeDictionary::matchSearchAllIndice
|
||||
result.emplace(name, std::move(mutable_ptr));
|
||||
|
||||
return result;
|
||||
#else
|
||||
throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Multi search all indices is not implemented when USE_VECTORSCAN is off");
|
||||
#endif // USE_VECTORSCAN
|
||||
}
|
||||
|
||||
Columns RegExpTreeDictionary::getColumns(
|
||||
@ -516,7 +624,7 @@ Columns RegExpTreeDictionary::getColumns(
|
||||
|
||||
/// calculate matches
|
||||
const ColumnString * key_column = typeid_cast<const ColumnString *>(key_columns[0].get());
|
||||
const auto & columns_map = matchSearchAllIndices(
|
||||
const auto & columns_map = match(
|
||||
key_column->getChars(),
|
||||
key_column->getOffsets(),
|
||||
attributes,
|
||||
@ -561,7 +669,7 @@ void registerDictionaryRegExpTree(DictionaryFactory & factory)
|
||||
"regexp_tree dictionary doesn't accept sources other than yaml source. "
|
||||
"To active it, please set regexp_dict_allow_other_sources=true");
|
||||
|
||||
return std::make_unique<RegExpTreeDictionary>(dict_id, dict_struct, std::move(source_ptr), configuration);
|
||||
return std::make_unique<RegExpTreeDictionary>(dict_id, dict_struct, std::move(source_ptr), configuration, context->getSettings().regexp_dict_allow_hyperscan);
|
||||
};
|
||||
|
||||
factory.registerLayout("regexp_tree", create_layout, true);
|
||||
|
@ -43,7 +43,11 @@ public:
|
||||
const std::string name = "RegExpTree";
|
||||
|
||||
RegExpTreeDictionary(
|
||||
const StorageID & id_, const DictionaryStructure & structure_, DictionarySourcePtr source_ptr_, Configuration configuration_);
|
||||
const StorageID & id_,
|
||||
const DictionaryStructure & structure_,
|
||||
DictionarySourcePtr source_ptr_,
|
||||
Configuration configuration_,
|
||||
bool use_vectorscan_);
|
||||
|
||||
std::string getTypeName() const override { return name; }
|
||||
|
||||
@ -79,7 +83,7 @@ public:
|
||||
|
||||
std::shared_ptr<const IExternalLoadable> clone() const override
|
||||
{
|
||||
return std::make_shared<RegExpTreeDictionary>(getDictionaryID(), structure, source_ptr->clone(), configuration);
|
||||
return std::make_shared<RegExpTreeDictionary>(getDictionaryID(), structure, source_ptr->clone(), configuration, use_vectorscan);
|
||||
}
|
||||
|
||||
ColumnUInt8::Ptr hasKeys(const Columns &, const DataTypes &) const override
|
||||
@ -122,11 +126,6 @@ private:
|
||||
mutable std::atomic<size_t> query_count{0};
|
||||
mutable std::atomic<size_t> found_count{0};
|
||||
|
||||
std::vector<std::string> regexps;
|
||||
std::vector<UInt64> regexp_ids;
|
||||
|
||||
Poco::Logger * logger;
|
||||
|
||||
void calculateBytesAllocated();
|
||||
|
||||
void loadData();
|
||||
@ -135,7 +134,7 @@ private:
|
||||
void initTopologyOrder(UInt64 node_idx, std::set<UInt64> & visited, UInt64 & topology_id);
|
||||
void initGraph();
|
||||
|
||||
std::unordered_map<String, ColumnPtr> matchSearchAllIndices(
|
||||
std::unordered_map<String, ColumnPtr> match(
|
||||
const ColumnString::Chars & keys_data,
|
||||
const ColumnString::Offsets & keys_offsets,
|
||||
const std::unordered_map<String, const DictionaryAttribute &> & attributes,
|
||||
@ -146,16 +145,26 @@ private:
|
||||
std::unordered_map<String, Field> & attributes_to_set,
|
||||
const String & data,
|
||||
std::unordered_set<UInt64> & visited_nodes,
|
||||
const std::unordered_map<String, const DictionaryAttribute &> & attributes) const;
|
||||
const std::unordered_map<String, const DictionaryAttribute &> & attributes,
|
||||
const std::unordered_map<String, ColumnPtr> & defaults,
|
||||
size_t key_index) const;
|
||||
|
||||
struct RegexTreeNode;
|
||||
using RegexTreeNodePtr = std::unique_ptr<RegexTreeNode>;
|
||||
using RegexTreeNodePtr = std::shared_ptr<RegexTreeNode>;
|
||||
|
||||
bool use_vectorscan;
|
||||
|
||||
std::vector<std::string> simple_regexps;
|
||||
std::vector<UInt64> regexp_ids;
|
||||
std::vector<RegexTreeNodePtr> complex_regexp_nodes;
|
||||
|
||||
std::map<UInt64, RegexTreeNodePtr> regex_nodes;
|
||||
std::unordered_map<UInt64, UInt64> topology_order;
|
||||
#if USE_VECTORSCAN
|
||||
MultiRegexps::DeferredConstructedRegexpsPtr hyperscan_regex;
|
||||
#endif
|
||||
|
||||
Poco::Logger * logger;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -1,6 +1,7 @@
|
||||
#if defined(OS_LINUX)
|
||||
|
||||
#include "IOUringReader.h"
|
||||
|
||||
#if USE_LIBURING
|
||||
|
||||
#include <base/errnoToString.h>
|
||||
#include <Common/assert_cast.h>
|
||||
#include <Common/Exception.h>
|
||||
|
@ -1,5 +1,8 @@
|
||||
#pragma once
|
||||
#if defined(OS_LINUX)
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#if USE_LIBURING
|
||||
|
||||
#include <Common/ThreadPool.h>
|
||||
#include <IO/AsynchronousReader.h>
|
||||
|
@ -7,7 +7,7 @@
|
||||
#include <Disks/IO/ThreadPoolReader.h>
|
||||
#include <IO/SynchronousReader.h>
|
||||
#include <Common/ProfileEvents.h>
|
||||
|
||||
#include "config.h"
|
||||
|
||||
namespace ProfileEvents
|
||||
{
|
||||
@ -84,7 +84,7 @@ std::unique_ptr<ReadBufferFromFileBase> createReadBufferFromFileBase(
|
||||
}
|
||||
else if (settings.local_fs_method == LocalFSReadMethod::io_uring)
|
||||
{
|
||||
#if defined(OS_LINUX)
|
||||
#if USE_LIBURING
|
||||
static std::shared_ptr<IOUringReader> reader = std::make_shared<IOUringReader>(512);
|
||||
if (!reader->isSupported())
|
||||
throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "io_uring is not supported by this system");
|
||||
|
41
src/Disks/tests/gtest_azure_sdk.cpp
Normal file
41
src/Disks/tests/gtest_azure_sdk.cpp
Normal file
@ -0,0 +1,41 @@
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <Common/logger_useful.h>
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#if USE_AZURE_BLOB_STORAGE
|
||||
|
||||
#include <azure/storage/blobs.hpp>
|
||||
#include <azure/storage/common/internal/xml_wrapper.hpp>
|
||||
#include <azure/storage/blobs/blob_container_client.hpp>
|
||||
#include <azure/storage/blobs/blob_options.hpp>
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
TEST(AzureXMLWrapper, TestLeak)
|
||||
{
|
||||
std::string str = "<hello>world</hello>";
|
||||
|
||||
Azure::Storage::_internal::XmlReader reader(str.c_str(), str.length());
|
||||
Azure::Storage::_internal::XmlReader reader2(std::move(reader));
|
||||
Azure::Storage::_internal::XmlReader reader3 = std::move(reader2);
|
||||
reader3.Read();
|
||||
}
|
||||
|
||||
TEST(AzureBlobContainerClient, CurlMemoryLeak)
|
||||
{
|
||||
using Azure::Storage::Blobs::BlobContainerClient;
|
||||
using Azure::Storage::Blobs::BlobClientOptions;
|
||||
|
||||
static constexpr auto unavailable_url = "http://unavailable:19999/bucket";
|
||||
static constexpr auto container = "container";
|
||||
|
||||
BlobClientOptions options;
|
||||
options.Retry.MaxRetries = 0;
|
||||
|
||||
auto client = std::make_unique<BlobContainerClient>(BlobContainerClient::CreateFromConnectionString(unavailable_url, container, options));
|
||||
EXPECT_THROW({ client->ListBlobs(); }, Azure::Core::Http::TransportException);
|
||||
}
|
||||
|
||||
#endif
|
@ -1,25 +0,0 @@
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <Common/logger_useful.h>
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#if USE_AZURE_BLOB_STORAGE
|
||||
|
||||
#include <azure/storage/blobs.hpp>
|
||||
#include <azure/storage/common/internal/xml_wrapper.hpp>
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
|
||||
TEST(AzureXMLWrapper, TestLeak)
|
||||
{
|
||||
std::string str = "<hello>world</hello>";
|
||||
|
||||
Azure::Storage::_internal::XmlReader reader(str.c_str(), str.length());
|
||||
Azure::Storage::_internal::XmlReader reader2(std::move(reader));
|
||||
Azure::Storage::_internal::XmlReader reader3 = std::move(reader2);
|
||||
reader3.Read();
|
||||
}
|
||||
|
||||
#endif
|
@ -982,7 +982,7 @@ struct JSONExtractTree
|
||||
return false;
|
||||
}
|
||||
|
||||
assert_cast<ColumnDecimal<DecimalType> &>(dest).insert(value);
|
||||
assert_cast<ColumnDecimal<DecimalType> &>(dest).insertValue(value);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -209,9 +209,13 @@ struct AggregationMethodOneNumber
|
||||
// Insert the key from the hash table into columns.
|
||||
static void insertKeyIntoColumns(const Key & key, std::vector<IColumn *> & key_columns, const Sizes & /*key_sizes*/)
|
||||
{
|
||||
static_assert(sizeof(FieldType) <= sizeof(Key));
|
||||
const auto * key_holder = reinterpret_cast<const char *>(&key);
|
||||
auto * column = static_cast<ColumnVectorHelper *>(key_columns[0]);
|
||||
column->insertRawData<sizeof(FieldType)>(key_holder);
|
||||
if constexpr (sizeof(FieldType) < sizeof(Key) && std::endian::native == std::endian::big)
|
||||
column->insertRawData<sizeof(FieldType)>(key_holder + (sizeof(Key) - sizeof(FieldType)));
|
||||
else
|
||||
column->insertRawData<sizeof(FieldType)>(key_holder);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -94,6 +94,8 @@ struct BloomFilterHash
|
||||
else if (which.isFloat32()) return build_hash_column(getNumberTypeHash<Float64, Float64>(field));
|
||||
else if (which.isFloat64()) return build_hash_column(getNumberTypeHash<Float64, Float64>(field));
|
||||
else if (which.isUUID()) return build_hash_column(getNumberTypeHash<UUID, UUID>(field));
|
||||
else if (which.isIPv4()) return build_hash_column(getNumberTypeHash<IPv4, IPv4>(field));
|
||||
else if (which.isIPv6()) return build_hash_column(getNumberTypeHash<IPv6, IPv6>(field));
|
||||
else if (which.isString()) return build_hash_column(getStringTypeHash(field));
|
||||
else if (which.isFixedString()) return build_hash_column(getFixedStringTypeHash(field, data_type));
|
||||
else throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected type {} of bloom filter index.", data_type->getName());
|
||||
@ -156,6 +158,8 @@ struct BloomFilterHash
|
||||
else if (which.isFloat32()) getNumberTypeHash<Float32, is_first>(column, vec, pos);
|
||||
else if (which.isFloat64()) getNumberTypeHash<Float64, is_first>(column, vec, pos);
|
||||
else if (which.isUUID()) getNumberTypeHash<UUID, is_first>(column, vec, pos);
|
||||
else if (which.isIPv4()) getNumberTypeHash<IPv4, is_first>(column, vec, pos);
|
||||
else if (which.isIPv6()) getNumberTypeHash<IPv6, is_first>(column, vec, pos);
|
||||
else if (which.isString()) getStringTypeHash<is_first>(column, vec, pos);
|
||||
else if (which.isFixedString()) getStringTypeHash<is_first>(column, vec, pos);
|
||||
else throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected type {} of bloom filter index.", data_type->getName());
|
||||
|
@ -313,14 +313,6 @@ Pipe && QueryCache::Reader::getPipe()
|
||||
return std::move(pipe);
|
||||
}
|
||||
|
||||
QueryCache::QueryCache(size_t max_cache_size_in_bytes_, size_t max_cache_entries_, size_t max_cache_entry_size_in_bytes_, size_t max_cache_entry_size_in_rows_)
|
||||
: max_cache_size_in_bytes(max_cache_size_in_bytes_)
|
||||
, max_cache_entries(max_cache_entries_)
|
||||
, max_cache_entry_size_in_bytes(max_cache_entry_size_in_bytes_)
|
||||
, max_cache_entry_size_in_rows(max_cache_entry_size_in_rows_)
|
||||
{
|
||||
}
|
||||
|
||||
QueryCache::Reader QueryCache::createReader(const Key & key)
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
@ -343,14 +335,22 @@ void QueryCache::reset()
|
||||
|
||||
size_t QueryCache::recordQueryRun(const Key & key)
|
||||
{
|
||||
static constexpr size_t TIMES_EXECUTED_MAX_SIZE = 10'000;
|
||||
|
||||
std::lock_guard times_executed_lock(mutex);
|
||||
std::lock_guard lock(mutex);
|
||||
size_t times = ++times_executed[key];
|
||||
// Regularly drop times_executed to avoid DOS-by-unlimited-growth.
|
||||
static constexpr size_t TIMES_EXECUTED_MAX_SIZE = 10'000;
|
||||
if (times_executed.size() > TIMES_EXECUTED_MAX_SIZE)
|
||||
times_executed.clear();
|
||||
return times;
|
||||
}
|
||||
|
||||
void QueryCache::updateConfiguration(const Poco::Util::AbstractConfiguration & config)
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
max_cache_size_in_bytes = config.getUInt64("query_cache.size", 1_GiB);
|
||||
max_cache_entries = config.getUInt64("query_cache.max_entries", 1024);
|
||||
max_cache_entry_size_in_bytes = config.getUInt64("query_cache.max_entry_size", 1_MiB);
|
||||
max_cache_entry_size_in_rows = config.getUInt64("query_cache.max_entry_rows", 30'000'000);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -2,6 +2,7 @@
|
||||
|
||||
#include <Core/Block.h>
|
||||
#include <Parsers/IAST_fwd.h>
|
||||
#include <Poco/Util/LayeredConfiguration.h>
|
||||
#include <Processors/Chunk.h>
|
||||
#include <QueryPipeline/Pipe.h>
|
||||
|
||||
@ -132,7 +133,7 @@ public:
|
||||
friend class QueryCache; /// for createReader()
|
||||
};
|
||||
|
||||
QueryCache(size_t max_cache_size_in_bytes_, size_t max_cache_entries_, size_t max_cache_entry_size_in_bytes_, size_t max_cache_entry_size_in_rows_);
|
||||
void updateConfiguration(const Poco::Util::AbstractConfiguration & config);
|
||||
|
||||
Reader createReader(const Key & key);
|
||||
Writer createWriter(const Key & key, std::chrono::milliseconds min_query_runtime);
|
||||
@ -154,11 +155,13 @@ private:
|
||||
Cache cache TSA_GUARDED_BY(mutex);
|
||||
TimesExecuted times_executed TSA_GUARDED_BY(mutex);
|
||||
|
||||
size_t cache_size_in_bytes TSA_GUARDED_BY(mutex) = 0; /// updated in each cache insert/delete
|
||||
const size_t max_cache_size_in_bytes;
|
||||
const size_t max_cache_entries;
|
||||
const size_t max_cache_entry_size_in_bytes;
|
||||
const size_t max_cache_entry_size_in_rows;
|
||||
/// Cache configuration
|
||||
size_t max_cache_size_in_bytes TSA_GUARDED_BY(mutex) = 0;
|
||||
size_t max_cache_entries TSA_GUARDED_BY(mutex) = 0;
|
||||
size_t max_cache_entry_size_in_bytes TSA_GUARDED_BY(mutex) = 0;
|
||||
size_t max_cache_entry_size_in_rows TSA_GUARDED_BY(mutex) = 0;
|
||||
|
||||
size_t cache_size_in_bytes TSA_GUARDED_BY(mutex) = 0; /// Updated in each cache insert/delete
|
||||
|
||||
friend class StorageSystemQueryCache;
|
||||
};
|
||||
|
@ -2041,14 +2041,22 @@ void Context::dropIndexMarkCache() const
|
||||
shared->index_mark_cache->reset();
|
||||
}
|
||||
|
||||
void Context::setQueryCache(size_t max_size_in_bytes, size_t max_entries, size_t max_entry_size_in_bytes, size_t max_entry_size_in_records)
|
||||
void Context::setQueryCache(const Poco::Util::AbstractConfiguration & config)
|
||||
{
|
||||
auto lock = getLock();
|
||||
|
||||
if (shared->query_cache)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Query cache has been already created.");
|
||||
|
||||
shared->query_cache = std::make_shared<QueryCache>(max_size_in_bytes, max_entries, max_entry_size_in_bytes, max_entry_size_in_records);
|
||||
shared->query_cache = std::make_shared<QueryCache>();
|
||||
shared->query_cache->updateConfiguration(config);
|
||||
}
|
||||
|
||||
void Context::updateQueryCacheConfiguration(const Poco::Util::AbstractConfiguration & config)
|
||||
{
|
||||
auto lock = getLock();
|
||||
if (shared->query_cache)
|
||||
shared->query_cache->updateConfiguration(config);
|
||||
}
|
||||
|
||||
QueryCachePtr Context::getQueryCache() const
|
||||
|
@ -872,7 +872,8 @@ public:
|
||||
void dropMMappedFileCache() const;
|
||||
|
||||
/// Create a cache of query results for statements which run repeatedly.
|
||||
void setQueryCache(size_t max_size_in_bytes, size_t max_entries, size_t max_entry_size_in_bytes, size_t max_entry_size_in_records);
|
||||
void setQueryCache(const Poco::Util::AbstractConfiguration & config);
|
||||
void updateQueryCacheConfiguration(const Poco::Util::AbstractConfiguration & config);
|
||||
std::shared_ptr<QueryCache> getQueryCache() const;
|
||||
void dropQueryCache() const;
|
||||
|
||||
|
@ -1801,11 +1801,16 @@ ExpressionActionsPtr ExpressionAnalyzer::getActions(bool add_aliases, bool proje
|
||||
getActionsDAG(add_aliases, project_result), ExpressionActionsSettings::fromContext(getContext(), compile_expressions));
|
||||
}
|
||||
|
||||
ExpressionActionsPtr ExpressionAnalyzer::getConstActions(const ColumnsWithTypeAndName & constant_inputs)
|
||||
ActionsDAGPtr ExpressionAnalyzer::getConstActionsDAG(const ColumnsWithTypeAndName & constant_inputs)
|
||||
{
|
||||
auto actions = std::make_shared<ActionsDAG>(constant_inputs);
|
||||
|
||||
getRootActions(query, true /* no_makeset_for_subqueries */, actions, true /* only_consts */);
|
||||
return actions;
|
||||
}
|
||||
|
||||
ExpressionActionsPtr ExpressionAnalyzer::getConstActions(const ColumnsWithTypeAndName & constant_inputs)
|
||||
{
|
||||
auto actions = getConstActionsDAG(constant_inputs);
|
||||
return std::make_shared<ExpressionActions>(actions, ExpressionActionsSettings::fromContext(getContext()));
|
||||
}
|
||||
|
||||
|
@ -119,8 +119,9 @@ public:
|
||||
ActionsDAGPtr getActionsDAG(bool add_aliases, bool project_result = true);
|
||||
ExpressionActionsPtr getActions(bool add_aliases, bool project_result = true, CompileExpressions compile_expressions = CompileExpressions::no);
|
||||
|
||||
/// Actions that can be performed on an empty block: adding constants and applying functions that depend only on constants.
|
||||
/// Get actions to evaluate a constant expression. The function adds constants and applies functions that depend only on constants.
|
||||
/// Does not execute subqueries.
|
||||
ActionsDAGPtr getConstActionsDAG(const ColumnsWithTypeAndName & constant_inputs = {});
|
||||
ExpressionActionsPtr getConstActions(const ColumnsWithTypeAndName & constant_inputs = {});
|
||||
|
||||
/** Sets that require a subquery to be create.
|
||||
|
@ -70,7 +70,7 @@ std::pair<Field, std::shared_ptr<const IDataType>> evaluateConstantExpression(co
|
||||
if (context->getClientInfo().query_kind != ClientInfo::QueryKind::SECONDARY_QUERY && context->getSettingsRef().normalize_function_names)
|
||||
FunctionNameNormalizer().visit(ast.get());
|
||||
|
||||
String name = ast->getColumnName();
|
||||
String result_name = ast->getColumnName();
|
||||
auto syntax_result = TreeRewriter(context).analyze(ast, source_columns);
|
||||
|
||||
/// AST potentially could be transformed to literal during TreeRewriter analyze.
|
||||
@ -78,33 +78,37 @@ std::pair<Field, std::shared_ptr<const IDataType>> evaluateConstantExpression(co
|
||||
if (ASTLiteral * literal = ast->as<ASTLiteral>())
|
||||
return getFieldAndDataTypeFromLiteral(literal);
|
||||
|
||||
ExpressionActionsPtr expr_for_constant_folding = ExpressionAnalyzer(ast, syntax_result, context).getConstActions();
|
||||
auto actions = ExpressionAnalyzer(ast, syntax_result, context).getConstActionsDAG();
|
||||
|
||||
/// There must be at least one column in the block so that it knows the number of rows.
|
||||
Block block_with_constants{{ ColumnConst::create(ColumnUInt8::create(1, 0), 1), std::make_shared<DataTypeUInt8>(), "_dummy" }};
|
||||
ColumnPtr result_column;
|
||||
DataTypePtr result_type;
|
||||
for (const auto & action_node : actions->getOutputs())
|
||||
{
|
||||
if ((action_node->result_name == result_name) && action_node->column)
|
||||
{
|
||||
result_column = action_node->column;
|
||||
result_type = action_node->result_type;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
expr_for_constant_folding->execute(block_with_constants);
|
||||
if (!result_column)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Element of set in IN, VALUES or LIMIT or aggregate function parameter "
|
||||
"is not a constant expression (result column not found): {}", result_name);
|
||||
|
||||
if (!block_with_constants || block_with_constants.rows() == 0)
|
||||
if (result_column->empty())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
||||
"Logical error: empty block after evaluation "
|
||||
"Logical error: empty result column after evaluation "
|
||||
"of constant expression for IN, VALUES or LIMIT or aggregate function parameter");
|
||||
|
||||
if (!block_with_constants.has(name))
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Element of set in IN, VALUES or LIMIT or aggregate function parameter "
|
||||
"is not a constant expression (result column not found): {}", name);
|
||||
|
||||
const ColumnWithTypeAndName & result = block_with_constants.getByName(name);
|
||||
const IColumn & result_column = *result.column;
|
||||
|
||||
/// Expressions like rand() or now() are not constant
|
||||
if (!isColumnConst(result_column))
|
||||
if (!isColumnConst(*result_column))
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Element of set in IN, VALUES or LIMIT or aggregate function parameter "
|
||||
"is not a constant expression (result column is not const): {}", name);
|
||||
"is not a constant expression (result column is not const): {}", result_name);
|
||||
|
||||
return std::make_pair(result_column[0], result.type);
|
||||
return std::make_pair((*result_column)[0], result_type);
|
||||
}
|
||||
|
||||
|
||||
|
@ -74,6 +74,11 @@ void ASTStorage::formatImpl(const FormatSettings & s, FormatState & state, Forma
|
||||
}
|
||||
}
|
||||
|
||||
bool ASTStorage::isExtendedStorageDefinition() const
|
||||
{
|
||||
return partition_by || primary_key || order_by || sample_by || settings;
|
||||
}
|
||||
|
||||
|
||||
class ASTColumnsElement : public IAST
|
||||
{
|
||||
|
@ -30,6 +30,8 @@ public:
|
||||
ASTPtr clone() const override;
|
||||
|
||||
void formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const override;
|
||||
|
||||
bool isExtendedStorageDefinition() const;
|
||||
};
|
||||
|
||||
|
||||
|
@ -8,7 +8,7 @@ namespace DB
|
||||
{
|
||||
|
||||
JSONColumnsBlockOutputFormat::JSONColumnsBlockOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_, bool validate_utf8, size_t indent_)
|
||||
: JSONColumnsBlockOutputFormatBase(out_, header_, format_settings_, validate_utf8), indent(indent_)
|
||||
: JSONColumnsBlockOutputFormatBase(out_, header_, format_settings_, validate_utf8), indent(indent_), header(header_)
|
||||
{
|
||||
names = JSONUtils::makeNamesValidJSONStrings(header_.getNames(), format_settings, validate_utf8);
|
||||
}
|
||||
@ -25,6 +25,18 @@ void JSONColumnsBlockOutputFormat::writeColumnStart(size_t column_index)
|
||||
|
||||
void JSONColumnsBlockOutputFormat::writeChunkEnd()
|
||||
{
|
||||
/// Write empty chunk
|
||||
if (!written_rows)
|
||||
{
|
||||
const auto & columns = header.getColumns();
|
||||
for (size_t i = 0; i != columns.size(); ++i)
|
||||
{
|
||||
writeColumnStart(i);
|
||||
writeColumn(*columns[i], *serializations[i]);
|
||||
writeColumnEnd(i == columns.size() - 1);
|
||||
}
|
||||
}
|
||||
|
||||
JSONUtils::writeObjectEnd(*ostr, indent);
|
||||
writeChar('\n', *ostr);
|
||||
}
|
||||
|
@ -27,6 +27,8 @@ protected:
|
||||
|
||||
Names names;
|
||||
size_t indent;
|
||||
|
||||
Block header;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -42,6 +42,7 @@ void JSONColumnsBlockOutputFormatBase::writeChunk(Chunk & chunk)
|
||||
writeColumn(*columns[i], *serializations[i]);
|
||||
writeColumnEnd(i == columns.size() - 1);
|
||||
}
|
||||
written_rows += chunk.getNumRows();
|
||||
writeChunkEnd();
|
||||
}
|
||||
|
||||
|
@ -36,6 +36,8 @@ protected:
|
||||
const Serializations serializations;
|
||||
|
||||
Chunk mono_chunk;
|
||||
|
||||
size_t written_rows = 0;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -124,6 +124,8 @@ size_t IntersectOrExceptTransform::buildFilter(
|
||||
|
||||
void IntersectOrExceptTransform::accumulate(Chunk chunk)
|
||||
{
|
||||
convertToFullIfSparse(chunk);
|
||||
|
||||
auto num_rows = chunk.getNumRows();
|
||||
auto columns = chunk.detachColumns();
|
||||
|
||||
@ -160,6 +162,8 @@ void IntersectOrExceptTransform::accumulate(Chunk chunk)
|
||||
|
||||
void IntersectOrExceptTransform::filter(Chunk & chunk)
|
||||
{
|
||||
convertToFullIfSparse(chunk);
|
||||
|
||||
auto num_rows = chunk.getNumRows();
|
||||
auto columns = chunk.detachColumns();
|
||||
|
||||
|
@ -700,12 +700,14 @@ struct StorageDistributedDirectoryMonitor::BatchHeader
|
||||
|
||||
struct StorageDistributedDirectoryMonitor::Batch
|
||||
{
|
||||
/// File indexes for this batch.
|
||||
std::vector<UInt64> file_indices;
|
||||
size_t total_rows = 0;
|
||||
size_t total_bytes = 0;
|
||||
bool recovered = false;
|
||||
|
||||
StorageDistributedDirectoryMonitor & parent;
|
||||
/// Information about all available indexes (not only for the current batch).
|
||||
const std::map<UInt64, String> & file_index_to_path;
|
||||
|
||||
bool split_batch_on_failure = true;
|
||||
@ -795,17 +797,22 @@ struct StorageDistributedDirectoryMonitor::Batch
|
||||
else
|
||||
{
|
||||
std::vector<std::string> files;
|
||||
for (const auto && file_info : file_index_to_path | boost::adaptors::indexed())
|
||||
for (auto file_index_info : file_indices | boost::adaptors::indexed())
|
||||
{
|
||||
if (file_info.index() > 8)
|
||||
if (file_index_info.index() > 8)
|
||||
{
|
||||
files.push_back("...");
|
||||
break;
|
||||
}
|
||||
|
||||
files.push_back(file_info.value().second);
|
||||
auto file_index = file_index_info.value();
|
||||
auto file_path = file_index_to_path.find(file_index);
|
||||
if (file_path != file_index_to_path.end())
|
||||
files.push_back(file_path->second);
|
||||
else
|
||||
files.push_back(fmt::format("#{}.bin (deleted)", file_index));
|
||||
}
|
||||
e.addMessage(fmt::format("While sending batch, nums: {}, files: {}", file_index_to_path.size(), fmt::join(files, "\n")));
|
||||
e.addMessage(fmt::format("While sending batch, size: {}, files: {}", file_indices.size(), fmt::join(files, "\n")));
|
||||
|
||||
throw;
|
||||
}
|
||||
|
@ -706,8 +706,12 @@ Block KeyCondition::getBlockWithConstants(
|
||||
|
||||
if (syntax_analyzer_result)
|
||||
{
|
||||
const auto expr_for_constant_folding = ExpressionAnalyzer(query, syntax_analyzer_result, context).getConstActions();
|
||||
expr_for_constant_folding->execute(result);
|
||||
auto actions = ExpressionAnalyzer(query, syntax_analyzer_result, context).getConstActionsDAG();
|
||||
for (const auto & action_node : actions->getOutputs())
|
||||
{
|
||||
if (action_node->column)
|
||||
result.insert(ColumnWithTypeAndName{action_node->column, action_node->result_type, action_node->result_name});
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
|
@ -379,11 +379,6 @@ namespace
|
||||
|
||||
bool columnExists(const String & name) const { return block.has(name); }
|
||||
|
||||
void insertStringColumn(const ColumnPtr & column, const String & name)
|
||||
{
|
||||
block.insert({column, std::make_shared<DataTypeString>(), name});
|
||||
}
|
||||
|
||||
void insertUInt8Column(const ColumnPtr & column, const String & name)
|
||||
{
|
||||
block.insert({column, std::make_shared<DataTypeUInt8>(), name});
|
||||
@ -399,6 +394,11 @@ namespace
|
||||
block.insert({column, std::make_shared<DataTypeUUID>(), name});
|
||||
}
|
||||
|
||||
void insertLowCardinalityColumn(const ColumnPtr & column, const String & name)
|
||||
{
|
||||
block.insert({column, std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()), name});
|
||||
}
|
||||
|
||||
void insertPartitionValueColumn(
|
||||
size_t rows, const Row & partition_value, const DataTypePtr & partition_value_type, const String & name)
|
||||
{
|
||||
@ -483,11 +483,13 @@ static void injectPartConstVirtualColumns(
|
||||
{
|
||||
ColumnPtr column;
|
||||
if (rows)
|
||||
column = DataTypeString().createColumnConst(rows, part->name)->convertToFullColumnIfConst();
|
||||
column = DataTypeLowCardinality{std::make_shared<DataTypeString>()}
|
||||
.createColumnConst(rows, part->name)
|
||||
->convertToFullColumnIfConst();
|
||||
else
|
||||
column = DataTypeString().createColumn();
|
||||
column = DataTypeLowCardinality{std::make_shared<DataTypeString>()}.createColumn();
|
||||
|
||||
inserter.insertStringColumn(column, virtual_column_name);
|
||||
inserter.insertLowCardinalityColumn(column, virtual_column_name);
|
||||
}
|
||||
else if (virtual_column_name == "_part_index")
|
||||
{
|
||||
@ -513,11 +515,13 @@ static void injectPartConstVirtualColumns(
|
||||
{
|
||||
ColumnPtr column;
|
||||
if (rows)
|
||||
column = DataTypeString().createColumnConst(rows, part->info.partition_id)->convertToFullColumnIfConst();
|
||||
column = DataTypeLowCardinality{std::make_shared<DataTypeString>()}
|
||||
.createColumnConst(rows, part->info.partition_id)
|
||||
->convertToFullColumnIfConst();
|
||||
else
|
||||
column = DataTypeString().createColumn();
|
||||
column = DataTypeLowCardinality{std::make_shared<DataTypeString>()}.createColumn();
|
||||
|
||||
inserter.insertStringColumn(column, virtual_column_name);
|
||||
inserter.insertLowCardinalityColumn(column, virtual_column_name);
|
||||
}
|
||||
else if (virtual_column_name == "_partition_value")
|
||||
{
|
||||
|
@ -840,8 +840,14 @@ Block MergeTreeData::getSampleBlockWithVirtualColumns() const
|
||||
{
|
||||
DataTypePtr partition_value_type = getPartitionValueType();
|
||||
return {
|
||||
ColumnWithTypeAndName(ColumnString::create(), std::make_shared<DataTypeString>(), "_part"),
|
||||
ColumnWithTypeAndName(ColumnString::create(), std::make_shared<DataTypeString>(), "_partition_id"),
|
||||
ColumnWithTypeAndName(
|
||||
DataTypeLowCardinality{std::make_shared<DataTypeString>()}.createColumn(),
|
||||
std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()),
|
||||
"_part"),
|
||||
ColumnWithTypeAndName(
|
||||
DataTypeLowCardinality{std::make_shared<DataTypeString>()}.createColumn(),
|
||||
std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()),
|
||||
"_partition_id"),
|
||||
ColumnWithTypeAndName(ColumnUUID::create(), std::make_shared<DataTypeUUID>(), "_part_uuid"),
|
||||
ColumnWithTypeAndName(partition_value_type->createColumn(), partition_value_type, "_partition_value")};
|
||||
}
|
||||
@ -1889,7 +1895,9 @@ size_t MergeTreeData::clearOldTemporaryDirectories(size_t custom_directories_lif
|
||||
{
|
||||
if (temporary_parts.contains(basename))
|
||||
{
|
||||
LOG_WARNING(log, "{} is in use (by merge/mutation/INSERT) (consider increasing temporary_directories_lifetime setting)", full_path);
|
||||
/// Actually we don't rely on temporary_directories_lifetime when removing old temporaries directoties,
|
||||
/// it's just an extra level of protection just in case we have a bug.
|
||||
LOG_INFO(log, "{} is in use (by merge/mutation/INSERT) (consider increasing temporary_directories_lifetime setting)", full_path);
|
||||
continue;
|
||||
}
|
||||
else
|
||||
@ -7576,7 +7584,19 @@ MergeTreeData::WriteAheadLogPtr MergeTreeData::getWriteAheadLog()
|
||||
if (!write_ahead_log)
|
||||
{
|
||||
auto reservation = reserveSpace(getSettings()->write_ahead_log_max_bytes);
|
||||
write_ahead_log = std::make_shared<MergeTreeWriteAheadLog>(*this, reservation->getDisk());
|
||||
for (const auto & disk: reservation->getDisks())
|
||||
{
|
||||
if (!disk->isRemote())
|
||||
{
|
||||
write_ahead_log = std::make_shared<MergeTreeWriteAheadLog>(*this, disk);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!write_ahead_log)
|
||||
throw Exception(
|
||||
ErrorCodes::NOT_IMPLEMENTED,
|
||||
"Can't store write ahead log in remote disk. It makes no sense.");
|
||||
}
|
||||
|
||||
return write_ahead_log;
|
||||
@ -7585,10 +7605,10 @@ MergeTreeData::WriteAheadLogPtr MergeTreeData::getWriteAheadLog()
|
||||
NamesAndTypesList MergeTreeData::getVirtuals() const
|
||||
{
|
||||
return NamesAndTypesList{
|
||||
NameAndTypePair("_part", std::make_shared<DataTypeString>()),
|
||||
NameAndTypePair("_part", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())),
|
||||
NameAndTypePair("_part_index", std::make_shared<DataTypeUInt64>()),
|
||||
NameAndTypePair("_part_uuid", std::make_shared<DataTypeUUID>()),
|
||||
NameAndTypePair("_partition_id", std::make_shared<DataTypeString>()),
|
||||
NameAndTypePair("_partition_id", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())),
|
||||
NameAndTypePair("_partition_value", getPartitionValueType()),
|
||||
NameAndTypePair("_sample_factor", std::make_shared<DataTypeFloat64>()),
|
||||
NameAndTypePair("_part_offset", std::make_shared<DataTypeUInt64>()),
|
||||
|
@ -88,7 +88,8 @@ static void assertIndexColumnsType(const Block & header)
|
||||
WhichDataType which(actual_type);
|
||||
|
||||
if (!which.isUInt() && !which.isInt() && !which.isString() && !which.isFixedString() && !which.isFloat() &&
|
||||
!which.isDate() && !which.isDateTime() && !which.isDateTime64() && !which.isEnum() && !which.isUUID())
|
||||
!which.isDate() && !which.isDateTime() && !which.isDateTime64() && !which.isEnum() && !which.isUUID() &&
|
||||
!which.isIPv4() && !which.isIPv6())
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Unexpected type {} of bloom filter index.", type->getName());
|
||||
}
|
||||
}
|
||||
|
@ -147,9 +147,9 @@ static StoragePtr create(const StorageFactory::Arguments & args)
|
||||
* - Additional MergeTreeSettings in the SETTINGS clause;
|
||||
*/
|
||||
|
||||
bool is_extended_storage_def = args.storage_def->partition_by || args.storage_def->primary_key || args.storage_def->order_by
|
||||
|| args.storage_def->sample_by || (args.query.columns_list->indices && !args.query.columns_list->indices->children.empty())
|
||||
|| (args.query.columns_list->projections && !args.query.columns_list->projections->children.empty()) || args.storage_def->settings;
|
||||
bool is_extended_storage_def = args.storage_def->isExtendedStorageDefinition()
|
||||
|| (args.query.columns_list->indices && !args.query.columns_list->indices->children.empty())
|
||||
|| (args.query.columns_list->projections && !args.query.columns_list->projections->children.empty());
|
||||
|
||||
String name_part = args.engine_name.substr(0, args.engine_name.size() - strlen("MergeTree"));
|
||||
|
||||
|
@ -268,11 +268,11 @@ NamesAndTypesList StorageDistributed::getVirtuals() const
|
||||
/// NOTE This is weird. Most of these virtual columns are part of MergeTree
|
||||
/// tables info. But Distributed is general-purpose engine.
|
||||
return NamesAndTypesList{
|
||||
NameAndTypePair("_table", std::make_shared<DataTypeString>()),
|
||||
NameAndTypePair("_part", std::make_shared<DataTypeString>()),
|
||||
NameAndTypePair("_table", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())),
|
||||
NameAndTypePair("_part", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())),
|
||||
NameAndTypePair("_part_index", std::make_shared<DataTypeUInt64>()),
|
||||
NameAndTypePair("_part_uuid", std::make_shared<DataTypeUUID>()),
|
||||
NameAndTypePair("_partition_id", std::make_shared<DataTypeString>()),
|
||||
NameAndTypePair("_partition_id", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())),
|
||||
NameAndTypePair("_sample_factor", std::make_shared<DataTypeFloat64>()),
|
||||
NameAndTypePair("_part_offset", std::make_shared<DataTypeUInt64>()),
|
||||
NameAndTypePair("_row_exists", std::make_shared<DataTypeUInt8>()),
|
||||
|
@ -664,7 +664,7 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources(
|
||||
{
|
||||
ColumnWithTypeAndName column;
|
||||
column.name = "_database";
|
||||
column.type = std::make_shared<DataTypeString>();
|
||||
column.type = std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>());
|
||||
column.column = column.type->createColumnConst(0, Field(database_name));
|
||||
|
||||
auto adding_column_dag = ActionsDAG::makeAddingColumnActions(std::move(column));
|
||||
@ -682,7 +682,7 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources(
|
||||
{
|
||||
ColumnWithTypeAndName column;
|
||||
column.name = "_table";
|
||||
column.type = std::make_shared<DataTypeString>();
|
||||
column.type = std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>());
|
||||
column.column = column.type->createColumnConst(0, Field(table_name));
|
||||
|
||||
auto adding_column_dag = ActionsDAG::makeAddingColumnActions(std::move(column));
|
||||
@ -980,7 +980,9 @@ void registerStorageMerge(StorageFactory & factory)
|
||||
|
||||
NamesAndTypesList StorageMerge::getVirtuals() const
|
||||
{
|
||||
NamesAndTypesList virtuals{{"_database", std::make_shared<DataTypeString>()}, {"_table", std::make_shared<DataTypeString>()}};
|
||||
NamesAndTypesList virtuals{
|
||||
{"_database", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())},
|
||||
{"_table", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())}};
|
||||
|
||||
auto first_table = getFirstTable([](auto && table) { return table; });
|
||||
if (first_table)
|
||||
|
@ -1601,37 +1601,39 @@ void StorageMergeTree::renameAndCommitEmptyParts(MutableDataPartsVector & new_pa
|
||||
|
||||
void StorageMergeTree::truncate(const ASTPtr &, const StorageMetadataPtr &, ContextPtr query_context, TableExclusiveLockHolder &)
|
||||
{
|
||||
/// Asks to complete merges and does not allow them to start.
|
||||
/// This protects against "revival" of data for a removed partition after completion of merge.
|
||||
auto merge_blocker = stopMergesAndWait();
|
||||
waitForOutdatedPartsToBeLoaded();
|
||||
|
||||
Stopwatch watch;
|
||||
|
||||
auto txn = query_context->getCurrentTransaction();
|
||||
MergeTreeData::Transaction transaction(*this, txn.get());
|
||||
{
|
||||
auto operation_data_parts_lock = lockOperationsWithParts();
|
||||
/// Asks to complete merges and does not allow them to start.
|
||||
/// This protects against "revival" of data for a removed partition after completion of merge.
|
||||
auto merge_blocker = stopMergesAndWait();
|
||||
waitForOutdatedPartsToBeLoaded();
|
||||
|
||||
auto parts = getVisibleDataPartsVector(query_context);
|
||||
Stopwatch watch;
|
||||
|
||||
auto future_parts = initCoverageWithNewEmptyParts(parts);
|
||||
auto txn = query_context->getCurrentTransaction();
|
||||
MergeTreeData::Transaction transaction(*this, txn.get());
|
||||
{
|
||||
auto operation_data_parts_lock = lockOperationsWithParts();
|
||||
|
||||
LOG_TEST(log, "Made {} empty parts in order to cover {} parts. Empty parts: {}, covered parts: {}. With txn {}",
|
||||
future_parts.size(), parts.size(),
|
||||
fmt::join(getPartsNames(future_parts), ", "), fmt::join(getPartsNames(parts), ", "),
|
||||
transaction.getTID());
|
||||
auto parts = getVisibleDataPartsVector(query_context);
|
||||
|
||||
captureTmpDirectoryHolders(*this, future_parts);
|
||||
auto future_parts = initCoverageWithNewEmptyParts(parts);
|
||||
|
||||
auto new_data_parts = createEmptyDataParts(*this, future_parts, txn);
|
||||
renameAndCommitEmptyParts(new_data_parts, transaction);
|
||||
LOG_TEST(log, "Made {} empty parts in order to cover {} parts. Empty parts: {}, covered parts: {}. With txn {}",
|
||||
future_parts.size(), parts.size(),
|
||||
fmt::join(getPartsNames(future_parts), ", "), fmt::join(getPartsNames(parts), ", "),
|
||||
transaction.getTID());
|
||||
|
||||
PartLog::addNewParts(query_context, new_data_parts, watch.elapsed());
|
||||
captureTmpDirectoryHolders(*this, future_parts);
|
||||
|
||||
LOG_INFO(log, "Truncated table with {} parts by replacing them with new empty {} parts. With txn {}",
|
||||
parts.size(), future_parts.size(),
|
||||
transaction.getTID());
|
||||
auto new_data_parts = createEmptyDataParts(*this, future_parts, txn);
|
||||
renameAndCommitEmptyParts(new_data_parts, transaction);
|
||||
|
||||
PartLog::addNewParts(query_context, new_data_parts, watch.elapsed());
|
||||
|
||||
LOG_INFO(log, "Truncated table with {} parts by replacing them with new empty {} parts. With txn {}",
|
||||
parts.size(), future_parts.size(),
|
||||
transaction.getTID());
|
||||
}
|
||||
}
|
||||
|
||||
/// Old parts are needed to be destroyed before clearing them from filesystem.
|
||||
@ -1642,48 +1644,50 @@ void StorageMergeTree::truncate(const ASTPtr &, const StorageMetadataPtr &, Cont
|
||||
|
||||
void StorageMergeTree::dropPart(const String & part_name, bool detach, ContextPtr query_context)
|
||||
{
|
||||
/// Asks to complete merges and does not allow them to start.
|
||||
/// This protects against "revival" of data for a removed partition after completion of merge.
|
||||
auto merge_blocker = stopMergesAndWait();
|
||||
|
||||
Stopwatch watch;
|
||||
|
||||
/// It's important to create it outside of lock scope because
|
||||
/// otherwise it can lock parts in destructor and deadlock is possible.
|
||||
auto txn = query_context->getCurrentTransaction();
|
||||
MergeTreeData::Transaction transaction(*this, txn.get());
|
||||
{
|
||||
auto operation_data_parts_lock = lockOperationsWithParts();
|
||||
/// Asks to complete merges and does not allow them to start.
|
||||
/// This protects against "revival" of data for a removed partition after completion of merge.
|
||||
auto merge_blocker = stopMergesAndWait();
|
||||
|
||||
auto part = getPartIfExists(part_name, {MergeTreeDataPartState::Active});
|
||||
if (!part)
|
||||
throw Exception(ErrorCodes::NO_SUCH_DATA_PART, "Part {} not found, won't try to drop it.", part_name);
|
||||
Stopwatch watch;
|
||||
|
||||
if (detach)
|
||||
/// It's important to create it outside of lock scope because
|
||||
/// otherwise it can lock parts in destructor and deadlock is possible.
|
||||
auto txn = query_context->getCurrentTransaction();
|
||||
MergeTreeData::Transaction transaction(*this, txn.get());
|
||||
{
|
||||
auto metadata_snapshot = getInMemoryMetadataPtr();
|
||||
LOG_INFO(log, "Detaching {}", part->getDataPartStorage().getPartDirectory());
|
||||
part->makeCloneInDetached("", metadata_snapshot);
|
||||
}
|
||||
auto operation_data_parts_lock = lockOperationsWithParts();
|
||||
|
||||
{
|
||||
auto future_parts = initCoverageWithNewEmptyParts({part});
|
||||
auto part = getPartIfExists(part_name, {MergeTreeDataPartState::Active});
|
||||
if (!part)
|
||||
throw Exception(ErrorCodes::NO_SUCH_DATA_PART, "Part {} not found, won't try to drop it.", part_name);
|
||||
|
||||
LOG_TEST(log, "Made {} empty parts in order to cover {} part. With txn {}",
|
||||
fmt::join(getPartsNames(future_parts), ", "), fmt::join(getPartsNames({part}), ", "),
|
||||
transaction.getTID());
|
||||
if (detach)
|
||||
{
|
||||
auto metadata_snapshot = getInMemoryMetadataPtr();
|
||||
LOG_INFO(log, "Detaching {}", part->getDataPartStorage().getPartDirectory());
|
||||
part->makeCloneInDetached("", metadata_snapshot);
|
||||
}
|
||||
|
||||
captureTmpDirectoryHolders(*this, future_parts);
|
||||
{
|
||||
auto future_parts = initCoverageWithNewEmptyParts({part});
|
||||
|
||||
auto new_data_parts = createEmptyDataParts(*this, future_parts, txn);
|
||||
renameAndCommitEmptyParts(new_data_parts, transaction);
|
||||
LOG_TEST(log, "Made {} empty parts in order to cover {} part. With txn {}",
|
||||
fmt::join(getPartsNames(future_parts), ", "), fmt::join(getPartsNames({part}), ", "),
|
||||
transaction.getTID());
|
||||
|
||||
PartLog::addNewParts(query_context, new_data_parts, watch.elapsed());
|
||||
captureTmpDirectoryHolders(*this, future_parts);
|
||||
|
||||
const auto * op = detach ? "Detached" : "Dropped";
|
||||
LOG_INFO(log, "{} {} part by replacing it with new empty {} part. With txn {}",
|
||||
op, part->name, future_parts[0].part_name,
|
||||
transaction.getTID());
|
||||
auto new_data_parts = createEmptyDataParts(*this, future_parts, txn);
|
||||
renameAndCommitEmptyParts(new_data_parts, transaction);
|
||||
|
||||
PartLog::addNewParts(query_context, new_data_parts, watch.elapsed());
|
||||
|
||||
const auto * op = detach ? "Detached" : "Dropped";
|
||||
LOG_INFO(log, "{} {} part by replacing it with new empty {} part. With txn {}",
|
||||
op, part->name, future_parts[0].part_name,
|
||||
transaction.getTID());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -1695,58 +1699,60 @@ void StorageMergeTree::dropPart(const String & part_name, bool detach, ContextPt
|
||||
|
||||
void StorageMergeTree::dropPartition(const ASTPtr & partition, bool detach, ContextPtr query_context)
|
||||
{
|
||||
const auto * partition_ast = partition->as<ASTPartition>();
|
||||
|
||||
/// Asks to complete merges and does not allow them to start.
|
||||
/// This protects against "revival" of data for a removed partition after completion of merge.
|
||||
auto merge_blocker = stopMergesAndWait();
|
||||
|
||||
Stopwatch watch;
|
||||
|
||||
/// It's important to create it outside of lock scope because
|
||||
/// otherwise it can lock parts in destructor and deadlock is possible.
|
||||
auto txn = query_context->getCurrentTransaction();
|
||||
MergeTreeData::Transaction transaction(*this, txn.get());
|
||||
{
|
||||
auto operation_data_parts_lock = lockOperationsWithParts();
|
||||
const auto * partition_ast = partition->as<ASTPartition>();
|
||||
|
||||
DataPartsVector parts;
|
||||
/// Asks to complete merges and does not allow them to start.
|
||||
/// This protects against "revival" of data for a removed partition after completion of merge.
|
||||
auto merge_blocker = stopMergesAndWait();
|
||||
|
||||
Stopwatch watch;
|
||||
|
||||
/// It's important to create it outside of lock scope because
|
||||
/// otherwise it can lock parts in destructor and deadlock is possible.
|
||||
auto txn = query_context->getCurrentTransaction();
|
||||
MergeTreeData::Transaction transaction(*this, txn.get());
|
||||
{
|
||||
if (partition_ast && partition_ast->all)
|
||||
parts = getVisibleDataPartsVector(query_context);
|
||||
else
|
||||
auto operation_data_parts_lock = lockOperationsWithParts();
|
||||
|
||||
DataPartsVector parts;
|
||||
{
|
||||
String partition_id = getPartitionIDFromQuery(partition, query_context);
|
||||
parts = getVisibleDataPartsVectorInPartition(query_context, partition_id);
|
||||
if (partition_ast && partition_ast->all)
|
||||
parts = getVisibleDataPartsVector(query_context);
|
||||
else
|
||||
{
|
||||
String partition_id = getPartitionIDFromQuery(partition, query_context);
|
||||
parts = getVisibleDataPartsVectorInPartition(query_context, partition_id);
|
||||
}
|
||||
}
|
||||
|
||||
if (detach)
|
||||
for (const auto & part : parts)
|
||||
{
|
||||
auto metadata_snapshot = getInMemoryMetadataPtr();
|
||||
LOG_INFO(log, "Detaching {}", part->getDataPartStorage().getPartDirectory());
|
||||
part->makeCloneInDetached("", metadata_snapshot);
|
||||
}
|
||||
|
||||
auto future_parts = initCoverageWithNewEmptyParts(parts);
|
||||
|
||||
LOG_TEST(log, "Made {} empty parts in order to cover {} parts. Empty parts: {}, covered parts: {}. With txn {}",
|
||||
future_parts.size(), parts.size(),
|
||||
fmt::join(getPartsNames(future_parts), ", "), fmt::join(getPartsNames(parts), ", "),
|
||||
transaction.getTID());
|
||||
|
||||
captureTmpDirectoryHolders(*this, future_parts);
|
||||
|
||||
auto new_data_parts = createEmptyDataParts(*this, future_parts, txn);
|
||||
renameAndCommitEmptyParts(new_data_parts, transaction);
|
||||
|
||||
PartLog::addNewParts(query_context, new_data_parts, watch.elapsed());
|
||||
|
||||
const auto * op = detach ? "Detached" : "Dropped";
|
||||
LOG_INFO(log, "{} partition with {} parts by replacing them with new empty {} parts. With txn {}",
|
||||
op, parts.size(), future_parts.size(),
|
||||
transaction.getTID());
|
||||
}
|
||||
|
||||
if (detach)
|
||||
for (const auto & part : parts)
|
||||
{
|
||||
auto metadata_snapshot = getInMemoryMetadataPtr();
|
||||
LOG_INFO(log, "Detaching {}", part->getDataPartStorage().getPartDirectory());
|
||||
part->makeCloneInDetached("", metadata_snapshot);
|
||||
}
|
||||
|
||||
auto future_parts = initCoverageWithNewEmptyParts(parts);
|
||||
|
||||
LOG_TEST(log, "Made {} empty parts in order to cover {} parts. Empty parts: {}, covered parts: {}. With txn {}",
|
||||
future_parts.size(), parts.size(),
|
||||
fmt::join(getPartsNames(future_parts), ", "), fmt::join(getPartsNames(parts), ", "),
|
||||
transaction.getTID());
|
||||
|
||||
captureTmpDirectoryHolders(*this, future_parts);
|
||||
|
||||
auto new_data_parts = createEmptyDataParts(*this, future_parts, txn);
|
||||
renameAndCommitEmptyParts(new_data_parts, transaction);
|
||||
|
||||
PartLog::addNewParts(query_context, new_data_parts, watch.elapsed());
|
||||
|
||||
const auto * op = detach ? "Detached" : "Dropped";
|
||||
LOG_INFO(log, "{} partition with {} parts by replacing them with new empty {} parts. With txn {}",
|
||||
op, parts.size(), future_parts.size(),
|
||||
transaction.getTID());
|
||||
}
|
||||
|
||||
/// Old parts are needed to be destroyed before clearing them from filesystem.
|
||||
|
@ -107,6 +107,7 @@ namespace ErrorCodes
|
||||
extern const int CANNOT_EXTRACT_TABLE_STRUCTURE;
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
extern const int CANNOT_COMPILE_REGEXP;
|
||||
extern const int FILE_DOESNT_EXIST;
|
||||
}
|
||||
|
||||
class IOutputFormat;
|
||||
@ -260,6 +261,9 @@ private:
|
||||
outcome_future = listObjectsAsync();
|
||||
}
|
||||
|
||||
if (request_settings.throw_on_zero_files_match && result_batch.empty())
|
||||
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Can not match any files using prefix {}", request.GetPrefix());
|
||||
|
||||
KeysWithInfo temp_buffer;
|
||||
temp_buffer.reserve(result_batch.size());
|
||||
|
||||
|
@ -167,6 +167,7 @@ S3Settings::RequestSettings::RequestSettings(const NamedCollection & collection)
|
||||
max_connections = collection.getOrDefault<UInt64>("max_connections", max_connections);
|
||||
list_object_keys_size = collection.getOrDefault<UInt64>("list_object_keys_size", list_object_keys_size);
|
||||
allow_head_object_request = collection.getOrDefault<bool>("allow_head_object_request", allow_head_object_request);
|
||||
throw_on_zero_files_match = collection.getOrDefault<bool>("throw_on_zero_files_match", throw_on_zero_files_match);
|
||||
}
|
||||
|
||||
S3Settings::RequestSettings::RequestSettings(
|
||||
@ -182,6 +183,7 @@ S3Settings::RequestSettings::RequestSettings(
|
||||
check_objects_after_upload = config.getBool(key + "check_objects_after_upload", settings.s3_check_objects_after_upload);
|
||||
list_object_keys_size = config.getUInt64(key + "list_object_keys_size", settings.s3_list_object_keys_size);
|
||||
allow_head_object_request = config.getBool(key + "allow_head_object_request", allow_head_object_request);
|
||||
throw_on_zero_files_match = config.getBool(key + "throw_on_zero_files_match", settings.s3_throw_on_zero_files_match);
|
||||
|
||||
/// NOTE: it would be better to reuse old throttlers to avoid losing token bucket state on every config reload,
|
||||
/// which could lead to exceeding limit for short time. But it is good enough unless very high `burst` values are used.
|
||||
@ -231,6 +233,9 @@ void S3Settings::RequestSettings::updateFromSettingsImpl(const Settings & settin
|
||||
if ((!if_changed || settings.s3_max_put_rps.changed || settings.s3_max_put_burst.changed) && settings.s3_max_put_rps)
|
||||
put_request_throttler = std::make_shared<Throttler>(
|
||||
settings.s3_max_put_rps, settings.s3_max_put_burst ? settings.s3_max_put_burst : Throttler::default_burst_seconds * settings.s3_max_put_rps);
|
||||
|
||||
if (!if_changed || settings.s3_throw_on_zero_files_match)
|
||||
throw_on_zero_files_match = settings.s3_throw_on_zero_files_match;
|
||||
}
|
||||
|
||||
void S3Settings::RequestSettings::updateFromSettings(const Settings & settings)
|
||||
|
@ -77,6 +77,8 @@ struct S3Settings
|
||||
/// See https://github.com/aws/aws-sdk-cpp/issues/1558 and also the function S3ErrorMarshaller::ExtractRegion() for more information.
|
||||
bool allow_head_object_request = true;
|
||||
|
||||
bool throw_on_zero_files_match = false;
|
||||
|
||||
const PartUploadSettings & getUploadSettings() const { return upload_settings; }
|
||||
|
||||
RequestSettings() = default;
|
||||
|
@ -51,6 +51,7 @@ const char * auto_config_build[]
|
||||
"USE_ROCKSDB", "@USE_ROCKSDB@",
|
||||
"USE_NURAFT", "@USE_NURAFT@",
|
||||
"USE_NLP", "@USE_NLP@",
|
||||
"USE_LIBURING", "@USE_LIBURING@",
|
||||
"USE_SQLITE", "@USE_SQLITE@",
|
||||
"USE_LIBPQXX", "@USE_LIBPQXX@",
|
||||
"USE_AZURE_BLOB_STORAGE", "@USE_AZURE_BLOB_STORAGE@",
|
||||
|
@ -120,7 +120,7 @@ void StorageSystemProcesses::fillData(MutableColumns & res_columns, ContextPtr c
|
||||
res_columns[i++]->insert(process.client_info.quota_key);
|
||||
res_columns[i++]->insert(process.client_info.distributed_depth);
|
||||
|
||||
res_columns[i++]->insert(static_cast<double>(process.elapsed_microseconds) / 100000.0);
|
||||
res_columns[i++]->insert(static_cast<double>(process.elapsed_microseconds) / 1'000'000.0);
|
||||
res_columns[i++]->insert(process.is_cancelled);
|
||||
res_columns[i++]->insert(process.is_all_data_sent);
|
||||
res_columns[i++]->insert(process.read_rows);
|
||||
|
@ -129,6 +129,9 @@ if (TARGET ch_contrib::parquet)
|
||||
set(USE_ARROW 1)
|
||||
set(USE_ORC 1)
|
||||
endif()
|
||||
if (TARGET ch_contrib::liburing)
|
||||
set(USE_LIBURING 1)
|
||||
endif ()
|
||||
if (TARGET ch_contrib::protobuf)
|
||||
set(USE_PROTOBUF 1)
|
||||
endif()
|
||||
|
@ -5,7 +5,8 @@ import logging
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
from typing import Any, List, Optional
|
||||
from pathlib import Path
|
||||
from typing import Any, Callable, List, Optional
|
||||
|
||||
import requests # type: ignore
|
||||
|
||||
@ -56,21 +57,29 @@ def read_build_urls(build_name: str, reports_path: str) -> List[str]:
|
||||
return []
|
||||
|
||||
|
||||
def download_build_with_progress(url, path):
|
||||
def download_build_with_progress(url: str, path: Path) -> None:
|
||||
logging.info("Downloading from %s to temp path %s", url, path)
|
||||
for i in range(DOWNLOAD_RETRIES_COUNT):
|
||||
try:
|
||||
response = get_with_retries(url, retries=1, stream=True)
|
||||
total_length = int(response.headers.get("content-length", 0))
|
||||
if path.is_file() and total_length and path.stat().st_size == total_length:
|
||||
logging.info(
|
||||
"The file %s already exists and have a proper size %s",
|
||||
path,
|
||||
total_length,
|
||||
)
|
||||
return
|
||||
|
||||
with open(path, "wb") as f:
|
||||
response = get_with_retries(url, retries=1, stream=True)
|
||||
total_length = response.headers.get("content-length")
|
||||
if total_length is None or int(total_length) == 0:
|
||||
if total_length == 0:
|
||||
logging.info(
|
||||
"No content-length, will download file without progress"
|
||||
)
|
||||
f.write(response.content)
|
||||
else:
|
||||
dl = 0
|
||||
total_length = int(total_length)
|
||||
|
||||
logging.info("Content length is %ld bytes", total_length)
|
||||
for data in response.iter_content(chunk_size=4096):
|
||||
dl += len(data)
|
||||
@ -99,12 +108,14 @@ def download_build_with_progress(url, path):
|
||||
logging.info("Downloading finished")
|
||||
|
||||
|
||||
def download_builds(result_path, build_urls, filter_fn):
|
||||
def download_builds(
|
||||
result_path: str, build_urls: List[str], filter_fn: Callable[[str], bool]
|
||||
) -> None:
|
||||
for url in build_urls:
|
||||
if filter_fn(url):
|
||||
fname = os.path.basename(url.replace("%2B", "+").replace("%20", " "))
|
||||
logging.info("Will download %s to %s", fname, result_path)
|
||||
download_build_with_progress(url, os.path.join(result_path, fname))
|
||||
download_build_with_progress(url, Path(result_path) / fname)
|
||||
|
||||
|
||||
def download_builds_filter(
|
||||
|
@ -182,6 +182,12 @@ CI_CONFIG = {
|
||||
"tests_config": {
|
||||
# required_build - build name for artifacts
|
||||
# force_tests - force success status for tests
|
||||
"Install packages (amd64)": {
|
||||
"required_build": "package_release",
|
||||
},
|
||||
"Install packages (arm64)": {
|
||||
"required_build": "package_aarch64",
|
||||
},
|
||||
"Stateful tests (asan)": {
|
||||
"required_build": "package_asan",
|
||||
},
|
||||
|
@ -6,6 +6,7 @@ This file is needed to avoid cicle import build_download_helper.py <=> env_helpe
|
||||
import argparse
|
||||
import logging
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from build_download_helper import download_build_with_progress
|
||||
from ci_config import CI_CONFIG, BuildConfig
|
||||
@ -57,14 +58,15 @@ def parse_args() -> argparse.Namespace:
|
||||
def main():
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s")
|
||||
args = parse_args()
|
||||
os.makedirs(TEMP_PATH, exist_ok=True)
|
||||
temp_path = Path(TEMP_PATH)
|
||||
temp_path.mkdir(parents=True, exist_ok=True)
|
||||
for build in args.build_names:
|
||||
# check if it's in CI_CONFIG
|
||||
config = CI_CONFIG["build_config"][build] # type: BuildConfig
|
||||
if args.rename:
|
||||
path = os.path.join(TEMP_PATH, f"clickhouse-{config['static_binary_name']}")
|
||||
path = temp_path / f"clickhouse-{config['static_binary_name']}"
|
||||
else:
|
||||
path = os.path.join(TEMP_PATH, "clickhouse")
|
||||
path = temp_path / "clickhouse"
|
||||
|
||||
url = S3_ARTIFACT_DOWNLOAD_TEMPLATE.format(
|
||||
pr_or_release=f"{args.version.major}.{args.version.minor}",
|
||||
|
315
tests/ci/install_check.py
Normal file
315
tests/ci/install_check.py
Normal file
@ -0,0 +1,315 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import argparse
|
||||
|
||||
import atexit
|
||||
import logging
|
||||
import sys
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
from typing import Dict
|
||||
|
||||
from github import Github
|
||||
|
||||
from build_download_helper import download_builds_filter
|
||||
from clickhouse_helper import (
|
||||
ClickHouseHelper,
|
||||
mark_flaky_tests,
|
||||
prepare_tests_results_for_clickhouse,
|
||||
)
|
||||
from commit_status_helper import post_commit_status, update_mergeable_check
|
||||
from docker_pull_helper import get_image_with_version, DockerImage
|
||||
from env_helper import CI, TEMP_PATH as TEMP, REPORTS_PATH
|
||||
from get_robot_token import get_best_robot_token
|
||||
from pr_info import PRInfo
|
||||
from report import TestResults, TestResult
|
||||
from rerun_helper import RerunHelper
|
||||
from s3_helper import S3Helper
|
||||
from stopwatch import Stopwatch
|
||||
from tee_popen import TeePopen
|
||||
from upload_result_helper import upload_results
|
||||
|
||||
|
||||
RPM_IMAGE = "clickhouse/install-rpm-test"
|
||||
DEB_IMAGE = "clickhouse/install-deb-test"
|
||||
TEMP_PATH = Path(TEMP)
|
||||
SUCCESS = "success"
|
||||
FAILURE = "failure"
|
||||
|
||||
|
||||
def prepare_test_scripts():
|
||||
server_test = r"""#!/bin/bash
|
||||
systemctl start clickhouse-server
|
||||
clickhouse-client -q 'SELECT version()'"""
|
||||
keeper_test = r"""#!/bin/bash
|
||||
systemctl start clickhouse-keeper
|
||||
for i in {1..20}; do
|
||||
echo wait for clickhouse-keeper to being up
|
||||
> /dev/tcp/127.0.0.1/9181 2>/dev/null && break || sleep 1
|
||||
done
|
||||
for i in {1..5}; do
|
||||
echo wait for clickhouse-keeper to answer on mntr request
|
||||
exec 13<>/dev/tcp/127.0.0.1/9181
|
||||
echo mntr >&13
|
||||
cat <&13 | grep zk_version && break || sleep 1
|
||||
exec 13>&-
|
||||
done
|
||||
exec 13>&-"""
|
||||
binary_test = r"""#!/bin/bash
|
||||
chmod +x /packages/clickhouse
|
||||
/packages/clickhouse install
|
||||
clickhouse-server start --daemon
|
||||
for i in {1..5}; do
|
||||
clickhouse-client -q 'SELECT version()' && break || sleep 1
|
||||
done
|
||||
clickhouse-keeper start --daemon
|
||||
for i in {1..20}; do
|
||||
echo wait for clickhouse-keeper to being up
|
||||
> /dev/tcp/127.0.0.1/9181 2>/dev/null && break || sleep 1
|
||||
done
|
||||
for i in {1..5}; do
|
||||
echo wait for clickhouse-keeper to answer on mntr request
|
||||
exec 13<>/dev/tcp/127.0.0.1/9181
|
||||
echo mntr >&13
|
||||
cat <&13 | grep zk_version && break || sleep 1
|
||||
exec 13>&-
|
||||
done
|
||||
exec 13>&-"""
|
||||
(TEMP_PATH / "server_test.sh").write_text(server_test, encoding="utf-8")
|
||||
(TEMP_PATH / "keeper_test.sh").write_text(keeper_test, encoding="utf-8")
|
||||
(TEMP_PATH / "binary_test.sh").write_text(binary_test, encoding="utf-8")
|
||||
|
||||
|
||||
def test_install_deb(image: DockerImage) -> TestResults:
|
||||
tests = {
|
||||
"Install server deb": r"""#!/bin/bash -ex
|
||||
apt-get install /packages/clickhouse-{server,client,common}*deb
|
||||
bash -ex /packages/server_test.sh""",
|
||||
"Install keeper deb": r"""#!/bin/bash -ex
|
||||
apt-get install /packages/clickhouse-keeper*deb
|
||||
bash -ex /packages/keeper_test.sh""",
|
||||
"Install clickhouse binary in deb": r"bash -ex /packages/binary_test.sh",
|
||||
}
|
||||
return test_install(image, tests)
|
||||
|
||||
|
||||
def test_install_rpm(image: DockerImage) -> TestResults:
|
||||
# FIXME: I couldn't find why Type=notify is broken in centos:8
|
||||
# systemd just ignores the watchdog completely
|
||||
tests = {
|
||||
"Install server rpm": r"""#!/bin/bash -ex
|
||||
yum localinstall --disablerepo=* -y /packages/clickhouse-{server,client,common}*rpm
|
||||
echo CLICKHOUSE_WATCHDOG_ENABLE=0 > /etc/default/clickhouse-server
|
||||
bash -ex /packages/server_test.sh""",
|
||||
"Install keeper rpm": r"""#!/bin/bash -ex
|
||||
yum localinstall --disablerepo=* -y /packages/clickhouse-keeper*rpm
|
||||
bash -ex /packages/keeper_test.sh""",
|
||||
"Install clickhouse binary in rpm": r"bash -ex /packages/binary_test.sh",
|
||||
}
|
||||
return test_install(image, tests)
|
||||
|
||||
|
||||
def test_install_tgz(image: DockerImage) -> TestResults:
|
||||
# FIXME: I couldn't find why Type=notify is broken in centos:8
|
||||
# systemd just ignores the watchdog completely
|
||||
tests = {
|
||||
f"Install server tgz in {image.name}": r"""#!/bin/bash -ex
|
||||
[ -f /etc/debian_version ] && CONFIGURE=configure || CONFIGURE=
|
||||
for pkg in /packages/clickhouse-{common,client,server}*tgz; do
|
||||
package=${pkg%-*}
|
||||
package=${package##*/}
|
||||
tar xf "$pkg"
|
||||
"/$package/install/doinst.sh" $CONFIGURE
|
||||
done
|
||||
[ -f /etc/yum.conf ] && echo CLICKHOUSE_WATCHDOG_ENABLE=0 > /etc/default/clickhouse-server
|
||||
bash -ex /packages/server_test.sh""",
|
||||
f"Install keeper tgz in {image.name}": r"""#!/bin/bash -ex
|
||||
[ -f /etc/debian_version ] && CONFIGURE=configure || CONFIGURE=
|
||||
for pkg in /packages/clickhouse-keeper*tgz; do
|
||||
package=${pkg%-*}
|
||||
package=${package##*/}
|
||||
tar xf "$pkg"
|
||||
"/$package/install/doinst.sh" $CONFIGURE
|
||||
done
|
||||
bash -ex /packages/keeper_test.sh""",
|
||||
}
|
||||
return test_install(image, tests)
|
||||
|
||||
|
||||
def test_install(image: DockerImage, tests: Dict[str, str]) -> TestResults:
|
||||
test_results = [] # type: TestResults
|
||||
for name, command in tests.items():
|
||||
stopwatch = Stopwatch()
|
||||
container_name = name.lower().replace(" ", "_").replace("/", "_")
|
||||
log_file = TEMP_PATH / f"{container_name}.log"
|
||||
run_command = (
|
||||
f"docker run --rm --privileged --detach --cap-add=SYS_PTRACE "
|
||||
f"--volume={TEMP_PATH}:/packages {image}"
|
||||
)
|
||||
logging.info("Running docker container: `%s`", run_command)
|
||||
container_id = subprocess.check_output(
|
||||
run_command, shell=True, encoding="utf-8"
|
||||
).strip()
|
||||
(TEMP_PATH / "install.sh").write_text(command)
|
||||
install_command = f"docker exec {container_id} bash -ex /packages/install.sh"
|
||||
with TeePopen(install_command, log_file) as process:
|
||||
retcode = process.wait()
|
||||
if retcode == 0:
|
||||
status = SUCCESS
|
||||
else:
|
||||
status = FAILURE
|
||||
|
||||
subprocess.check_call(f"docker kill -s 9 {container_id}", shell=True)
|
||||
test_results.append(
|
||||
TestResult(name, status, stopwatch.duration_seconds, [log_file])
|
||||
)
|
||||
|
||||
return test_results
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(
|
||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
||||
description="The script to check if the packages are able to install",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"check_name",
|
||||
help="check name, used to download the packages",
|
||||
)
|
||||
parser.add_argument("--download", default=True, help=argparse.SUPPRESS)
|
||||
parser.add_argument(
|
||||
"--no-download",
|
||||
dest="download",
|
||||
action="store_false",
|
||||
default=argparse.SUPPRESS,
|
||||
help="if set, the packages won't be downloaded, useful for debug",
|
||||
)
|
||||
parser.add_argument("--deb", default=True, help=argparse.SUPPRESS)
|
||||
parser.add_argument(
|
||||
"--no-deb",
|
||||
dest="deb",
|
||||
action="store_false",
|
||||
default=argparse.SUPPRESS,
|
||||
help="if set, the deb packages won't be checked",
|
||||
)
|
||||
parser.add_argument("--rpm", default=True, help=argparse.SUPPRESS)
|
||||
parser.add_argument(
|
||||
"--no-rpm",
|
||||
dest="rpm",
|
||||
action="store_false",
|
||||
default=argparse.SUPPRESS,
|
||||
help="if set, the rpm packages won't be checked",
|
||||
)
|
||||
parser.add_argument("--tgz", default=True, help=argparse.SUPPRESS)
|
||||
parser.add_argument(
|
||||
"--no-tgz",
|
||||
dest="tgz",
|
||||
action="store_false",
|
||||
default=argparse.SUPPRESS,
|
||||
help="if set, the tgz packages won't be checked",
|
||||
)
|
||||
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def main():
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
|
||||
stopwatch = Stopwatch()
|
||||
|
||||
args = parse_args()
|
||||
|
||||
TEMP_PATH.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
pr_info = PRInfo()
|
||||
|
||||
if CI:
|
||||
gh = Github(get_best_robot_token(), per_page=100)
|
||||
atexit.register(update_mergeable_check, gh, pr_info, args.check_name)
|
||||
|
||||
rerun_helper = RerunHelper(gh, pr_info, args.check_name)
|
||||
if rerun_helper.is_already_finished_by_status():
|
||||
logging.info(
|
||||
"Check is already finished according to github status, exiting"
|
||||
)
|
||||
sys.exit(0)
|
||||
|
||||
docker_images = {
|
||||
name: get_image_with_version(REPORTS_PATH, name)
|
||||
for name in (RPM_IMAGE, DEB_IMAGE)
|
||||
}
|
||||
prepare_test_scripts()
|
||||
|
||||
if args.download:
|
||||
|
||||
def filter_artifacts(path: str) -> bool:
|
||||
return (
|
||||
path.endswith(".deb")
|
||||
or path.endswith(".rpm")
|
||||
or path.endswith(".tgz")
|
||||
or path.endswith("/clickhouse")
|
||||
)
|
||||
|
||||
download_builds_filter(
|
||||
args.check_name, REPORTS_PATH, TEMP_PATH, filter_artifacts
|
||||
)
|
||||
|
||||
test_results = [] # type: TestResults
|
||||
if args.deb:
|
||||
test_results.extend(test_install_deb(docker_images[DEB_IMAGE]))
|
||||
if args.rpm:
|
||||
test_results.extend(test_install_rpm(docker_images[RPM_IMAGE]))
|
||||
if args.tgz:
|
||||
test_results.extend(test_install_tgz(docker_images[DEB_IMAGE]))
|
||||
test_results.extend(test_install_tgz(docker_images[RPM_IMAGE]))
|
||||
|
||||
state = SUCCESS
|
||||
description = "Packages installed successfully"
|
||||
if FAILURE in (result.status for result in test_results):
|
||||
state = FAILURE
|
||||
description = "Failed to install packages: " + ", ".join(
|
||||
result.name for result in test_results
|
||||
)
|
||||
|
||||
s3_helper = S3Helper()
|
||||
|
||||
report_url = upload_results(
|
||||
s3_helper,
|
||||
pr_info.number,
|
||||
pr_info.sha,
|
||||
test_results,
|
||||
[],
|
||||
args.check_name,
|
||||
)
|
||||
print(f"::notice ::Report url: {report_url}")
|
||||
if not CI:
|
||||
return
|
||||
|
||||
ch_helper = ClickHouseHelper()
|
||||
mark_flaky_tests(ch_helper, args.check_name, test_results)
|
||||
|
||||
if len(description) >= 140:
|
||||
description = description[:136] + "..."
|
||||
|
||||
post_commit_status(gh, pr_info.sha, args.check_name, description, state, report_url)
|
||||
|
||||
prepared_events = prepare_tests_results_for_clickhouse(
|
||||
pr_info,
|
||||
test_results,
|
||||
state,
|
||||
stopwatch.duration_seconds,
|
||||
stopwatch.start_time_str,
|
||||
report_url,
|
||||
args.check_name,
|
||||
)
|
||||
|
||||
ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)
|
||||
|
||||
if state == FAILURE:
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
@ -108,13 +108,10 @@ def main():
|
||||
|
||||
stopwatch = Stopwatch()
|
||||
|
||||
temp_path = TEMP_PATH
|
||||
reports_path = REPORTS_PATH
|
||||
|
||||
check_name = sys.argv[1]
|
||||
|
||||
if not os.path.exists(temp_path):
|
||||
os.makedirs(temp_path)
|
||||
if not os.path.exists(TEMP_PATH):
|
||||
os.makedirs(TEMP_PATH)
|
||||
|
||||
pr_info = PRInfo()
|
||||
|
||||
@ -127,14 +124,14 @@ def main():
|
||||
logging.info("Check is already finished according to github status, exiting")
|
||||
sys.exit(0)
|
||||
|
||||
docker_image = get_image_with_version(reports_path, IMAGE_NAME)
|
||||
docker_image = get_image_with_version(REPORTS_PATH, IMAGE_NAME)
|
||||
|
||||
download_unit_tests(check_name, reports_path, temp_path)
|
||||
download_unit_tests(check_name, REPORTS_PATH, TEMP_PATH)
|
||||
|
||||
tests_binary_path = os.path.join(temp_path, "unit_tests_dbms")
|
||||
tests_binary_path = os.path.join(TEMP_PATH, "unit_tests_dbms")
|
||||
os.chmod(tests_binary_path, 0o777)
|
||||
|
||||
test_output = os.path.join(temp_path, "test_output")
|
||||
test_output = os.path.join(TEMP_PATH, "test_output")
|
||||
if not os.path.exists(test_output):
|
||||
os.makedirs(test_output)
|
||||
|
||||
@ -151,7 +148,7 @@ def main():
|
||||
else:
|
||||
logging.info("Run failed")
|
||||
|
||||
subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True)
|
||||
subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {TEMP_PATH}", shell=True)
|
||||
|
||||
s3_helper = S3Helper()
|
||||
state, description, test_results, additional_logs = process_results(test_output)
|
||||
|
@ -449,21 +449,31 @@ class FailureReason(enum.Enum):
|
||||
INTERNAL_ERROR = "Test internal error: "
|
||||
|
||||
|
||||
def threshold_generator(always_on_prob, always_off_prob, min_val, max_val):
|
||||
def gen():
|
||||
tmp = random.random()
|
||||
if tmp <= always_on_prob:
|
||||
return min_val
|
||||
if tmp <= always_on_prob + always_off_prob:
|
||||
return max_val
|
||||
|
||||
if isinstance(min_val, int) and isinstance(max_val, int):
|
||||
return random.randint(min_val, max_val)
|
||||
else:
|
||||
return random.uniform(min_val, max_val)
|
||||
|
||||
return gen
|
||||
|
||||
|
||||
class SettingsRandomizer:
|
||||
settings = {
|
||||
"max_insert_threads": lambda: 0
|
||||
if random.random() < 0.5
|
||||
else random.randint(1, 16),
|
||||
"group_by_two_level_threshold": lambda: 1
|
||||
if random.random() < 0.1
|
||||
else 2**60
|
||||
if random.random() < 0.11
|
||||
else 100000,
|
||||
"group_by_two_level_threshold_bytes": lambda: 1
|
||||
if random.random() < 0.1
|
||||
else 2**60
|
||||
if random.random() < 0.11
|
||||
else 50000000,
|
||||
"group_by_two_level_threshold": threshold_generator(0.2, 0.2, 1, 1000000),
|
||||
"group_by_two_level_threshold_bytes": threshold_generator(
|
||||
0.2, 0.2, 1, 50000000
|
||||
),
|
||||
"distributed_aggregation_memory_efficient": lambda: random.randint(0, 1),
|
||||
"fsync_metadata": lambda: random.randint(0, 1),
|
||||
"output_format_parallel_formatting": lambda: random.randint(0, 1),
|
||||
@ -480,17 +490,15 @@ class SettingsRandomizer:
|
||||
"read_in_order_two_level_merge_threshold": lambda: random.randint(0, 100),
|
||||
"optimize_aggregation_in_order": lambda: random.randint(0, 1),
|
||||
"aggregation_in_order_max_block_bytes": lambda: random.randint(0, 50000000),
|
||||
"min_compress_block_size": lambda: random.randint(1, 1048576 * 3),
|
||||
"max_compress_block_size": lambda: random.randint(1, 1048576 * 3),
|
||||
"use_uncompressed_cache": lambda: random.randint(0, 1),
|
||||
"min_bytes_to_use_direct_io": lambda: 0
|
||||
if random.random() < 0.5
|
||||
else 1
|
||||
if random.random() < 0.2
|
||||
else random.randint(1, 1024 * 1024 * 1024),
|
||||
"min_bytes_to_use_mmap_io": lambda: 0
|
||||
if random.random() < 0.5
|
||||
else 1
|
||||
if random.random() < 0.2
|
||||
else random.randint(1, 1024 * 1024 * 1024),
|
||||
"min_bytes_to_use_direct_io": threshold_generator(
|
||||
0.2, 0.5, 1, 10 * 1024 * 1024 * 1024
|
||||
),
|
||||
"min_bytes_to_use_mmap_io": threshold_generator(
|
||||
0.2, 0.5, 1, 10 * 1024 * 1024 * 1024
|
||||
),
|
||||
"local_filesystem_read_method": lambda: random.choice(
|
||||
["read", "pread", "mmap", "pread_threadpool", "io_uring"]
|
||||
),
|
||||
@ -514,6 +522,39 @@ class SettingsRandomizer:
|
||||
return random_settings
|
||||
|
||||
|
||||
class MergeTreeSettingsRandomizer:
|
||||
settings = {
|
||||
# Temporary disable due to large number of failures. TODO: fix.
|
||||
# "ratio_of_defaults_for_sparse_serialization": threshold_generator(
|
||||
# 0.1, 0.6, 0.0, 1.0
|
||||
# ),
|
||||
"prefer_fetch_merged_part_size_threshold": threshold_generator(
|
||||
0.2, 0.5, 1, 10 * 1024 * 1024 * 1024
|
||||
),
|
||||
"vertical_merge_algorithm_min_rows_to_activate": threshold_generator(
|
||||
0.4, 0.4, 1, 1000000
|
||||
),
|
||||
"vertical_merge_algorithm_min_columns_to_activate": threshold_generator(
|
||||
0.4, 0.4, 1, 100
|
||||
),
|
||||
"min_merge_bytes_to_use_direct_io": threshold_generator(
|
||||
0.25, 0.25, 1, 10 * 1024 * 1024 * 1024
|
||||
),
|
||||
"index_granularity_bytes": lambda: random.randint(1024, 30 * 1024 * 1024),
|
||||
"merge_max_block_size": lambda: random.randint(1, 8192 * 3),
|
||||
"index_granularity": lambda: random.randint(1, 65536),
|
||||
"min_bytes_for_wide_part": threshold_generator(0.3, 0.3, 0, 1024 * 1024 * 1024),
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def get_random_settings(args):
|
||||
random_settings = []
|
||||
for setting, generator in MergeTreeSettingsRandomizer.settings.items():
|
||||
if setting not in args.changed_merge_tree_settings:
|
||||
random_settings.append(f"{setting}={generator()}")
|
||||
return random_settings
|
||||
|
||||
|
||||
class TestResult:
|
||||
def __init__(
|
||||
self,
|
||||
@ -618,41 +659,48 @@ class TestCase:
|
||||
|
||||
return testcase_args
|
||||
|
||||
def cli_random_settings(self) -> str:
|
||||
return " ".join([f"--{setting}" for setting in self.random_settings])
|
||||
@staticmethod
|
||||
def cli_format_settings(settings_list) -> str:
|
||||
return " ".join([f"--{setting}" for setting in settings_list])
|
||||
|
||||
def add_random_settings(self, args, client_options):
|
||||
if self.tags and "no-random-settings" in self.tags:
|
||||
return client_options
|
||||
if args.no_random_settings:
|
||||
return client_options
|
||||
def has_show_create_table_in_test(self):
|
||||
return not subprocess.call(["grep", "-iq", "show create", self.case_file])
|
||||
|
||||
if len(self.base_url_params) == 0:
|
||||
os.environ["CLICKHOUSE_URL_PARAMS"] = "&".join(self.random_settings)
|
||||
else:
|
||||
os.environ["CLICKHOUSE_URL_PARAMS"] = (
|
||||
self.base_url_params + "&" + "&".join(self.random_settings)
|
||||
def add_random_settings(self, client_options):
|
||||
new_options = ""
|
||||
if self.randomize_settings:
|
||||
if len(self.base_url_params) == 0:
|
||||
os.environ["CLICKHOUSE_URL_PARAMS"] = "&".join(self.random_settings)
|
||||
else:
|
||||
os.environ["CLICKHOUSE_URL_PARAMS"] = (
|
||||
self.base_url_params + "&" + "&".join(self.random_settings)
|
||||
)
|
||||
|
||||
new_options += f" {self.cli_format_settings(self.random_settings)}"
|
||||
|
||||
if self.randomize_merge_tree_settings:
|
||||
new_options += f" --allow_merge_tree_settings {self.cli_format_settings(self.merge_tree_random_settings)}"
|
||||
|
||||
if new_options != "":
|
||||
new_options += " --allow_repeated_settings"
|
||||
|
||||
os.environ["CLICKHOUSE_CLIENT_OPT"] = (
|
||||
self.base_client_options + new_options + " "
|
||||
)
|
||||
|
||||
new_options = f" --allow_repeated_settings {self.cli_random_settings()}"
|
||||
os.environ["CLICKHOUSE_CLIENT_OPT"] = (
|
||||
self.base_client_options + new_options + " "
|
||||
)
|
||||
return client_options + new_options
|
||||
|
||||
def remove_random_settings_from_env(self):
|
||||
os.environ["CLICKHOUSE_URL_PARAMS"] = self.base_url_params
|
||||
os.environ["CLICKHOUSE_CLIENT_OPT"] = self.base_client_options
|
||||
|
||||
def add_info_about_settings(self, args, description):
|
||||
if self.tags and "no-random-settings" in self.tags:
|
||||
return description
|
||||
if args.no_random_settings:
|
||||
return description
|
||||
def add_info_about_settings(self, description):
|
||||
if self.randomize_settings:
|
||||
description += f"\nSettings used in the test: {self.cli_format_settings(self.random_settings)}"
|
||||
if self.randomize_merge_tree_settings:
|
||||
description += f"\n\nMergeTree settings used in test: {self.cli_format_settings(self.merge_tree_random_settings)}"
|
||||
|
||||
return (
|
||||
f"{description}\nSettings used in the test: {self.cli_random_settings()}\n"
|
||||
)
|
||||
return description + "\n"
|
||||
|
||||
def __init__(self, suite, case: str, args, is_concurrent: bool):
|
||||
self.case: str = case # case file name
|
||||
@ -676,12 +724,40 @@ class TestCase:
|
||||
self.testcase_args = None
|
||||
self.runs_count = 0
|
||||
|
||||
self.random_settings = SettingsRandomizer.get_random_settings()
|
||||
has_no_random_settings_tag = self.tags and "no-random-settings" in self.tags
|
||||
|
||||
self.randomize_settings = not (
|
||||
args.no_random_settings or has_no_random_settings_tag
|
||||
)
|
||||
|
||||
has_no_random_merge_tree_settings_tag = (
|
||||
self.tags and "no-random-merge-tree-settings" in self.tags
|
||||
)
|
||||
|
||||
# If test contains SHOW CREATE TABLE do not
|
||||
# randomize merge tree settings, because
|
||||
# they will be added to table definition and test will fail
|
||||
self.randomize_merge_tree_settings = not (
|
||||
args.no_random_merge_tree_settings
|
||||
or has_no_random_settings_tag
|
||||
or has_no_random_merge_tree_settings_tag
|
||||
or self.has_show_create_table_in_test()
|
||||
)
|
||||
|
||||
if self.randomize_settings:
|
||||
self.random_settings = SettingsRandomizer.get_random_settings()
|
||||
|
||||
if self.randomize_merge_tree_settings:
|
||||
self.merge_tree_random_settings = (
|
||||
MergeTreeSettingsRandomizer.get_random_settings(args)
|
||||
)
|
||||
|
||||
self.base_url_params = (
|
||||
os.environ["CLICKHOUSE_URL_PARAMS"]
|
||||
if "CLICKHOUSE_URL_PARAMS" in os.environ
|
||||
else ""
|
||||
)
|
||||
|
||||
self.base_client_options = (
|
||||
os.environ["CLICKHOUSE_CLIENT_OPT"]
|
||||
if "CLICKHOUSE_CLIENT_OPT" in os.environ
|
||||
@ -1136,7 +1212,7 @@ class TestCase:
|
||||
self.testcase_args = self.configure_testcase_args(
|
||||
args, self.case_file, suite.suite_tmp_path
|
||||
)
|
||||
client_options = self.add_random_settings(args, client_options)
|
||||
client_options = self.add_random_settings(client_options)
|
||||
proc, stdout, stderr, debug_log, total_time = self.run_single_test(
|
||||
server_logs_level, client_options
|
||||
)
|
||||
@ -1149,9 +1225,7 @@ class TestCase:
|
||||
result.description = result.description.replace('\0', '')
|
||||
|
||||
if result.status == TestStatus.FAIL:
|
||||
result.description = self.add_info_about_settings(
|
||||
args, result.description
|
||||
)
|
||||
result.description = self.add_info_about_settings(result.description)
|
||||
return result
|
||||
except KeyboardInterrupt as e:
|
||||
raise e
|
||||
@ -1162,7 +1236,7 @@ class TestCase:
|
||||
FailureReason.INTERNAL_QUERY_FAIL,
|
||||
0.0,
|
||||
self.add_info_about_settings(
|
||||
args, self.get_description_from_exception_info(sys.exc_info())
|
||||
self.get_description_from_exception_info(sys.exc_info())
|
||||
),
|
||||
)
|
||||
except (ConnectionError, http.client.ImproperConnectionState):
|
||||
@ -1172,7 +1246,7 @@ class TestCase:
|
||||
FailureReason.SERVER_DIED,
|
||||
0.0,
|
||||
self.add_info_about_settings(
|
||||
args, self.get_description_from_exception_info(sys.exc_info())
|
||||
self.get_description_from_exception_info(sys.exc_info())
|
||||
),
|
||||
)
|
||||
except Exception:
|
||||
@ -1680,6 +1754,19 @@ def collect_build_flags(args):
|
||||
return result
|
||||
|
||||
|
||||
def collect_changed_merge_tree_settings(args):
|
||||
changed_settings = (
|
||||
clickhouse_execute(
|
||||
args,
|
||||
"SELECT name FROM system.merge_tree_settings WHERE changed",
|
||||
)
|
||||
.strip()
|
||||
.splitlines()
|
||||
)
|
||||
|
||||
return list(map(lambda s: s.decode(), changed_settings))
|
||||
|
||||
|
||||
def check_table_column(args, database, table, column):
|
||||
return (
|
||||
int(
|
||||
@ -1984,6 +2071,7 @@ def main(args):
|
||||
raise Exception(msg)
|
||||
|
||||
args.build_flags = collect_build_flags(args)
|
||||
args.changed_merge_tree_settings = collect_changed_merge_tree_settings(args)
|
||||
args.suppport_system_processes_is_all_data_sent = check_table_column(
|
||||
args, "system", "processes", "is_all_data_sent"
|
||||
)
|
||||
@ -2328,7 +2416,12 @@ if __name__ == "__main__":
|
||||
default=False,
|
||||
help="Disable settings randomization",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--no-random-merge-tree-settings",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="Disable MergeTree settings randomization",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--run-by-hash-num",
|
||||
type=int,
|
||||
|
@ -2856,7 +2856,10 @@ class ClickHouseCluster:
|
||||
SANITIZER_SIGN, from_host=True, filename="stderr.log"
|
||||
):
|
||||
sanitizer_assert_instance = instance.grep_in_log(
|
||||
SANITIZER_SIGN, from_host=True, filename="stderr.log"
|
||||
SANITIZER_SIGN,
|
||||
from_host=True,
|
||||
filename="stderr.log",
|
||||
after=1000,
|
||||
)
|
||||
logging.error(
|
||||
"Sanitizer in instance %s log %s",
|
||||
@ -2897,8 +2900,8 @@ class ClickHouseCluster:
|
||||
|
||||
if sanitizer_assert_instance is not None:
|
||||
raise Exception(
|
||||
"Sanitizer assert found in {} for instance {}".format(
|
||||
self.docker_logs_path, sanitizer_assert_instance
|
||||
"Sanitizer assert found for instance {}".format(
|
||||
sanitizer_assert_instance
|
||||
)
|
||||
)
|
||||
if fatal_log is not None:
|
||||
@ -3652,15 +3655,21 @@ class ClickHouseInstance:
|
||||
)
|
||||
return len(result) > 0
|
||||
|
||||
def grep_in_log(self, substring, from_host=False, filename="clickhouse-server.log"):
|
||||
def grep_in_log(
|
||||
self, substring, from_host=False, filename="clickhouse-server.log", after=None
|
||||
):
|
||||
logging.debug(f"grep in log called %s", substring)
|
||||
if after is not None:
|
||||
after_opt = "-A{}".format(after)
|
||||
else:
|
||||
after_opt = ""
|
||||
if from_host:
|
||||
# We check fist file exists but want to look for all rotated logs as well
|
||||
result = subprocess_check_call(
|
||||
[
|
||||
"bash",
|
||||
"-c",
|
||||
f'[ -f {self.logs_dir}/{filename} ] && zgrep -a "{substring}" {self.logs_dir}/{filename}* || true',
|
||||
f'[ -f {self.logs_dir}/{filename} ] && zgrep {after_opt} -a "{substring}" {self.logs_dir}/{filename}* || true',
|
||||
]
|
||||
)
|
||||
else:
|
||||
@ -3668,7 +3677,7 @@ class ClickHouseInstance:
|
||||
[
|
||||
"bash",
|
||||
"-c",
|
||||
f'[ -f /var/log/clickhouse-server/{filename} ] && zgrep -a "{substring}" /var/log/clickhouse-server/{filename}* || true',
|
||||
f'[ -f /var/log/clickhouse-server/{filename} ] && zgrep {after_opt} -a "{substring}" /var/log/clickhouse-server/{filename}* || true',
|
||||
]
|
||||
)
|
||||
logging.debug("grep result %s", result)
|
||||
|
@ -2953,6 +2953,7 @@ def test_rabbitmq_address(rabbitmq_cluster):
|
||||
instance2.query("drop table rabbit_out sync")
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="FIXME: flaky (something with channel.start_consuming()")
|
||||
def test_format_with_prefix_and_suffix(rabbitmq_cluster):
|
||||
instance.query(
|
||||
"""
|
||||
@ -3001,6 +3002,7 @@ def test_format_with_prefix_and_suffix(rabbitmq_cluster):
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="FIXME: flaky (something with channel.start_consuming()")
|
||||
def test_max_rows_per_message(rabbitmq_cluster):
|
||||
num_rows = 5
|
||||
|
||||
@ -3073,6 +3075,7 @@ def test_max_rows_per_message(rabbitmq_cluster):
|
||||
assert result == "0\t0\n10\t100\n20\t200\n30\t300\n40\t400\n"
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="FIXME: flaky (something with channel.start_consuming()")
|
||||
def test_row_based_formats(rabbitmq_cluster):
|
||||
num_rows = 10
|
||||
|
||||
@ -3169,6 +3172,7 @@ def test_row_based_formats(rabbitmq_cluster):
|
||||
assert result == expected
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="FIXME: flaky (something with channel.start_consuming()")
|
||||
def test_block_based_formats_1(rabbitmq_cluster):
|
||||
instance.query(
|
||||
"""
|
||||
@ -3230,6 +3234,7 @@ def test_block_based_formats_1(rabbitmq_cluster):
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="FIXME: flaky (something with channel.start_consuming()")
|
||||
def test_block_based_formats_2(rabbitmq_cluster):
|
||||
num_rows = 100
|
||||
|
||||
|
12
tests/performance/column_array_filter.xml
Normal file
12
tests/performance/column_array_filter.xml
Normal file
@ -0,0 +1,12 @@
|
||||
<test>
|
||||
<query>SELECT arr FROM (SELECT cast(range(number % 10) as Array(Int128)) AS arr FROM (SELECT * FROM system.numbers LIMIT 10000000) WHERE length(arr) <= 5) format Null</query>
|
||||
<query>SELECT arr FROM (SELECT cast(range(number % 10) as Array(UInt128)) AS arr FROM (SELECT * FROM system.numbers LIMIT 10000000) WHERE length(arr) <= 5) format Null</query>
|
||||
|
||||
<query>SELECT arr FROM (SELECT cast(range(number % 10) as Array(Int256)) AS arr FROM (SELECT * FROM system.numbers LIMIT 10000000) WHERE length(arr) <= 5) format Null</query>
|
||||
<query>SELECT arr FROM (SELECT cast(range(number % 10) as Array(UInt256)) AS arr FROM (SELECT * FROM system.numbers LIMIT 10000000) WHERE length(arr) <= 5) format Null</query>
|
||||
|
||||
<query>SELECT arr FROM (SELECT cast(range(number % 10) as Array(Decimal32(0))) AS arr FROM (SELECT * FROM system.numbers LIMIT 10000000) WHERE length(arr) <= 5) format Null</query>
|
||||
<query>SELECT arr FROM (SELECT cast(range(number % 10) as Array(Decimal64(0))) AS arr FROM (SELECT * FROM system.numbers LIMIT 10000000) WHERE length(arr) <= 5) format Null</query>
|
||||
<query>SELECT arr FROM (SELECT cast(range(number % 10) as Array(Decimal128(0))) AS arr FROM (SELECT * FROM system.numbers LIMIT 10000000) WHERE length(arr) <= 5) format Null</query>
|
||||
<query>SELECT arr FROM (SELECT cast(range(number % 10) as Array(Decimal256(0))) AS arr FROM (SELECT * FROM system.numbers LIMIT 10000000) WHERE length(arr) <= 5) format Null</query>
|
||||
</test>
|
12
tests/performance/column_array_replicate.xml
Normal file
12
tests/performance/column_array_replicate.xml
Normal file
@ -0,0 +1,12 @@
|
||||
<test>
|
||||
<query>with cast([1,2,3,4] as Array(Int128)) as elem select arrayWithConstant(rand() % 10 + 5, materialize(elem)) from numbers(1000000) format Null</query>
|
||||
<query>with cast([1,2,3,4] as Array(UInt128)) as elem select arrayWithConstant(rand() % 10 + 5, materialize(elem)) from numbers(1000000) format Null</query>
|
||||
|
||||
<query>with cast([1,2,3,4] as Array(Int256)) as elem select arrayWithConstant(rand() % 10 + 5, materialize(elem)) from numbers(1000000) format Null</query>
|
||||
<query>with cast([1,2,3,4] as Array(UInt256)) as elem select arrayWithConstant(rand() % 10 + 5, materialize(elem)) from numbers(1000000) format Null</query>
|
||||
|
||||
<query>with cast([1,2,3,4] as Array(Decimal32(0))) as elem select arrayWithConstant(rand() % 10 + 5, materialize(elem)) from numbers(1000000) format Null</query>
|
||||
<query>with cast([1,2,3,4] as Array(Decimal64(0))) as elem select arrayWithConstant(rand() % 10 + 5, materialize(elem)) from numbers(1000000) format Null</query>
|
||||
<query>with cast([1,2,3,4] as Array(Decimal128(0))) as elem select arrayWithConstant(rand() % 10 + 5, materialize(elem)) from numbers(1000000) format Null</query>
|
||||
<query>with cast([1,2,3,4] as Array(Decimal256(0))) as elem select arrayWithConstant(rand() % 10 + 5, materialize(elem)) from numbers(1000000) format Null</query>
|
||||
</test>
|
@ -44,7 +44,7 @@ select 100, max2((select count() from logs where level = 'Warning' and message_f
|
||||
group by message_format_string order by count() desc limit 1) / (select count() from logs), 0.005);
|
||||
|
||||
-- Same as above for Error
|
||||
select 110, max2((select count() from logs where level = 'Warning' group by message_format_string order by count() desc limit 1) / (select count() from logs), 0.01);
|
||||
select 110, max2((select count() from logs where level = 'Error' group by message_format_string order by count() desc limit 1) / (select count() from logs), 0.01);
|
||||
|
||||
-- Avoid too noisy messages: limit the number of messages with high frequency
|
||||
select 120, max2(count(), 3) from (select count() / (select count() from logs) as freq, message_format_string from logs group by message_format_string having freq > 0.10);
|
||||
|
@ -1,4 +1,5 @@
|
||||
#!/usr/bin/env bash
|
||||
# Tags: no-random-merge-tree-settings
|
||||
|
||||
set -e
|
||||
|
||||
@ -7,7 +8,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
. "$CURDIR"/../shell_config.sh
|
||||
|
||||
$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS preferred_block_size_bytes"
|
||||
$CLICKHOUSE_CLIENT -q "CREATE TABLE preferred_block_size_bytes (p Date, s String) ENGINE = MergeTree PARTITION BY p ORDER BY p SETTINGS index_granularity=1, index_granularity_bytes=0, min_bytes_for_wide_part = 0"
|
||||
$CLICKHOUSE_CLIENT -q "CREATE TABLE preferred_block_size_bytes (p Date, s String) ENGINE = MergeTree PARTITION BY p ORDER BY p SETTINGS index_granularity=1, index_granularity_bytes=0, min_bytes_for_wide_part = 0, min_rows_for_wide_part = 0"
|
||||
$CLICKHOUSE_CLIENT -q "INSERT INTO preferred_block_size_bytes (s) SELECT '16_bytes_-_-_-_' AS s FROM system.numbers LIMIT 10, 90"
|
||||
$CLICKHOUSE_CLIENT -q "OPTIMIZE TABLE preferred_block_size_bytes"
|
||||
$CLICKHOUSE_CLIENT --preferred_block_size_bytes=26 -q "SELECT DISTINCT blockSize(), ignore(p, s) FROM preferred_block_size_bytes"
|
||||
@ -18,7 +19,7 @@ $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS preferred_block_size_bytes"
|
||||
# PREWHERE using empty column
|
||||
|
||||
$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS pbs"
|
||||
$CLICKHOUSE_CLIENT -q "CREATE TABLE pbs (p Date, i UInt64, sa Array(String)) ENGINE = MergeTree PARTITION BY p ORDER BY p SETTINGS index_granularity=100, index_granularity_bytes=0, min_bytes_for_wide_part = 0"
|
||||
$CLICKHOUSE_CLIENT -q "CREATE TABLE pbs (p Date, i UInt64, sa Array(String)) ENGINE = MergeTree PARTITION BY p ORDER BY p SETTINGS index_granularity=100, index_granularity_bytes=0, min_bytes_for_wide_part = 0, min_rows_for_wide_part = 0"
|
||||
$CLICKHOUSE_CLIENT -q "INSERT INTO pbs (p, i, sa) SELECT toDate(i % 30) AS p, number AS i, ['a'] AS sa FROM system.numbers LIMIT 1000"
|
||||
$CLICKHOUSE_CLIENT -q "ALTER TABLE pbs ADD COLUMN s UInt8 DEFAULT 0"
|
||||
$CLICKHOUSE_CLIENT --preferred_block_size_bytes=100000 -q "SELECT count() FROM pbs PREWHERE s = 0"
|
||||
@ -29,7 +30,7 @@ $CLICKHOUSE_CLIENT -q "DROP TABLE pbs"
|
||||
# Nullable PREWHERE
|
||||
|
||||
$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS nullable_prewhere"
|
||||
$CLICKHOUSE_CLIENT -q "CREATE TABLE nullable_prewhere (p Date, f Nullable(UInt64), d UInt64) ENGINE = MergeTree PARTITION BY p ORDER BY p SETTINGS index_granularity=8, index_granularity_bytes=0, min_bytes_for_wide_part = 0"
|
||||
$CLICKHOUSE_CLIENT -q "CREATE TABLE nullable_prewhere (p Date, f Nullable(UInt64), d UInt64) ENGINE = MergeTree PARTITION BY p ORDER BY p SETTINGS index_granularity=8, index_granularity_bytes=0, min_bytes_for_wide_part = 0, min_rows_for_wide_part = 0"
|
||||
$CLICKHOUSE_CLIENT -q "INSERT INTO nullable_prewhere SELECT toDate(0) AS p, if(number % 2 = 0, CAST(number AS Nullable(UInt64)), CAST(NULL AS Nullable(UInt64))) AS f, number as d FROM system.numbers LIMIT 1001"
|
||||
$CLICKHOUSE_CLIENT -q "SELECT sum(d), sum(f), max(d) FROM nullable_prewhere PREWHERE NOT isNull(f)"
|
||||
$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS nullable_prewhere"
|
||||
|
@ -25,15 +25,9 @@
|
||||
1
|
||||
1
|
||||
0
|
||||
1
|
||||
0
|
||||
1
|
||||
1
|
||||
0
|
||||
-
|
||||
1
|
||||
1
|
||||
1
|
||||
-
|
||||
(1,2) ((1,2),(3,4)) 1 1
|
||||
-
|
||||
|
@ -28,16 +28,10 @@ select 1 in (0 + 1, 1, toInt8(sin(5)));
|
||||
select (0 + 1, 1, toInt8(sin(5))) in (0 + 1, 1, toInt8(sin(5)));
|
||||
select identity(tuple(1)) in (tuple(1), tuple(2));
|
||||
select identity(tuple(1)) in (tuple(0), tuple(2));
|
||||
select identity(tuple(1)) in (identity(tuple(1)), tuple(2));
|
||||
select identity(tuple(1)) in (identity(tuple(0)), tuple(2));
|
||||
select identity(tuple(1)) in (identity(tuple(1)), identity(tuple(2)));
|
||||
select identity(tuple(1)) in (identity(tuple(1)), identity(identity(tuple(2))));
|
||||
select identity(tuple(1)) in (identity(tuple(0)), identity(identity(tuple(2))));
|
||||
|
||||
select '-';
|
||||
select identity((1, 2)) in (1, 2);
|
||||
select identity((1, 2)) in ((1, 2), (3, 4));
|
||||
select identity((1, 2)) in ((1, 2), identity((3, 4)));
|
||||
|
||||
select '-';
|
||||
select (1,2) as x, ((1,2),(3,4)) as y, 1 in x, x in y;
|
||||
@ -50,4 +44,3 @@ select (1, 2) in (select (1, 2));
|
||||
select identity(tuple(1)) in (select tuple(1));
|
||||
select identity((1, 2)) in (select 1, 2);
|
||||
select identity((1, 2)) in (select (1, 2));
|
||||
|
||||
|
@ -1,4 +1,5 @@
|
||||
#!/usr/bin/env bash
|
||||
# Tags: no-random-merge-tree-settings
|
||||
|
||||
#--------------------------------------------
|
||||
# Description of test result:
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user