mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-22 15:42:02 +00:00
Merge branch 'master' into fix_trash
This commit is contained in:
commit
75f49a48e1
64
.github/workflows/master.yml
vendored
64
.github/workflows/master.yml
vendored
@ -215,8 +215,8 @@ jobs:
|
||||
fetch-depth: 0 # For a proper version and performance artifacts
|
||||
- name: Build
|
||||
run: |
|
||||
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
|
||||
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
|
||||
git -C "$GITHUB_WORKSPACE" submodule sync
|
||||
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --init --jobs=10
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
@ -259,8 +259,8 @@ jobs:
|
||||
fetch-depth: 0 # For a proper version and performance artifacts
|
||||
- name: Build
|
||||
run: |
|
||||
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
|
||||
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
|
||||
git -C "$GITHUB_WORKSPACE" submodule sync
|
||||
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --init --jobs=10
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
@ -305,8 +305,8 @@ jobs:
|
||||
fetch-depth: 0 # otherwise we will have no info about contributors
|
||||
- name: Build
|
||||
run: |
|
||||
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
|
||||
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
|
||||
git -C "$GITHUB_WORKSPACE" submodule sync
|
||||
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --init --jobs=10
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
@ -350,8 +350,8 @@ jobs:
|
||||
# uses: actions/checkout@v2
|
||||
# - name: Build
|
||||
# run: |
|
||||
# git -C "$GITHUB_WORKSPACE" submodule sync --recursive
|
||||
# git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
|
||||
# git -C "$GITHUB_WORKSPACE" submodule sync
|
||||
# git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --init --jobs=10
|
||||
# sudo rm -fr "$TEMP_PATH"
|
||||
# mkdir -p "$TEMP_PATH"
|
||||
# cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
@ -395,8 +395,8 @@ jobs:
|
||||
uses: actions/checkout@v2
|
||||
- name: Build
|
||||
run: |
|
||||
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
|
||||
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
|
||||
git -C "$GITHUB_WORKSPACE" submodule sync
|
||||
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --init --jobs=10
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
@ -440,8 +440,8 @@ jobs:
|
||||
uses: actions/checkout@v2
|
||||
- name: Build
|
||||
run: |
|
||||
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
|
||||
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
|
||||
git -C "$GITHUB_WORKSPACE" submodule sync
|
||||
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --init --jobs=10
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
@ -485,8 +485,8 @@ jobs:
|
||||
uses: actions/checkout@v2
|
||||
- name: Build
|
||||
run: |
|
||||
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
|
||||
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
|
||||
git -C "$GITHUB_WORKSPACE" submodule sync
|
||||
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --init --jobs=10
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
@ -530,8 +530,8 @@ jobs:
|
||||
uses: actions/checkout@v2
|
||||
- name: Build
|
||||
run: |
|
||||
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
|
||||
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
|
||||
git -C "$GITHUB_WORKSPACE" submodule sync
|
||||
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --init --jobs=10
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
@ -575,8 +575,8 @@ jobs:
|
||||
uses: actions/checkout@v2
|
||||
- name: Build
|
||||
run: |
|
||||
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
|
||||
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
|
||||
git -C "$GITHUB_WORKSPACE" submodule sync
|
||||
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --init --jobs=10
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
@ -623,8 +623,8 @@ jobs:
|
||||
uses: actions/checkout@v2
|
||||
- name: Build
|
||||
run: |
|
||||
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
|
||||
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
|
||||
git -C "$GITHUB_WORKSPACE" submodule sync
|
||||
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --init --jobs=10
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
@ -668,8 +668,8 @@ jobs:
|
||||
uses: actions/checkout@v2
|
||||
- name: Build
|
||||
run: |
|
||||
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
|
||||
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
|
||||
git -C "$GITHUB_WORKSPACE" submodule sync
|
||||
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --init --jobs=10
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
@ -715,8 +715,8 @@ jobs:
|
||||
fetch-depth: 0 # otherwise we will have no info about contributors
|
||||
- name: Build
|
||||
run: |
|
||||
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
|
||||
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
|
||||
git -C "$GITHUB_WORKSPACE" submodule sync
|
||||
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --init --jobs=10
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
@ -762,8 +762,8 @@ jobs:
|
||||
fetch-depth: 0 # otherwise we will have no info about contributors
|
||||
- name: Build
|
||||
run: |
|
||||
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
|
||||
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
|
||||
git -C "$GITHUB_WORKSPACE" submodule sync
|
||||
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --init --jobs=10
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
@ -809,8 +809,8 @@ jobs:
|
||||
fetch-depth: 0 # otherwise we will have no info about contributors
|
||||
- name: Build
|
||||
run: |
|
||||
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
|
||||
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
|
||||
git -C "$GITHUB_WORKSPACE" submodule sync
|
||||
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --init --jobs=10
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
@ -856,8 +856,8 @@ jobs:
|
||||
fetch-depth: 0 # otherwise we will have no info about contributors
|
||||
- name: Build
|
||||
run: |
|
||||
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
|
||||
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
|
||||
git -C "$GITHUB_WORKSPACE" submodule sync
|
||||
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --init --jobs=10
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
@ -903,8 +903,8 @@ jobs:
|
||||
fetch-depth: 0 # otherwise we will have no info about contributors
|
||||
- name: Build
|
||||
run: |
|
||||
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
|
||||
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
|
||||
git -C "$GITHUB_WORKSPACE" submodule sync
|
||||
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --init --jobs=10
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
|
64
.github/workflows/pull_request.yml
vendored
64
.github/workflows/pull_request.yml
vendored
@ -277,8 +277,8 @@ jobs:
|
||||
fetch-depth: 0 # for performance artifact
|
||||
- name: Build
|
||||
run: |
|
||||
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
|
||||
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
|
||||
git -C "$GITHUB_WORKSPACE" submodule sync
|
||||
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --init --jobs=10
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
@ -322,8 +322,8 @@ jobs:
|
||||
uses: actions/checkout@v2
|
||||
- name: Build
|
||||
run: |
|
||||
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
|
||||
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
|
||||
git -C "$GITHUB_WORKSPACE" submodule sync
|
||||
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --init --jobs=10
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
@ -367,8 +367,8 @@ jobs:
|
||||
# uses: actions/checkout@v2
|
||||
# - name: Build
|
||||
# run: |
|
||||
# git -C "$GITHUB_WORKSPACE" submodule sync --recursive
|
||||
# git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
|
||||
# git -C "$GITHUB_WORKSPACE" submodule sync
|
||||
# git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --init --jobs=10
|
||||
# sudo rm -fr "$TEMP_PATH"
|
||||
# mkdir -p "$TEMP_PATH"
|
||||
# cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
@ -414,8 +414,8 @@ jobs:
|
||||
fetch-depth: 0 # for performance artifact
|
||||
- name: Build
|
||||
run: |
|
||||
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
|
||||
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
|
||||
git -C "$GITHUB_WORKSPACE" submodule sync
|
||||
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --init --jobs=10
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
@ -459,8 +459,8 @@ jobs:
|
||||
uses: actions/checkout@v2
|
||||
- name: Build
|
||||
run: |
|
||||
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
|
||||
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
|
||||
git -C "$GITHUB_WORKSPACE" submodule sync
|
||||
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --init --jobs=10
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
@ -504,8 +504,8 @@ jobs:
|
||||
uses: actions/checkout@v2
|
||||
- name: Build
|
||||
run: |
|
||||
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
|
||||
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
|
||||
git -C "$GITHUB_WORKSPACE" submodule sync
|
||||
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --init --jobs=10
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
@ -549,8 +549,8 @@ jobs:
|
||||
uses: actions/checkout@v2
|
||||
- name: Build
|
||||
run: |
|
||||
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
|
||||
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
|
||||
git -C "$GITHUB_WORKSPACE" submodule sync
|
||||
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --init --jobs=10
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
@ -594,8 +594,8 @@ jobs:
|
||||
uses: actions/checkout@v2
|
||||
- name: Build
|
||||
run: |
|
||||
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
|
||||
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
|
||||
git -C "$GITHUB_WORKSPACE" submodule sync
|
||||
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --init --jobs=10
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
@ -639,8 +639,8 @@ jobs:
|
||||
uses: actions/checkout@v2
|
||||
- name: Build
|
||||
run: |
|
||||
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
|
||||
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
|
||||
git -C "$GITHUB_WORKSPACE" submodule sync
|
||||
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --init --jobs=10
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
@ -687,8 +687,8 @@ jobs:
|
||||
uses: actions/checkout@v2
|
||||
- name: Build
|
||||
run: |
|
||||
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
|
||||
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
|
||||
git -C "$GITHUB_WORKSPACE" submodule sync
|
||||
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --init --jobs=10
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
@ -732,8 +732,8 @@ jobs:
|
||||
uses: actions/checkout@v2
|
||||
- name: Build
|
||||
run: |
|
||||
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
|
||||
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
|
||||
git -C "$GITHUB_WORKSPACE" submodule sync
|
||||
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --init --jobs=10
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
@ -777,8 +777,8 @@ jobs:
|
||||
uses: actions/checkout@v2
|
||||
- name: Build
|
||||
run: |
|
||||
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
|
||||
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
|
||||
git -C "$GITHUB_WORKSPACE" submodule sync
|
||||
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --init --jobs=10
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
@ -822,8 +822,8 @@ jobs:
|
||||
uses: actions/checkout@v2
|
||||
- name: Build
|
||||
run: |
|
||||
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
|
||||
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
|
||||
git -C "$GITHUB_WORKSPACE" submodule sync
|
||||
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --init --jobs=10
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
@ -867,8 +867,8 @@ jobs:
|
||||
uses: actions/checkout@v2
|
||||
- name: Build
|
||||
run: |
|
||||
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
|
||||
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
|
||||
git -C "$GITHUB_WORKSPACE" submodule sync
|
||||
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --init --jobs=10
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
@ -912,8 +912,8 @@ jobs:
|
||||
uses: actions/checkout@v2
|
||||
- name: Build
|
||||
run: |
|
||||
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
|
||||
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
|
||||
git -C "$GITHUB_WORKSPACE" submodule sync
|
||||
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --init --jobs=10
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
@ -957,8 +957,8 @@ jobs:
|
||||
uses: actions/checkout@v2
|
||||
- name: Build
|
||||
run: |
|
||||
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
|
||||
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
|
||||
git -C "$GITHUB_WORKSPACE" submodule sync
|
||||
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --init --jobs=10
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
|
6
.gitmodules
vendored
6
.gitmodules
vendored
@ -79,10 +79,10 @@
|
||||
url = https://github.com/ClickHouse/snappy.git
|
||||
[submodule "contrib/cppkafka"]
|
||||
path = contrib/cppkafka
|
||||
url = https://github.com/mfontanini/cppkafka.git
|
||||
url = https://github.com/ClickHouse/cppkafka.git
|
||||
[submodule "contrib/brotli"]
|
||||
path = contrib/brotli
|
||||
url = https://github.com/google/brotli.git
|
||||
url = https://github.com/ClickHouse/brotli.git
|
||||
[submodule "contrib/h3"]
|
||||
path = contrib/h3
|
||||
url = https://github.com/ClickHouse/h3
|
||||
@ -144,7 +144,7 @@
|
||||
ignore = untracked
|
||||
[submodule "contrib/msgpack-c"]
|
||||
path = contrib/msgpack-c
|
||||
url = https://github.com/msgpack/msgpack-c
|
||||
url = https://github.com/ClickHouse/msgpack-c
|
||||
[submodule "contrib/libcpuid"]
|
||||
path = contrib/libcpuid
|
||||
url = https://github.com/ClickHouse/libcpuid.git
|
||||
|
@ -5,6 +5,11 @@ if (NOT ENABLE_AMQPCPP)
|
||||
return()
|
||||
endif()
|
||||
|
||||
if (NOT TARGET ch_contrib::uv)
|
||||
message(STATUS "Not using AMQP-CPP because libuv is disabled")
|
||||
return()
|
||||
endif()
|
||||
|
||||
set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/AMQP-CPP")
|
||||
|
||||
set (SRCS
|
||||
|
2
contrib/arrow
vendored
2
contrib/arrow
vendored
@ -1 +1 @@
|
||||
Subproject commit efdcd015cfdee1b6aa349c9ca227ca12c3d697f5
|
||||
Subproject commit 6f274b737c66a6c39bab0d3bdf6cf7d139ef06f5
|
2
contrib/brotli
vendored
2
contrib/brotli
vendored
@ -1 +1 @@
|
||||
Subproject commit 63be8a99401992075c23e99f7c84de1c653e39e2
|
||||
Subproject commit 5bd78768449751a78d4b4c646b0612917986f5b1
|
@ -5,6 +5,11 @@ if (NOT ENABLE_CASSANDRA)
|
||||
return()
|
||||
endif()
|
||||
|
||||
if (NOT TARGET ch_contrib::uv)
|
||||
message(STATUS "Not using cassandra because libuv is disabled")
|
||||
return()
|
||||
endif()
|
||||
|
||||
if (APPLE)
|
||||
set(CMAKE_MACOSX_RPATH ON)
|
||||
endif()
|
||||
|
2
contrib/cppkafka
vendored
2
contrib/cppkafka
vendored
@ -1 +1 @@
|
||||
Subproject commit 5a119f689f8a4d90d10a9635e7ee2bee5c127de1
|
||||
Subproject commit 64bd67db12b9c705e9127439a5b05b351d9df7da
|
2
contrib/msgpack-c
vendored
2
contrib/msgpack-c
vendored
@ -1 +1 @@
|
||||
Subproject commit 46684265d50b5d1b062d4c5c428ba08462844b1d
|
||||
Subproject commit 790b3fe58ebded7a8bd130782ef28bec5784c248
|
2
contrib/rapidjson
vendored
2
contrib/rapidjson
vendored
@ -1 +1 @@
|
||||
Subproject commit c4ef90ccdbc21d5d5a628d08316bfd301e32d6fa
|
||||
Subproject commit b571bd5c1a3b1fc931d77ae36932537a3c9018c3
|
2
contrib/snappy
vendored
2
contrib/snappy
vendored
@ -1 +1 @@
|
||||
Subproject commit fb057edfed820212076239fd32cb2ff23e9016bf
|
||||
Subproject commit 3786173af204d21da97180977ad6ab4321138b3d
|
@ -355,22 +355,8 @@ fi
|
||||
cat > report.html <<EOF ||:
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<link rel="preload" as="font" href="https://yastatic.net/adv-www/_/sUYVCPUAQE7ExrvMS7FoISoO83s.woff2" type="font/woff2" crossorigin="anonymous"/>
|
||||
<style>
|
||||
@font-face {
|
||||
font-family:'Yandex Sans Display Web';
|
||||
src:url(https://yastatic.net/adv-www/_/H63jN0veW07XQUIA2317lr9UIm8.eot);
|
||||
src:url(https://yastatic.net/adv-www/_/H63jN0veW07XQUIA2317lr9UIm8.eot?#iefix) format('embedded-opentype'),
|
||||
url(https://yastatic.net/adv-www/_/sUYVCPUAQE7ExrvMS7FoISoO83s.woff2) format('woff2'),
|
||||
url(https://yastatic.net/adv-www/_/v2Sve_obH3rKm6rKrtSQpf-eB7U.woff) format('woff'),
|
||||
url(https://yastatic.net/adv-www/_/PzD8hWLMunow5i3RfJ6WQJAL7aI.ttf) format('truetype'),
|
||||
url(https://yastatic.net/adv-www/_/lF_KG5g4tpQNlYIgA0e77fBSZ5s.svg#YandexSansDisplayWeb-Regular) format('svg');
|
||||
font-weight:400;
|
||||
font-style:normal;
|
||||
font-stretch:normal
|
||||
}
|
||||
|
||||
body { font-family: "Yandex Sans Display Web", Arial, sans-serif; background: #EEE; }
|
||||
body { font-family: "DejaVu Sans", "Noto Sans", Arial, sans-serif; background: #EEE; }
|
||||
h1 { margin-left: 10px; }
|
||||
th, td { border: 0; padding: 5px 10px 5px 10px; text-align: left; vertical-align: top; line-height: 1.5; background-color: #FFF;
|
||||
td { white-space: pre; font-family: Monospace, Courier New; }
|
||||
@ -378,7 +364,6 @@ border: 0; box-shadow: 0 0 0 1px rgba(0, 0, 0, 0.05), 0 8px 25px -5px rgba(0, 0,
|
||||
a { color: #06F; text-decoration: none; }
|
||||
a:hover, a:active { color: #F40; text-decoration: underline; }
|
||||
table { border: 0; }
|
||||
.main { margin-left: 10%; }
|
||||
p.links a { padding: 5px; margin: 3px; background: #FFF; line-height: 2; white-space: nowrap; box-shadow: 0 0 0 1px rgba(0, 0, 0, 0.05), 0 8px 25px -5px rgba(0, 0, 0, 0.1); }
|
||||
th { cursor: pointer; }
|
||||
|
||||
|
@ -92,8 +92,6 @@ function run_tests()
|
||||
|
||||
if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then
|
||||
ADDITIONAL_OPTIONS+=('--replicated-database')
|
||||
# Cannot be used with replicated database, due to distributed_ddl_output_mode=none
|
||||
ADDITIONAL_OPTIONS+=('--no-left-queries-check')
|
||||
ADDITIONAL_OPTIONS+=('--jobs')
|
||||
ADDITIONAL_OPTIONS+=('2')
|
||||
else
|
||||
|
@ -5,20 +5,9 @@ description: Dataset containing the on-time performance of airline flights
|
||||
|
||||
# OnTime
|
||||
|
||||
This dataset can be obtained in two ways:
|
||||
This dataset contains data from Bureau of Transportation Statistics.
|
||||
|
||||
- import from raw data
|
||||
- download of prepared partitions
|
||||
|
||||
## Import from Raw Data {#import-from-raw-data}
|
||||
|
||||
Downloading data:
|
||||
|
||||
``` bash
|
||||
wget --no-check-certificate --continue https://transtats.bts.gov/PREZIP/On_Time_Reporting_Carrier_On_Time_Performance_1987_present_{1987..2021}_{1..12}.zip
|
||||
```
|
||||
|
||||
Creating a table:
|
||||
## Creating a table
|
||||
|
||||
``` sql
|
||||
CREATE TABLE `ontime`
|
||||
@ -29,140 +18,138 @@ CREATE TABLE `ontime`
|
||||
`DayofMonth` UInt8,
|
||||
`DayOfWeek` UInt8,
|
||||
`FlightDate` Date,
|
||||
`Reporting_Airline` String,
|
||||
`Reporting_Airline` LowCardinality(String),
|
||||
`DOT_ID_Reporting_Airline` Int32,
|
||||
`IATA_CODE_Reporting_Airline` String,
|
||||
`Tail_Number` String,
|
||||
`Flight_Number_Reporting_Airline` String,
|
||||
`IATA_CODE_Reporting_Airline` LowCardinality(String),
|
||||
`Tail_Number` LowCardinality(String),
|
||||
`Flight_Number_Reporting_Airline` LowCardinality(String),
|
||||
`OriginAirportID` Int32,
|
||||
`OriginAirportSeqID` Int32,
|
||||
`OriginCityMarketID` Int32,
|
||||
`Origin` FixedString(5),
|
||||
`OriginCityName` String,
|
||||
`OriginCityName` LowCardinality(String),
|
||||
`OriginState` FixedString(2),
|
||||
`OriginStateFips` String,
|
||||
`OriginStateName` String,
|
||||
`OriginStateFips` FixedString(2),
|
||||
`OriginStateName` LowCardinality(String),
|
||||
`OriginWac` Int32,
|
||||
`DestAirportID` Int32,
|
||||
`DestAirportSeqID` Int32,
|
||||
`DestCityMarketID` Int32,
|
||||
`Dest` FixedString(5),
|
||||
`DestCityName` String,
|
||||
`DestCityName` LowCardinality(String),
|
||||
`DestState` FixedString(2),
|
||||
`DestStateFips` String,
|
||||
`DestStateName` String,
|
||||
`DestStateFips` FixedString(2),
|
||||
`DestStateName` LowCardinality(String),
|
||||
`DestWac` Int32,
|
||||
`CRSDepTime` Int32,
|
||||
`DepTime` Int32,
|
||||
`DepDelay` Int32,
|
||||
`DepDelayMinutes` Int32,
|
||||
`DepDel15` Int32,
|
||||
`DepartureDelayGroups` String,
|
||||
`DepTimeBlk` String,
|
||||
`DepartureDelayGroups` LowCardinality(String),
|
||||
`DepTimeBlk` LowCardinality(String),
|
||||
`TaxiOut` Int32,
|
||||
`WheelsOff` Int32,
|
||||
`WheelsOn` Int32,
|
||||
`WheelsOff` LowCardinality(String),
|
||||
`WheelsOn` LowCardinality(String),
|
||||
`TaxiIn` Int32,
|
||||
`CRSArrTime` Int32,
|
||||
`ArrTime` Int32,
|
||||
`ArrDelay` Int32,
|
||||
`ArrDelayMinutes` Int32,
|
||||
`ArrDel15` Int32,
|
||||
`ArrivalDelayGroups` Int32,
|
||||
`ArrTimeBlk` String,
|
||||
`Cancelled` UInt8,
|
||||
`ArrivalDelayGroups` LowCardinality(String),
|
||||
`ArrTimeBlk` LowCardinality(String),
|
||||
`Cancelled` Int8,
|
||||
`CancellationCode` FixedString(1),
|
||||
`Diverted` UInt8,
|
||||
`Diverted` Int8,
|
||||
`CRSElapsedTime` Int32,
|
||||
`ActualElapsedTime` Int32,
|
||||
`AirTime` Nullable(Int32),
|
||||
`AirTime` Int32,
|
||||
`Flights` Int32,
|
||||
`Distance` Int32,
|
||||
`DistanceGroup` UInt8,
|
||||
`DistanceGroup` Int8,
|
||||
`CarrierDelay` Int32,
|
||||
`WeatherDelay` Int32,
|
||||
`NASDelay` Int32,
|
||||
`SecurityDelay` Int32,
|
||||
`LateAircraftDelay` Int32,
|
||||
`FirstDepTime` String,
|
||||
`TotalAddGTime` String,
|
||||
`LongestAddGTime` String,
|
||||
`DivAirportLandings` String,
|
||||
`DivReachedDest` String,
|
||||
`DivActualElapsedTime` String,
|
||||
`DivArrDelay` String,
|
||||
`DivDistance` String,
|
||||
`Div1Airport` String,
|
||||
`FirstDepTime` Int16,
|
||||
`TotalAddGTime` Int16,
|
||||
`LongestAddGTime` Int16,
|
||||
`DivAirportLandings` Int8,
|
||||
`DivReachedDest` Int8,
|
||||
`DivActualElapsedTime` Int16,
|
||||
`DivArrDelay` Int16,
|
||||
`DivDistance` Int16,
|
||||
`Div1Airport` LowCardinality(String),
|
||||
`Div1AirportID` Int32,
|
||||
`Div1AirportSeqID` Int32,
|
||||
`Div1WheelsOn` String,
|
||||
`Div1TotalGTime` String,
|
||||
`Div1LongestGTime` String,
|
||||
`Div1WheelsOff` String,
|
||||
`Div1TailNum` String,
|
||||
`Div2Airport` String,
|
||||
`Div1WheelsOn` Int16,
|
||||
`Div1TotalGTime` Int16,
|
||||
`Div1LongestGTime` Int16,
|
||||
`Div1WheelsOff` Int16,
|
||||
`Div1TailNum` LowCardinality(String),
|
||||
`Div2Airport` LowCardinality(String),
|
||||
`Div2AirportID` Int32,
|
||||
`Div2AirportSeqID` Int32,
|
||||
`Div2WheelsOn` String,
|
||||
`Div2TotalGTime` String,
|
||||
`Div2LongestGTime` String,
|
||||
`Div2WheelsOff` String,
|
||||
`Div2TailNum` String,
|
||||
`Div3Airport` String,
|
||||
`Div2WheelsOn` Int16,
|
||||
`Div2TotalGTime` Int16,
|
||||
`Div2LongestGTime` Int16,
|
||||
`Div2WheelsOff` Int16,
|
||||
`Div2TailNum` LowCardinality(String),
|
||||
`Div3Airport` LowCardinality(String),
|
||||
`Div3AirportID` Int32,
|
||||
`Div3AirportSeqID` Int32,
|
||||
`Div3WheelsOn` String,
|
||||
`Div3TotalGTime` String,
|
||||
`Div3LongestGTime` String,
|
||||
`Div3WheelsOff` String,
|
||||
`Div3TailNum` String,
|
||||
`Div4Airport` String,
|
||||
`Div3WheelsOn` Int16,
|
||||
`Div3TotalGTime` Int16,
|
||||
`Div3LongestGTime` Int16,
|
||||
`Div3WheelsOff` Int16,
|
||||
`Div3TailNum` LowCardinality(String),
|
||||
`Div4Airport` LowCardinality(String),
|
||||
`Div4AirportID` Int32,
|
||||
`Div4AirportSeqID` Int32,
|
||||
`Div4WheelsOn` String,
|
||||
`Div4TotalGTime` String,
|
||||
`Div4LongestGTime` String,
|
||||
`Div4WheelsOff` String,
|
||||
`Div4TailNum` String,
|
||||
`Div5Airport` String,
|
||||
`Div4WheelsOn` Int16,
|
||||
`Div4TotalGTime` Int16,
|
||||
`Div4LongestGTime` Int16,
|
||||
`Div4WheelsOff` Int16,
|
||||
`Div4TailNum` LowCardinality(String),
|
||||
`Div5Airport` LowCardinality(String),
|
||||
`Div5AirportID` Int32,
|
||||
`Div5AirportSeqID` Int32,
|
||||
`Div5WheelsOn` String,
|
||||
`Div5TotalGTime` String,
|
||||
`Div5LongestGTime` String,
|
||||
`Div5WheelsOff` String,
|
||||
`Div5TailNum` String
|
||||
`Div5WheelsOn` Int16,
|
||||
`Div5TotalGTime` Int16,
|
||||
`Div5LongestGTime` Int16,
|
||||
`Div5WheelsOff` Int16,
|
||||
`Div5TailNum` LowCardinality(String)
|
||||
) ENGINE = MergeTree
|
||||
PARTITION BY Year
|
||||
ORDER BY (IATA_CODE_Reporting_Airline, FlightDate)
|
||||
SETTINGS index_granularity = 8192;
|
||||
ORDER BY (Year, Quarter, Month, DayofMonth, FlightDate, IATA_CODE_Reporting_Airline);
|
||||
```
|
||||
|
||||
## Import from Raw Data {#import-from-raw-data}
|
||||
|
||||
Downloading data:
|
||||
|
||||
``` bash
|
||||
wget --no-check-certificate --continue https://transtats.bts.gov/PREZIP/On_Time_Reporting_Carrier_On_Time_Performance_1987_present_{1987..2022}_{1..12}.zip
|
||||
```
|
||||
|
||||
Loading data with multiple threads:
|
||||
|
||||
``` bash
|
||||
ls -1 *.zip | xargs -I{} -P $(nproc) bash -c "echo {}; unzip -cq {} '*.csv' | sed 's/\.00//g' | clickhouse-client --input_format_with_names_use_header=0 --query='INSERT INTO ontime FORMAT CSVWithNames'"
|
||||
ls -1 *.zip | xargs -I{} -P $(nproc) bash -c "echo {}; unzip -cq {} '*.csv' | sed 's/\.00//g' | clickhouse-client --input_format_csv_empty_as_default 1 --query='INSERT INTO ontime FORMAT CSVWithNames'"
|
||||
```
|
||||
|
||||
(if you will have memory shortage or other issues on your server, remove the `-P $(nproc)` part)
|
||||
|
||||
## Download of Prepared Partitions {#download-of-prepared-partitions}
|
||||
## Import from a saved copy
|
||||
|
||||
``` bash
|
||||
$ curl -O https://datasets.clickhouse.com/ontime/partitions/ontime.tar
|
||||
$ tar xvf ontime.tar -C /var/lib/clickhouse # path to ClickHouse data directory
|
||||
$ # check permissions of unpacked data, fix if required
|
||||
$ sudo service clickhouse-server restart
|
||||
$ clickhouse-client --query "select count(*) from datasets.ontime"
|
||||
Alternatively, you can import data from a saved copy by the following query:
|
||||
|
||||
```
|
||||
INSERT INTO ontime SELECT * FROM s3('https://clickhouse-public-datasets.s3.amazonaws.com/ontime/csv_by_year/*.csv.gz', CSVWithNames) SETTINGS max_insert_threads = 40;
|
||||
```
|
||||
|
||||
:::note
|
||||
If you will run the queries described below, you have to use the full table name, `datasets.ontime`.
|
||||
:::
|
||||
|
||||
|
||||
!!! info "Info"
|
||||
If you are using the prepared partitions or the Online Playground replace any occurrence of `IATA_CODE_Reporting_Airline` or `IATA_CODE_Reporting_Airline AS Carrier` in the following queries with `Carrier` (see `describe ontime`).
|
||||
The snapshot was created on 2022-05-29.
|
||||
|
||||
## Queries {#queries}
|
||||
|
||||
|
@ -108,10 +108,8 @@ Converting “star schema” to denormalized “flat schema”:
|
||||
SET max_memory_usage = 20000000000;
|
||||
|
||||
CREATE TABLE lineorder_flat
|
||||
ENGINE = MergeTree
|
||||
PARTITION BY toYear(LO_ORDERDATE)
|
||||
ORDER BY (LO_ORDERDATE, LO_ORDERKEY) AS
|
||||
SELECT
|
||||
ENGINE = MergeTree ORDER BY (LO_ORDERDATE, LO_ORDERKEY)
|
||||
AS SELECT
|
||||
l.LO_ORDERKEY AS LO_ORDERKEY,
|
||||
l.LO_LINENUMBER AS LO_LINENUMBER,
|
||||
l.LO_CUSTKEY AS LO_CUSTKEY,
|
||||
|
@ -149,6 +149,8 @@ Each element of [Nested](../sql-reference/data-types/nested-data-structures/nest
|
||||
In input data, ENUM values can be represented as names or as ids. First, we try to match the input value to the ENUM name. If we fail and the input value is a number, we try to match this number to ENUM id.
|
||||
If input data contains only ENUM ids, it's recommended to enable the setting [input_format_tsv_enum_as_number](../operations/settings/settings.md#settings-input_format_tsv_enum_as_number) to optimize ENUM parsing.
|
||||
|
||||
While importing data, you can skip some first rows using setting [input_format_tsv_skip_first_lines](../operations/settings/settings.md#settings-input_format_tsv_skip_first_lines)
|
||||
|
||||
For example:
|
||||
|
||||
``` sql
|
||||
@ -429,6 +431,8 @@ If input data contains only ENUM ids, it's recommended to enable the setting [in
|
||||
|
||||
The CSV format supports the output of totals and extremes the same way as `TabSeparated`.
|
||||
|
||||
While importing data, you can skip some first rows using setting [input_format_csv_skip_first_lines](../operations/settings/settings.md#settings-input_format_csv_skip_first_lines)
|
||||
|
||||
## CSVWithNames {#csvwithnames}
|
||||
|
||||
Also prints the header row with column names, similar to [TabSeparatedWithNames](#tabseparatedwithnames).
|
||||
|
10
docs/en/interfaces/third-party/gui.md
vendored
10
docs/en/interfaces/third-party/gui.md
vendored
@ -147,6 +147,16 @@ Features:
|
||||
|
||||
[Zeppelin-Interpreter-for-ClickHouse](https://github.com/SiderZhang/Zeppelin-Interpreter-for-ClickHouse) is a [Zeppelin](https://zeppelin.apache.org) interpreter for ClickHouse. Compared with JDBC interpreter, it can provide better timeout control for long running queries.
|
||||
|
||||
### ClickCat {#clickcat}
|
||||
|
||||
[ClickCat](https://github.com/open-botech/ClickCat) is a firendly user interface that lets you search, explore and visualize your ClickHouse Data.
|
||||
|
||||
Features:
|
||||
|
||||
- An online SQL editor which can run your SQL code without any installing.
|
||||
- You can observe all processes and mutations. For those unfinished processes, you can kill them in ui.
|
||||
- The Metrics contains Cluster Analysis,Data Analysis,Query Analysis.
|
||||
|
||||
## Commercial {#commercial}
|
||||
|
||||
### DataGrip {#datagrip}
|
||||
|
@ -5,7 +5,7 @@ sidebar_label: Caches
|
||||
|
||||
# Cache Types {#cache-types}
|
||||
|
||||
When performing queries, ClichHouse uses different caches.
|
||||
When performing queries, ClickHouse uses different caches.
|
||||
|
||||
Main cache types:
|
||||
|
||||
|
@ -521,6 +521,18 @@ Result:
|
||||
└─────┴────────┘
|
||||
```
|
||||
|
||||
## input_format_tsv_skip_first_lines {#settings-input_format_tsv_skip_first_lines}
|
||||
|
||||
The number of lines to skip at the beginning of data in TSV input format.
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
## input_format_csv_skip_first_lines {#settings-input_format_csv_skip_first_lines}
|
||||
|
||||
The number of lines to skip at the beginning of data in CSV input format.
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
## input_format_null_as_default {#settings-input-format-null-as-default}
|
||||
|
||||
Enables or disables the initialization of [NULL](../../sql-reference/syntax.md#null-literal) fields with [default values](../../sql-reference/statements/create/table.md#create-default-values), if data type of these fields is not [nullable](../../sql-reference/data-types/nullable.md#data_type-nullable).
|
||||
|
@ -21,7 +21,7 @@ ClickHouse generates an exception for errors with dictionaries. Examples of erro
|
||||
- The dictionary being accessed could not be loaded.
|
||||
- Error querying a `cached` dictionary.
|
||||
|
||||
You can view the list of external dictionaries and their statuses in the `system.dictionaries` table.
|
||||
You can view the list of external dictionaries and their statuses in the [system.dictionaries](../../../operations/system-tables/dictionaries.md) table.
|
||||
|
||||
The configuration looks like this:
|
||||
|
||||
@ -48,6 +48,35 @@ LAYOUT(LAYOUT_TYPE(param value)) -- layout settings
|
||||
...
|
||||
```
|
||||
|
||||
Dictionaries without word `complex-key*` in a layout have a key with [UInt64](../../../sql-reference/data-types/int-uint.md) type, `complex-key*` dictionaries have a composite key (complex, with arbitrary types).
|
||||
|
||||
[UInt64](../../../sql-reference/data-types/int-uint.md) keys in XML dictionaries are defined with `<id>` tag.
|
||||
|
||||
Configuration example (column key_column has UInt64 type):
|
||||
```xml
|
||||
...
|
||||
<structure>
|
||||
<id>
|
||||
<name>key_column</name>
|
||||
</id>
|
||||
...
|
||||
```
|
||||
|
||||
Composite `complex` keys XML dictionaries are defined `<key>` tag.
|
||||
|
||||
Configuration example of a composite key (key has one element with [String](../../../sql-reference/data-types/string.md) type):
|
||||
```xml
|
||||
...
|
||||
<structure>
|
||||
<key>
|
||||
<attribute>
|
||||
<name>country_code</name>
|
||||
<type>String</type>
|
||||
</attribute>
|
||||
</key>
|
||||
...
|
||||
```
|
||||
|
||||
## Ways to Store Dictionaries in Memory {#ways-to-store-dictionaries-in-memory}
|
||||
|
||||
- [flat](#flat)
|
||||
@ -98,6 +127,8 @@ LAYOUT(FLAT(INITIAL_ARRAY_SIZE 50000 MAX_ARRAY_SIZE 5000000))
|
||||
|
||||
The dictionary is completely stored in memory in the form of a hash table. The dictionary can contain any number of elements with any identifiers In practice, the number of keys can reach tens of millions of items.
|
||||
|
||||
The dictionary key has the [UInt64](../../../sql-reference/data-types/int-uint.md) type.
|
||||
|
||||
If `preallocate` is `true` (default is `false`) the hash table will be preallocated (this will make the dictionary load faster). But note that you should use it only if:
|
||||
|
||||
- The source support an approximate number of elements (for now it is supported only by the `ClickHouse` source).
|
||||
@ -125,6 +156,8 @@ LAYOUT(HASHED(PREALLOCATE 0))
|
||||
|
||||
Similar to `hashed`, but uses less memory in favor more CPU usage.
|
||||
|
||||
The dictionary key has the [UInt64](../../../sql-reference/data-types/int-uint.md) type.
|
||||
|
||||
It will be also preallocated so as `hashed` (with `preallocate` set to `true`), and note that it is even more significant for `sparse_hashed`.
|
||||
|
||||
Configuration example:
|
||||
@ -181,6 +214,8 @@ LAYOUT(COMPLEX_KEY_SPARSE_HASHED())
|
||||
|
||||
The dictionary is completely stored in memory. Each attribute is stored in an array. The key attribute is stored in the form of a hashed table where value is an index in the attributes array. The dictionary can contain any number of elements with any identifiers. In practice, the number of keys can reach tens of millions of items.
|
||||
|
||||
The dictionary key has the [UInt64](../../../sql-reference/data-types/int-uint.md) type.
|
||||
|
||||
All types of sources are supported. When updating, data (from a file or from a table) is read in its entirety.
|
||||
|
||||
Configuration example:
|
||||
@ -220,6 +255,7 @@ LAYOUT(COMPLEX_KEY_HASHED_ARRAY())
|
||||
|
||||
The dictionary is stored in memory in the form of a hash table with an ordered array of ranges and their corresponding values.
|
||||
|
||||
The dictionary key has the [UInt64](../../../sql-reference/data-types/int-uint.md) type.
|
||||
This storage method works the same way as hashed and allows using date/time (arbitrary numeric type) ranges in addition to the key.
|
||||
|
||||
Example: The table contains discounts for each advertiser in the format:
|
||||
@ -360,6 +396,8 @@ RANGE(MIN StartDate MAX EndDate);
|
||||
|
||||
The dictionary is stored in a cache that has a fixed number of cells. These cells contain frequently used elements.
|
||||
|
||||
The dictionary key has the [UInt64](../../../sql-reference/data-types/int-uint.md) type.
|
||||
|
||||
When searching for a dictionary, the cache is searched first. For each block of data, all keys that are not found in the cache or are outdated are requested from the source using `SELECT attrs... FROM db.table WHERE id IN (k1, k2, ...)`. The received data is then written to the cache.
|
||||
|
||||
If keys are not found in dictionary, then update cache task is created and added into update queue. Update queue properties can be controlled with settings `max_update_queue_size`, `update_queue_push_timeout_milliseconds`, `query_wait_timeout_milliseconds`, `max_threads_for_updates`.
|
||||
@ -420,6 +458,8 @@ This type of storage is for use with composite [keys](../../../sql-reference/dic
|
||||
|
||||
Similar to `cache`, but stores data on SSD and index in RAM. All cache dictionary settings related to update queue can also be applied to SSD cache dictionaries.
|
||||
|
||||
The dictionary key has the [UInt64](../../../sql-reference/data-types/int-uint.md) type.
|
||||
|
||||
``` xml
|
||||
<layout>
|
||||
<ssd_cache>
|
||||
@ -452,7 +492,7 @@ This type of storage is for use with composite [keys](../../../sql-reference/dic
|
||||
|
||||
The dictionary is not stored in memory and directly goes to the source during the processing of a request.
|
||||
|
||||
The dictionary key has the `UInt64` type.
|
||||
The dictionary key has the [UInt64](../../../sql-reference/data-types/int-uint.md) type.
|
||||
|
||||
All types of [sources](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md), except local files, are supported.
|
||||
|
||||
|
@ -21,7 +21,7 @@ sidebar_label: "Хранение словарей в памяти"
|
||||
- При обращении к словарю, который не удалось загрузить.
|
||||
- При ошибке запроса к `cached`-словарю.
|
||||
|
||||
Список внешних словарей и их статус можно посмотреть в таблице `system.dictionaries`.
|
||||
Список внешних словарей и их статус можно посмотреть в таблице [system.dictionaries](../../../operations/system-tables/dictionaries.md).
|
||||
|
||||
Общий вид конфигурации:
|
||||
|
||||
@ -48,6 +48,36 @@ LAYOUT(LAYOUT_TYPE(param value)) -- layout settings
|
||||
...
|
||||
```
|
||||
|
||||
Ключ словарей не имеющих слово `complex-key*` в названии имеет тип [UInt64](../../../sql-reference/data-types/int-uint.md), `complex-key*` словари позволяют произвольный тип ключа (составной, и из разных типов).
|
||||
|
||||
[UInt64](../../../sql-reference/data-types/int-uint.md) ключи в XML словарях задаются тегом `<id>`.
|
||||
|
||||
Пример конфигурации (поле key_column имеет тип UInt64):
|
||||
```xml
|
||||
...
|
||||
<structure>
|
||||
<id>
|
||||
<name>key_column</name>
|
||||
</id>
|
||||
...
|
||||
```
|
||||
|
||||
Cоставные `complex` ключи в XML словарях задаются тегом `<key>`.
|
||||
|
||||
Пример конфигурации составного ключа (ключ состоит из одного элемента с типом [String](../../../sql-reference/data-types/string.md)):
|
||||
```xml
|
||||
...
|
||||
<structure>
|
||||
<key>
|
||||
<attribute>
|
||||
<name>country_code</name>
|
||||
<type>String</type>
|
||||
</attribute>
|
||||
</key>
|
||||
...
|
||||
```
|
||||
|
||||
|
||||
## Способы размещения словарей в памяти {#ways-to-store-dictionaries-in-memory}
|
||||
|
||||
- [flat](#flat)
|
||||
@ -98,6 +128,8 @@ LAYOUT(FLAT(INITIAL_ARRAY_SIZE 50000 MAX_ARRAY_SIZE 5000000))
|
||||
|
||||
Словарь полностью хранится в оперативной памяти в виде хэш-таблиц. Словарь может содержать произвольное количество элементов с произвольными идентификаторами. На практике количество ключей может достигать десятков миллионов элементов.
|
||||
|
||||
Ключ словаря имеет тип [UInt64](../../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
Если `preallocate` имеет значение `true` (по умолчанию `false`), хеш-таблица будет предварительно определена (это ускорит загрузку словаря). Используйте этот метод только в случае, если:
|
||||
|
||||
- Источник поддерживает произвольное количество элементов (пока поддерживается только источником `ClickHouse`).
|
||||
@ -125,6 +157,8 @@ LAYOUT(HASHED(PREALLOCATE 0))
|
||||
|
||||
Аналогичен `hashed`, но при этом занимает меньше места в памяти и генерирует более высокую загрузку CPU.
|
||||
|
||||
Ключ словаря имеет тип [UInt64](../../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
Для этого типа размещения также можно задать `preallocate` в значении `true`. В данном случае это более важно, чем для типа `hashed`.
|
||||
|
||||
Пример конфигурации:
|
||||
@ -181,6 +215,8 @@ LAYOUT(COMPLEX_KEY_SPARSE_HASHED())
|
||||
|
||||
Словарь полностью хранится в оперативной памяти. Каждый атрибут хранится в массиве. Ключевой атрибут хранится в виде хеш-таблицы, где его значение является индексом в массиве атрибутов. Словарь может содержать произвольное количество элементов с произвольными идентификаторами. На практике количество ключей может достигать десятков миллионов элементов.
|
||||
|
||||
Ключ словаря имеет тип [UInt64](../../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
Поддерживаются все виды источников. При обновлении данные (из файла, из таблицы) считываются целиком.
|
||||
|
||||
Пример конфигурации:
|
||||
@ -220,6 +256,7 @@ LAYOUT(COMPLEX_KEY_HASHED_ARRAY())
|
||||
|
||||
Словарь хранится в оперативной памяти в виде хэш-таблицы с упорядоченным массивом диапазонов и соответствующих им значений.
|
||||
|
||||
Ключ словаря имеет тип [UInt64](../../../sql-reference/data-types/int-uint.md).
|
||||
Этот способ размещения работает также как и hashed и позволяет дополнительно к ключу использовать дипазоны по дате/времени (произвольному числовому типу).
|
||||
|
||||
Пример: таблица содержит скидки для каждого рекламодателя в виде:
|
||||
@ -355,6 +392,8 @@ RANGE(MIN StartDate MAX EndDate);
|
||||
|
||||
Словарь хранится в кэше, состоящем из фиксированного количества ячеек. Ячейки содержат часто используемые элементы.
|
||||
|
||||
Ключ словаря имеет тип [UInt64](../../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
При поиске в словаре сначала просматривается кэш. На каждый блок данных, все не найденные в кэше или устаревшие ключи запрашиваются у источника с помощью `SELECT attrs... FROM db.table WHERE id IN (k1, k2, ...)`. Затем, полученные данные записываются в кэш.
|
||||
|
||||
Если ключи не были найдены в словаре, то для обновления кэша создается задание и добавляется в очередь обновлений. Параметры очереди обновлений можно устанавливать настройками `max_update_queue_size`, `update_queue_push_timeout_milliseconds`, `query_wait_timeout_milliseconds`, `max_threads_for_updates`
|
||||
@ -414,6 +453,8 @@ LAYOUT(CACHE(SIZE_IN_CELLS 1000000000))
|
||||
|
||||
Похож на `cache`, но хранит данные на SSD, а индекс в оперативной памяти. Все параметры, относящиеся к очереди обновлений, могут также быть применены к SSD-кэш словарям.
|
||||
|
||||
Ключ словаря имеет тип [UInt64](../../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
``` xml
|
||||
<layout>
|
||||
<ssd_cache>
|
||||
@ -446,7 +487,7 @@ LAYOUT(SSD_CACHE(BLOCK_SIZE 4096 FILE_SIZE 16777216 READ_BUFFER_SIZE 1048576
|
||||
|
||||
Словарь не хранит данные локально и взаимодействует с источником непосредственно в момент запроса.
|
||||
|
||||
Ключ словаря имеет тип `UInt64`.
|
||||
Ключ словаря имеет тип [UInt64](../../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
Поддерживаются все виды [источников](external-dicts-dict-sources.md), кроме локальных файлов.
|
||||
|
||||
|
@ -546,8 +546,9 @@ static void sanityChecks(Server & server)
|
||||
#if defined(OS_LINUX)
|
||||
try
|
||||
{
|
||||
if (readString("/sys/devices/system/clocksource/clocksource0/current_clocksource").find("tsc") == std::string::npos)
|
||||
server.context()->addWarningMessage("Linux is not using a fast TSC clock source. Performance can be degraded.");
|
||||
const char * filename = "/sys/devices/system/clocksource/clocksource0/current_clocksource";
|
||||
if (readString(filename).find("tsc") == std::string::npos)
|
||||
server.context()->addWarningMessage("Linux is not using a fast TSC clock source. Performance can be degraded. Check " + String(filename));
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
@ -555,8 +556,9 @@ static void sanityChecks(Server & server)
|
||||
|
||||
try
|
||||
{
|
||||
if (readNumber("/proc/sys/vm/overcommit_memory") == 2)
|
||||
server.context()->addWarningMessage("Linux memory overcommit is disabled.");
|
||||
const char * filename = "/proc/sys/vm/overcommit_memory";
|
||||
if (readNumber(filename) == 2)
|
||||
server.context()->addWarningMessage("Linux memory overcommit is disabled. Check " + String(filename));
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
@ -564,8 +566,9 @@ static void sanityChecks(Server & server)
|
||||
|
||||
try
|
||||
{
|
||||
if (readString("/sys/kernel/mm/transparent_hugepage/enabled").find("[always]") != std::string::npos)
|
||||
server.context()->addWarningMessage("Linux transparent hugepages are set to \"always\".");
|
||||
const char * filename = "/sys/kernel/mm/transparent_hugepage/enabled";
|
||||
if (readString(filename).find("[always]") != std::string::npos)
|
||||
server.context()->addWarningMessage("Linux transparent hugepages are set to \"always\". Check " + String(filename));
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
@ -573,8 +576,9 @@ static void sanityChecks(Server & server)
|
||||
|
||||
try
|
||||
{
|
||||
if (readNumber("/proc/sys/kernel/pid_max") < 30000)
|
||||
server.context()->addWarningMessage("Linux max PID is too low.");
|
||||
const char * filename = "/proc/sys/kernel/pid_max";
|
||||
if (readNumber(filename) < 30000)
|
||||
server.context()->addWarningMessage("Linux max PID is too low. Check " + String(filename));
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
@ -582,8 +586,9 @@ static void sanityChecks(Server & server)
|
||||
|
||||
try
|
||||
{
|
||||
if (readNumber("/proc/sys/kernel/threads-max") < 30000)
|
||||
server.context()->addWarningMessage("Linux threads max count is too low.");
|
||||
const char * filename = "/proc/sys/kernel/threads-max";
|
||||
if (readNumber(filename) < 30000)
|
||||
server.context()->addWarningMessage("Linux threads max count is too low. Check " + String(filename));
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
@ -591,7 +596,7 @@ static void sanityChecks(Server & server)
|
||||
|
||||
std::string dev_id = getBlockDeviceId(data_path);
|
||||
if (getBlockDeviceType(dev_id) == BlockDeviceType::ROT && getBlockDeviceReadAheadBytes(dev_id) == 0)
|
||||
server.context()->addWarningMessage("Rotational disk with disabled readahead is in use. Performance can be degraded.");
|
||||
server.context()->addWarningMessage("Rotational disk with disabled readahead is in use. Performance can be degraded. Used for data: " + String(data_path));
|
||||
#endif
|
||||
|
||||
try
|
||||
|
@ -224,8 +224,16 @@ public:
|
||||
++this->data(place).denominator;
|
||||
}
|
||||
|
||||
void
|
||||
addBatchSinglePlace(
|
||||
void addManyDefaults(
|
||||
AggregateDataPtr __restrict place,
|
||||
const IColumn ** /*columns*/,
|
||||
size_t length,
|
||||
Arena * /*arena*/) const override
|
||||
{
|
||||
this->data(place).denominator += length;
|
||||
}
|
||||
|
||||
void addBatchSinglePlace(
|
||||
size_t row_begin,
|
||||
size_t row_end,
|
||||
AggregateDataPtr place,
|
||||
|
@ -53,6 +53,15 @@ public:
|
||||
++data(place).count;
|
||||
}
|
||||
|
||||
void addManyDefaults(
|
||||
AggregateDataPtr __restrict place,
|
||||
const IColumn ** /*columns*/,
|
||||
size_t length,
|
||||
Arena * /*arena*/) const override
|
||||
{
|
||||
data(place).count += length;
|
||||
}
|
||||
|
||||
void addBatchSinglePlace(
|
||||
size_t row_begin,
|
||||
size_t row_end,
|
||||
|
@ -880,8 +880,9 @@ struct AggregateFunctionMinData : Data
|
||||
{
|
||||
using Self = AggregateFunctionMinData;
|
||||
|
||||
bool changeIfBetter(const IColumn & column, size_t row_num, Arena * arena) { return this->changeIfLess(column, row_num, arena); }
|
||||
bool changeIfBetter(const Self & to, Arena * arena) { return this->changeIfLess(to, arena); }
|
||||
bool changeIfBetter(const IColumn & column, size_t row_num, Arena * arena) { return this->changeIfLess(column, row_num, arena); }
|
||||
bool changeIfBetter(const Self & to, Arena * arena) { return this->changeIfLess(to, arena); }
|
||||
void addManyDefaults(const IColumn & column, size_t /*length*/, Arena * arena) { this->changeIfLess(column, 0, arena); }
|
||||
|
||||
static const char * name() { return "min"; }
|
||||
|
||||
@ -907,8 +908,9 @@ struct AggregateFunctionMaxData : Data
|
||||
{
|
||||
using Self = AggregateFunctionMaxData;
|
||||
|
||||
bool changeIfBetter(const IColumn & column, size_t row_num, Arena * arena) { return this->changeIfGreater(column, row_num, arena); }
|
||||
bool changeIfBetter(const Self & to, Arena * arena) { return this->changeIfGreater(to, arena); }
|
||||
bool changeIfBetter(const IColumn & column, size_t row_num, Arena * arena) { return this->changeIfGreater(column, row_num, arena); }
|
||||
bool changeIfBetter(const Self & to, Arena * arena) { return this->changeIfGreater(to, arena); }
|
||||
void addManyDefaults(const IColumn & column, size_t /*length*/, Arena * arena) { this->changeIfGreater(column, 0, arena); }
|
||||
|
||||
static const char * name() { return "max"; }
|
||||
|
||||
@ -935,8 +937,9 @@ struct AggregateFunctionAnyData : Data
|
||||
using Self = AggregateFunctionAnyData;
|
||||
static constexpr bool is_any = true;
|
||||
|
||||
bool changeIfBetter(const IColumn & column, size_t row_num, Arena * arena) { return this->changeFirstTime(column, row_num, arena); }
|
||||
bool changeIfBetter(const Self & to, Arena * arena) { return this->changeFirstTime(to, arena); }
|
||||
bool changeIfBetter(const IColumn & column, size_t row_num, Arena * arena) { return this->changeFirstTime(column, row_num, arena); }
|
||||
bool changeIfBetter(const Self & to, Arena * arena) { return this->changeFirstTime(to, arena); }
|
||||
void addManyDefaults(const IColumn & column, size_t /*length*/, Arena * arena) { this->changeFirstTime(column, 0, arena); }
|
||||
|
||||
static const char * name() { return "any"; }
|
||||
|
||||
@ -962,8 +965,9 @@ struct AggregateFunctionAnyLastData : Data
|
||||
{
|
||||
using Self = AggregateFunctionAnyLastData;
|
||||
|
||||
bool changeIfBetter(const IColumn & column, size_t row_num, Arena * arena) { return this->changeEveryTime(column, row_num, arena); }
|
||||
bool changeIfBetter(const Self & to, Arena * arena) { return this->changeEveryTime(to, arena); }
|
||||
bool changeIfBetter(const IColumn & column, size_t row_num, Arena * arena) { return this->changeEveryTime(column, row_num, arena); }
|
||||
bool changeIfBetter(const Self & to, Arena * arena) { return this->changeEveryTime(to, arena); }
|
||||
void addManyDefaults(const IColumn & column, size_t /*length*/, Arena * arena) { this->changeEveryTime(column, 0, arena); }
|
||||
|
||||
static const char * name() { return "anyLast"; }
|
||||
|
||||
@ -1024,6 +1028,8 @@ struct AggregateFunctionSingleValueOrNullData : Data
|
||||
return false;
|
||||
}
|
||||
|
||||
void addManyDefaults(const IColumn & column, size_t /*length*/, Arena * arena) { this->changeIfBetter(column, 0, arena); }
|
||||
|
||||
void insertResultInto(IColumn & to) const
|
||||
{
|
||||
if (is_null || first_value)
|
||||
@ -1098,6 +1104,12 @@ struct AggregateFunctionAnyHeavyData : Data
|
||||
return false;
|
||||
}
|
||||
|
||||
void addManyDefaults(const IColumn & column, size_t length, Arena * arena)
|
||||
{
|
||||
for (size_t i = 0; i < length; ++i)
|
||||
changeIfBetter(column, 0, arena);
|
||||
}
|
||||
|
||||
void write(WriteBuffer & buf, const ISerialization & serialization) const
|
||||
{
|
||||
Data::write(buf, serialization);
|
||||
@ -1158,6 +1170,15 @@ public:
|
||||
this->data(place).changeIfBetter(*columns[0], row_num, arena);
|
||||
}
|
||||
|
||||
void addManyDefaults(
|
||||
AggregateDataPtr __restrict place,
|
||||
const IColumn ** columns,
|
||||
size_t length,
|
||||
Arena * arena) const override
|
||||
{
|
||||
this->data(place).addManyDefaults(*columns[0], length, arena);
|
||||
}
|
||||
|
||||
void addBatchSinglePlace(
|
||||
size_t row_begin,
|
||||
size_t row_end,
|
||||
|
@ -489,6 +489,33 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
void addManyDefaults(
|
||||
AggregateDataPtr __restrict /*place*/,
|
||||
const IColumn ** /*columns*/,
|
||||
size_t /*length*/,
|
||||
Arena * /*arena*/) const override
|
||||
{
|
||||
}
|
||||
|
||||
void addBatchSparse(
|
||||
size_t row_begin,
|
||||
size_t row_end,
|
||||
AggregateDataPtr * places,
|
||||
size_t place_offset,
|
||||
const IColumn ** columns,
|
||||
Arena * arena) const override
|
||||
{
|
||||
const auto & column_sparse = assert_cast<const ColumnSparse &>(*columns[0]);
|
||||
const auto * values = &column_sparse.getValuesColumn();
|
||||
const auto & offsets = column_sparse.getOffsetsData();
|
||||
|
||||
size_t from = std::lower_bound(offsets.begin(), offsets.end(), row_begin) - offsets.begin();
|
||||
size_t to = std::lower_bound(offsets.begin(), offsets.end(), row_end) - offsets.begin();
|
||||
|
||||
for (size_t i = from; i < to; ++i)
|
||||
add(places[offsets[i]] + place_offset, &values, i + 1, arena);
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
|
||||
{
|
||||
this->data(place).merge(this->data(rhs));
|
||||
|
@ -237,6 +237,15 @@ public:
|
||||
detail::OneAdder<T, Data>::add(this->data(place), *columns[0], row_num);
|
||||
}
|
||||
|
||||
void addManyDefaults(
|
||||
AggregateDataPtr __restrict place,
|
||||
const IColumn ** columns,
|
||||
size_t /*length*/,
|
||||
Arena * /*arena*/) const override
|
||||
{
|
||||
detail::OneAdder<T, Data>::add(this->data(place), *columns[0], 0);
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
|
||||
{
|
||||
this->data(place).set.merge(this->data(rhs).set);
|
||||
|
@ -123,6 +123,10 @@ public:
|
||||
*/
|
||||
virtual void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const = 0;
|
||||
|
||||
/// Adds several default values of arguments into aggregation data on which place points to.
|
||||
/// Default values must be a the 0-th positions in columns.
|
||||
virtual void addManyDefaults(AggregateDataPtr __restrict place, const IColumn ** columns, size_t length, Arena * arena) const = 0;
|
||||
|
||||
/// Merges state (on which place points to) with other state of current aggregation function.
|
||||
virtual void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const = 0;
|
||||
|
||||
@ -377,6 +381,16 @@ public:
|
||||
|
||||
AddFunc getAddressOfAddFunction() const override { return &addFree; }
|
||||
|
||||
void addManyDefaults(
|
||||
AggregateDataPtr __restrict place,
|
||||
const IColumn ** columns,
|
||||
size_t length,
|
||||
Arena * arena) const override
|
||||
{
|
||||
for (size_t i = 0; i < length; ++i)
|
||||
static_cast<const Derived *>(this)->add(place, columns, 0, arena);
|
||||
}
|
||||
|
||||
void addBatch( /// NOLINT
|
||||
size_t row_begin,
|
||||
size_t row_end,
|
||||
@ -413,13 +427,9 @@ public:
|
||||
{
|
||||
const auto & column_sparse = assert_cast<const ColumnSparse &>(*columns[0]);
|
||||
const auto * values = &column_sparse.getValuesColumn();
|
||||
auto offset_it = column_sparse.begin();
|
||||
auto offset_it = column_sparse.getIterator(row_begin);
|
||||
|
||||
/// FIXME: make it more optimal
|
||||
for (size_t i = 0; i < row_begin; ++i, ++offset_it)
|
||||
;
|
||||
|
||||
for (size_t i = 0; i < row_end; ++i, ++offset_it)
|
||||
for (size_t i = row_begin; i < row_end; ++i, ++offset_it)
|
||||
static_cast<const Derived *>(this)->add(places[offset_it.getCurrentRow()] + place_offset,
|
||||
&values, offset_it.getValueIndex(), arena);
|
||||
}
|
||||
@ -468,17 +478,16 @@ public:
|
||||
const IColumn ** columns,
|
||||
Arena * arena) const override
|
||||
{
|
||||
/// TODO: add values and defaults separately if order of adding isn't important.
|
||||
const auto & column_sparse = assert_cast<const ColumnSparse &>(*columns[0]);
|
||||
const auto * values = &column_sparse.getValuesColumn();
|
||||
auto offset_it = column_sparse.begin();
|
||||
const auto & offsets = column_sparse.getOffsetsData();
|
||||
|
||||
/// FIXME: make it more optimal
|
||||
for (size_t i = 0; i < row_begin; ++i, ++offset_it)
|
||||
;
|
||||
auto from = std::lower_bound(offsets.begin(), offsets.end(), row_begin) - offsets.begin() + 1;
|
||||
auto to = std::lower_bound(offsets.begin(), offsets.end(), row_end) - offsets.begin() + 1;
|
||||
|
||||
for (size_t i = 0; i < row_end; ++i, ++offset_it)
|
||||
static_cast<const Derived *>(this)->add(place, &values, offset_it.getValueIndex(), arena);
|
||||
size_t num_defaults = (row_end - row_begin) - (to - from);
|
||||
static_cast<const Derived *>(this)->addBatchSinglePlace(from, to, place, &values, arena, -1);
|
||||
static_cast<const Derived *>(this)->addManyDefaults(place, &values, num_defaults, arena);
|
||||
}
|
||||
|
||||
void addBatchSinglePlaceNotNull( /// NOLINT
|
||||
|
@ -285,11 +285,11 @@ void ClientBase::setupSignalHandler()
|
||||
sigemptyset(&new_act.sa_mask);
|
||||
#else
|
||||
if (sigemptyset(&new_act.sa_mask))
|
||||
throw Exception(ErrorCodes::CANNOT_SET_SIGNAL_HANDLER, "Cannot set signal handler.");
|
||||
throwFromErrno("Cannot set signal handler.", ErrorCodes::CANNOT_SET_SIGNAL_HANDLER);
|
||||
#endif
|
||||
|
||||
if (sigaction(SIGINT, &new_act, nullptr))
|
||||
throw Exception(ErrorCodes::CANNOT_SET_SIGNAL_HANDLER, "Cannot set signal handler.");
|
||||
throwFromErrno("Cannot set signal handler.", ErrorCodes::CANNOT_SET_SIGNAL_HANDLER);
|
||||
}
|
||||
|
||||
|
||||
@ -492,7 +492,8 @@ try
|
||||
String pager = config().getString("pager", "");
|
||||
if (!pager.empty())
|
||||
{
|
||||
signal(SIGPIPE, SIG_IGN);
|
||||
if (SIG_ERR == signal(SIGPIPE, SIG_IGN))
|
||||
throwFromErrno("Cannot set signal handler.", ErrorCodes::CANNOT_SET_SIGNAL_HANDLER);
|
||||
|
||||
ShellCommand::Config config(pager);
|
||||
config.pipe_stdin_only = true;
|
||||
|
@ -772,6 +772,14 @@ size_t ColumnSparse::getValueIndex(size_t n) const
|
||||
return it - offsets_data.begin() + 1;
|
||||
}
|
||||
|
||||
ColumnSparse::Iterator ColumnSparse::getIterator(size_t n) const
|
||||
{
|
||||
const auto & offsets_data = getOffsetsData();
|
||||
const auto * it = std::lower_bound(offsets_data.begin(), offsets_data.end(), n);
|
||||
size_t current_offset = it - offsets_data.begin();
|
||||
return Iterator(offsets_data, _size, current_offset, n);
|
||||
}
|
||||
|
||||
ColumnPtr recursiveRemoveSparse(const ColumnPtr & column)
|
||||
{
|
||||
if (!column)
|
||||
|
@ -215,6 +215,7 @@ public:
|
||||
|
||||
Iterator begin() const { return Iterator(getOffsetsData(), _size, 0, 0); }
|
||||
Iterator end() const { return Iterator(getOffsetsData(), _size, getOffsetsData().size(), _size); }
|
||||
Iterator getIterator(size_t n) const;
|
||||
|
||||
private:
|
||||
using Inserter = std::function<void(IColumn &)>;
|
||||
|
@ -638,10 +638,6 @@ void LRUFileCache::remove(const Key & key)
|
||||
|
||||
if (fs::exists(key_path))
|
||||
fs::remove(key_path);
|
||||
|
||||
#ifndef NDEBUG
|
||||
assertCacheCorrectness(cache_lock);
|
||||
#endif
|
||||
}
|
||||
|
||||
void LRUFileCache::remove()
|
||||
@ -1053,8 +1049,7 @@ void LRUFileCache::assertCacheCellsCorrectness(
|
||||
if (file_segment->reserved_size != 0)
|
||||
{
|
||||
assert(cell.queue_iterator);
|
||||
/// FIXME: this is too slow, need to make it O(1)
|
||||
/// assert(queue.contains(file_segment->key(), file_segment->offset(), cache_lock));
|
||||
assert(queue.contains(file_segment->key(), file_segment->offset(), cache_lock));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -301,7 +301,7 @@ private:
|
||||
|
||||
size_t getFileSegmentsNumUnlocked(std::lock_guard<std::mutex> & cache_lock) const;
|
||||
|
||||
static void assertCacheCellsCorrectness(const FileSegmentsByOffset & cells_by_offset, std::lock_guard<std::mutex> & cache_lock);
|
||||
void assertCacheCellsCorrectness(const FileSegmentsByOffset & cells_by_offset, std::lock_guard<std::mutex> & cache_lock);
|
||||
|
||||
public:
|
||||
String dumpStructure(const Key & key_) override;
|
||||
|
@ -1,3 +1,5 @@
|
||||
// NOLINTBEGIN(readability-inconsistent-declaration-parameter-name)
|
||||
|
||||
#include <csignal>
|
||||
#include <sys/time.h>
|
||||
#if defined(OS_LINUX)
|
||||
@ -317,3 +319,5 @@ FOR_EACH_WRAPPED_FUNCTION(MAKE_WRAPPER)
|
||||
# undef MAKE_WRAPPER
|
||||
#endif
|
||||
}
|
||||
|
||||
// NOLINTEND(readability-inconsistent-declaration-parameter-name)
|
||||
|
@ -1,10 +1,8 @@
|
||||
#include "filesystemHelpers.h"
|
||||
|
||||
#include <sys/stat.h>
|
||||
#if defined(__linux__)
|
||||
# include <cstdio>
|
||||
# include <mntent.h>
|
||||
# include <sys/stat.h>
|
||||
# include <sys/sysmacros.h>
|
||||
#endif
|
||||
#include <cerrno>
|
||||
@ -13,6 +11,7 @@
|
||||
#include <filesystem>
|
||||
#include <fcntl.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
#include <utime.h>
|
||||
#include <IO/ReadBufferFromFile.h>
|
||||
|
@ -1166,12 +1166,12 @@ KeeperStorage::ResponsesForSessions KeeperStorage::processRequest(const Coordina
|
||||
? list_watches
|
||||
: watches;
|
||||
|
||||
watches_type[zk_request->getPath()].emplace_back(session_id);
|
||||
watches_type[zk_request->getPath()].emplace(session_id);
|
||||
sessions_and_watchers[session_id].emplace(zk_request->getPath());
|
||||
}
|
||||
else if (response->error == Coordination::Error::ZNONODE && zk_request->getOpNum() == Coordination::OpNum::Exists)
|
||||
{
|
||||
watches[zk_request->getPath()].emplace_back(session_id);
|
||||
watches[zk_request->getPath()].emplace(session_id);
|
||||
sessions_and_watchers[session_id].emplace(zk_request->getPath());
|
||||
}
|
||||
}
|
||||
@ -1206,13 +1206,7 @@ void KeeperStorage::clearDeadWatches(int64_t session_id)
|
||||
if (watch != watches.end())
|
||||
{
|
||||
auto & watches_for_path = watch->second;
|
||||
for (auto w_it = watches_for_path.begin(); w_it != watches_for_path.end();)
|
||||
{
|
||||
if (*w_it == session_id)
|
||||
w_it = watches_for_path.erase(w_it);
|
||||
else
|
||||
++w_it;
|
||||
}
|
||||
watches_for_path.erase(session_id);
|
||||
if (watches_for_path.empty())
|
||||
watches.erase(watch);
|
||||
}
|
||||
@ -1222,13 +1216,7 @@ void KeeperStorage::clearDeadWatches(int64_t session_id)
|
||||
if (list_watch != list_watches.end())
|
||||
{
|
||||
auto & list_watches_for_path = list_watch->second;
|
||||
for (auto w_it = list_watches_for_path.begin(); w_it != list_watches_for_path.end();)
|
||||
{
|
||||
if (*w_it == session_id)
|
||||
w_it = list_watches_for_path.erase(w_it);
|
||||
else
|
||||
++w_it;
|
||||
}
|
||||
list_watches_for_path.erase(session_id);
|
||||
if (list_watches_for_path.empty())
|
||||
list_watches.erase(list_watch);
|
||||
}
|
||||
@ -1250,7 +1238,7 @@ void KeeperStorage::dumpWatches(WriteBufferFromOwnString & buf) const
|
||||
|
||||
void KeeperStorage::dumpWatchesByPath(WriteBufferFromOwnString & buf) const
|
||||
{
|
||||
auto write_int_vec = [&buf](const std::vector<int64_t> & session_ids)
|
||||
auto write_int_container = [&buf](const auto & session_ids)
|
||||
{
|
||||
for (int64_t session_id : session_ids)
|
||||
{
|
||||
@ -1261,13 +1249,13 @@ void KeeperStorage::dumpWatchesByPath(WriteBufferFromOwnString & buf) const
|
||||
for (const auto & [watch_path, sessions] : watches)
|
||||
{
|
||||
buf << watch_path << "\n";
|
||||
write_int_vec(sessions);
|
||||
write_int_container(sessions);
|
||||
}
|
||||
|
||||
for (const auto & [watch_path, sessions] : list_watches)
|
||||
{
|
||||
buf << watch_path << "\n";
|
||||
write_int_vec(sessions);
|
||||
write_int_container(sessions);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -96,7 +96,7 @@ public:
|
||||
using Container = SnapshotableHashTable<Node>;
|
||||
using Ephemerals = std::unordered_map<int64_t, std::unordered_set<std::string>>;
|
||||
using SessionAndWatcher = std::unordered_map<int64_t, std::unordered_set<std::string>>;
|
||||
using SessionIDs = std::vector<int64_t>;
|
||||
using SessionIDs = std::unordered_set<int64_t>;
|
||||
|
||||
/// Just vector of SHA1 from user:password
|
||||
using AuthIDs = std::vector<AuthID>;
|
||||
|
@ -1,5 +1,6 @@
|
||||
#pragma once
|
||||
|
||||
#include <unordered_set>
|
||||
#include <vector>
|
||||
|
||||
|
||||
@ -7,6 +8,8 @@ namespace DB
|
||||
{
|
||||
|
||||
using ColumnNumbers = std::vector<size_t>;
|
||||
using ColumnNumbersSet = std::unordered_set<size_t>;
|
||||
using ColumnNumbersList = std::vector<ColumnNumbers>;
|
||||
using ColumnNumbersSetList = std::vector<ColumnNumbersSet>;
|
||||
|
||||
}
|
||||
|
@ -16,6 +16,7 @@ using NameOrderedSet = std::set<std::string>;
|
||||
using NameToNameMap = std::unordered_map<std::string, std::string>;
|
||||
using NameToNameSetMap = std::unordered_map<std::string, NameSet>;
|
||||
using NameToNameVector = std::vector<std::pair<std::string, std::string>>;
|
||||
using NameToIndexMap = std::unordered_map<std::string, size_t>;
|
||||
|
||||
using NameWithAlias = std::pair<std::string, std::string>;
|
||||
using NamesWithAliases = std::vector<NameWithAlias>;
|
||||
|
@ -1,3 +1,4 @@
|
||||
#include <cstddef>
|
||||
#include <Core/NamesAndTypes.h>
|
||||
|
||||
#include <base/sort.h>
|
||||
@ -214,4 +215,17 @@ std::optional<NameAndTypePair> NamesAndTypesList::tryGetByName(const std::string
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
size_t NamesAndTypesList::getPosByName(const std::string &name) const noexcept
|
||||
{
|
||||
size_t pos = 0;
|
||||
for (const NameAndTypePair & column : *this)
|
||||
{
|
||||
if (column.name == name)
|
||||
break;
|
||||
++pos;
|
||||
}
|
||||
return pos;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -105,8 +105,11 @@ public:
|
||||
/// Check that column contains in list
|
||||
bool contains(const String & name) const;
|
||||
|
||||
/// Try to get column by name, return empty optional if column not found
|
||||
/// Try to get column by name, returns empty optional if column not found
|
||||
std::optional<NameAndTypePair> tryGetByName(const std::string & name) const;
|
||||
|
||||
/// Try to get column position by name, returns number of columns if column isn't found
|
||||
size_t getPosByName(const std::string & name) const noexcept;
|
||||
};
|
||||
|
||||
using NamesAndTypesLists = std::vector<NamesAndTypesList>;
|
||||
|
@ -567,7 +567,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value)
|
||||
\
|
||||
M(UInt64, remote_fs_read_max_backoff_ms, 10000, "Max wait time when trying to read data for remote disk", 0) \
|
||||
M(UInt64, remote_fs_read_backoff_max_tries, 5, "Max attempts to read with backoff", 0) \
|
||||
M(Bool, enable_filesystem_cache, true, "Use cache for remote filesystem. This setting does not turn on/off cache for disks (must me done via disk config), but allows to bypass cache for some queries if intended", 0) \
|
||||
M(Bool, enable_filesystem_cache, true, "Use cache for remote filesystem. This setting does not turn on/off cache for disks (must be done via disk config), but allows to bypass cache for some queries if intended", 0) \
|
||||
M(UInt64, filesystem_cache_max_wait_sec, 5, "Allow to wait at most this number of seconds for download of current remote_fs_buffer_size bytes, and skip cache if exceeded", 0) \
|
||||
M(Bool, enable_filesystem_cache_on_write_operations, false, "Write into cache on write operations. To actually work this setting requires be added to disk config too", 0) \
|
||||
M(Bool, enable_filesystem_cache_log, false, "Allows to record the filesystem caching log for each query", 0) \
|
||||
@ -674,6 +674,8 @@ static constexpr UInt64 operator""_GiB(unsigned long long value)
|
||||
M(Bool, input_format_json_read_bools_as_numbers, true, "Allow to parse bools as numbers in JSON input formats", 0) \
|
||||
M(Bool, input_format_protobuf_flatten_google_wrappers, false, "Enable Google wrappers for regular non-nested columns, e.g. google.protobuf.StringValue 'str' for String column 'str'. For Nullable columns empty wrappers are recognized as defaults, and missing as nulls", 0) \
|
||||
M(Bool, output_format_protobuf_nullables_with_google_wrappers, false, "When serializing Nullable columns with Google wrappers, serialize default values as empty wrappers. If turned off, default and null values are not serialized", 0) \
|
||||
M(UInt64, input_format_csv_skip_first_lines, 0, "Skip specified number of lines at the beginning of data in CSV format", 0) \
|
||||
M(UInt64, input_format_tsv_skip_first_lines, 0, "Skip specified number of lines at the beginning of data in TSV format", 0) \
|
||||
\
|
||||
M(DateTimeInputFormat, date_time_input_format, FormatSettings::DateTimeInputFormat::Basic, "Method to read DateTime from text input formats. Possible values: 'basic', 'best_effort' and 'best_effort_us'.", 0) \
|
||||
M(DateTimeOutputFormat, date_time_output_format, FormatSettings::DateTimeOutputFormat::Simple, "Method to write DateTime to text output. Possible values: 'simple', 'iso', 'unix_timestamp'.", 0) \
|
||||
|
@ -9,6 +9,8 @@
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <boost/algorithm/string/predicate.hpp>
|
||||
|
||||
#include <cmath>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -16,6 +18,7 @@ namespace ErrorCodes
|
||||
{
|
||||
extern const int SIZE_OF_FIXED_STRING_DOESNT_MATCH;
|
||||
extern const int CANNOT_PARSE_BOOL;
|
||||
extern const int CANNOT_PARSE_NUMBER;
|
||||
}
|
||||
|
||||
|
||||
@ -176,27 +179,75 @@ UInt64 SettingFieldMaxThreads::getAuto()
|
||||
return getNumberOfPhysicalCPUCores();
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
Poco::Timespan::TimeDiff float64AsSecondsToTimespan(Float64 d)
|
||||
{
|
||||
if (d != 0.0 && !std::isnormal(d))
|
||||
throw Exception(
|
||||
ErrorCodes::CANNOT_PARSE_NUMBER, "A setting's value in seconds must be a normal floating point number or zero. Got {}", d);
|
||||
return static_cast<Poco::Timespan::TimeDiff>(d * 1000000);
|
||||
}
|
||||
|
||||
template <SettingFieldTimespanUnit unit_>
|
||||
SettingFieldTimespan<unit_>::SettingFieldTimespan(const Field & f) : SettingFieldTimespan(fieldToNumber<UInt64>(f))
|
||||
}
|
||||
|
||||
template <>
|
||||
SettingFieldSeconds::SettingFieldTimespan(const Field & f) : SettingFieldTimespan(float64AsSecondsToTimespan(fieldToNumber<Float64>(f)))
|
||||
{
|
||||
}
|
||||
|
||||
template <SettingFieldTimespanUnit unit_>
|
||||
SettingFieldTimespan<unit_> & SettingFieldTimespan<unit_>::operator=(const Field & f)
|
||||
template <>
|
||||
SettingFieldMilliseconds::SettingFieldTimespan(const Field & f) : SettingFieldTimespan(fieldToNumber<UInt64>(f))
|
||||
{
|
||||
}
|
||||
|
||||
template <>
|
||||
SettingFieldTimespan<SettingFieldTimespanUnit::Second> & SettingFieldSeconds::operator=(const Field & f)
|
||||
{
|
||||
*this = Poco::Timespan{float64AsSecondsToTimespan(fieldToNumber<Float64>(f))};
|
||||
return *this;
|
||||
}
|
||||
|
||||
template <>
|
||||
SettingFieldTimespan<SettingFieldTimespanUnit::Millisecond> & SettingFieldMilliseconds::operator=(const Field & f)
|
||||
{
|
||||
*this = fieldToNumber<UInt64>(f);
|
||||
return *this;
|
||||
}
|
||||
|
||||
template <SettingFieldTimespanUnit unit_>
|
||||
String SettingFieldTimespan<unit_>::toString() const
|
||||
template <>
|
||||
String SettingFieldSeconds::toString() const
|
||||
{
|
||||
return ::DB::toString(static_cast<Float64>(value.totalMicroseconds()) / microseconds_per_unit);
|
||||
}
|
||||
|
||||
template <>
|
||||
String SettingFieldMilliseconds::toString() const
|
||||
{
|
||||
return ::DB::toString(operator UInt64());
|
||||
}
|
||||
|
||||
template <SettingFieldTimespanUnit unit_>
|
||||
void SettingFieldTimespan<unit_>::parseFromString(const String & str)
|
||||
template <>
|
||||
SettingFieldSeconds::operator Field() const
|
||||
{
|
||||
return static_cast<Float64>(value.totalMicroseconds()) / microseconds_per_unit;
|
||||
}
|
||||
|
||||
template <>
|
||||
SettingFieldMilliseconds::operator Field() const
|
||||
{
|
||||
return operator UInt64();
|
||||
}
|
||||
|
||||
template <>
|
||||
void SettingFieldSeconds::parseFromString(const String & str)
|
||||
{
|
||||
Float64 n = parse<Float64>(str.data(), str.size());
|
||||
*this = Poco::Timespan{static_cast<Poco::Timespan::TimeDiff>(n * microseconds_per_unit)};
|
||||
}
|
||||
|
||||
template <>
|
||||
void SettingFieldMilliseconds::parseFromString(const String & str)
|
||||
{
|
||||
*this = stringToNumber<UInt64>(str);
|
||||
}
|
||||
@ -204,6 +255,13 @@ void SettingFieldTimespan<unit_>::parseFromString(const String & str)
|
||||
template <SettingFieldTimespanUnit unit_>
|
||||
void SettingFieldTimespan<unit_>::writeBinary(WriteBuffer & out) const
|
||||
{
|
||||
/// Note that this returns an UInt64 (for both seconds and milliseconds units) for compatibility reasons as the value
|
||||
/// for seconds used to be a integer (now a Float64)
|
||||
/// This method is only used to communicate with clients or servers older than DBMS_MIN_REVISION_WITH_SETTINGS_SERIALIZED_AS_STRINGS
|
||||
/// in which the value was passed as binary (as a UInt64)
|
||||
/// Later versions pass the setting values as String (using toString() and parseFromString()) and there passing "1.2" will
|
||||
/// lead to `1` on releases with integer seconds or `1.2` on more recent releases
|
||||
/// See https://github.com/ClickHouse/ClickHouse/issues/36940 for more details
|
||||
auto num_units = operator UInt64();
|
||||
writeVarUInt(num_units, out);
|
||||
}
|
||||
|
@ -124,7 +124,7 @@ struct SettingFieldTimespan
|
||||
operator std::chrono::duration<Rep, Period>() const { return std::chrono::duration_cast<std::chrono::duration<Rep, Period>>(std::chrono::microseconds(value.totalMicroseconds())); } /// NOLINT
|
||||
|
||||
explicit operator UInt64() const { return value.totalMicroseconds() / microseconds_per_unit; }
|
||||
explicit operator Field() const { return operator UInt64(); }
|
||||
explicit operator Field() const;
|
||||
|
||||
Poco::Timespan::TimeDiff totalMicroseconds() const { return value.totalMicroseconds(); }
|
||||
Poco::Timespan::TimeDiff totalMilliseconds() const { return value.totalMilliseconds(); }
|
||||
|
@ -222,11 +222,7 @@ struct SimpleSortCursor : SortCursorHelper<SimpleSortCursor>
|
||||
|
||||
bool ALWAYS_INLINE greaterAt(const SimpleSortCursor & rhs, size_t lhs_pos, size_t rhs_pos) const
|
||||
{
|
||||
const auto & desc = impl->desc[0];
|
||||
int direction = desc.direction;
|
||||
int nulls_direction = desc.nulls_direction;
|
||||
|
||||
bool result = false;
|
||||
int res = 0;
|
||||
|
||||
#if USE_EMBEDDED_COMPILER
|
||||
if (impl->desc.compiled_sort_description && rhs.impl->desc.compiled_sort_description)
|
||||
@ -234,17 +230,23 @@ struct SimpleSortCursor : SortCursorHelper<SimpleSortCursor>
|
||||
assert(impl->raw_sort_columns_data.size() == rhs.impl->raw_sort_columns_data.size());
|
||||
|
||||
auto sort_description_func_typed = reinterpret_cast<JITSortDescriptionFunc>(impl->desc.compiled_sort_description);
|
||||
int jit_result = sort_description_func_typed(lhs_pos, rhs_pos, impl->raw_sort_columns_data.data(), rhs.impl->raw_sort_columns_data.data()); /// NOLINT
|
||||
result = jit_result > 0;
|
||||
res = sort_description_func_typed(lhs_pos, rhs_pos, impl->raw_sort_columns_data.data(), rhs.impl->raw_sort_columns_data.data()); /// NOLINT
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
int non_jit_result = impl->sort_columns[0]->compareAt(lhs_pos, rhs_pos, *(rhs.impl->sort_columns[0]), nulls_direction);
|
||||
result = (non_jit_result != 0 && ((non_jit_result > 0) == (direction > 0)));
|
||||
const auto & desc = impl->desc[0];
|
||||
int direction = desc.direction;
|
||||
int nulls_direction = desc.nulls_direction;
|
||||
res = direction * impl->sort_columns[0]->compareAt(lhs_pos, rhs_pos, *(rhs.impl->sort_columns[0]), nulls_direction);
|
||||
}
|
||||
|
||||
return result;
|
||||
if (res > 0)
|
||||
return true;
|
||||
if (res < 0)
|
||||
return false;
|
||||
|
||||
return impl->order > rhs.impl->order;
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -68,6 +68,14 @@
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int CANNOT_SET_SIGNAL_HANDLER;
|
||||
}
|
||||
}
|
||||
|
||||
DB::PipeFDs signal_pipe;
|
||||
|
||||
|
||||
@ -76,7 +84,8 @@ DB::PipeFDs signal_pipe;
|
||||
*/
|
||||
static void call_default_signal_handler(int sig)
|
||||
{
|
||||
signal(sig, SIG_DFL);
|
||||
if (SIG_ERR == signal(sig, SIG_DFL))
|
||||
DB::throwFromErrno("Cannot set signal handler.", DB::ErrorCodes::CANNOT_SET_SIGNAL_HANDLER);
|
||||
raise(sig);
|
||||
}
|
||||
|
||||
@ -498,9 +507,8 @@ BaseDaemon::~BaseDaemon()
|
||||
signal_listener_thread.join();
|
||||
/// Reset signals to SIG_DFL to avoid trying to write to the signal_pipe that will be closed after.
|
||||
for (int sig : handled_signals)
|
||||
{
|
||||
signal(sig, SIG_DFL);
|
||||
}
|
||||
if (SIG_ERR == signal(sig, SIG_DFL))
|
||||
DB::throwFromErrno("Cannot set signal handler.", DB::ErrorCodes::CANNOT_SET_SIGNAL_HANDLER);
|
||||
signal_pipe.close();
|
||||
}
|
||||
|
||||
|
@ -89,7 +89,7 @@ bool DatabaseMySQL::empty() const
|
||||
return true;
|
||||
|
||||
for (const auto & [table_name, storage_info] : local_tables_cache)
|
||||
if (!remove_or_detach_tables.count(table_name))
|
||||
if (!remove_or_detach_tables.contains(table_name))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
@ -103,7 +103,7 @@ DatabaseTablesIteratorPtr DatabaseMySQL::getTablesIterator(ContextPtr local_cont
|
||||
fetchTablesIntoLocalCache(local_context);
|
||||
|
||||
for (const auto & [table_name, modify_time_and_storage] : local_tables_cache)
|
||||
if (!remove_or_detach_tables.count(table_name) && (!filter_by_table_name || filter_by_table_name(table_name)))
|
||||
if (!remove_or_detach_tables.contains(table_name) && (!filter_by_table_name || filter_by_table_name(table_name)))
|
||||
tables[table_name] = modify_time_and_storage.second;
|
||||
|
||||
return std::make_unique<DatabaseTablesSnapshotIterator>(tables, database_name);
|
||||
@ -120,7 +120,7 @@ StoragePtr DatabaseMySQL::tryGetTable(const String & mysql_table_name, ContextPt
|
||||
|
||||
fetchTablesIntoLocalCache(local_context);
|
||||
|
||||
if (!remove_or_detach_tables.count(mysql_table_name) && local_tables_cache.find(mysql_table_name) != local_tables_cache.end())
|
||||
if (!remove_or_detach_tables.contains(mysql_table_name) && local_tables_cache.find(mysql_table_name) != local_tables_cache.end())
|
||||
return local_tables_cache[mysql_table_name].second;
|
||||
|
||||
return StoragePtr{};
|
||||
@ -349,11 +349,11 @@ void DatabaseMySQL::attachTable(ContextPtr /* context_ */, const String & table_
|
||||
{
|
||||
std::lock_guard<std::mutex> lock{mutex};
|
||||
|
||||
if (!local_tables_cache.count(table_name))
|
||||
if (!local_tables_cache.contains(table_name))
|
||||
throw Exception("Cannot attach table " + backQuoteIfNeed(database_name) + "." + backQuoteIfNeed(table_name) +
|
||||
" because it does not exist.", ErrorCodes::UNKNOWN_TABLE);
|
||||
|
||||
if (!remove_or_detach_tables.count(table_name))
|
||||
if (!remove_or_detach_tables.contains(table_name))
|
||||
throw Exception("Cannot attach table " + backQuoteIfNeed(database_name) + "." + backQuoteIfNeed(table_name) +
|
||||
" because it already exists.", ErrorCodes::TABLE_ALREADY_EXISTS);
|
||||
|
||||
@ -372,11 +372,11 @@ StoragePtr DatabaseMySQL::detachTable(ContextPtr /* context */, const String & t
|
||||
{
|
||||
std::lock_guard<std::mutex> lock{mutex};
|
||||
|
||||
if (remove_or_detach_tables.count(table_name))
|
||||
if (remove_or_detach_tables.contains(table_name))
|
||||
throw Exception("Table " + backQuoteIfNeed(database_name) + "." + backQuoteIfNeed(table_name) + " is dropped",
|
||||
ErrorCodes::TABLE_IS_DROPPED);
|
||||
|
||||
if (!local_tables_cache.count(table_name))
|
||||
if (!local_tables_cache.contains(table_name))
|
||||
throw Exception("Table " + backQuoteIfNeed(database_name) + "." + backQuoteIfNeed(table_name) + " doesn't exist.",
|
||||
ErrorCodes::UNKNOWN_TABLE);
|
||||
|
||||
@ -412,7 +412,7 @@ void DatabaseMySQL::detachTablePermanently(ContextPtr, const String & table_name
|
||||
|
||||
fs::path remove_flag = fs::path(getMetadataPath()) / (escapeForFileName(table_name) + suffix);
|
||||
|
||||
if (remove_or_detach_tables.count(table_name))
|
||||
if (remove_or_detach_tables.contains(table_name))
|
||||
throw Exception(ErrorCodes::TABLE_IS_DROPPED, "Table {}.{} is dropped", backQuoteIfNeed(database_name), backQuoteIfNeed(table_name));
|
||||
|
||||
if (fs::exists(remove_flag))
|
||||
|
@ -643,7 +643,7 @@ void registerDictionaryFlat(DictionaryFactory & factory)
|
||||
|
||||
const auto dict_id = StorageID::fromDictionaryConfig(config, config_prefix);
|
||||
|
||||
return std::make_unique<FlatDictionary>(dict_id, dict_struct, std::move(source_ptr), std::move(configuration));
|
||||
return std::make_unique<FlatDictionary>(dict_id, dict_struct, std::move(source_ptr), configuration);
|
||||
};
|
||||
|
||||
factory.registerLayout("flat", create_layout, false);
|
||||
|
@ -16,6 +16,13 @@
|
||||
#include <Storages/ExternalDataSourceConfiguration.h>
|
||||
#include <Storages/MySQL/MySQLHelpers.h>
|
||||
#include <Storages/MySQL/MySQLSettings.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <IO/WriteBufferFromString.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Common/LocalDateTime.h>
|
||||
#include <Common/logger_useful.h>
|
||||
#include "readInvalidateQuery.h"
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -118,15 +125,6 @@ void registerDictionarySourceMysql(DictionarySourceFactory & factory)
|
||||
|
||||
|
||||
#if USE_MYSQL
|
||||
# include <Columns/ColumnString.h>
|
||||
# include <DataTypes/DataTypeString.h>
|
||||
# include <IO/WriteBufferFromString.h>
|
||||
# include <IO/WriteHelpers.h>
|
||||
# include <Common/LocalDateTime.h>
|
||||
# include <Common/logger_useful.h>
|
||||
# include "readInvalidateQuery.h"
|
||||
# include <mysqlxx/Exception.h>
|
||||
# include <Core/Settings.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
@ -104,7 +104,7 @@ ColumnPtr RangeHashedDictionary<dictionary_key_type>::getColumn(
|
||||
|
||||
/// Cast range column to storage type
|
||||
Columns modified_key_columns = key_columns;
|
||||
auto range_storage_column = key_columns.back();
|
||||
const ColumnPtr & range_storage_column = key_columns.back();
|
||||
ColumnWithTypeAndName column_to_cast = {range_storage_column->convertToFullColumnIfConst(), key_types.back(), ""};
|
||||
modified_key_columns.back() = castColumnAccurate(column_to_cast, dict_struct.range_min->type);
|
||||
|
||||
@ -314,7 +314,7 @@ ColumnUInt8::Ptr RangeHashedDictionary<dictionary_key_type>::hasKeys(const Colum
|
||||
}
|
||||
|
||||
/// Cast range column to storage type
|
||||
auto range_storage_column = key_columns.back();
|
||||
const ColumnPtr & range_storage_column = key_columns.back();
|
||||
ColumnWithTypeAndName column_to_cast = {range_storage_column->convertToFullColumnIfConst(), key_types.back(), ""};
|
||||
auto range_column_updated = castColumnAccurate(column_to_cast, dict_struct.range_min->type);
|
||||
auto key_columns_copy = key_columns;
|
||||
@ -513,7 +513,7 @@ void RangeHashedDictionary<dictionary_key_type>::getItemsImpl(
|
||||
|
||||
size_t keys_found = 0;
|
||||
|
||||
auto range_column = key_columns.back();
|
||||
const ColumnPtr & range_column = key_columns.back();
|
||||
auto key_columns_copy = key_columns;
|
||||
key_columns_copy.pop_back();
|
||||
|
||||
@ -984,7 +984,7 @@ Pipe RangeHashedDictionary<dictionary_key_type>::read(const Names & column_names
|
||||
Columns result;
|
||||
result.reserve(attribute_names_size);
|
||||
|
||||
auto key_column = key_columns.back();
|
||||
const ColumnPtr & key_column = key_columns.back();
|
||||
|
||||
const auto * key_to_index_column = typeid_cast<const ColumnUInt64 *>(key_column.get());
|
||||
if (!key_to_index_column)
|
||||
|
@ -27,6 +27,8 @@ namespace ErrorCodes
|
||||
extern const int FILE_ALREADY_EXISTS;
|
||||
extern const int FILE_DOESNT_EXIST;
|
||||
extern const int BAD_FILE_TYPE;
|
||||
extern const int ATTEMPT_TO_READ_AFTER_EOF;
|
||||
extern const int CANNOT_READ_ALL_DATA;
|
||||
}
|
||||
|
||||
static String revisionToString(UInt64 revision)
|
||||
@ -122,10 +124,10 @@ DiskObjectStorage::Metadata DiskObjectStorage::readUpdateAndStoreMetadata(const
|
||||
}
|
||||
|
||||
|
||||
DiskObjectStorage::Metadata DiskObjectStorage::readUpdateStoreMetadataAndRemove(const String & path, bool sync, DiskObjectStorage::MetadataUpdater updater)
|
||||
void DiskObjectStorage::readUpdateStoreMetadataAndRemove(const String & path, bool sync, DiskObjectStorage::MetadataUpdater updater)
|
||||
{
|
||||
std::unique_lock lock(metadata_mutex);
|
||||
return Metadata::readUpdateStoreMetadataAndRemove(remote_fs_root_path, metadata_disk, path, sync, updater);
|
||||
Metadata::readUpdateStoreMetadataAndRemove(remote_fs_root_path, metadata_disk, path, sync, updater);
|
||||
}
|
||||
|
||||
DiskObjectStorage::Metadata DiskObjectStorage::readOrCreateUpdateAndStoreMetadata(const String & path, WriteMode mode, bool sync, DiskObjectStorage::MetadataUpdater updater)
|
||||
@ -174,8 +176,13 @@ void DiskObjectStorage::getRemotePathsRecursive(const String & local_path, std::
|
||||
}
|
||||
catch (const Exception & e)
|
||||
{
|
||||
if (e.code() == ErrorCodes::FILE_DOESNT_EXIST)
|
||||
/// Unfortunately in rare cases it can happen when files disappear
|
||||
/// or can be empty in case of operation interruption (like cancelled metadata fetch)
|
||||
if (e.code() == ErrorCodes::FILE_DOESNT_EXIST ||
|
||||
e.code() == ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF ||
|
||||
e.code() == ErrorCodes::CANNOT_READ_ALL_DATA)
|
||||
return;
|
||||
|
||||
throw;
|
||||
}
|
||||
}
|
||||
@ -186,6 +193,15 @@ void DiskObjectStorage::getRemotePathsRecursive(const String & local_path, std::
|
||||
{
|
||||
it = iterateDirectory(local_path);
|
||||
}
|
||||
catch (const Exception & e)
|
||||
{
|
||||
/// Unfortunately in rare cases it can happen when files disappear
|
||||
/// or can be empty in case of operation interruption (like cancelled metadata fetch)
|
||||
if (e.code() == ErrorCodes::FILE_DOESNT_EXIST ||
|
||||
e.code() == ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF ||
|
||||
e.code() == ErrorCodes::CANNOT_READ_ALL_DATA)
|
||||
return;
|
||||
}
|
||||
catch (const fs::filesystem_error & e)
|
||||
{
|
||||
if (e.code() == std::errc::no_such_file_or_directory)
|
||||
@ -237,7 +253,10 @@ void DiskObjectStorage::moveFile(const String & from_path, const String & to_pat
|
||||
metadata_helper->createFileOperationObject("rename", revision, object_metadata);
|
||||
}
|
||||
|
||||
metadata_disk->moveFile(from_path, to_path);
|
||||
{
|
||||
std::unique_lock lock(metadata_mutex);
|
||||
metadata_disk->moveFile(from_path, to_path);
|
||||
}
|
||||
}
|
||||
|
||||
void DiskObjectStorage::moveFile(const String & from_path, const String & to_path)
|
||||
@ -449,6 +468,8 @@ void DiskObjectStorage::removeMetadata(const String & path, std::vector<String>
|
||||
LOG_WARNING(log,
|
||||
"Metadata file {} can't be read by reason: {}. Removing it forcibly.",
|
||||
backQuote(path), e.nested() ? e.nested()->message() : e.message());
|
||||
|
||||
std::unique_lock lock(metadata_mutex);
|
||||
metadata_disk->removeFile(path);
|
||||
}
|
||||
else
|
||||
|
@ -65,7 +65,7 @@ public:
|
||||
Metadata readMetadata(const String & path) const;
|
||||
Metadata readMetadataUnlocked(const String & path, std::shared_lock<std::shared_mutex> &) const;
|
||||
Metadata readUpdateAndStoreMetadata(const String & path, bool sync, MetadataUpdater updater);
|
||||
Metadata readUpdateStoreMetadataAndRemove(const String & path, bool sync, MetadataUpdater updater);
|
||||
void readUpdateStoreMetadataAndRemove(const String & path, bool sync, MetadataUpdater updater);
|
||||
|
||||
Metadata readOrCreateUpdateAndStoreMetadata(const String & path, WriteMode mode, bool sync, MetadataUpdater updater);
|
||||
|
||||
|
@ -10,7 +10,10 @@ namespace ErrorCodes
|
||||
{
|
||||
extern const int UNKNOWN_FORMAT;
|
||||
extern const int PATH_ACCESS_DENIED;
|
||||
extern const int MEMORY_LIMIT_EXCEEDED;
|
||||
extern const int FILE_DOESNT_EXIST;
|
||||
extern const int ATTEMPT_TO_READ_AFTER_EOF;
|
||||
extern const int CANNOT_READ_ALL_DATA;
|
||||
extern const int CANNOT_OPEN_FILE;
|
||||
}
|
||||
|
||||
DiskObjectStorageMetadata DiskObjectStorageMetadata::readMetadata(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_)
|
||||
@ -45,16 +48,38 @@ DiskObjectStorageMetadata DiskObjectStorageMetadata::createUpdateAndStoreMetadat
|
||||
return result;
|
||||
}
|
||||
|
||||
DiskObjectStorageMetadata DiskObjectStorageMetadata::readUpdateStoreMetadataAndRemove(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync, DiskObjectStorageMetadataUpdater updater)
|
||||
void DiskObjectStorageMetadata::readUpdateStoreMetadataAndRemove(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync, DiskObjectStorageMetadataUpdater updater)
|
||||
{
|
||||
DiskObjectStorageMetadata result(remote_fs_root_path_, metadata_disk_, metadata_file_path_);
|
||||
result.load();
|
||||
if (updater(result))
|
||||
result.save(sync);
|
||||
metadata_disk_->removeFile(metadata_file_path_);
|
||||
/// Very often we are deleting metadata from some unfinished operation (like fetch of metadata)
|
||||
/// in this case metadata file can be incomplete/empty and so on. It's ok to remove it in this case
|
||||
/// because we cannot do anything better.
|
||||
try
|
||||
{
|
||||
DiskObjectStorageMetadata metadata(remote_fs_root_path_, metadata_disk_, metadata_file_path_);
|
||||
metadata.load();
|
||||
if (updater(metadata))
|
||||
metadata.save(sync);
|
||||
|
||||
return result;
|
||||
metadata_disk_->removeFile(metadata_file_path_);
|
||||
}
|
||||
catch (Exception & ex)
|
||||
{
|
||||
/// If we have some broken half-empty file just remove it
|
||||
if (ex.code() == ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF
|
||||
|| ex.code() == ErrorCodes::CANNOT_READ_ALL_DATA
|
||||
|| ex.code() == ErrorCodes::CANNOT_OPEN_FILE)
|
||||
{
|
||||
LOG_INFO(&Poco::Logger::get("ObjectStorageMetadata"), "Failed to read metadata file {} before removal because it's incomplete or empty. "
|
||||
"It's Ok and can happen after operation interruption (like metadata fetch), so removing as is", metadata_file_path_);
|
||||
metadata_disk_->removeFile(metadata_file_path_);
|
||||
}
|
||||
|
||||
/// If file already removed, than nothing to do
|
||||
if (ex.code() == ErrorCodes::FILE_DOESNT_EXIST)
|
||||
return;
|
||||
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
DiskObjectStorageMetadata DiskObjectStorageMetadata::createAndStoreMetadataIfNotExists(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync, bool overwrite)
|
||||
@ -74,70 +99,55 @@ DiskObjectStorageMetadata DiskObjectStorageMetadata::createAndStoreMetadataIfNot
|
||||
|
||||
void DiskObjectStorageMetadata::load()
|
||||
{
|
||||
try
|
||||
const ReadSettings read_settings;
|
||||
auto buf = metadata_disk->readFile(metadata_file_path, read_settings, 1024); /* reasonable buffer size for small file */
|
||||
|
||||
UInt32 version;
|
||||
readIntText(version, *buf);
|
||||
|
||||
if (version < VERSION_ABSOLUTE_PATHS || version > VERSION_READ_ONLY_FLAG)
|
||||
throw Exception(
|
||||
ErrorCodes::UNKNOWN_FORMAT,
|
||||
"Unknown metadata file version. Path: {}. Version: {}. Maximum expected version: {}",
|
||||
metadata_disk->getPath() + metadata_file_path, toString(version), toString(VERSION_READ_ONLY_FLAG));
|
||||
|
||||
assertChar('\n', *buf);
|
||||
|
||||
UInt32 remote_fs_objects_count;
|
||||
readIntText(remote_fs_objects_count, *buf);
|
||||
assertChar('\t', *buf);
|
||||
readIntText(total_size, *buf);
|
||||
assertChar('\n', *buf);
|
||||
remote_fs_objects.resize(remote_fs_objects_count);
|
||||
|
||||
for (size_t i = 0; i < remote_fs_objects_count; ++i)
|
||||
{
|
||||
const ReadSettings read_settings;
|
||||
auto buf = metadata_disk->readFile(metadata_file_path, read_settings, 1024); /* reasonable buffer size for small file */
|
||||
|
||||
UInt32 version;
|
||||
readIntText(version, *buf);
|
||||
|
||||
if (version < VERSION_ABSOLUTE_PATHS || version > VERSION_READ_ONLY_FLAG)
|
||||
throw Exception(
|
||||
ErrorCodes::UNKNOWN_FORMAT,
|
||||
"Unknown metadata file version. Path: {}. Version: {}. Maximum expected version: {}",
|
||||
metadata_disk->getPath() + metadata_file_path, toString(version), toString(VERSION_READ_ONLY_FLAG));
|
||||
|
||||
assertChar('\n', *buf);
|
||||
|
||||
UInt32 remote_fs_objects_count;
|
||||
readIntText(remote_fs_objects_count, *buf);
|
||||
String remote_fs_object_path;
|
||||
size_t remote_fs_object_size;
|
||||
readIntText(remote_fs_object_size, *buf);
|
||||
assertChar('\t', *buf);
|
||||
readIntText(total_size, *buf);
|
||||
assertChar('\n', *buf);
|
||||
remote_fs_objects.resize(remote_fs_objects_count);
|
||||
|
||||
for (size_t i = 0; i < remote_fs_objects_count; ++i)
|
||||
readEscapedString(remote_fs_object_path, *buf);
|
||||
if (version == VERSION_ABSOLUTE_PATHS)
|
||||
{
|
||||
String remote_fs_object_path;
|
||||
size_t remote_fs_object_size;
|
||||
readIntText(remote_fs_object_size, *buf);
|
||||
assertChar('\t', *buf);
|
||||
readEscapedString(remote_fs_object_path, *buf);
|
||||
if (version == VERSION_ABSOLUTE_PATHS)
|
||||
{
|
||||
if (!remote_fs_object_path.starts_with(remote_fs_root_path))
|
||||
throw Exception(ErrorCodes::UNKNOWN_FORMAT,
|
||||
"Path in metadata does not correspond to root path. Path: {}, root path: {}, disk path: {}",
|
||||
remote_fs_object_path, remote_fs_root_path, metadata_disk->getPath());
|
||||
if (!remote_fs_object_path.starts_with(remote_fs_root_path))
|
||||
throw Exception(ErrorCodes::UNKNOWN_FORMAT,
|
||||
"Path in metadata does not correspond to root path. Path: {}, root path: {}, disk path: {}",
|
||||
remote_fs_object_path, remote_fs_root_path, metadata_disk->getPath());
|
||||
|
||||
remote_fs_object_path = remote_fs_object_path.substr(remote_fs_root_path.size());
|
||||
}
|
||||
assertChar('\n', *buf);
|
||||
remote_fs_objects[i].relative_path = remote_fs_object_path;
|
||||
remote_fs_objects[i].bytes_size = remote_fs_object_size;
|
||||
remote_fs_object_path = remote_fs_object_path.substr(remote_fs_root_path.size());
|
||||
}
|
||||
|
||||
readIntText(ref_count, *buf);
|
||||
assertChar('\n', *buf);
|
||||
|
||||
if (version >= VERSION_READ_ONLY_FLAG)
|
||||
{
|
||||
readBoolText(read_only, *buf);
|
||||
assertChar('\n', *buf);
|
||||
}
|
||||
remote_fs_objects[i].relative_path = remote_fs_object_path;
|
||||
remote_fs_objects[i].bytes_size = remote_fs_object_size;
|
||||
}
|
||||
catch (Exception & e)
|
||||
|
||||
readIntText(ref_count, *buf);
|
||||
assertChar('\n', *buf);
|
||||
|
||||
if (version >= VERSION_READ_ONLY_FLAG)
|
||||
{
|
||||
tryLogCurrentException(__PRETTY_FUNCTION__);
|
||||
|
||||
if (e.code() == ErrorCodes::UNKNOWN_FORMAT)
|
||||
throw;
|
||||
|
||||
if (e.code() == ErrorCodes::MEMORY_LIMIT_EXCEEDED)
|
||||
throw;
|
||||
|
||||
throw Exception("Failed to read metadata file: " + metadata_file_path, ErrorCodes::UNKNOWN_FORMAT);
|
||||
readBoolText(read_only, *buf);
|
||||
assertChar('\n', *buf);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -47,7 +47,7 @@ struct DiskObjectStorageMetadata
|
||||
|
||||
static DiskObjectStorageMetadata readMetadata(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_);
|
||||
static DiskObjectStorageMetadata readUpdateAndStoreMetadata(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync, Updater updater);
|
||||
static DiskObjectStorageMetadata readUpdateStoreMetadataAndRemove(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync, Updater updater);
|
||||
static void readUpdateStoreMetadataAndRemove(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync, Updater updater);
|
||||
|
||||
static DiskObjectStorageMetadata createAndStoreMetadata(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync);
|
||||
static DiskObjectStorageMetadata createUpdateAndStoreMetadata(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_, bool sync, Updater updater);
|
||||
|
@ -66,6 +66,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
|
||||
format_settings.csv.null_representation = settings.format_csv_null_representation;
|
||||
format_settings.csv.input_format_arrays_as_nested_csv = settings.input_format_csv_arrays_as_nested_csv;
|
||||
format_settings.csv.input_format_use_best_effort_in_schema_inference = settings.input_format_csv_use_best_effort_in_schema_inference;
|
||||
format_settings.csv.skip_first_lines = settings.input_format_csv_skip_first_lines;
|
||||
format_settings.hive_text.fields_delimiter = settings.input_format_hive_text_fields_delimiter;
|
||||
format_settings.hive_text.collection_items_delimiter = settings.input_format_hive_text_collection_items_delimiter;
|
||||
format_settings.hive_text.map_keys_delimiter = settings.input_format_hive_text_map_keys_delimiter;
|
||||
@ -123,6 +124,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
|
||||
format_settings.tsv.input_format_enum_as_number = settings.input_format_tsv_enum_as_number;
|
||||
format_settings.tsv.null_representation = settings.format_tsv_null_representation;
|
||||
format_settings.tsv.input_format_use_best_effort_in_schema_inference = settings.input_format_tsv_use_best_effort_in_schema_inference;
|
||||
format_settings.tsv.skip_first_lines = settings.input_format_tsv_skip_first_lines;
|
||||
format_settings.values.accurate_types_of_literals = settings.input_format_values_accurate_types_of_literals;
|
||||
format_settings.values.deduce_templates_of_expressions = settings.input_format_values_deduce_templates_of_expressions;
|
||||
format_settings.values.interpret_expressions = settings.input_format_values_interpret_expressions;
|
||||
@ -492,10 +494,9 @@ String FormatFactory::getFormatFromFileName(String file_name, bool throw_if_not_
|
||||
String FormatFactory::getFormatFromFileDescriptor(int fd)
|
||||
{
|
||||
#ifdef OS_LINUX
|
||||
char buf[32] = {'\0'};
|
||||
snprintf(buf, sizeof(buf), "/proc/self/fd/%d", fd);
|
||||
std::string proc_path = fmt::format("/proc/self/fd/{}", fd);
|
||||
char file_path[PATH_MAX] = {'\0'};
|
||||
if (readlink(buf, file_path, sizeof(file_path) - 1) != -1)
|
||||
if (readlink(proc_path.c_str(), file_path, sizeof(file_path) - 1) != -1)
|
||||
return getFormatFromFileName(file_path, false);
|
||||
return "";
|
||||
#elif defined(__APPLE__)
|
||||
@ -585,6 +586,13 @@ bool FormatFactory::checkIfFormatHasAnySchemaReader(const String & name) const
|
||||
return checkIfFormatHasSchemaReader(name) || checkIfFormatHasExternalSchemaReader(name);
|
||||
}
|
||||
|
||||
void FormatFactory::checkFormatName(const String & name) const
|
||||
{
|
||||
auto it = dict.find(name);
|
||||
if (it == dict.end())
|
||||
throw Exception("Unknown format " + name, ErrorCodes::UNKNOWN_FORMAT);
|
||||
}
|
||||
|
||||
FormatFactory & FormatFactory::instance()
|
||||
{
|
||||
static FormatFactory ret;
|
||||
|
@ -210,6 +210,9 @@ public:
|
||||
bool isInputFormat(const String & name) const;
|
||||
bool isOutputFormat(const String & name) const;
|
||||
|
||||
/// Check that format with specified name exists and throw an exception otherwise.
|
||||
void checkFormatName(const String & name) const;
|
||||
|
||||
private:
|
||||
FormatsDictionary dict;
|
||||
FileExtensionFormats file_extension_formats;
|
||||
|
@ -109,6 +109,7 @@ struct FormatSettings
|
||||
String null_representation = "\\N";
|
||||
char tuple_delimiter = ',';
|
||||
bool input_format_use_best_effort_in_schema_inference = true;
|
||||
UInt64 skip_first_lines = 0;
|
||||
} csv;
|
||||
|
||||
struct HiveText
|
||||
@ -219,6 +220,7 @@ struct FormatSettings
|
||||
String null_representation = "\\N";
|
||||
bool input_format_enum_as_number = false;
|
||||
bool input_format_use_best_effort_in_schema_inference = true;
|
||||
UInt64 skip_first_lines = 0;
|
||||
} tsv;
|
||||
|
||||
struct
|
||||
|
@ -118,7 +118,7 @@ struct CRCFunctionWrapper
|
||||
private:
|
||||
static ReturnType doCRC(const ColumnString::Chars & buf, size_t offset, size_t size)
|
||||
{
|
||||
const unsigned char * p = reinterpret_cast<const unsigned char *>(&buf[0]) + offset;
|
||||
const unsigned char * p = reinterpret_cast<const unsigned char *>(buf.data()) + offset;
|
||||
return Impl::makeCRC(p, size);
|
||||
}
|
||||
};
|
||||
|
12
src/Functions/FunctionShowCertificate.cpp
Normal file
12
src/Functions/FunctionShowCertificate.cpp
Normal file
@ -0,0 +1,12 @@
|
||||
#include "FunctionShowCertificate.h"
|
||||
#include <Functions/FunctionFactory.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
void registerFunctionShowCertificate(FunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction<FunctionShowCertificate>();
|
||||
}
|
||||
|
||||
}
|
189
src/Functions/FunctionShowCertificate.h
Normal file
189
src/Functions/FunctionShowCertificate.h
Normal file
@ -0,0 +1,189 @@
|
||||
#pragma once
|
||||
|
||||
#include <Common/config.h>
|
||||
|
||||
#include <Columns/ColumnMap.h>
|
||||
#include <Columns/ColumnArray.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <DataTypes/DataTypeMap.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <Functions/IFunction.h>
|
||||
#include <Interpreters/Context.h>
|
||||
|
||||
#if USE_SSL
|
||||
#include <openssl/x509v3.h>
|
||||
#include "Poco/Net/SSLManager.h"
|
||||
#include "Poco/Crypto/X509Certificate.h"
|
||||
#endif
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int SUPPORT_IS_DISABLED;
|
||||
}
|
||||
|
||||
// showCertificate()
|
||||
class FunctionShowCertificate : public IFunction
|
||||
{
|
||||
public:
|
||||
static constexpr auto name = "showCertificate";
|
||||
|
||||
static FunctionPtr create(ContextPtr)
|
||||
{
|
||||
#if !defined(USE_SSL) || USE_SSL == 0
|
||||
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSL support is disabled");
|
||||
#endif
|
||||
return std::make_shared<FunctionShowCertificate>();
|
||||
}
|
||||
|
||||
String getName() const override { return name; }
|
||||
|
||||
size_t getNumberOfArguments() const override { return 0; }
|
||||
|
||||
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo &) const override { return true; }
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName &) const override
|
||||
{
|
||||
return std::make_shared<DataTypeMap>(std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>());
|
||||
}
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName &, const DataTypePtr &, size_t input_rows_count) const override
|
||||
{
|
||||
MutableColumnPtr keys = DataTypeString().createColumn();
|
||||
MutableColumnPtr values = DataTypeString().createColumn();
|
||||
MutableColumnPtr offsets = DataTypeNumber<IColumn::Offset>().createColumn();
|
||||
|
||||
if (input_rows_count)
|
||||
{
|
||||
#if USE_SSL
|
||||
if (const X509 * cert = SSL_CTX_get0_certificate(Poco::Net::SSLManager::instance().defaultServerContext()->sslContext()))
|
||||
{
|
||||
BIO * b = BIO_new(BIO_s_mem());
|
||||
SCOPE_EXIT(
|
||||
{
|
||||
BIO_free(b);
|
||||
});
|
||||
|
||||
keys->insert("version");
|
||||
values->insert(std::to_string(X509_get_version(cert) + 1));
|
||||
|
||||
{
|
||||
char buf[1024] = {0};
|
||||
const ASN1_INTEGER * sn = cert->cert_info->serialNumber;
|
||||
BIGNUM * bnsn = ASN1_INTEGER_to_BN(sn, nullptr);
|
||||
SCOPE_EXIT(
|
||||
{
|
||||
BN_free(bnsn);
|
||||
});
|
||||
if (BN_print(b, bnsn) > 0 && BIO_read(b, buf, sizeof(buf)) > 0)
|
||||
{
|
||||
keys->insert("serial_number");
|
||||
values->insert(buf);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
{
|
||||
const ASN1_BIT_STRING *sig = nullptr;
|
||||
const X509_ALGOR *al = nullptr;
|
||||
char buf[1024] = {0};
|
||||
X509_get0_signature(&sig, &al, cert);
|
||||
if (al)
|
||||
{
|
||||
OBJ_obj2txt(buf, sizeof(buf), al->algorithm, 0);
|
||||
keys->insert("signature_algo");
|
||||
values->insert(buf);
|
||||
}
|
||||
}
|
||||
|
||||
char * issuer = X509_NAME_oneline(cert->cert_info->issuer, nullptr, 0);
|
||||
if (issuer)
|
||||
{
|
||||
SCOPE_EXIT(
|
||||
{
|
||||
OPENSSL_free(issuer);
|
||||
});
|
||||
keys->insert("issuer");
|
||||
values->insert(issuer);
|
||||
}
|
||||
|
||||
{
|
||||
char buf[1024] = {0};
|
||||
if (ASN1_TIME_print(b, X509_get_notBefore(cert)) && BIO_read(b, buf, sizeof(buf)) > 0)
|
||||
{
|
||||
keys->insert("not_before");
|
||||
values->insert(buf);
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
char buf[1024] = {0};
|
||||
if (ASN1_TIME_print(b, X509_get_notAfter(cert)) && BIO_read(b, buf, sizeof(buf)) > 0)
|
||||
{
|
||||
keys->insert("not_after");
|
||||
values->insert(buf);
|
||||
}
|
||||
}
|
||||
|
||||
char * subject = X509_NAME_oneline(cert->cert_info->subject, nullptr, 0);
|
||||
if (subject)
|
||||
{
|
||||
SCOPE_EXIT(
|
||||
{
|
||||
OPENSSL_free(subject);
|
||||
});
|
||||
keys->insert("subject");
|
||||
values->insert(subject);
|
||||
}
|
||||
|
||||
if (X509_PUBKEY * pkey = X509_get_X509_PUBKEY(cert))
|
||||
{
|
||||
char buf[1024] = {0};
|
||||
ASN1_OBJECT *ppkalg = nullptr;
|
||||
const unsigned char *pk = nullptr;
|
||||
int ppklen = 0;
|
||||
X509_ALGOR *pa = nullptr;
|
||||
if (X509_PUBKEY_get0_param(&ppkalg, &pk, &ppklen, &pa, pkey) &&
|
||||
i2a_ASN1_OBJECT(b, ppkalg) > 0 && BIO_read(b, buf, sizeof(buf)) > 0)
|
||||
{
|
||||
keys->insert("pkey_algo");
|
||||
values->insert(buf);
|
||||
}
|
||||
}
|
||||
}
|
||||
offsets->insert(keys->size());
|
||||
#endif
|
||||
}
|
||||
|
||||
size_t sz = keys->size();
|
||||
|
||||
if (sz && input_rows_count > 1)
|
||||
{
|
||||
keys->reserve(sz * input_rows_count);
|
||||
values->reserve(sz * input_rows_count);
|
||||
offsets->reserve(input_rows_count);
|
||||
}
|
||||
|
||||
for (size_t i = 1; i < input_rows_count; ++i)
|
||||
{
|
||||
for (size_t j = 0; j < sz; ++j)
|
||||
{
|
||||
keys->insertFrom(*keys, j);
|
||||
values->insertFrom(*values, j);
|
||||
}
|
||||
offsets->insert(keys->size());
|
||||
}
|
||||
|
||||
auto nested_column = ColumnArray::create(
|
||||
ColumnTuple::create(Columns{std::move(keys), std::move(values)}), std::move(offsets));
|
||||
|
||||
return ColumnMap::create(nested_column);
|
||||
}
|
||||
};
|
||||
|
||||
}
|
@ -91,7 +91,7 @@ struct HexImpl
|
||||
out_vec.resize(size * hex_length);
|
||||
|
||||
size_t pos = 0;
|
||||
char * out = reinterpret_cast<char *>(&out_vec[0]);
|
||||
char * out = reinterpret_cast<char *>(out_vec.data());
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
const UInt8 * in_pos = reinterpret_cast<const UInt8 *>(&in_vec[i]);
|
||||
|
@ -123,7 +123,7 @@ public:
|
||||
bool document_ok = false;
|
||||
if (col_json_const)
|
||||
{
|
||||
std::string_view json{reinterpret_cast<const char *>(&chars[0]), offsets[0] - 1};
|
||||
std::string_view json{reinterpret_cast<const char *>(chars.data()), offsets[0] - 1};
|
||||
document_ok = parser.parse(json, document);
|
||||
}
|
||||
|
||||
|
@ -91,7 +91,7 @@ private:
|
||||
|
||||
static UInt16 extractPort(UInt16 default_port, const ColumnString::Chars & buf, size_t offset, size_t size)
|
||||
{
|
||||
const char * p = reinterpret_cast<const char *>(&buf[0]) + offset;
|
||||
const char * p = reinterpret_cast<const char *>(buf.data()) + offset;
|
||||
const char * end = p + size;
|
||||
|
||||
StringRef host = getURLHost(p, size);
|
||||
@ -127,4 +127,3 @@ void registerFunctionPort(FunctionFactory & factory)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
@ -112,7 +112,6 @@ public:
|
||||
|
||||
auto is_not_null = FunctionFactory::instance().get("isNotNull", context);
|
||||
auto assume_not_null = FunctionFactory::instance().get("assumeNotNull", context);
|
||||
auto multi_if = FunctionFactory::instance().get("multiIf", context);
|
||||
|
||||
ColumnsWithTypeAndName multi_if_args;
|
||||
ColumnsWithTypeAndName tmp_args(1);
|
||||
@ -144,7 +143,16 @@ public:
|
||||
if (multi_if_args.size() == 1)
|
||||
return multi_if_args.front().column;
|
||||
|
||||
ColumnPtr res = multi_if->build(multi_if_args)->execute(multi_if_args, result_type, input_rows_count);
|
||||
/// If there was only two arguments (3 arguments passed to multiIf)
|
||||
/// use function "if" instead, because it's implemented more efficient.
|
||||
/// TODO: make "multiIf" the same efficient.
|
||||
FunctionOverloadResolverPtr if_function;
|
||||
if (multi_if_args.size() == 3)
|
||||
if_function = FunctionFactory::instance().get("if", context);
|
||||
else
|
||||
if_function = FunctionFactory::instance().get("multiIf", context);
|
||||
|
||||
ColumnPtr res = if_function->build(multi_if_args)->execute(multi_if_args, result_type, input_rows_count);
|
||||
|
||||
/// if last argument is not nullable, result should be also not nullable
|
||||
if (!multi_if_args.back().column->isNullable() && res->isNullable())
|
||||
|
165
src/Functions/grouping.h
Normal file
165
src/Functions/grouping.h
Normal file
@ -0,0 +1,165 @@
|
||||
#pragma once
|
||||
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <Columns/ColumnArray.h>
|
||||
#include <Columns/ColumnConst.h>
|
||||
#include <Columns/ColumnFixedString.h>
|
||||
#include <Core/ColumnNumbers.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <Functions/IFunction.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class FunctionGroupingBase : public IFunction
|
||||
{
|
||||
protected:
|
||||
static constexpr UInt64 ONE = 1;
|
||||
|
||||
const ColumnNumbers arguments_indexes;
|
||||
|
||||
public:
|
||||
FunctionGroupingBase(ColumnNumbers arguments_indexes_)
|
||||
: arguments_indexes(std::move(arguments_indexes_))
|
||||
{}
|
||||
|
||||
bool isVariadic() const override { return true; }
|
||||
|
||||
size_t getNumberOfArguments() const override { return 0; }
|
||||
|
||||
bool useDefaultImplementationForNulls() const override { return false; }
|
||||
|
||||
bool isSuitableForConstantFolding() const override { return false; }
|
||||
|
||||
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override
|
||||
{
|
||||
return std::make_shared<DataTypeUInt64>();
|
||||
}
|
||||
|
||||
template <typename AggregationKeyChecker>
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, size_t input_rows_count, AggregationKeyChecker checker) const
|
||||
{
|
||||
const auto * grouping_set_column = checkAndGetColumn<ColumnUInt64>(arguments[0].column.get());
|
||||
|
||||
auto result = ColumnUInt64::create();
|
||||
auto & result_data = result->getData();
|
||||
result_data.reserve(input_rows_count);
|
||||
for (size_t i = 0; i < input_rows_count; ++i)
|
||||
{
|
||||
UInt64 set_index = grouping_set_column->getElement(i);
|
||||
|
||||
UInt64 value = 0;
|
||||
for (auto index : arguments_indexes)
|
||||
value = (value << 1) + (checker(set_index, index) ? 1 : 0);
|
||||
|
||||
result_data.push_back(value);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
class FunctionGroupingOrdinary : public FunctionGroupingBase
|
||||
{
|
||||
public:
|
||||
explicit FunctionGroupingOrdinary(ColumnNumbers arguments_indexes_)
|
||||
: FunctionGroupingBase(std::move(arguments_indexes_))
|
||||
{}
|
||||
|
||||
String getName() const override { return "groupingOrdinary"; }
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName &, const DataTypePtr &, size_t input_rows_count) const override
|
||||
{
|
||||
UInt64 value = (ONE << arguments_indexes.size()) - 1;
|
||||
return ColumnUInt64::create(input_rows_count, value);
|
||||
}
|
||||
};
|
||||
|
||||
class FunctionGroupingForRollup : public FunctionGroupingBase
|
||||
{
|
||||
const UInt64 aggregation_keys_number;
|
||||
|
||||
public:
|
||||
FunctionGroupingForRollup(ColumnNumbers arguments_indexes_, UInt64 aggregation_keys_number_)
|
||||
: FunctionGroupingBase(std::move(arguments_indexes_))
|
||||
, aggregation_keys_number(aggregation_keys_number_)
|
||||
{}
|
||||
|
||||
String getName() const override { return "groupingForRollup"; }
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
|
||||
{
|
||||
return FunctionGroupingBase::executeImpl(arguments, input_rows_count,
|
||||
[this](UInt64 set_index, UInt64 arg_index)
|
||||
{
|
||||
// For ROLLUP(a, b, c) there will be following grouping set indexes:
|
||||
// | GROUPING SET | INDEX |
|
||||
// | (a, b, c) | 0 |
|
||||
// | (a, b) | 1 |
|
||||
// | (a) | 2 |
|
||||
// | () | 3 |
|
||||
return arg_index < aggregation_keys_number - set_index;
|
||||
}
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
class FunctionGroupingForCube : public FunctionGroupingBase
|
||||
{
|
||||
const UInt64 aggregation_keys_number;
|
||||
|
||||
public:
|
||||
|
||||
FunctionGroupingForCube(ColumnNumbers arguments_indexes_, UInt64 aggregation_keys_number_)
|
||||
: FunctionGroupingBase(arguments_indexes_)
|
||||
, aggregation_keys_number(aggregation_keys_number_)
|
||||
{}
|
||||
|
||||
String getName() const override { return "groupingForCube"; }
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
|
||||
{
|
||||
return FunctionGroupingBase::executeImpl(arguments, input_rows_count,
|
||||
[this](UInt64 set_index, UInt64 arg_index)
|
||||
{
|
||||
// For CUBE(a, b) there will be following grouping set indexes:
|
||||
// | GROUPING SET | INDEX |
|
||||
// | (a, b) | 0 |
|
||||
// | (a) | 1 |
|
||||
// | (b) | 2 |
|
||||
// | () | 3 |
|
||||
auto set_mask = (ONE << aggregation_keys_number) - 1 - set_index;
|
||||
return set_mask & (ONE << (aggregation_keys_number - arg_index - 1));
|
||||
}
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
class FunctionGroupingForGroupingSets : public FunctionGroupingBase
|
||||
{
|
||||
ColumnNumbersSetList grouping_sets;
|
||||
public:
|
||||
FunctionGroupingForGroupingSets(ColumnNumbers arguments_indexes_, ColumnNumbersList const & grouping_sets_)
|
||||
: FunctionGroupingBase(std::move(arguments_indexes_))
|
||||
{
|
||||
for (auto const & set : grouping_sets_)
|
||||
grouping_sets.emplace_back(set.begin(), set.end());
|
||||
}
|
||||
|
||||
String getName() const override { return "groupingForGroupingSets"; }
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
|
||||
{
|
||||
return FunctionGroupingBase::executeImpl(arguments, input_rows_count,
|
||||
[this](UInt64 set_index, UInt64 arg_index)
|
||||
{
|
||||
return grouping_sets[set_index].contains(arg_index);
|
||||
}
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
}
|
@ -167,7 +167,7 @@ public:
|
||||
|
||||
const auto & tuple_columns = tuple_col->getColumns();
|
||||
|
||||
const ColumnWithTypeAndName poly = arguments[1];
|
||||
const ColumnWithTypeAndName & poly = arguments[1];
|
||||
const IColumn * poly_col = poly.column.get();
|
||||
const ColumnConst * const_poly_col = checkAndGetColumn<ColumnConst>(poly_col);
|
||||
|
||||
|
@ -68,7 +68,7 @@ void registerFunctionEncrypt(FunctionFactory & factory);
|
||||
void registerFunctionDecrypt(FunctionFactory & factory);
|
||||
void registerFunctionAESEncryptMysql(FunctionFactory & factory);
|
||||
void registerFunctionAESDecryptMysql(FunctionFactory & factory);
|
||||
|
||||
void registerFunctionShowCertificate(FunctionFactory &);
|
||||
#endif
|
||||
|
||||
void registerFunctions()
|
||||
@ -135,6 +135,7 @@ void registerFunctions()
|
||||
registerFunctionDecrypt(factory);
|
||||
registerFunctionAESEncryptMysql(factory);
|
||||
registerFunctionAESDecryptMysql(factory);
|
||||
registerFunctionShowCertificate(factory);
|
||||
#endif
|
||||
registerFunctionTid(factory);
|
||||
registerFunctionLogTrace(factory);
|
||||
|
@ -1,6 +1,12 @@
|
||||
#include <memory>
|
||||
#include <Common/quoteString.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <Columns/ColumnArray.h>
|
||||
#include <Columns/ColumnFixedString.h>
|
||||
#include <Core/ColumnNumbers.h>
|
||||
#include <Core/ColumnWithTypeAndName.h>
|
||||
|
||||
#include <Functions/grouping.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/FunctionsMiscellaneous.h>
|
||||
|
||||
@ -8,10 +14,12 @@
|
||||
|
||||
#include <DataTypes/DataTypeSet.h>
|
||||
#include <DataTypes/DataTypeFunction.h>
|
||||
#include <DataTypes/DataTypeFixedString.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <DataTypes/DataTypeTuple.h>
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypeLowCardinality.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/FieldToDataType.h>
|
||||
|
||||
#include <Columns/ColumnSet.h>
|
||||
@ -56,6 +64,9 @@ namespace ErrorCodes
|
||||
extern const int INCORRECT_ELEMENT_OF_SET;
|
||||
extern const int BAD_ARGUMENTS;
|
||||
extern const int DUPLICATE_COLUMN;
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION;
|
||||
extern const int TOO_MANY_ARGUMENTS_FOR_FUNCTION;
|
||||
}
|
||||
|
||||
static NamesAndTypesList::iterator findColumn(const String & name, NamesAndTypesList & cols)
|
||||
@ -459,10 +470,18 @@ public:
|
||||
};
|
||||
|
||||
ActionsMatcher::Data::Data(
|
||||
ContextPtr context_, SizeLimits set_size_limit_, size_t subquery_depth_,
|
||||
const NamesAndTypesList & source_columns_, ActionsDAGPtr actions_dag,
|
||||
PreparedSets & prepared_sets_, SubqueriesForSets & subqueries_for_sets_,
|
||||
bool no_subqueries_, bool no_makeset_, bool only_consts_, bool create_source_for_in_)
|
||||
ContextPtr context_,
|
||||
SizeLimits set_size_limit_,
|
||||
size_t subquery_depth_,
|
||||
std::reference_wrapper<const NamesAndTypesList> source_columns_,
|
||||
ActionsDAGPtr actions_dag,
|
||||
PreparedSets & prepared_sets_,
|
||||
SubqueriesForSets & subqueries_for_sets_,
|
||||
bool no_subqueries_,
|
||||
bool no_makeset_,
|
||||
bool only_consts_,
|
||||
bool create_source_for_in_,
|
||||
AggregationKeysInfo aggregation_keys_info_)
|
||||
: WithContext(context_)
|
||||
, set_size_limit(set_size_limit_)
|
||||
, subquery_depth(subquery_depth_)
|
||||
@ -475,6 +494,7 @@ ActionsMatcher::Data::Data(
|
||||
, create_source_for_in(create_source_for_in_)
|
||||
, visit_depth(0)
|
||||
, actions_stack(std::move(actions_dag), context_)
|
||||
, aggregation_keys_info(aggregation_keys_info_)
|
||||
, next_unique_suffix(actions_stack.getLastActions().getIndex().size() + 1)
|
||||
{
|
||||
}
|
||||
@ -817,6 +837,55 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
|
||||
return;
|
||||
}
|
||||
|
||||
if (node.name == "grouping")
|
||||
{
|
||||
if (data.only_consts)
|
||||
return; // Can not perform constant folding, because this function can be executed only after GROUP BY
|
||||
|
||||
size_t arguments_size = node.arguments->children.size();
|
||||
if (arguments_size == 0)
|
||||
throw Exception(ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION, "Function GROUPING expects at least one argument");
|
||||
if (arguments_size > 64)
|
||||
throw Exception(ErrorCodes::TOO_MANY_ARGUMENTS_FOR_FUNCTION, "Function GROUPING can have up to 64 arguments, but {} provided", arguments_size);
|
||||
auto keys_info = data.aggregation_keys_info;
|
||||
auto aggregation_keys_number = keys_info.aggregation_keys.size();
|
||||
|
||||
ColumnNumbers arguments_indexes;
|
||||
for (auto const & arg : node.arguments->children)
|
||||
{
|
||||
size_t pos = keys_info.aggregation_keys.getPosByName(arg->getColumnName());
|
||||
if (pos == aggregation_keys_number)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Argument of GROUPING function {} is not a part of GROUP BY clause", arg->getColumnName());
|
||||
arguments_indexes.push_back(pos);
|
||||
}
|
||||
|
||||
switch (keys_info.group_by_kind)
|
||||
{
|
||||
case GroupByKind::GROUPING_SETS:
|
||||
{
|
||||
data.addFunction(std::make_shared<FunctionToOverloadResolverAdaptor>(std::make_shared<FunctionGroupingForGroupingSets>(std::move(arguments_indexes), keys_info.grouping_set_keys)), { "__grouping_set" }, column_name);
|
||||
break;
|
||||
}
|
||||
case GroupByKind::ROLLUP:
|
||||
data.addFunction(std::make_shared<FunctionToOverloadResolverAdaptor>(std::make_shared<FunctionGroupingForRollup>(std::move(arguments_indexes), aggregation_keys_number)), { "__grouping_set" }, column_name);
|
||||
break;
|
||||
case GroupByKind::CUBE:
|
||||
{
|
||||
data.addFunction(std::make_shared<FunctionToOverloadResolverAdaptor>(std::make_shared<FunctionGroupingForCube>(std::move(arguments_indexes), aggregation_keys_number)), { "__grouping_set" }, column_name);
|
||||
break;
|
||||
}
|
||||
case GroupByKind::ORDINARY:
|
||||
{
|
||||
data.addFunction(std::make_shared<FunctionToOverloadResolverAdaptor>(std::make_shared<FunctionGroupingOrdinary>(std::move(arguments_indexes))), {}, column_name);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
||||
"Unexpected kind of GROUP BY clause for GROUPING function: {}", keys_info.group_by_kind);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
SetPtr prepared_set;
|
||||
if (checkFunctionIsInOrGlobalInOperator(node))
|
||||
{
|
||||
|
@ -1,10 +1,12 @@
|
||||
#pragma once
|
||||
|
||||
#include <Core/NamesAndTypes.h>
|
||||
#include <Interpreters/Context_fwd.h>
|
||||
#include <Interpreters/InDepthNodeVisitor.h>
|
||||
#include <Interpreters/PreparedSets.h>
|
||||
#include <Interpreters/SubqueryForSet.h>
|
||||
#include <Parsers/IAST.h>
|
||||
#include <Core/ColumnNumbers.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -76,6 +78,42 @@ class ASTIdentifier;
|
||||
class ASTFunction;
|
||||
class ASTLiteral;
|
||||
|
||||
enum class GroupByKind
|
||||
{
|
||||
NONE,
|
||||
ORDINARY,
|
||||
ROLLUP,
|
||||
CUBE,
|
||||
GROUPING_SETS,
|
||||
};
|
||||
|
||||
/*
|
||||
* This class stores information about aggregation keys used in GROUP BY clause.
|
||||
* It's used for providing information about aggregation to GROUPING function
|
||||
* implementation.
|
||||
*/
|
||||
struct AggregationKeysInfo
|
||||
{
|
||||
AggregationKeysInfo(
|
||||
std::reference_wrapper<const NamesAndTypesList> aggregation_keys_,
|
||||
std::reference_wrapper<const ColumnNumbersList> grouping_set_keys_,
|
||||
GroupByKind group_by_kind_)
|
||||
: aggregation_keys(aggregation_keys_)
|
||||
, grouping_set_keys(grouping_set_keys_)
|
||||
, group_by_kind(group_by_kind_)
|
||||
{}
|
||||
|
||||
AggregationKeysInfo(const AggregationKeysInfo &) = default;
|
||||
AggregationKeysInfo(AggregationKeysInfo &&) = default;
|
||||
|
||||
// Names and types of all used keys
|
||||
const NamesAndTypesList & aggregation_keys;
|
||||
// Indexes of aggregation keys used in each grouping set (only for GROUP BY GROUPING SETS)
|
||||
const ColumnNumbersList & grouping_set_keys;
|
||||
|
||||
GroupByKind group_by_kind;
|
||||
};
|
||||
|
||||
/// Collect ExpressionAction from AST. Returns PreparedSets and SubqueriesForSets too.
|
||||
class ActionsMatcher
|
||||
{
|
||||
@ -95,6 +133,7 @@ public:
|
||||
bool create_source_for_in;
|
||||
size_t visit_depth;
|
||||
ScopeStack actions_stack;
|
||||
AggregationKeysInfo aggregation_keys_info;
|
||||
|
||||
/*
|
||||
* Remember the last unique column suffix to avoid quadratic behavior
|
||||
@ -107,14 +146,15 @@ public:
|
||||
ContextPtr context_,
|
||||
SizeLimits set_size_limit_,
|
||||
size_t subquery_depth_,
|
||||
const NamesAndTypesList & source_columns_,
|
||||
std::reference_wrapper<const NamesAndTypesList> source_columns_,
|
||||
ActionsDAGPtr actions_dag,
|
||||
PreparedSets & prepared_sets_,
|
||||
SubqueriesForSets & subqueries_for_sets_,
|
||||
bool no_subqueries_,
|
||||
bool no_makeset_,
|
||||
bool only_consts_,
|
||||
bool create_source_for_in_);
|
||||
bool create_source_for_in_,
|
||||
AggregationKeysInfo aggregation_keys_info_);
|
||||
|
||||
/// Does result of the calculation already exists in the block.
|
||||
bool hasColumn(const String & column_name) const;
|
||||
|
@ -647,7 +647,7 @@ std::unique_lock<std::shared_mutex> DatabaseCatalog::getExclusiveDDLGuardForData
|
||||
{
|
||||
std::unique_lock lock(ddl_guards_mutex);
|
||||
db_guard_iter = ddl_guards.try_emplace(database).first;
|
||||
assert(db_guard_iter->second.first.count(""));
|
||||
assert(db_guard_iter->second.first.contains(""));
|
||||
}
|
||||
DatabaseGuard & db_guard = db_guard_iter->second;
|
||||
return std::unique_lock{db_guard.second};
|
||||
|
@ -43,10 +43,13 @@
|
||||
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <Common/StringUtils/StringUtils.h>
|
||||
#include <Core/ColumnNumbers.h>
|
||||
#include <Core/Names.h>
|
||||
#include <Core/NamesAndTypes.h>
|
||||
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/DataTypeFactory.h>
|
||||
#include <DataTypes/DataTypeFixedString.h>
|
||||
|
||||
#include <Interpreters/ActionsVisitor.h>
|
||||
#include <Interpreters/GetAggregatesVisitor.h>
|
||||
@ -325,12 +328,21 @@ void ExpressionAnalyzer::analyzeAggregation(ActionsDAGPtr & temp_actions)
|
||||
{
|
||||
if (ASTPtr group_by_ast = select_query->groupBy())
|
||||
{
|
||||
NameSet unique_keys;
|
||||
NameToIndexMap unique_keys;
|
||||
ASTs & group_asts = group_by_ast->children;
|
||||
|
||||
if (select_query->group_by_with_rollup)
|
||||
group_by_kind = GroupByKind::ROLLUP;
|
||||
else if (select_query->group_by_with_cube)
|
||||
group_by_kind = GroupByKind::CUBE;
|
||||
else if (select_query->group_by_with_grouping_sets && group_asts.size() > 1)
|
||||
group_by_kind = GroupByKind::GROUPING_SETS;
|
||||
else
|
||||
group_by_kind = GroupByKind::ORDINARY;
|
||||
|
||||
/// For GROUPING SETS with multiple groups we always add virtual __grouping_set column
|
||||
/// With set number, which is used as an additional key at the stage of merging aggregating data.
|
||||
if (select_query->group_by_with_grouping_sets && group_asts.size() > 1)
|
||||
if (group_by_kind != GroupByKind::ORDINARY)
|
||||
aggregated_columns.emplace_back("__grouping_set", std::make_shared<DataTypeUInt64>());
|
||||
|
||||
for (ssize_t i = 0; i < static_cast<ssize_t>(group_asts.size()); ++i)
|
||||
@ -347,6 +359,7 @@ void ExpressionAnalyzer::analyzeAggregation(ActionsDAGPtr & temp_actions)
|
||||
group_elements_ast = group_ast_element->children;
|
||||
|
||||
NamesAndTypesList grouping_set_list;
|
||||
ColumnNumbers grouping_set_indexes_list;
|
||||
|
||||
for (ssize_t j = 0; j < ssize_t(group_elements_ast.size()); ++j)
|
||||
{
|
||||
@ -387,15 +400,21 @@ void ExpressionAnalyzer::analyzeAggregation(ActionsDAGPtr & temp_actions)
|
||||
/// Aggregation keys are unique.
|
||||
if (!unique_keys.contains(key.name))
|
||||
{
|
||||
unique_keys.insert(key.name);
|
||||
unique_keys[key.name] = aggregation_keys.size();
|
||||
grouping_set_indexes_list.push_back(aggregation_keys.size());
|
||||
aggregation_keys.push_back(key);
|
||||
|
||||
/// Key is no longer needed, therefore we can save a little by moving it.
|
||||
aggregated_columns.push_back(std::move(key));
|
||||
}
|
||||
else
|
||||
{
|
||||
grouping_set_indexes_list.push_back(unique_keys[key.name]);
|
||||
}
|
||||
}
|
||||
|
||||
aggregation_keys_list.push_back(std::move(grouping_set_list));
|
||||
aggregation_keys_indexes_list.push_back(std::move(grouping_set_indexes_list));
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -433,7 +452,7 @@ void ExpressionAnalyzer::analyzeAggregation(ActionsDAGPtr & temp_actions)
|
||||
/// Aggregation keys are uniqued.
|
||||
if (!unique_keys.contains(key.name))
|
||||
{
|
||||
unique_keys.insert(key.name);
|
||||
unique_keys[key.name] = aggregation_keys.size();
|
||||
aggregation_keys.push_back(key);
|
||||
|
||||
/// Key is no longer needed, therefore we can save a little by moving it.
|
||||
@ -442,6 +461,13 @@ void ExpressionAnalyzer::analyzeAggregation(ActionsDAGPtr & temp_actions)
|
||||
}
|
||||
}
|
||||
|
||||
if (!select_query->group_by_with_grouping_sets)
|
||||
{
|
||||
auto & list = aggregation_keys_indexes_list.emplace_back();
|
||||
for (size_t i = 0; i < aggregation_keys.size(); ++i)
|
||||
list.push_back(i);
|
||||
}
|
||||
|
||||
if (group_asts.empty())
|
||||
{
|
||||
select_query->setExpression(ASTSelectQuery::Expression::GROUP_BY, {});
|
||||
@ -583,7 +609,8 @@ void ExpressionAnalyzer::getRootActions(const ASTPtr & ast, bool no_makeset_for_
|
||||
no_makeset_for_subqueries,
|
||||
false /* no_makeset */,
|
||||
only_consts,
|
||||
!isRemoteStorage() /* create_source_for_in */);
|
||||
!isRemoteStorage() /* create_source_for_in */,
|
||||
getAggregationKeysInfo());
|
||||
ActionsVisitor(visitor_data, log.stream()).visit(ast);
|
||||
actions = visitor_data.getActions();
|
||||
}
|
||||
@ -603,7 +630,8 @@ void ExpressionAnalyzer::getRootActionsNoMakeSet(const ASTPtr & ast, ActionsDAGP
|
||||
true /* no_makeset_for_subqueries, no_makeset implies no_makeset_for_subqueries */,
|
||||
true /* no_makeset */,
|
||||
only_consts,
|
||||
!isRemoteStorage() /* create_source_for_in */);
|
||||
!isRemoteStorage() /* create_source_for_in */,
|
||||
getAggregationKeysInfo());
|
||||
ActionsVisitor(visitor_data, log.stream()).visit(ast);
|
||||
actions = visitor_data.getActions();
|
||||
}
|
||||
@ -624,7 +652,8 @@ void ExpressionAnalyzer::getRootActionsForHaving(
|
||||
no_makeset_for_subqueries,
|
||||
false /* no_makeset */,
|
||||
only_consts,
|
||||
true /* create_source_for_in */);
|
||||
true /* create_source_for_in */,
|
||||
getAggregationKeysInfo());
|
||||
ActionsVisitor(visitor_data, log.stream()).visit(ast);
|
||||
actions = visitor_data.getActions();
|
||||
}
|
||||
|
@ -1,6 +1,8 @@
|
||||
#pragma once
|
||||
|
||||
#include <Core/ColumnNumbers.h>
|
||||
#include <Columns/FilterDescription.h>
|
||||
#include <Interpreters/ActionsVisitor.h>
|
||||
#include <Interpreters/AggregateDescription.h>
|
||||
#include <Interpreters/DatabaseCatalog.h>
|
||||
#include <Interpreters/SubqueryForSet.h>
|
||||
@ -67,6 +69,7 @@ struct ExpressionAnalyzerData
|
||||
bool has_aggregation = false;
|
||||
NamesAndTypesList aggregation_keys;
|
||||
NamesAndTypesLists aggregation_keys_list;
|
||||
ColumnNumbersList aggregation_keys_indexes_list;
|
||||
bool has_const_aggregation_keys = false;
|
||||
AggregateDescriptions aggregate_descriptions;
|
||||
|
||||
@ -77,6 +80,8 @@ struct ExpressionAnalyzerData
|
||||
|
||||
/// All new temporary tables obtained by performing the GLOBAL IN/JOIN subqueries.
|
||||
TemporaryTablesMapping external_tables;
|
||||
|
||||
GroupByKind group_by_kind = GroupByKind::NONE;
|
||||
};
|
||||
|
||||
|
||||
@ -200,6 +205,11 @@ protected:
|
||||
|
||||
NamesAndTypesList getColumnsAfterArrayJoin(ActionsDAGPtr & actions, const NamesAndTypesList & src_columns);
|
||||
NamesAndTypesList analyzeJoin(ActionsDAGPtr & actions, const NamesAndTypesList & src_columns);
|
||||
|
||||
AggregationKeysInfo getAggregationKeysInfo() const noexcept
|
||||
{
|
||||
return { aggregation_keys, aggregation_keys_indexes_list, group_by_kind };
|
||||
}
|
||||
};
|
||||
|
||||
class SelectQueryExpressionAnalyzer;
|
||||
@ -326,7 +336,15 @@ public:
|
||||
bool hasGlobalSubqueries() { return has_global_subqueries; }
|
||||
bool hasTableJoin() const { return syntax->ast_join; }
|
||||
|
||||
bool useGroupingSetKey() const { return aggregation_keys_list.size() > 1; }
|
||||
/// When there is only one group in GROUPING SETS
|
||||
/// it is a special case that is equal to GROUP BY, i.e.:
|
||||
///
|
||||
/// GROUPING SETS ((a, b)) -> GROUP BY a, b
|
||||
///
|
||||
/// But it is rewritten by GroupingSetsRewriterVisitor to GROUP BY,
|
||||
/// so instead of aggregation_keys_list.size() > 1,
|
||||
/// !aggregation_keys_list.empty() can be used.
|
||||
bool useGroupingSetKey() const { return !aggregation_keys_list.empty(); }
|
||||
|
||||
const NamesAndTypesList & aggregationKeys() const { return aggregation_keys; }
|
||||
bool hasConstAggregationKeys() const { return has_const_aggregation_keys; }
|
||||
|
22
src/Interpreters/GroupingSetsRewriterVisitor.cpp
Normal file
22
src/Interpreters/GroupingSetsRewriterVisitor.cpp
Normal file
@ -0,0 +1,22 @@
|
||||
#include <Interpreters/GroupingSetsRewriterVisitor.h>
|
||||
#include <Parsers/ASTSelectQuery.h>
|
||||
#include <Parsers/ASTExpressionList.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
void GroupingSetsRewriterData::visit(ASTSelectQuery & select_query, ASTPtr &)
|
||||
{
|
||||
const ASTPtr group_by = select_query.groupBy();
|
||||
if (!group_by || !select_query.group_by_with_grouping_sets)
|
||||
return;
|
||||
|
||||
if (group_by->children.size() != 1)
|
||||
return;
|
||||
|
||||
select_query.setExpression(ASTSelectQuery::Expression::GROUP_BY, std::move(group_by->children.front()));
|
||||
select_query.group_by_with_grouping_sets = false;
|
||||
}
|
||||
|
||||
}
|
30
src/Interpreters/GroupingSetsRewriterVisitor.h
Normal file
30
src/Interpreters/GroupingSetsRewriterVisitor.h
Normal file
@ -0,0 +1,30 @@
|
||||
#pragma once
|
||||
|
||||
#include <Parsers/IAST.h>
|
||||
#include <Interpreters/InDepthNodeVisitor.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class ASTSelectQuery;
|
||||
|
||||
/// Rewrite GROUPING SETS with only one group, to GROUP BY.
|
||||
///
|
||||
/// Examples:
|
||||
/// - GROUPING SETS (foo) -> GROUP BY foo
|
||||
/// - GROUPING SETS ((foo, bar)) -> GROUP BY foo, bar
|
||||
///
|
||||
/// But not the following:
|
||||
/// - GROUPING SETS (foo, bar) (since it has two groups (foo) and (bar))
|
||||
class GroupingSetsRewriterData
|
||||
{
|
||||
public:
|
||||
using TypeToVisit = ASTSelectQuery;
|
||||
|
||||
static void visit(ASTSelectQuery & select_query, ASTPtr &);
|
||||
};
|
||||
|
||||
using GroupingSetsRewriterMatcher = OneTypeMatcher<GroupingSetsRewriterData>;
|
||||
using GroupingSetsRewriterVisitor = InDepthNodeVisitor<GroupingSetsRewriterMatcher, true>;
|
||||
|
||||
}
|
@ -538,6 +538,7 @@ void HashJoin::dataMapInit(MapsVariant & map)
|
||||
|
||||
bool HashJoin::overDictionary() const
|
||||
{
|
||||
assert(data->type != Type::DICT || table_join->getDictionaryReader());
|
||||
return data->type == Type::DICT;
|
||||
}
|
||||
|
||||
@ -707,6 +708,13 @@ namespace
|
||||
|
||||
void HashJoin::initRightBlockStructure(Block & saved_block_sample)
|
||||
{
|
||||
if (isCrossOrComma(kind))
|
||||
{
|
||||
/// cross join doesn't have keys, just add all columns
|
||||
saved_block_sample = sample_block_with_columns_to_add.cloneEmpty();
|
||||
return;
|
||||
}
|
||||
|
||||
bool multiple_disjuncts = !table_join->oneDisjunct();
|
||||
/// We could remove key columns for LEFT | INNER HashJoin but we should keep them for JoinSwitcher (if any).
|
||||
bool save_key_columns = !table_join->forceHashJoin() || isRightOrFull(kind) || multiple_disjuncts;
|
||||
@ -724,9 +732,7 @@ void HashJoin::initRightBlockStructure(Block & saved_block_sample)
|
||||
for (auto & column : sample_block_with_columns_to_add)
|
||||
{
|
||||
if (!saved_block_sample.findByName(column.name))
|
||||
{
|
||||
saved_block_sample.insert(column);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -912,6 +918,7 @@ public:
|
||||
bool is_join_get_)
|
||||
: join_on_keys(join_on_keys_)
|
||||
, rows_to_add(block.rows())
|
||||
, sample_block(saved_block_sample)
|
||||
, is_join_get(is_join_get_)
|
||||
{
|
||||
size_t num_columns_to_add = block_with_columns_to_add.columns();
|
||||
@ -951,12 +958,46 @@ public:
|
||||
return ColumnWithTypeAndName(std::move(columns[i]), type_name[i].type, type_name[i].qualified_name);
|
||||
}
|
||||
|
||||
static void assertBlockEqualsStructureUpToLowCard(const Block & lhs_block, const Block & rhs_block)
|
||||
{
|
||||
if (lhs_block.columns() != rhs_block.columns())
|
||||
throw Exception("Different number of columns in blocks", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
for (size_t i = 0; i < lhs_block.columns(); ++i)
|
||||
{
|
||||
const auto & lhs = lhs_block.getByPosition(i);
|
||||
const auto & rhs = rhs_block.getByPosition(i);
|
||||
if (lhs.name != rhs.name)
|
||||
throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Block structure mismatch: [{}] != [{}]",
|
||||
lhs_block.dumpStructure(), rhs_block.dumpStructure());
|
||||
|
||||
const auto & ltype = recursiveRemoveLowCardinality(lhs.type);
|
||||
const auto & rtype = recursiveRemoveLowCardinality(rhs.type);
|
||||
if (!ltype->equals(*rtype))
|
||||
throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Block structure mismatch: [{}] != [{}]",
|
||||
lhs_block.dumpStructure(), rhs_block.dumpStructure());
|
||||
|
||||
const auto & lcol = recursiveRemoveLowCardinality(lhs.column);
|
||||
const auto & rcol = recursiveRemoveLowCardinality(rhs.column);
|
||||
if (lcol->getDataType() != rcol->getDataType())
|
||||
throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Block structure mismatch: [{}] != [{}]",
|
||||
lhs_block.dumpStructure(), rhs_block.dumpStructure());
|
||||
}
|
||||
}
|
||||
|
||||
template <bool has_defaults>
|
||||
void appendFromBlock(const Block & block, size_t row_num)
|
||||
{
|
||||
if constexpr (has_defaults)
|
||||
applyLazyDefaults();
|
||||
|
||||
#ifndef NDEBUG
|
||||
/// Like assertBlocksHaveEqualStructure but doesn't check low cardinality
|
||||
assertBlockEqualsStructureUpToLowCard(sample_block, block);
|
||||
#else
|
||||
UNUSED(assertBlockEqualsStructureUpToLowCard);
|
||||
#endif
|
||||
|
||||
if (is_join_get)
|
||||
{
|
||||
/// If it's joinGetOrNull, we need to wrap not-nullable columns in StorageJoin.
|
||||
@ -1019,6 +1060,7 @@ private:
|
||||
size_t lazy_defaults_count = 0;
|
||||
/// for ASOF
|
||||
const IColumn * left_asof_key = nullptr;
|
||||
Block sample_block;
|
||||
|
||||
bool is_join_get;
|
||||
|
||||
@ -1698,7 +1740,7 @@ DataTypePtr HashJoin::joinGetCheckAndGetReturnType(const DataTypes & data_types,
|
||||
throw Exception("StorageJoin doesn't contain column " + column_name, ErrorCodes::NO_SUCH_COLUMN_IN_TABLE);
|
||||
|
||||
auto elem = sample_block_with_columns_to_add.getByName(column_name);
|
||||
if (or_null)
|
||||
if (or_null && JoinCommon::canBecomeNullable(elem.type))
|
||||
elem.type = makeNullable(elem.type);
|
||||
return elem.type;
|
||||
}
|
||||
|
@ -12,8 +12,13 @@ class IInterpreterUnionOrSelectQuery : public IInterpreter
|
||||
{
|
||||
public:
|
||||
IInterpreterUnionOrSelectQuery(const ASTPtr & query_ptr_, ContextPtr context_, const SelectQueryOptions & options_)
|
||||
: IInterpreterUnionOrSelectQuery(query_ptr_, Context::createCopy(context_), options_)
|
||||
{
|
||||
}
|
||||
|
||||
IInterpreterUnionOrSelectQuery(const ASTPtr & query_ptr_, ContextMutablePtr context_, const SelectQueryOptions & options_)
|
||||
: query_ptr(query_ptr_)
|
||||
, context(Context::createCopy(context_))
|
||||
, context(context_)
|
||||
, options(options_)
|
||||
, max_streams(context->getSettingsRef().max_threads)
|
||||
{
|
||||
@ -60,4 +65,3 @@ protected:
|
||||
bool uses_view_source = false;
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -51,6 +51,14 @@ InterpreterSelectIntersectExceptQuery::InterpreterSelectIntersectExceptQuery(
|
||||
const ASTPtr & query_ptr_,
|
||||
ContextPtr context_,
|
||||
const SelectQueryOptions & options_)
|
||||
:InterpreterSelectIntersectExceptQuery(query_ptr_, Context::createCopy(context_), options_)
|
||||
{
|
||||
}
|
||||
|
||||
InterpreterSelectIntersectExceptQuery::InterpreterSelectIntersectExceptQuery(
|
||||
const ASTPtr & query_ptr_,
|
||||
ContextMutablePtr context_,
|
||||
const SelectQueryOptions & options_)
|
||||
: IInterpreterUnionOrSelectQuery(query_ptr_->clone(), context_, options_)
|
||||
{
|
||||
ASTSelectIntersectExceptQuery * ast = query_ptr->as<ASTSelectIntersectExceptQuery>();
|
||||
|
@ -24,6 +24,11 @@ public:
|
||||
ContextPtr context_,
|
||||
const SelectQueryOptions & options_);
|
||||
|
||||
InterpreterSelectIntersectExceptQuery(
|
||||
const ASTPtr & query_ptr_,
|
||||
ContextMutablePtr context_,
|
||||
const SelectQueryOptions & options_);
|
||||
|
||||
BlockIO execute() override;
|
||||
|
||||
Block getSampleBlock() { return result_header; }
|
||||
|
@ -165,6 +165,14 @@ InterpreterSelectQuery::InterpreterSelectQuery(
|
||||
: InterpreterSelectQuery(query_ptr_, context_, std::nullopt, nullptr, options_, required_result_column_names_)
|
||||
{}
|
||||
|
||||
InterpreterSelectQuery::InterpreterSelectQuery(
|
||||
const ASTPtr & query_ptr_,
|
||||
ContextMutablePtr context_,
|
||||
const SelectQueryOptions & options_,
|
||||
const Names & required_result_column_names_)
|
||||
: InterpreterSelectQuery(query_ptr_, context_, std::nullopt, nullptr, options_, required_result_column_names_)
|
||||
{}
|
||||
|
||||
InterpreterSelectQuery::InterpreterSelectQuery(
|
||||
const ASTPtr & query_ptr_,
|
||||
ContextPtr context_,
|
||||
@ -280,6 +288,28 @@ InterpreterSelectQuery::InterpreterSelectQuery(
|
||||
const StorageMetadataPtr & metadata_snapshot_,
|
||||
SubqueriesForSets subquery_for_sets_,
|
||||
PreparedSets prepared_sets_)
|
||||
: InterpreterSelectQuery(
|
||||
query_ptr_,
|
||||
Context::createCopy(context_),
|
||||
std::move(input_pipe_),
|
||||
storage_,
|
||||
options_,
|
||||
required_result_column_names,
|
||||
metadata_snapshot_,
|
||||
std::move(subquery_for_sets_),
|
||||
std::move(prepared_sets_))
|
||||
{}
|
||||
|
||||
InterpreterSelectQuery::InterpreterSelectQuery(
|
||||
const ASTPtr & query_ptr_,
|
||||
ContextMutablePtr context_,
|
||||
std::optional<Pipe> input_pipe_,
|
||||
const StoragePtr & storage_,
|
||||
const SelectQueryOptions & options_,
|
||||
const Names & required_result_column_names,
|
||||
const StorageMetadataPtr & metadata_snapshot_,
|
||||
SubqueriesForSets subquery_for_sets_,
|
||||
PreparedSets prepared_sets_)
|
||||
/// NOTE: the query almost always should be cloned because it will be modified during analysis.
|
||||
: IInterpreterUnionOrSelectQuery(options_.modify_inplace ? query_ptr_ : query_ptr_->clone(), context_, options_)
|
||||
, storage(storage_)
|
||||
@ -1098,6 +1128,9 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional<P
|
||||
if (query.group_by_with_grouping_sets && (query.group_by_with_rollup || query.group_by_with_cube))
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "GROUPING SETS are not supported together with ROLLUP and CUBE");
|
||||
|
||||
if (expressions.hasHaving() && query.group_by_with_totals && (query.group_by_with_rollup || query.group_by_with_cube))
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "WITH TOTALS and WITH ROLLUP or CUBE are not supported together in presence of HAVING");
|
||||
|
||||
if (query_info.projection && query_info.projection->desc->type == ProjectionDescription::Type::Aggregate)
|
||||
{
|
||||
query_info.projection->aggregate_overflow_row = aggregate_overflow_row;
|
||||
@ -1386,11 +1419,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional<P
|
||||
executeRollupOrCube(query_plan, Modificator::CUBE);
|
||||
|
||||
if ((query.group_by_with_rollup || query.group_by_with_cube || query.group_by_with_grouping_sets) && expressions.hasHaving())
|
||||
{
|
||||
if (query.group_by_with_totals)
|
||||
throw Exception("WITH TOTALS and WITH ROLLUP or CUBE or GROUPING SETS are not supported together in presence of HAVING", ErrorCodes::NOT_IMPLEMENTED);
|
||||
executeHaving(query_plan, expressions.before_having, expressions.remove_having_filter);
|
||||
}
|
||||
}
|
||||
else if (expressions.hasHaving())
|
||||
executeHaving(query_plan, expressions.before_having, expressions.remove_having_filter);
|
||||
@ -2060,12 +2089,15 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc
|
||||
if (prewhere_info)
|
||||
query_info.prewhere_info = prewhere_info;
|
||||
|
||||
bool optimize_read_in_order = analysis_result.optimize_read_in_order;
|
||||
bool optimize_aggregation_in_order = analysis_result.optimize_aggregation_in_order && !query_analyzer->useGroupingSetKey();
|
||||
|
||||
/// Create optimizer with prepared actions.
|
||||
/// Maybe we will need to calc input_order_info later, e.g. while reading from StorageMerge.
|
||||
if ((analysis_result.optimize_read_in_order || analysis_result.optimize_aggregation_in_order)
|
||||
if ((optimize_read_in_order || optimize_aggregation_in_order)
|
||||
&& (!query_info.projection || query_info.projection->complete))
|
||||
{
|
||||
if (analysis_result.optimize_read_in_order)
|
||||
if (optimize_read_in_order)
|
||||
{
|
||||
if (query_info.projection)
|
||||
{
|
||||
@ -2291,7 +2323,7 @@ void InterpreterSelectQuery::executeAggregation(QueryPlan & query_plan, const Ac
|
||||
|
||||
SortDescription group_by_sort_description;
|
||||
|
||||
if (group_by_info && settings.optimize_aggregation_in_order)
|
||||
if (group_by_info && settings.optimize_aggregation_in_order && !query_analyzer->useGroupingSetKey())
|
||||
group_by_sort_description = getSortDescriptionFromGroupBy(getSelectQuery());
|
||||
else
|
||||
group_by_info = nullptr;
|
||||
|
@ -55,6 +55,12 @@ public:
|
||||
const SelectQueryOptions &,
|
||||
const Names & required_result_column_names_ = Names{});
|
||||
|
||||
InterpreterSelectQuery(
|
||||
const ASTPtr & query_ptr_,
|
||||
ContextMutablePtr context_,
|
||||
const SelectQueryOptions &,
|
||||
const Names & required_result_column_names_ = Names{});
|
||||
|
||||
/// Read data not from the table specified in the query, but from the prepared pipe `input`.
|
||||
InterpreterSelectQuery(
|
||||
const ASTPtr & query_ptr_,
|
||||
@ -133,6 +139,17 @@ private:
|
||||
SubqueriesForSets subquery_for_sets_ = {},
|
||||
PreparedSets prepared_sets_ = {});
|
||||
|
||||
InterpreterSelectQuery(
|
||||
const ASTPtr & query_ptr_,
|
||||
ContextMutablePtr context_,
|
||||
std::optional<Pipe> input_pipe,
|
||||
const StoragePtr & storage_,
|
||||
const SelectQueryOptions &,
|
||||
const Names & required_result_column_names = {},
|
||||
const StorageMetadataPtr & metadata_snapshot_ = nullptr,
|
||||
SubqueriesForSets subquery_for_sets_ = {},
|
||||
PreparedSets prepared_sets_ = {});
|
||||
|
||||
ASTSelectQuery & getSelectQuery() { return query_ptr->as<ASTSelectQuery &>(); }
|
||||
|
||||
void addPrewhereAliasActions();
|
||||
|
@ -31,8 +31,13 @@ namespace ErrorCodes
|
||||
}
|
||||
|
||||
InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery(
|
||||
const ASTPtr & query_ptr_, ContextPtr context_,
|
||||
const SelectQueryOptions & options_, const Names & required_result_column_names)
|
||||
const ASTPtr & query_ptr_, ContextPtr context_, const SelectQueryOptions & options_, const Names & required_result_column_names)
|
||||
: InterpreterSelectWithUnionQuery(query_ptr_, Context::createCopy(context_), options_, required_result_column_names)
|
||||
{
|
||||
}
|
||||
|
||||
InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery(
|
||||
const ASTPtr & query_ptr_, ContextMutablePtr context_, const SelectQueryOptions & options_, const Names & required_result_column_names)
|
||||
: IInterpreterUnionOrSelectQuery(query_ptr_, context_, options_)
|
||||
{
|
||||
ASTSelectWithUnionQuery * ast = query_ptr->as<ASTSelectWithUnionQuery>();
|
||||
@ -46,6 +51,10 @@ InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery(
|
||||
if (!num_children)
|
||||
throw Exception("Logical error: no children in ASTSelectWithUnionQuery", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
/// This is required for UNION to match headers correctly.
|
||||
if (num_children > 1)
|
||||
options.reorderColumns();
|
||||
|
||||
/// Note that we pass 'required_result_column_names' to first SELECT.
|
||||
/// And for the rest, we pass names at the corresponding positions of 'required_result_column_names' in the result of first SELECT,
|
||||
/// because names could be different.
|
||||
|
@ -22,6 +22,12 @@ public:
|
||||
const SelectQueryOptions &,
|
||||
const Names & required_result_column_names = {});
|
||||
|
||||
InterpreterSelectWithUnionQuery(
|
||||
const ASTPtr & query_ptr_,
|
||||
ContextMutablePtr context_,
|
||||
const SelectQueryOptions &,
|
||||
const Names & required_result_column_names = {});
|
||||
|
||||
~InterpreterSelectWithUnionQuery() override;
|
||||
|
||||
/// Builds QueryPlan for current query.
|
||||
|
@ -31,6 +31,8 @@ struct SelectQueryOptions
|
||||
bool only_analyze = false;
|
||||
bool modify_inplace = false;
|
||||
bool remove_duplicates = false;
|
||||
/// This is required for UNION to match headers correctly.
|
||||
bool reorder_columns_as_required_header = false;
|
||||
bool ignore_quota = false;
|
||||
bool ignore_limits = false;
|
||||
/// This flag is needed to analyze query ignoring table projections.
|
||||
@ -97,6 +99,12 @@ struct SelectQueryOptions
|
||||
return *this;
|
||||
}
|
||||
|
||||
SelectQueryOptions & reorderColumns(bool value = true)
|
||||
{
|
||||
reorder_columns_as_required_header = value;
|
||||
return *this;
|
||||
}
|
||||
|
||||
SelectQueryOptions & noSubquery()
|
||||
{
|
||||
subquery_depth = 0;
|
||||
@ -127,7 +135,7 @@ struct SelectQueryOptions
|
||||
return *this;
|
||||
}
|
||||
|
||||
SelectQueryOptions & ignoreASTOptimizationsAlias(bool value = true)
|
||||
SelectQueryOptions & ignoreASTOptimizations(bool value = true)
|
||||
{
|
||||
ignore_ast_optimizations = value;
|
||||
return *this;
|
||||
|
@ -479,6 +479,13 @@ bool TableJoin::tryInitDictJoin(const Block & sample_block, ContextPtr context)
|
||||
src_names.push_back(original);
|
||||
dst_columns.push_back({col.name, col.type});
|
||||
}
|
||||
else
|
||||
{
|
||||
/// Can't extract column from dictionary table
|
||||
/// TODO: Sometimes it should be possible to recunstruct required column,
|
||||
/// e.g. if it's an expression depending on dictionary attributes
|
||||
return false;
|
||||
}
|
||||
}
|
||||
dictionary_reader = std::make_shared<DictionaryReader>(dict_name, src_names, dst_columns, context);
|
||||
|
||||
|
@ -217,7 +217,7 @@ void TransactionLog::runUpdatingThread()
|
||||
try
|
||||
{
|
||||
/// Do not wait if we have some transactions to finalize
|
||||
if (!unknown_state_list_loaded.empty())
|
||||
if (unknown_state_list_loaded.empty())
|
||||
log_updated_event->wait();
|
||||
|
||||
if (stop_flag.load())
|
||||
|
@ -253,7 +253,7 @@ void TranslateQualifiedNamesMatcher::visit(ASTExpressionList & node, const ASTPt
|
||||
{
|
||||
for (const auto & column : table.columns)
|
||||
{
|
||||
if (asterisk_regexp_pattern->isColumnMatching(column.name) && (first_table || !data.join_using_columns.count(column.name)))
|
||||
if (asterisk_regexp_pattern->isColumnMatching(column.name) && (first_table || !data.join_using_columns.contains(column.name)))
|
||||
{
|
||||
addIdentifier(columns, table.table, column.name);
|
||||
}
|
||||
|
@ -13,6 +13,7 @@
|
||||
#include <Interpreters/FunctionNameNormalizer.h>
|
||||
#include <Interpreters/MarkTableIdentifiersVisitor.h>
|
||||
#include <Interpreters/QueryNormalizer.h>
|
||||
#include <Interpreters/GroupingSetsRewriterVisitor.h>
|
||||
#include <Interpreters/ExecuteScalarSubqueriesVisitor.h>
|
||||
#include <Interpreters/CollectJoinOnKeysVisitor.h>
|
||||
#include <Interpreters/RequiredSourceColumnsVisitor.h>
|
||||
@ -65,6 +66,12 @@ namespace
|
||||
|
||||
using LogAST = DebugASTLog<false>; /// set to true to enable logs
|
||||
|
||||
void optimizeGroupingSets(ASTPtr & query)
|
||||
{
|
||||
GroupingSetsRewriterVisitor::Data data;
|
||||
GroupingSetsRewriterVisitor(data).visit(query);
|
||||
}
|
||||
|
||||
/// Select implementation of a function based on settings.
|
||||
/// Important that it is done as query rewrite. It means rewritten query
|
||||
/// will be sent to remote servers during distributed query execution,
|
||||
@ -422,7 +429,7 @@ void renameDuplicatedColumns(const ASTSelectQuery * select_query)
|
||||
/// This is the case when we have DISTINCT or arrayJoin: we require more columns in SELECT even if we need less columns in result.
|
||||
/// Also we have to remove duplicates in case of GLOBAL subqueries. Their results are placed into tables so duplicates are impossible.
|
||||
/// Also remove all INTERPOLATE columns which are not in SELECT anymore.
|
||||
void removeUnneededColumnsFromSelectClause(ASTSelectQuery * select_query, const Names & required_result_columns, bool remove_dups)
|
||||
void removeUnneededColumnsFromSelectClause(ASTSelectQuery * select_query, const Names & required_result_columns, bool remove_dups, bool reorder_columns_as_required_header)
|
||||
{
|
||||
ASTs & elements = select_query->select()->children;
|
||||
|
||||
@ -453,6 +460,29 @@ void removeUnneededColumnsFromSelectClause(ASTSelectQuery * select_query, const
|
||||
|
||||
NameSet remove_columns;
|
||||
|
||||
/// Resort columns according to required_result_columns.
|
||||
if (reorder_columns_as_required_header && !required_result_columns.empty())
|
||||
{
|
||||
std::unordered_map<String, size_t> name_pos;
|
||||
{
|
||||
size_t pos = 0;
|
||||
for (const auto & name : required_result_columns)
|
||||
name_pos[name] = pos++;
|
||||
}
|
||||
std::sort(elements.begin(), elements.end(), [&](const auto & lhs, const auto & rhs)
|
||||
{
|
||||
String lhs_name = lhs->getAliasOrColumnName();
|
||||
String rhs_name = rhs->getAliasOrColumnName();
|
||||
size_t lhs_pos = name_pos.size();
|
||||
size_t rhs_pos = name_pos.size();
|
||||
if (auto it = name_pos.find(lhs_name); it != name_pos.end())
|
||||
lhs_pos = it->second;
|
||||
if (auto it = name_pos.find(rhs_name); it != name_pos.end())
|
||||
rhs_pos = it->second;
|
||||
return lhs_pos < rhs_pos;
|
||||
});
|
||||
}
|
||||
|
||||
for (const auto & elem : elements)
|
||||
{
|
||||
String name = elem->getAliasOrColumnName();
|
||||
@ -465,6 +495,8 @@ void removeUnneededColumnsFromSelectClause(ASTSelectQuery * select_query, const
|
||||
}
|
||||
else if (select_query->distinct || hasArrayJoin(elem))
|
||||
{
|
||||
/// ARRAY JOIN cannot be optimized out since it may change number of rows,
|
||||
/// so as DISTINCT.
|
||||
new_elements.push_back(elem);
|
||||
}
|
||||
else
|
||||
@ -1135,6 +1167,7 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect(
|
||||
|
||||
size_t subquery_depth = select_options.subquery_depth;
|
||||
bool remove_duplicates = select_options.remove_duplicates;
|
||||
bool reorder_columns_as_required_header = select_options.reorder_columns_as_required_header;
|
||||
|
||||
const auto & settings = getContext()->getSettingsRef();
|
||||
|
||||
@ -1151,7 +1184,7 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect(
|
||||
if (remove_duplicates)
|
||||
renameDuplicatedColumns(select_query);
|
||||
|
||||
/// Perform it before analyzing JOINs, because it may change number of columns with names unique and break some login inside JOINs
|
||||
/// Perform it before analyzing JOINs, because it may change number of columns with names unique and break some logic inside JOINs
|
||||
if (settings.optimize_normalize_count_variants)
|
||||
TreeOptimizer::optimizeCountConstantAndSumOne(query);
|
||||
|
||||
@ -1186,7 +1219,7 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect(
|
||||
/// Leave all selected columns in case of DISTINCT; columns that contain arrayJoin function inside.
|
||||
/// Must be after 'normalizeTree' (after expanding aliases, for aliases not get lost)
|
||||
/// and before 'executeScalarSubqueries', 'analyzeAggregation', etc. to avoid excessive calculations.
|
||||
removeUnneededColumnsFromSelectClause(select_query, required_result_columns, remove_duplicates);
|
||||
removeUnneededColumnsFromSelectClause(select_query, required_result_columns, remove_duplicates, reorder_columns_as_required_header);
|
||||
|
||||
/// Executing scalar subqueries - replacing them with constant values.
|
||||
executeScalarSubqueries(query, getContext(), subquery_depth, result.scalars, result.local_scalars, select_options.only_analyze);
|
||||
@ -1373,6 +1406,8 @@ void TreeRewriter::normalize(
|
||||
/// Common subexpression elimination. Rewrite rules.
|
||||
QueryNormalizer::Data normalizer_data(aliases, source_columns_set, ignore_alias, settings, allow_self_aliases);
|
||||
QueryNormalizer(normalizer_data).visit(query);
|
||||
|
||||
optimizeGroupingSets(query);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -95,22 +95,22 @@ public:
|
||||
ASTPtr & refWhere() { return getExpression(Expression::WHERE); }
|
||||
ASTPtr & refHaving() { return getExpression(Expression::HAVING); }
|
||||
|
||||
const ASTPtr with() const { return getExpression(Expression::WITH); }
|
||||
const ASTPtr select() const { return getExpression(Expression::SELECT); }
|
||||
const ASTPtr tables() const { return getExpression(Expression::TABLES); }
|
||||
const ASTPtr prewhere() const { return getExpression(Expression::PREWHERE); }
|
||||
const ASTPtr where() const { return getExpression(Expression::WHERE); }
|
||||
const ASTPtr groupBy() const { return getExpression(Expression::GROUP_BY); }
|
||||
const ASTPtr having() const { return getExpression(Expression::HAVING); }
|
||||
const ASTPtr window() const { return getExpression(Expression::WINDOW); }
|
||||
const ASTPtr orderBy() const { return getExpression(Expression::ORDER_BY); }
|
||||
const ASTPtr limitByOffset() const { return getExpression(Expression::LIMIT_BY_OFFSET); }
|
||||
const ASTPtr limitByLength() const { return getExpression(Expression::LIMIT_BY_LENGTH); }
|
||||
const ASTPtr limitBy() const { return getExpression(Expression::LIMIT_BY); }
|
||||
const ASTPtr limitOffset() const { return getExpression(Expression::LIMIT_OFFSET); }
|
||||
const ASTPtr limitLength() const { return getExpression(Expression::LIMIT_LENGTH); }
|
||||
const ASTPtr settings() const { return getExpression(Expression::SETTINGS); }
|
||||
const ASTPtr interpolate() const { return getExpression(Expression::INTERPOLATE); }
|
||||
ASTPtr with() const { return getExpression(Expression::WITH); }
|
||||
ASTPtr select() const { return getExpression(Expression::SELECT); }
|
||||
ASTPtr tables() const { return getExpression(Expression::TABLES); }
|
||||
ASTPtr prewhere() const { return getExpression(Expression::PREWHERE); }
|
||||
ASTPtr where() const { return getExpression(Expression::WHERE); }
|
||||
ASTPtr groupBy() const { return getExpression(Expression::GROUP_BY); }
|
||||
ASTPtr having() const { return getExpression(Expression::HAVING); }
|
||||
ASTPtr window() const { return getExpression(Expression::WINDOW); }
|
||||
ASTPtr orderBy() const { return getExpression(Expression::ORDER_BY); }
|
||||
ASTPtr limitByOffset() const { return getExpression(Expression::LIMIT_BY_OFFSET); }
|
||||
ASTPtr limitByLength() const { return getExpression(Expression::LIMIT_BY_LENGTH); }
|
||||
ASTPtr limitBy() const { return getExpression(Expression::LIMIT_BY); }
|
||||
ASTPtr limitOffset() const { return getExpression(Expression::LIMIT_OFFSET); }
|
||||
ASTPtr limitLength() const { return getExpression(Expression::LIMIT_LENGTH); }
|
||||
ASTPtr settings() const { return getExpression(Expression::SETTINGS); }
|
||||
ASTPtr interpolate() const { return getExpression(Expression::INTERPOLATE); }
|
||||
|
||||
bool hasFiltration() const { return where() || prewhere() || having(); }
|
||||
|
||||
|
@ -803,6 +803,20 @@ namespace
|
||||
node = makeASTFunction("exists", subquery);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool parseGrouping(IParser::Pos & pos, ASTPtr & node, Expected & expected)
|
||||
{
|
||||
ASTPtr expr_list;
|
||||
if (!ParserExpressionList(false, false).parse(pos, expr_list, expected))
|
||||
return false;
|
||||
|
||||
auto res = std::make_shared<ASTFunction>();
|
||||
res->name = "grouping";
|
||||
res->arguments = expr_list;
|
||||
res->children.push_back(res->arguments);
|
||||
node = std::move(res);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -888,6 +902,8 @@ bool ParserFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
|
||||
else if (function_name_lowercase == "datediff" || function_name_lowercase == "date_diff"
|
||||
|| function_name_lowercase == "timestampdiff" || function_name_lowercase == "timestamp_diff")
|
||||
parsed_special_function = parseDateDiff(pos, node, expected);
|
||||
else if (function_name_lowercase == "grouping")
|
||||
parsed_special_function = parseGrouping(pos, node, expected);
|
||||
|
||||
if (parsed_special_function.has_value())
|
||||
return parsed_special_function.value() && ParserToken(TokenType::ClosingRoundBracket).ignore(pos);
|
||||
|
@ -38,7 +38,7 @@ PollingQueue::~PollingQueue()
|
||||
void PollingQueue::addTask(size_t thread_number, void * data, int fd)
|
||||
{
|
||||
std::uintptr_t key = reinterpret_cast<uintptr_t>(data);
|
||||
if (tasks.count(key))
|
||||
if (tasks.contains(key))
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Task {} was already added to task queue", key);
|
||||
|
||||
tasks[key] = TaskData{thread_number, data, fd};
|
||||
|
@ -18,6 +18,7 @@ BinaryRowInputFormat::BinaryRowInputFormat(ReadBuffer & in_, Block header, Param
|
||||
header,
|
||||
in_,
|
||||
params_,
|
||||
true,
|
||||
with_names_,
|
||||
with_types_,
|
||||
format_settings_,
|
||||
|
@ -41,7 +41,15 @@ CSVRowInputFormat::CSVRowInputFormat(
|
||||
bool with_types_,
|
||||
const FormatSettings & format_settings_,
|
||||
std::unique_ptr<FormatWithNamesAndTypesReader> format_reader_)
|
||||
: RowInputFormatWithNamesAndTypes(header_, in_, params_, with_names_, with_types_, format_settings_, std::move(format_reader_))
|
||||
: RowInputFormatWithNamesAndTypes(
|
||||
header_,
|
||||
in_,
|
||||
params_,
|
||||
false,
|
||||
with_names_,
|
||||
with_types_,
|
||||
format_settings_,
|
||||
std::move(format_reader_))
|
||||
{
|
||||
const String bad_delimiters = " \t\"'.UL";
|
||||
if (bad_delimiters.find(format_settings.csv.delimiter) != String::npos)
|
||||
@ -259,6 +267,12 @@ bool CSVFormatReader::readField(
|
||||
}
|
||||
}
|
||||
|
||||
void CSVFormatReader::skipPrefixBeforeHeader()
|
||||
{
|
||||
for (size_t i = 0; i != format_settings.csv.skip_first_lines; ++i)
|
||||
readRow();
|
||||
}
|
||||
|
||||
|
||||
CSVSchemaReader::CSVSchemaReader(ReadBuffer & in_, bool with_names_, bool with_types_, const FormatSettings & format_setting_)
|
||||
: FormatWithNamesAndTypesSchemaReader(
|
||||
|
@ -58,6 +58,7 @@ public:
|
||||
void skipTypes() override { skipHeaderRow(); }
|
||||
void skipFieldDelimiter() override;
|
||||
void skipRowEndDelimiter() override;
|
||||
void skipPrefixBeforeHeader() override;
|
||||
|
||||
std::vector<String> readNames() override { return readHeaderRow(); }
|
||||
std::vector<String> readTypes() override { return readHeaderRow(); }
|
||||
|
@ -47,6 +47,7 @@ CustomSeparatedRowInputFormat::CustomSeparatedRowInputFormat(
|
||||
header_,
|
||||
*buf_,
|
||||
params_,
|
||||
false,
|
||||
with_names_,
|
||||
with_types_,
|
||||
format_settings_,
|
||||
|
@ -28,6 +28,7 @@ JSONCompactEachRowRowInputFormat::JSONCompactEachRowRowInputFormat(
|
||||
header_,
|
||||
in_,
|
||||
params_,
|
||||
false,
|
||||
with_names_,
|
||||
with_types_,
|
||||
format_settings_,
|
||||
|
@ -5,6 +5,7 @@
|
||||
|
||||
#include <base/defines.h>
|
||||
#include <base/types.h>
|
||||
#include <base/sort.h>
|
||||
|
||||
#include <Columns/ColumnMap.h>
|
||||
#include <Columns/IColumn.h>
|
||||
@ -107,17 +108,25 @@ void PrometheusTextOutputFormat::fixupBucketLabels(CurrentMetric & metric)
|
||||
{
|
||||
String bucket_label = metric.type == "histogram" ? "le" : "quantile";
|
||||
|
||||
std::sort(metric.values.begin(), metric.values.end(),
|
||||
::sort(metric.values.begin(), metric.values.end(),
|
||||
[&bucket_label](const auto & lhs, const auto & rhs)
|
||||
{
|
||||
bool lhs_labels_contain_sum = lhs.labels.contains("sum");
|
||||
bool lhs_labels_contain_count = lhs.labels.contains("count");
|
||||
bool lhs_labels_contain_sum_or_count = lhs_labels_contain_sum || lhs_labels_contain_count;
|
||||
|
||||
bool rhs_labels_contain_sum = rhs.labels.contains("sum");
|
||||
bool rhs_labels_contain_count = rhs.labels.contains("count");
|
||||
bool rhs_labels_contain_sum_or_count = rhs_labels_contain_sum || rhs_labels_contain_count;
|
||||
|
||||
/// rows with labels at the beginning and then `_sum` and `_count`
|
||||
if (lhs.labels.contains("sum") && rhs.labels.contains("count"))
|
||||
if (lhs_labels_contain_sum && rhs_labels_contain_count)
|
||||
return true;
|
||||
if (lhs.labels.contains("count") && rhs.labels.contains("sum"))
|
||||
else if (lhs_labels_contain_count && rhs_labels_contain_sum)
|
||||
return false;
|
||||
if (rhs.labels.contains("sum") || rhs.labels.contains("count"))
|
||||
else if (rhs_labels_contain_sum_or_count && !lhs_labels_contain_sum_or_count)
|
||||
return true;
|
||||
if (lhs.labels.contains("sum") || lhs.labels.contains("count"))
|
||||
else if (lhs_labels_contain_sum_or_count && !rhs_labels_contain_sum_or_count)
|
||||
return false;
|
||||
|
||||
auto lit = lhs.labels.find(bucket_label);
|
||||
|
@ -40,7 +40,15 @@ TabSeparatedRowInputFormat::TabSeparatedRowInputFormat(
|
||||
bool with_types_,
|
||||
bool is_raw_,
|
||||
const FormatSettings & format_settings_)
|
||||
: RowInputFormatWithNamesAndTypes(header_, in_, params_, with_names_, with_types_, format_settings_, std::make_unique<TabSeparatedFormatReader>(in_, format_settings_, is_raw_))
|
||||
: RowInputFormatWithNamesAndTypes(
|
||||
header_,
|
||||
in_,
|
||||
params_,
|
||||
false,
|
||||
with_names_,
|
||||
with_types_,
|
||||
format_settings_,
|
||||
std::make_unique<TabSeparatedFormatReader>(in_, format_settings_, is_raw_))
|
||||
{
|
||||
}
|
||||
|
||||
@ -230,6 +238,12 @@ void TabSeparatedFormatReader::checkNullValueForNonNullable(DataTypePtr type)
|
||||
}
|
||||
}
|
||||
|
||||
void TabSeparatedFormatReader::skipPrefixBeforeHeader()
|
||||
{
|
||||
for (size_t i = 0; i != format_settings.csv.skip_first_lines; ++i)
|
||||
readRow();
|
||||
}
|
||||
|
||||
void TabSeparatedRowInputFormat::syncAfterError()
|
||||
{
|
||||
skipToUnescapedNextLineOrEOF(*in);
|
||||
|
@ -43,6 +43,7 @@ public:
|
||||
void skipTypes() override { skipHeaderRow(); }
|
||||
void skipFieldDelimiter() override;
|
||||
void skipRowEndDelimiter() override;
|
||||
void skipPrefixBeforeHeader() override;
|
||||
|
||||
std::vector<String> readRow();
|
||||
std::vector<String> readNames() override { return readRow(); }
|
||||
|
@ -17,6 +17,7 @@ RowInputFormatWithNamesAndTypes::RowInputFormatWithNamesAndTypes(
|
||||
const Block & header_,
|
||||
ReadBuffer & in_,
|
||||
const Params & params_,
|
||||
bool is_binary_,
|
||||
bool with_names_,
|
||||
bool with_types_,
|
||||
const FormatSettings & format_settings_,
|
||||
@ -24,6 +25,7 @@ RowInputFormatWithNamesAndTypes::RowInputFormatWithNamesAndTypes(
|
||||
: RowInputFormatWithDiagnosticInfo(header_, in_, params_)
|
||||
, format_settings(format_settings_)
|
||||
, data_types(header_.getDataTypes())
|
||||
, is_binary(is_binary_)
|
||||
, with_names(with_names_)
|
||||
, with_types(with_types_)
|
||||
, format_reader(std::move(format_reader_))
|
||||
@ -38,10 +40,11 @@ void RowInputFormatWithNamesAndTypes::readPrefix()
|
||||
if (getCurrentUnitNumber() != 0)
|
||||
return;
|
||||
|
||||
if (with_names || with_types || data_types.at(0)->textCanContainOnlyValidUTF8())
|
||||
/// Search and remove BOM only in textual formats (CSV, TSV etc), not in binary ones (RowBinary*).
|
||||
/// Also, we assume that column name or type cannot contain BOM, so, if format has header,
|
||||
/// then BOM at beginning of stream cannot be confused with name or type of field, and it is safe to skip it.
|
||||
if (!is_binary && (with_names || with_types || data_types.at(0)->textCanContainOnlyValidUTF8()))
|
||||
{
|
||||
/// We assume that column name or type cannot contain BOM, so, if format has header,
|
||||
/// then BOM at beginning of stream cannot be confused with name or type of field, and it is safe to skip it.
|
||||
skipBOMIfExists(*in);
|
||||
}
|
||||
|
||||
|
@ -24,13 +24,15 @@ class FormatWithNamesAndTypesReader;
|
||||
class RowInputFormatWithNamesAndTypes : public RowInputFormatWithDiagnosticInfo
|
||||
{
|
||||
protected:
|
||||
/** with_names - in the first line the header with column names
|
||||
/** is_binary - it is a binary format (e.g. don't search for BOM)
|
||||
* with_names - in the first line the header with column names
|
||||
* with_types - in the second line the header with column names
|
||||
*/
|
||||
RowInputFormatWithNamesAndTypes(
|
||||
const Block & header_,
|
||||
ReadBuffer & in_,
|
||||
const Params & params_,
|
||||
bool is_binary_,
|
||||
bool with_names_,
|
||||
bool with_types_,
|
||||
const FormatSettings & format_settings_,
|
||||
@ -51,6 +53,7 @@ private:
|
||||
bool parseRowAndPrintDiagnosticInfo(MutableColumns & columns, WriteBuffer & out) override;
|
||||
void tryDeserializeField(const DataTypePtr & type, IColumn & column, size_t file_column) override;
|
||||
|
||||
bool is_binary;
|
||||
bool with_names;
|
||||
bool with_types;
|
||||
std::unique_ptr<FormatWithNamesAndTypesReader> format_reader;
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user