Merge branch 'master' into revert-39804-revert-39510-update-arrow

This commit is contained in:
Kruglov Pavel 2022-08-08 15:51:29 +02:00 committed by GitHub
commit 704bdd08ec
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
808 changed files with 6767 additions and 56024 deletions

View File

@ -13,7 +13,6 @@ concurrency:
- master
paths:
- '.github/**'
- 'benchmark/**'
- 'docker/docs/release/**'
- 'docs/**'
- 'utils/list-versions/version_date.tsv'

View File

@ -3348,6 +3348,10 @@ jobs:
###################################### JEPSEN TESTS #########################################
#############################################################################################
Jepsen:
# This is special test NOT INCLUDED in FinishCheck
# When it's skipped, all dependent tasks will be skipped too.
# DO NOT add it there
if: contains(github.event.pull_request.labels.*.name, 'jepsen-test')
needs: [BuilderBinRelease]
uses: ./.github/workflows/jepsen.yml
@ -3419,7 +3423,6 @@ jobs:
- SharedBuildSmokeTest
- CompatibilityCheck
- IntegrationTestsFlakyCheck
- Jepsen
runs-on: [self-hosted, style-checker]
steps:
- name: Clear repository

View File

@ -29,7 +29,7 @@ jobs:
fetch-depth: 0
- name: Generate versions
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
GITHUB_TOKEN: ${{ secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN }}
run: |
./utils/list-versions/list-versions.sh > ./utils/list-versions/version_date.tsv
GID=$(id -g "${UID}")

2
.gitmodules vendored
View File

@ -201,7 +201,7 @@
[submodule "contrib/boringssl"]
path = contrib/boringssl
url = https://github.com/ClickHouse/boringssl.git
branch = MergeWithUpstream
branch = unknown_branch_from_artur
[submodule "contrib/NuRaft"]
path = contrib/NuRaft
url = https://github.com/ClickHouse/NuRaft.git

View File

@ -10,9 +10,10 @@ The following versions of ClickHouse server are currently being supported with s
| Version | Supported |
|:-|:-|
| 22.7 | ✔️ |
| 22.6 | ✔️ |
| 22.5 | ✔️ |
| 22.4 | ✔️ |
| 22.4 | |
| 22.3 | ✔️ |
| 22.2 | ❌ |
| 22.1 | ❌ |
@ -57,5 +58,5 @@ As the security issue moves from triage, to identified fix, to release planning
## Public Disclosure Timing
A public disclosure date is negotiated by the ClickHouse maintainers and the bug submitter. We prefer to fully disclose the bug as soon as possible once a user mitigation is available. It is reasonable to delay disclosure when the bug or the fix is not yet fully understood, the solution is not well-tested, or for vendor coordination. The timeframe for disclosure is from immediate (especially if it's already publicly known) to 90 days. For a vulnerability with a straightforward mitigation, we expect report date to disclosure date to be on the order of 7 days.
A public disclosure date is negotiated by the ClickHouse maintainers and the bug submitter. We prefer to fully disclose the bug as soon as possible once a user mitigation is available. It is reasonable to delay disclosure when the bug or the fix is not yet fully understood, the solution is not well-tested, or for vendor coordination. The timeframe for disclosure is from immediate (especially if it's already publicly known) to 90 days. For a vulnerability with a straightforward mitigation, we expect the report date to disclosure date to be on the order of 7 days.

View File

@ -2,8 +2,47 @@
#include <cstring>
#include <type_traits>
#include <bit>
inline void reverseMemcpy(void * dst, const void * src, size_t size)
{
uint8_t * uint_dst = reinterpret_cast<uint8_t *>(dst);
const uint8_t * uint_src = reinterpret_cast<const uint8_t *>(src);
uint_dst += size;
while (size)
{
--uint_dst;
*uint_dst = *uint_src;
++uint_src;
--size;
}
}
template <typename T>
inline T unalignedLoadLE(const void * address)
{
T res {};
if constexpr (std::endian::native == std::endian::little)
memcpy(&res, address, sizeof(res));
else
reverseMemcpy(&res, address, sizeof(res));
return res;
}
template <typename T>
inline void unalignedStoreLE(void * address,
const typename std::enable_if<true, T>::type & src)
{
static_assert(std::is_trivially_copyable_v<T>);
if constexpr (std::endian::native == std::endian::little)
memcpy(address, &src, sizeof(src));
else
reverseMemcpy(address, &src, sizeof(src));
}
template <typename T>
inline T unalignedLoad(const void * address)
{

2
contrib/NuRaft vendored

@ -1 +1 @@
Subproject commit e1dc47c1cfd529801a8c94a396a3921a71ae3ccf
Subproject commit 1b0af760b3506b8e35b50cb7df098cbad5064ff2

2
contrib/azure vendored

@ -1 +1 @@
Subproject commit ac4b763d4ca40122275f1497cbdc5451337461d9
Subproject commit ef75afc075fc71fbcd8fe28dcda3794ae265fd1c

View File

@ -1,6 +1,6 @@
option (ENABLE_AZURE_BLOB_STORAGE "Enable Azure blob storage" ${ENABLE_LIBRARIES})
if (NOT ENABLE_AZURE_BLOB_STORAGE)
if (NOT ENABLE_AZURE_BLOB_STORAGE OR BUILD_STANDALONE_KEEPER OR OS_FREEBSD)
message(STATUS "Not using Azure blob storage")
return()
endif()

2
contrib/boringssl vendored

@ -1 +1 @@
Subproject commit c1e01a441d6db234f4f12e63a7657d1f9e6db9c1
Subproject commit 8061ac62d67953e61b793042e33baf1352e67510

View File

@ -44,6 +44,8 @@
#define HAVE_SETJMP_H
#define HAVE_SYS_STAT_H
#define HAVE_UNISTD_H
#define HAVE_POLL_H
#define HAVE_PTHREAD_H
#define ENABLE_IPV6
#define USE_OPENSSL

2
contrib/krb5 vendored

@ -1 +1 @@
Subproject commit 5149dea4e2be0f67707383d2682b897c14631374
Subproject commit d879821c7a4c70b0c3ad739d9951d1a2b1903df7

2
contrib/nats-io vendored

@ -1 +1 @@
Subproject commit 6b2227f36757da090321e2d317569d2bd42c4cc1
Subproject commit 1e2597c54616015077e53a26d56b6bac448eb1b6

View File

@ -18,6 +18,8 @@ elseif(WIN32)
set(NATS_PLATFORM_INCLUDE "apple")
endif()
add_definitions(-DNATS_HAS_TLS)
file(GLOB PS_SOURCES "${NATS_IO_SOURCE_DIR}/${NATS_PLATFORM_INCLUDE}/*.c")
set(SRCS
"${NATS_IO_SOURCE_DIR}/asynccb.c"

View File

@ -29,6 +29,7 @@
"docker/test/util": {
"name": "clickhouse/test-util",
"dependent": [
"docker/packager/binary",
"docker/test/base",
"docker/test/fasttest"
]

View File

@ -1,62 +1,7 @@
# rebuild in #33610
# docker build -t clickhouse/binary-builder .
FROM ubuntu:20.04
# ARG for quick switch to a given ubuntu mirror
ARG apt_archive="http://archive.ubuntu.com"
RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list
ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=14
RUN apt-get update \
&& apt-get install \
apt-transport-https \
apt-utils \
ca-certificates \
dnsutils \
gnupg \
iputils-ping \
lsb-release \
wget \
--yes --no-install-recommends --verbose-versions \
&& export LLVM_PUBKEY_HASH="bda960a8da687a275a2078d43c111d66b1c6a893a3275271beedf266c1ff4a0cdecb429c7a5cccf9f486ea7aa43fd27f" \
&& wget -nv -O /tmp/llvm-snapshot.gpg.key https://apt.llvm.org/llvm-snapshot.gpg.key \
&& echo "${LLVM_PUBKEY_HASH} /tmp/llvm-snapshot.gpg.key" | sha384sum -c \
&& apt-key add /tmp/llvm-snapshot.gpg.key \
&& export CODENAME="$(lsb_release --codename --short | tr 'A-Z' 'a-z')" \
&& echo "deb [trusted=yes] https://apt.llvm.org/${CODENAME}/ llvm-toolchain-${CODENAME}-${LLVM_VERSION} main" >> \
/etc/apt/sources.list \
&& apt-get clean
# initial packages
RUN apt-get update \
&& apt-get install \
bash \
build-essential \
ccache \
clang-${LLVM_VERSION} \
clang-tidy-${LLVM_VERSION} \
cmake \
curl \
fakeroot \
gdb \
git \
gperf \
lld-${LLVM_VERSION} \
llvm-${LLVM_VERSION} \
llvm-${LLVM_VERSION}-dev \
moreutils \
ninja-build \
pigz \
rename \
software-properties-common \
tzdata \
nasm \
--yes --no-install-recommends \
&& apt-get clean
# This symlink required by gcc to find lld compiler
RUN ln -s /usr/bin/lld-${LLVM_VERSION} /usr/bin/ld.lld
ARG FROM_TAG=latest
FROM clickhouse/test-util:$FROM_TAG
ENV CC=clang-${LLVM_VERSION}
ENV CXX=clang++-${LLVM_VERSION}
@ -119,18 +64,18 @@ ENV GOCACHE=/workdir/
RUN mkdir /workdir && chmod 777 /workdir
WORKDIR /workdir
# FIXME: thread sanitizer is broken in clang-14, we have to build it with clang-13
# NOTE: thread sanitizer is broken in clang-14, we have to build it with clang-15
# https://github.com/ClickHouse/ClickHouse/pull/39450
# https://github.com/google/sanitizers/issues/1540
# https://github.com/google/sanitizers/issues/1552
RUN export CODENAME="$(lsb_release --codename --short | tr 'A-Z' 'a-z')" \
&& echo "deb [trusted=yes] https://apt.llvm.org/${CODENAME}/ llvm-toolchain-${CODENAME}-13 main" >> \
&& echo "deb [trusted=yes] https://apt.llvm.org/${CODENAME}/ llvm-toolchain-${CODENAME}-15 main" >> \
/etc/apt/sources.list.d/clang.list \
&& apt-get update \
&& apt-get install \
clang-13 \
clang-tidy-13 \
clang-15 \
clang-tidy-15 \
--yes --no-install-recommends \
&& apt-get clean

View File

@ -3,7 +3,7 @@ set -x -e
exec &> >(ts)
cache_status () {
ccache_status () {
ccache --show-config ||:
ccache --show-stats ||:
}
@ -48,7 +48,7 @@ if [ -n "$MAKE_DEB" ]; then
fi
cache_status
ccache_status
# clear cache stats
ccache --zero-stats ||:
@ -92,7 +92,7 @@ $SCAN_WRAPPER ninja $NINJA_FLAGS $BUILD_TARGET
ls -la ./programs
cache_status
ccache_status
if [ -n "$MAKE_DEB" ]; then
# No quotes because I want it to expand to nothing if empty.
@ -104,6 +104,7 @@ if [ -n "$MAKE_DEB" ]; then
fi
mv ./programs/clickhouse* /output
[ -x ./programs/self-extracting/clickhouse ] && mv ./programs/self-extracting/clickhouse /output
mv ./src/unit_tests_dbms /output ||: # may not exist for some binary builds
find . -name '*.so' -print -exec mv '{}' /output \;
find . -name '*.so.*' -print -exec mv '{}' /output \;
@ -178,7 +179,8 @@ then
mv "coverity-scan.tgz" /output
fi
cache_status
ccache_status
ccache --evict-older-than 1d
if [ "${CCACHE_DEBUG:-}" == "1" ]
then

View File

@ -62,7 +62,7 @@ def pre_build(repo_path: str, env_variables: List[str]):
f"git -C {repo_path} fetch --no-recurse-submodules "
"--no-tags origin master:master"
)
logging.info("Getting master branch for performance artifact: ''%s'", cmd)
logging.info("Getting master branch for performance artifact: '%s'", cmd)
subprocess.check_call(cmd, shell=True)
@ -234,6 +234,7 @@ def parse_env_variables(
if cache:
result.append("CCACHE_DIR=/ccache")
result.append("CCACHE_COMPRESSLEVEL=5")
result.append("CCACHE_BASEDIR=/build")
result.append("CCACHE_NOHASHDIR=true")
result.append("CCACHE_COMPILERCHECK=content")
@ -242,7 +243,6 @@ def parse_env_variables(
# 15G is not enough for tidy build
cache_maxsize = "25G"
result.append(f"CCACHE_MAXSIZE={cache_maxsize}")
# result.append("CCACHE_UMASK=777")
if distcc_hosts:
hosts_with_params = [f"{host}/24,lzo" for host in distcc_hosts] + [
@ -333,7 +333,7 @@ if __name__ == "__main__":
parser.add_argument(
"--compiler",
choices=(
"clang-13", # For TSAN builds, see #39450
"clang-15", # For TSAN builds, see #39450
"clang-14",
"clang-14-darwin",
"clang-14-darwin-aarch64",

View File

@ -57,7 +57,15 @@ do
# check if variable not empty
[ -z "$dir" ] && continue
# ensure directories exist
if ! mkdir -p "$dir"; then
if [ "$DO_CHOWN" = "1" ]; then
mkdir="mkdir"
else
# if DO_CHOWN=0 it means that the system does not map root user to "admin" permissions
# it mainly happens on NFS mounts where root==nobody for security reasons
# thus mkdir MUST run with user id/gid and not from nobody that has zero permissions
mkdir="/usr/bin/clickhouse su "${USER}:${GROUP}" mkdir"
fi
if ! $mkdir -p "$dir"; then
echo "Couldn't create necessary directory: $dir"
exit 1
fi

View File

@ -3,59 +3,12 @@
ARG FROM_TAG=latest
FROM clickhouse/test-util:$FROM_TAG
# ARG for quick switch to a given ubuntu mirror
ARG apt_archive="http://archive.ubuntu.com"
RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list
ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=14
RUN apt-get update \
&& apt-get install ca-certificates lsb-release wget gnupg apt-transport-https \
--yes --no-install-recommends --verbose-versions \
&& export LLVM_PUBKEY_HASH="bda960a8da687a275a2078d43c111d66b1c6a893a3275271beedf266c1ff4a0cdecb429c7a5cccf9f486ea7aa43fd27f" \
&& wget -nv -O /tmp/llvm-snapshot.gpg.key https://apt.llvm.org/llvm-snapshot.gpg.key \
&& echo "${LLVM_PUBKEY_HASH} /tmp/llvm-snapshot.gpg.key" | sha384sum -c \
&& apt-key add /tmp/llvm-snapshot.gpg.key \
&& export CODENAME="$(lsb_release --codename --short | tr 'A-Z' 'a-z')" \
&& echo "deb [trusted=yes] http://apt.llvm.org/${CODENAME}/ llvm-toolchain-${CODENAME}-${LLVM_VERSION} main" >> \
/etc/apt/sources.list
# initial packages
RUN apt-get update \
&& apt-get install \
bash \
fakeroot \
ccache \
curl \
software-properties-common \
--yes --no-install-recommends
# Architecture of the image when BuildKit/buildx is used
ARG TARGETARCH
# Special dpkg-deb (https://github.com/ClickHouse-Extras/dpkg) version which is able
# to compress files using pigz (https://zlib.net/pigz/) instead of gzip.
# Significantly increase deb packaging speed and compatible with old systems
RUN arch=${TARGETARCH:-amd64} \
&& curl -Lo /usr/bin/dpkg-deb https://github.com/ClickHouse-Extras/dpkg/releases/download/1.21.1-clickhouse/dpkg-deb-${arch}
RUN apt-get update \
&& apt-get install \
clang-${LLVM_VERSION} \
debhelper \
devscripts \
gdb \
git \
gperf \
lcov \
llvm-${LLVM_VERSION} \
moreutils \
netbase \
perl \
pigz \
pkg-config \
tzdata \
pv \
nasm \
--yes --no-install-recommends
# Sanitizer options for services (clickhouse-server)

View File

@ -3,83 +3,23 @@
ARG FROM_TAG=latest
FROM clickhouse/test-util:$FROM_TAG
# ARG for quick switch to a given ubuntu mirror
ARG apt_archive="http://archive.ubuntu.com"
RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list
ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=14
RUN apt-get update \
&& apt-get install ca-certificates lsb-release wget gnupg apt-transport-https \
--yes --no-install-recommends --verbose-versions \
&& export LLVM_PUBKEY_HASH="bda960a8da687a275a2078d43c111d66b1c6a893a3275271beedf266c1ff4a0cdecb429c7a5cccf9f486ea7aa43fd27f" \
&& wget -nv -O /tmp/llvm-snapshot.gpg.key https://apt.llvm.org/llvm-snapshot.gpg.key \
&& echo "${LLVM_PUBKEY_HASH} /tmp/llvm-snapshot.gpg.key" | sha384sum -c \
&& apt-key add /tmp/llvm-snapshot.gpg.key \
&& export CODENAME="$(lsb_release --codename --short | tr 'A-Z' 'a-z')" \
&& echo "deb [trusted=yes] https://apt.llvm.org/${CODENAME}/ llvm-toolchain-${CODENAME}-${LLVM_VERSION} main" >> \
/etc/apt/sources.list
# initial packages
RUN apt-get update \
&& apt-get install \
bash \
fakeroot \
ccache \
curl \
software-properties-common \
--yes --no-install-recommends
# Architecture of the image when BuildKit/buildx is used
ARG TARGETARCH
# Special dpkg-deb (https://github.com/ClickHouse-Extras/dpkg) version which is able
# to compress files using pigz (https://zlib.net/pigz/) instead of gzip.
# Significantly increase deb packaging speed and compatible with old systems
RUN arch=${TARGETARCH:-amd64} \
&& curl -Lo /usr/bin/dpkg-deb https://github.com/ClickHouse-Extras/dpkg/releases/download/1.21.1-clickhouse/dpkg-deb-${arch}
RUN apt-get update \
&& apt-get install \
apt-transport-https \
bash \
brotli \
build-essential \
ca-certificates \
ccache \
clang-${LLVM_VERSION} \
clang-tidy-${LLVM_VERSION} \
cmake \
curl \
expect \
fakeroot \
gdb \
git \
gperf \
lld-${LLVM_VERSION} \
llvm-${LLVM_VERSION} \
file \
lsof \
moreutils \
ninja-build \
psmisc \
python3 \
python3-lxml \
python3-pip \
python3-requests \
python3-termcolor \
rename \
software-properties-common \
tzdata \
unixodbc \
file \
nasm \
--yes --no-install-recommends
RUN pip3 install numpy scipy pandas Jinja2
# This symlink required by gcc to find lld compiler
RUN ln -s /usr/bin/lld-${LLVM_VERSION} /usr/bin/ld.lld
ARG odbc_driver_url="https://github.com/ClickHouse/clickhouse-odbc/releases/download/v1.1.4.20200302/clickhouse-odbc-1.1.4-Linux.tar.gz"
RUN mkdir -p /tmp/clickhouse-odbc-tmp \

View File

@ -160,9 +160,8 @@ function run_cmake
"-DENABLE_REPLXX=1"
)
# TODO remove this? we don't use ccache anyway. An option would be to download it
# from S3 simultaneously with cloning.
export CCACHE_DIR="$FASTTEST_WORKSPACE/ccache"
export CCACHE_COMPRESSLEVEL=5
export CCACHE_BASEDIR="$FASTTEST_SOURCE"
export CCACHE_NOHASHDIR=true
export CCACHE_COMPILERCHECK=content
@ -191,6 +190,7 @@ function build
gzip "$FASTTEST_OUTPUT/clickhouse-stripped"
fi
ccache --show-stats ||:
ccache --evict-older-than 1d ||:
)
}

View File

@ -69,6 +69,8 @@ function download
wget_with_retry "$BINARY_URL_TO_DOWNLOAD"
chmod +x clickhouse
# clickhouse may be compressed - run once to decompress
./clickhouse ||:
ln -s ./clickhouse ./clickhouse-server
ln -s ./clickhouse ./clickhouse-client

View File

@ -4,4 +4,8 @@ services:
image: nats
ports:
- "${NATS_EXTERNAL_PORT}:${NATS_INTERNAL_PORT}"
command: "-p 4444 --user click --pass house"
command: "-p 4444 --user click --pass house --tls --tlscert=/etc/certs/server-cert.pem --tlskey=/etc/certs/server-key.pem"
volumes:
- type: bind
source: "${NATS_CERT_DIR}/nats"
target: /etc/certs

View File

@ -1338,6 +1338,8 @@ EOF
set -x
}
# clickhouse may be compressed - run once to decompress
/workspace/right/clickhouse ||:
# Check that local and client are in PATH
clickhouse-local --version > /dev/null
clickhouse-client --version > /dev/null

View File

@ -218,6 +218,12 @@ clickhouse-client --query "SELECT 'Server successfully started', 'OK'" >> /test_
|| (echo -e 'Server failed to start (see application_errors.txt and clickhouse-server.clean.log)\tFAIL' >> /test_output/test_results.tsv \
&& grep -a "<Error>.*Application" /var/log/clickhouse-server/clickhouse-server.log > /test_output/application_errors.txt)
echo "Get previous release tag"
previous_release_tag=$(clickhouse-client --query="SELECT version()" | get_previous_release_tag)
echo $previous_release_tag
stop
[ -f /var/log/clickhouse-server/clickhouse-server.log ] || echo -e "Server log does not exist\tFAIL"
[ -f /var/log/clickhouse-server/stderr.log ] || echo -e "Stderr log does not exist\tFAIL"
@ -265,10 +271,6 @@ zgrep -Fa " received signal " /test_output/gdb.log > /dev/null \
echo -e "Backward compatibility check\n"
echo "Get previous release tag"
previous_release_tag=$(clickhouse-client --query="SELECT version()" | get_previous_release_tag)
echo $previous_release_tag
echo "Clone previous release repository"
git clone https://github.com/ClickHouse/ClickHouse.git --no-tags --progress --branch=$previous_release_tag --no-recurse-submodules --depth=1 previous_release_repository
@ -278,7 +280,6 @@ mkdir previous_release_package_folder
echo $previous_release_tag | download_release_packets && echo -e 'Download script exit code\tOK' >> /test_output/test_results.tsv \
|| echo -e 'Download script failed\tFAIL' >> /test_output/test_results.tsv
stop
mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.clean.log
# Check if we cloned previous release repository successfully
@ -455,3 +456,5 @@ for core in core.*; do
pigz $core
mv $core.gz /test_output/
done
dmesg -T > /test_output/dmesg.log

View File

@ -77,7 +77,7 @@ def run_func_test(
pipes = []
for i in range(0, len(output_paths)):
f = open(output_paths[i], "w")
full_command = "{} {} {} {} {}".format(
full_command = "{} {} {} {} {} --stress".format(
cmd,
get_options(i, backward_compatibility_check),
global_time_limit_option,

View File

@ -1,5 +1,82 @@
# rebuild in #33610
# docker build -t clickhouse/test-util .
FROM ubuntu:20.04
# ARG for quick switch to a given ubuntu mirror
ARG apt_archive="http://archive.ubuntu.com"
RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list
ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=14
RUN apt-get update \
&& apt-get install \
apt-transport-https \
apt-utils \
ca-certificates \
dnsutils \
gnupg \
iputils-ping \
lsb-release \
wget \
--yes --no-install-recommends --verbose-versions \
&& export LLVM_PUBKEY_HASH="bda960a8da687a275a2078d43c111d66b1c6a893a3275271beedf266c1ff4a0cdecb429c7a5cccf9f486ea7aa43fd27f" \
&& wget -nv -O /tmp/llvm-snapshot.gpg.key https://apt.llvm.org/llvm-snapshot.gpg.key \
&& echo "${LLVM_PUBKEY_HASH} /tmp/llvm-snapshot.gpg.key" | sha384sum -c \
&& apt-key add /tmp/llvm-snapshot.gpg.key \
&& export CODENAME="$(lsb_release --codename --short | tr 'A-Z' 'a-z')" \
&& echo "deb [trusted=yes] https://apt.llvm.org/${CODENAME}/ llvm-toolchain-${CODENAME}-${LLVM_VERSION} main" >> \
/etc/apt/sources.list \
&& apt-get clean
# initial packages
RUN apt-get update \
&& apt-get install \
bash \
bsdmainutils \
build-essential \
clang-${LLVM_VERSION} \
clang-tidy-${LLVM_VERSION} \
cmake \
curl \
fakeroot \
gdb \
git \
gperf \
lld-${LLVM_VERSION} \
llvm-${LLVM_VERSION} \
llvm-${LLVM_VERSION}-dev \
moreutils \
nasm \
ninja-build \
pigz \
rename \
software-properties-common \
tzdata \
--yes --no-install-recommends \
&& apt-get clean
# This symlink required by gcc to find lld compiler
RUN ln -s /usr/bin/lld-${LLVM_VERSION} /usr/bin/ld.lld
ARG CCACHE_VERSION=4.6.1
RUN mkdir /tmp/ccache \
&& cd /tmp/ccache \
&& curl -L \
-O https://github.com/ccache/ccache/releases/download/v$CCACHE_VERSION/ccache-$CCACHE_VERSION.tar.xz \
-O https://github.com/ccache/ccache/releases/download/v$CCACHE_VERSION/ccache-$CCACHE_VERSION.tar.xz.asc \
&& gpg --recv-keys --keyserver hkps://keyserver.ubuntu.com 5A939A71A46792CF57866A51996DDA075594ADB8 \
&& gpg --verify ccache-4.6.1.tar.xz.asc \
&& tar xf ccache-$CCACHE_VERSION.tar.xz \
&& cd /tmp/ccache/ccache-$CCACHE_VERSION \
&& cmake -DCMAKE_INSTALL_PREFIX=/usr \
-DCMAKE_BUILD_TYPE=None \
-DZSTD_FROM_INTERNET=ON \
-DREDIS_STORAGE_BACKEND=OFF \
-Wno-dev \
-B build \
-S . \
&& make VERBOSE=1 -C build \
&& make install -C build \
&& cd / \
&& rm -rf /tmp/ccache
COPY process_functional_tests_result.py /

View File

@ -0,0 +1,24 @@
---
sidebar_position: 1
sidebar_label: 2022
---
# 2022 Changelog
### ClickHouse release v22.7.2.15-stable (f843089624e) FIXME as compared to v22.7.1.2484-stable (f4f05ec786a)
#### Bug Fix
* Backported in [#39750](https://github.com/ClickHouse/ClickHouse/issues/39750): Fix seeking while reading from encrypted disk. This PR fixes [#38381](https://github.com/ClickHouse/ClickHouse/issues/38381). [#39687](https://github.com/ClickHouse/ClickHouse/pull/39687) ([Vitaly Baranov](https://github.com/vitlibar)).
#### Bug Fix (user-visible misbehavior in official stable or prestable release)
* Backported in [#39591](https://github.com/ClickHouse/ClickHouse/issues/39591): Fix data race and possible heap-buffer-overflow in Avro format. Closes [#39094](https://github.com/ClickHouse/ClickHouse/issues/39094) Closes [#33652](https://github.com/ClickHouse/ClickHouse/issues/33652). [#39498](https://github.com/ClickHouse/ClickHouse/pull/39498) ([Kruglov Pavel](https://github.com/Avogar)).
* Backported in [#39613](https://github.com/ClickHouse/ClickHouse/issues/39613): Fix bug with maxsplit argument for splitByChar, which was not working correctly. [#39552](https://github.com/ClickHouse/ClickHouse/pull/39552) ([filimonov](https://github.com/filimonov)).
* Backported in [#39792](https://github.com/ClickHouse/ClickHouse/issues/39792): Fix wrong index analysis with tuples and operator `IN`, which could lead to wrong query result. [#39752](https://github.com/ClickHouse/ClickHouse/pull/39752) ([Anton Popov](https://github.com/CurtizJ)).
* Backported in [#39837](https://github.com/ClickHouse/ClickHouse/issues/39837): Fix `CANNOT_READ_ALL_DATA` exception with `local_filesystem_read_method=pread_threadpool`. This bug affected only Linux kernel version 5.9 and 5.10 according to [man](https://manpages.debian.org/testing/manpages-dev/preadv2.2.en.html#BUGS). [#39800](https://github.com/ClickHouse/ClickHouse/pull/39800) ([Anton Popov](https://github.com/CurtizJ)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Replace MemoryTrackerBlockerInThread to LockMemoryExceptionInThread [#39619](https://github.com/ClickHouse/ClickHouse/pull/39619) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Change mysql-odbc url [#39702](https://github.com/ClickHouse/ClickHouse/pull/39702) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).

View File

@ -29,7 +29,7 @@ ENGINE = PostgreSQL('host:port', 'database', 'user', 'password'[, `schema`, `use
## Data Types Support {#data_types-support}
| PostgerSQL | ClickHouse |
| PostgreSQL | ClickHouse |
|------------------|--------------------------------------------------------------|
| DATE | [Date](../../sql-reference/data-types/date.md) |
| TIMESTAMP | [DateTime](../../sql-reference/data-types/datetime.md) |

View File

@ -878,8 +878,6 @@ User can assign new big parts to different disks of a [JBOD](https://en.wikipedi
`MergeTree` family table engines can store data to [S3](https://aws.amazon.com/s3/) using a disk with type `s3`.
This feature is under development and not ready for production. There are known drawbacks such as very low performance.
Configuration markup:
``` xml
<storage_configuration>

View File

@ -22,7 +22,7 @@ Consider using the [sipHash64](#hash_functions-siphash64) function instead.
**Arguments**
The function takes a variable number of input parameters. Arguments can be any of the [supported data types](../../sql-reference/data-types/index.md).
The function takes a variable number of input parameters. Arguments can be any of the [supported data types](../../sql-reference/data-types/index.md). For some data types calculated value of hash function may be the same for the same values even if types of arguments differ (integers of different size, named and unnamed `Tuple` with the same data, `Map` and the corresponding `Array(Tuple(key, value))` type with the same data).
**Returned Value**
@ -69,7 +69,7 @@ Function [interprets](../../sql-reference/functions/type-conversion-functions.md
**Arguments**
The function takes a variable number of input parameters. Arguments can be any of the [supported data types](../../sql-reference/data-types/index.md).
The function takes a variable number of input parameters. Arguments can be any of the [supported data types](../../sql-reference/data-types/index.md). For some data types calculated value of hash function may be the same for the same values even if types of arguments differ (integers of different size, named and unnamed `Tuple` with the same data, `Map` and the corresponding `Array(Tuple(key, value))` type with the same data).
**Returned Value**
@ -99,7 +99,7 @@ sipHash128(par1,...)
**Arguments**
The function takes a variable number of input parameters. Arguments can be any of the [supported data types](../../sql-reference/data-types/index.md).
The function takes a variable number of input parameters. Arguments can be any of the [supported data types](../../sql-reference/data-types/index.md). For some data types calculated value of hash function may be the same for the same values even if types of arguments differ (integers of different size, named and unnamed `Tuple` with the same data, `Map` and the corresponding `Array(Tuple(key, value))` type with the same data).
**Returned value**
@ -135,7 +135,7 @@ This is a fast non-cryptographic hash function. It uses the CityHash algorithm f
**Arguments**
The function takes a variable number of input parameters. Arguments can be any of the [supported data types](../../sql-reference/data-types/index.md).
The function takes a variable number of input parameters. Arguments can be any of the [supported data types](../../sql-reference/data-types/index.md). For some data types calculated value of hash function may be the same for the same values even if types of arguments differ (integers of different size, named and unnamed `Tuple` with the same data, `Map` and the corresponding `Array(Tuple(key, value))` type with the same data).
**Returned Value**
@ -275,7 +275,7 @@ These functions use the `Fingerprint64` and `Hash64` methods respectively from a
**Arguments**
The function takes a variable number of input parameters. Arguments can be any of the [supported data types](../../sql-reference/data-types/index.md).
The function takes a variable number of input parameters. Arguments can be any of the [supported data types](../../sql-reference/data-types/index.md). For some data types calculated value of hash function may be the same for the same values even if types of arguments differ (integers of different size, named and unnamed `Tuple` with the same data, `Map` and the corresponding `Array(Tuple(key, value))` type with the same data)..
**Returned Value**
@ -401,7 +401,7 @@ metroHash64(par1, ...)
**Arguments**
The function takes a variable number of input parameters. Arguments can be any of the [supported data types](../../sql-reference/data-types/index.md).
The function takes a variable number of input parameters. Arguments can be any of the [supported data types](../../sql-reference/data-types/index.md). For some data types calculated value of hash function may be the same for the same values even if types of arguments differ (integers of different size, named and unnamed `Tuple` with the same data, `Map` and the corresponding `Array(Tuple(key, value))` type with the same data).
**Returned Value**
@ -436,7 +436,7 @@ murmurHash2_64(par1, ...)
**Arguments**
Both functions take a variable number of input parameters. Arguments can be any of the [supported data types](../../sql-reference/data-types/index.md).
Both functions take a variable number of input parameters. Arguments can be any of the [supported data types](../../sql-reference/data-types/index.md). For some data types calculated value of hash function may be the same for the same values even if types of arguments differ (integers of different size, named and unnamed `Tuple` with the same data, `Map` and the corresponding `Array(Tuple(key, value))` type with the same data).
**Returned Value**
@ -504,7 +504,7 @@ murmurHash3_64(par1, ...)
**Arguments**
Both functions take a variable number of input parameters. Arguments can be any of the [supported data types](../../sql-reference/data-types/index.md).
Both functions take a variable number of input parameters. Arguments can be any of the [supported data types](../../sql-reference/data-types/index.md). For some data types calculated value of hash function may be the same for the same values even if types of arguments differ (integers of different size, named and unnamed `Tuple` with the same data, `Map` and the corresponding `Array(Tuple(key, value))` type with the same data).
**Returned Value**

View File

@ -34,7 +34,7 @@ CREATE TABLE table_with_ttl
)
ENGINE MergeTree()
ORDER BY tuple()
TTL event_time + INTERVAL 3 MONTH;
TTL event_time + INTERVAL 3 MONTH
SETTINGS min_bytes_for_wide_part = 0;
INSERT INTO table_with_ttl VALUES (now(), 1, 'username1');

View File

@ -29,7 +29,7 @@ ENGINE = PostgreSQL('host:port', 'database', 'user', 'password'[, `schema`, `use
## Поддерживаемые типы данных {#data_types-support}
| PostgerSQL | ClickHouse |
| PostgreSQL | ClickHouse |
|------------------|--------------------------------------------------------------|
| DATE | [Date](../../sql-reference/data-types/date.md) |
| TIMESTAMP | [DateTime](../../sql-reference/data-types/datetime.md) |

View File

@ -19,10 +19,6 @@ def build(args):
if not args.skip_website:
website.build_website(args)
if not args.skip_website:
website.process_benchmark_results(args)
website.minify_website(args)
redirects.build_static_redirects(args)
@ -74,9 +70,5 @@ if __name__ == "__main__":
new_args = sys.executable + " " + " ".join(new_args)
server = livereload.Server()
server.watch(
args.website_dir + "**/*",
livereload.shell(new_args, cwd="tools", shell=True),
)
server.serve(root=args.output_dir, host="0.0.0.0", port=args.livereload)
sys.exit(0)

View File

@ -1,22 +0,0 @@
#!/usr/bin/env bash
#
# README:
# This script deploys ClickHouse website to your personal test subdomain.
#
# Before first use of this script:
# 1) Set up building documentation according to https://github.com/ClickHouse/ClickHouse/tree/master/docs/tools#use-buildpy-use-build-py
# 2) Create https://github.com/GIT_USER/clickhouse.github.io repo (replace GIT_USER with your GitHub login)
# 3) Enable GitHub Pages in settings of this repo
# 4) Add file named CNAME in root of this repo with "GIT_USER-test.clickhouse.com" content (without quotes)
# 5) Send email on address from https://clickhouse.com/#contacts asking to create GIT_USER-test.clickhouse.com domain
#
set -ex
BASE_DIR=$(dirname "$(readlink -f "$0")")
GIT_USER=${GIT_USER:-$USER}
GIT_PROD_URI=git@github.com:${GIT_USER}/clickhouse.github.io.git \
BASE_DOMAIN=${GIT_USER}-test.clickhouse.com \
EXTRA_BUILD_ARGS="${*}" \
CLOUDFLARE_TOKEN="" \
"${BASE_DIR}/release.sh"

View File

@ -1,27 +0,0 @@
#!/usr/bin/env bash
# Creates symlinks to docs in ClickHouse/docs/edit/
# that are easy to open in both languages simultaneously
# for example, with `vim -O docs/edit/my_article/*`
set -ex
BASE_DIR="$(dirname $(readlink -f $0))/.."
DOCS_DIR="${BASE_DIR}"
EDIT_DIR="${BASE_DIR}/edit"
pushd "${DOCS_DIR}/en"
ARTICLES=$(find . -name '*.md' | sed -e 's/\.md$//g' -e 's/^\.\/en\///g')
popd
rm -rf "${EDIT_DIR}" || true
for DOCS_LANG in en ru zh ja fa
do
for ARTICLE in ${ARTICLES}
do
ARTICLE_DIR="${EDIT_DIR}/${ARTICLE}"
mkdir -p $ARTICLE_DIR || true
ln -s "${DOCS_DIR}/${DOCS_LANG}/${ARTICLE}.md" "${ARTICLE_DIR}/${DOCS_LANG}.md"
done
done

View File

@ -36,15 +36,4 @@ then
# Push to GitHub rewriting the existing contents.
# Sometimes it does not work with error message "! [remote rejected] master -> master (cannot lock ref 'refs/heads/master': is at 42a0f6b6b6c7be56a469441b4bf29685c1cebac3 but expected 520e9b02c0d4678a2a5f41d2f561e6532fb98cc1)"
for _ in {1..10}; do git push --force origin master && break; sleep 5; done
# Turn off logging.
set +x
if [[ -n "${CLOUDFLARE_TOKEN}" ]]
then
sleep 1m
# https://api.cloudflare.com/#zone-purge-files-by-cache-tags,-host-or-prefix
POST_DATA='{"hosts":["clickhouse.com"]}'
curl -X POST "https://api.cloudflare.com/client/v4/zones/4fc6fb1d46e87851605aa7fa69ca6fe0/purge_cache" -H "Authorization: Bearer ${CLOUDFLARE_TOKEN}" -H "Content-Type:application/json" --data "${POST_DATA}"
fi
fi

View File

@ -113,7 +113,6 @@ def init_jinja2_filters(env):
env.filters["chunks"] = lambda line: [
line[i : i + chunk_size] for i in range(0, len(line), chunk_size)
]
env.filters["adjust_markdown_html"] = website.adjust_markdown_html
env.filters["to_rfc882"] = lambda d: datetime.datetime.strptime(
d, "%Y-%m-%d"
).strftime("%a, %d %b %Y %H:%M:%S GMT")

View File

@ -5,122 +5,9 @@ import os
import shutil
import subprocess
import bs4
import util
def handle_iframe(iframe, soup):
allowed_domains = ["https://www.youtube.com/"]
illegal_domain = True
iframe_src = iframe.attrs["src"]
for domain in allowed_domains:
if iframe_src.startswith(domain):
illegal_domain = False
break
if illegal_domain:
raise RuntimeError(f"iframe from illegal domain: {iframe_src}")
wrapper = soup.new_tag("div")
wrapper.attrs["class"] = ["embed-responsive", "embed-responsive-16by9"]
iframe.insert_before(wrapper)
iframe.extract()
wrapper.insert(0, iframe)
if "width" in iframe.attrs:
del iframe.attrs["width"]
if "height" in iframe.attrs:
del iframe.attrs["height"]
iframe.attrs[
"allow"
] = "accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture"
iframe.attrs["class"] = "embed-responsive-item"
iframe.attrs["frameborder"] = "0"
iframe.attrs["allowfullscreen"] = "1"
def adjust_markdown_html(content):
soup = bs4.BeautifulSoup(content, features="html.parser")
for a in soup.find_all("a"):
a_class = a.attrs.get("class")
a_href = a.attrs.get("href")
if a_class and "headerlink" in a_class:
a.string = "\xa0"
if a_href and a_href.startswith("http"):
a.attrs["target"] = "_blank"
for code in soup.find_all("code"):
code_class = code.attrs.get("class")
if code_class:
code.attrs["class"] = code_class + ["syntax"]
else:
code.attrs["class"] = "syntax"
for iframe in soup.find_all("iframe"):
handle_iframe(iframe, soup)
for img in soup.find_all("img"):
if img.attrs.get("alt") == "iframe":
img.name = "iframe"
img.string = ""
handle_iframe(img, soup)
continue
img_class = img.attrs.get("class")
if img_class:
img.attrs["class"] = img_class + ["img-fluid"]
else:
img.attrs["class"] = "img-fluid"
for details in soup.find_all("details"):
for summary in details.find_all("summary"):
if summary.parent != details:
summary.extract()
details.insert(0, summary)
for dd in soup.find_all("dd"):
dd_class = dd.attrs.get("class")
if dd_class:
dd.attrs["class"] = dd_class + ["pl-3"]
else:
dd.attrs["class"] = "pl-3"
for div in soup.find_all("div"):
div_class = div.attrs.get("class")
is_admonition = div_class and "admonition" in div.attrs.get("class")
if is_admonition:
for a in div.find_all("a"):
a_class = a.attrs.get("class")
if a_class:
a.attrs["class"] = a_class + ["alert-link"]
else:
a.attrs["class"] = "alert-link"
for p in div.find_all("p"):
p_class = p.attrs.get("class")
if is_admonition and p_class and ("admonition-title" in p_class):
p.attrs["class"] = p_class + [
"alert-heading",
"display-4",
"text-reset",
"mb-2",
]
if is_admonition:
div.attrs["role"] = "alert"
if ("info" in div_class) or ("note" in div_class):
mode = "alert-primary"
elif ("attention" in div_class) or ("warning" in div_class):
mode = "alert-warning"
elif "important" in div_class:
mode = "alert-danger"
elif "tip" in div_class:
mode = "alert-info"
else:
mode = "alert-secondary"
div.attrs["class"] = div_class + ["alert", "pb-0", "mb-4", mode]
return str(soup)
def build_website(args):
logging.info("Building website")
env = util.init_jinja2_env(args)
@ -145,11 +32,6 @@ def build_website(args):
),
)
shutil.copytree(
os.path.join(args.website_dir, "images"),
os.path.join(args.output_dir, "docs", "images"),
)
# This file can be requested to check for available ClickHouse releases.
shutil.copy2(
os.path.join(args.src_dir, "utils", "list-versions", "version_date.tsv"),
@ -179,84 +61,3 @@ def build_website(args):
with open(path, "wb") as f:
f.write(content.encode("utf-8"))
def get_css_in(args):
return [
f"'{args.website_dir}/css/bootstrap.css'",
f"'{args.website_dir}/css/docsearch.css'",
f"'{args.website_dir}/css/base.css'",
f"'{args.website_dir}/css/blog.css'",
f"'{args.website_dir}/css/docs.css'",
f"'{args.website_dir}/css/highlight.css'",
f"'{args.website_dir}/css/main.css'",
]
def get_js_in(args):
return [
f"'{args.website_dir}/js/jquery.js'",
f"'{args.website_dir}/js/popper.js'",
f"'{args.website_dir}/js/bootstrap.js'",
f"'{args.website_dir}/js/sentry.js'",
f"'{args.website_dir}/js/base.js'",
f"'{args.website_dir}/js/index.js'",
f"'{args.website_dir}/js/docsearch.js'",
f"'{args.website_dir}/js/docs.js'",
f"'{args.website_dir}/js/main.js'",
]
def minify_website(args):
css_in = " ".join(get_css_in(args))
css_out = f"{args.output_dir}/docs/css/base.css"
os.makedirs(f"{args.output_dir}/docs/css")
command = f"cat {css_in}"
output = subprocess.check_output(command, shell=True)
with open(css_out, "wb+") as f:
f.write(output)
with open(css_out, "rb") as f:
css_digest = hashlib.sha3_224(f.read()).hexdigest()[0:8]
js_in = " ".join(get_js_in(args))
js_out = f"{args.output_dir}/docs/js/base.js"
os.makedirs(f"{args.output_dir}/docs/js")
command = f"cat {js_in}"
output = subprocess.check_output(command, shell=True)
with open(js_out, "wb+") as f:
f.write(output)
with open(js_out, "rb") as f:
js_digest = hashlib.sha3_224(f.read()).hexdigest()[0:8]
logging.info(js_digest)
def process_benchmark_results(args):
benchmark_root = os.path.join(args.website_dir, "benchmark")
required_keys = {
"hardware": ["result", "system", "system_full", "kind"],
"versions": ["version", "system"],
}
for benchmark_kind in ["hardware", "versions"]:
results = []
results_root = os.path.join(benchmark_root, benchmark_kind, "results")
for result in sorted(os.listdir(results_root)):
result_file = os.path.join(results_root, result)
logging.info(f"Reading benchmark result from {result_file}")
with open(result_file, "r") as f:
result = json.loads(f.read())
for item in result:
for required_key in required_keys[benchmark_kind]:
assert (
required_key in item
), f'No "{required_key}" in {result_file}'
results += result
results_js = os.path.join(
args.output_dir, "benchmark", benchmark_kind, "results.js"
)
with open(results_js, "w") as f:
data = json.dumps(results)
f.write(f"var results = {data};")

View File

@ -29,7 +29,7 @@ ENGINE = PostgreSQL('host:port', 'database', 'user', 'password'[, `use_table_cac
## 支持的数据类型 {#data_types-support}
| PostgerSQL | ClickHouse |
| PostgreSQL | ClickHouse |
|------------------|--------------------------------------------------------------|
| DATE | [Date](../../sql-reference/data-types/date.md) |
| TIMESTAMP | [DateTime](../../sql-reference/data-types/datetime.md) |

View File

@ -10,7 +10,7 @@ sidebar_position: 10
这个由两部分组成的意思有两个结果:
- 唯一正确的写“Click**H** house”的方式是用大写H。
- 唯一正确的写“Click**H**ouse”的方式是用大写H。
- 如果需要缩写,请使用“**CH**”。由于一些历史原因缩写CK在中国也很流行主要是因为中文中最早的一个关于ClickHouse的演讲使用了这种形式。
!!! info “有趣的事实”

View File

@ -0,0 +1,8 @@
position: 10
label: '优化查询性能'
collapsible: true
collapsed: true
link:
type: generated-index
title: Improving Query Performance
slug: /zh/guides/improving-query-performance

View File

@ -0,0 +1,167 @@
---
sidebar_label: Data Skipping Indexes
sidebar_position: 2
---
# 深入理解ClickHouse跳数索引
### 跳数索引
影响ClickHouse查询性能的因素很多。在大多数场景中关键因素是ClickHouse在计算查询WHERE子句条件时是否可以使用主键。因此选择适用于最常见查询模式的主键对于表的设计至关重要。
然而无论如何仔细地调优主键不可避免地会出现不能有效使用它的查询用例。用户通常依赖于ClickHouse获得时间序列类型的数据但他们通常希望根据其他业务维度(如客户id、网站URL或产品编号)分析同一批数据。在这种情况下查询性能可能会相当差因为应用WHERE子句条件可能需要对每个列值进行完整扫描。虽然ClickHouse在这些情况下仍然相对较快但计算数百万或数十亿个单独的值将导致“非索引”查询的执行速度比基于主键的查询慢得多。
在传统的关系数据库中解决这个问题的一种方法是将一个或多个“二级”索引附加到表上。这是一个b-树结构允许数据库在O(log(n))时间内找到磁盘上所有匹配的行而不是O(n)时间(一次表扫描)其中n是行数。但是这种类型的二级索引不适用于ClickHouse(或其他面向列的数据库),因为磁盘上没有单独的行可以添加到索引中。
相反ClickHouse提供了一种不同类型的索引在特定情况下可以显著提高查询速度。这些结构被标记为跳数索引因为它们使ClickHouse能够跳过保证没有匹配值的数据块。
### 基本操作
用户只能在MergeTree表引擎上使用数据跳数索引。每个跳数索引都有四个主要参数
- 索引名称。索引名用于在每个分区中创建索引文件。此外,在删除或具体化索引时需要将其作为参数。
- 索引的表达式。索引表达式用于计算存储在索引中的值集。它可以是列、简单操作符、函数的子集的组合。
- 类型。索引的类型控制计算,该计算决定是否可以跳过读取和计算每个索引块。
- GRANULARITY。每个索引块由颗粒granule组成。例如如果主表索引粒度为8192行GRANULARITY为4则每个索引“块”将为32768行。
当用户创建数据跳数索引时,表的每个数据部分目录中将有两个额外的文件。
- skp_idx_{index_name}.idx包含排序的表达式值。
- skp_idx_{index_name}.mrk2包含关联数据列文件中的相应偏移量。
如果在执行查询并读取相关列文件时WHERE子句过滤条件的某些部分与跳数索引表达式匹配ClickHouse将使用索引文件数据来确定每个相关的数据块是必须被处理还是可以被绕过(假设块还没有通过应用主键索引被排除)。这里用一个非常简单的示例:考虑以下加载了可预测数据的表。
```
CREATE TABLE skip_table
(
my_key UInt64,
my_value UInt64
)
ENGINE MergeTree primary key my_key
SETTINGS index_granularity=8192;
INSERT INTO skip_table SELECT number, intDiv(number,4096) FROM numbers(100000000);
```
当执行一个不使用主键的简单查询时将扫描my_value列所有的一亿条记录
```
SELECT * FROM skip_table WHERE my_value IN (125, 700)
┌─my_key─┬─my_value─┐
│ 512000 │ 125 │
│ 512001 │ 125 │
│ ... | ... |
└────────┴──────────┘
8192 rows in set. Elapsed: 0.079 sec. Processed 100.00 million rows, 800.10 MB (1.26 billion rows/s., 10.10 GB/s.
```
增加一个基本的跳数索引:
```
ALTER TABLE skip_table ADD INDEX vix my_value TYPE set(100) GRANULARITY 2;
```
通常,跳数索引只应用于新插入的数据,所以仅仅添加索引不会影响上述查询。
要使已经存在的数据生效,那执行:
```
ALTER TABLE skip_table MATERIALIZE INDEX vix;
```
重跑SQL
```
SELECT * FROM skip_table WHERE my_value IN (125, 700)
┌─my_key─┬─my_value─┐
│ 512000 │ 125 │
│ 512001 │ 125 │
│ ... | ... |
└────────┴──────────┘
8192 rows in set. Elapsed: 0.051 sec. Processed 32.77 thousand rows, 360.45 KB (643.75 thousand rows/s., 7.08 MB/s.)
```
这次没有再去处理1亿行800MB的数据ClickHouse只读取和分析32768行360KB的数据—4个granule的数据。
下图是更直观的展示这就是如何读取和选择my_value为125的4096行以及如何跳过以下行而不从磁盘读取:
![Simple Skip](../../../en/guides/improving-query-performance/images/simple_skip.svg)
通过在执行查询时启用跟踪用户可以看到关于跳数索引使用情况的详细信息。在clickhouse-client中设置send_logs_level:
```
SET send_logs_level='trace';
```
这将在尝试调优查询SQL和表索引时提供有用的调试信息。上面的例子中调试日志显示跳数索引过滤了大部分granule只读取了两个:
```
<Debug> default.skip_table (933d4b2c-8cea-4bf9-8c93-c56e900eefd1) (SelectExecutor): Index `vix` has dropped 6102/6104 granules.
```
### 跳数索引类型
#### minmax
这种轻量级索引类型不需要参数。它存储每个块的索引表达式的最小值和最大值(如果表达式是一个元组,它分别存储元组元素的每个成员的值)。对于倾向于按值松散排序的列,这种类型非常理想。在查询处理期间,这种索引类型的开销通常是最小的。
这种类型的索引只适用于标量或元组表达式——索引永远不适用于返回数组或map数据类型的表达式。
#### set
这种轻量级索引类型接受单个参数max_size即每个块的值集(0允许无限数量的离散值)。这个集合包含块中的所有值(如果值的数量超过max_size则为空)。这种索引类型适用于每组颗粒中基数较低(本质上是“聚集在一起”)但总体基数较高的列。
该索引的成本、性能和有效性取决于块中的基数。如果每个块包含大量惟一值那么针对大型索引集计算查询条件将非常昂贵或者由于索引超过max_size而为空因此索引将不应用。
#### Bloom Filter Types
Bloom filter是一种数据结构它允许对集合成员进行高效的是否存在测试但代价是有轻微的误报。在跳数索引的使用场景假阳性不是一个大问题因为惟一的问题只是读取一些不必要的块。潜在的假阳性意味着索引表达式应该为真否则有效的数据可能会被跳过。
因为Bloom filter可以更有效地处理大量离散值的测试所以它们可以适用于大量条件表达式判断的场景。特别的是Bloom filter索引可以应用于数组数组中的每个值都被测试也可以应用于map通过使用mapKeys或mapValues函数将键或值转换为数组。
有三种基于Bloom过滤器的数据跳数索引类型
* 基本的**bloom_filter**接受一个可选参数该参数表示在0到1之间允许的“假阳性”率(如果未指定,则使用.025)。
* 更专业的**tokenbf_v1**。需要三个参数用来优化布隆过滤器1过滤器的大小字节(大过滤器有更少的假阳性,有更高的存储成本)2哈希函数的个数(更多的散列函数可以减少假阳性)。3布隆过滤器哈希函数的种子。有关这些参数如何影响布隆过滤器功能的更多细节请参阅 [这里](https://hur.st/bloomfilter/) 。此索引仅适用于String、FixedString和Map类型的数据。输入表达式被分割为由非字母数字字符分隔的字符序列。例如列值`This is a candidate for a "full text" search`将被分割为`This` `is` `a` `candidate` `for` `full` `text` `search`。它用于LIKE、EQUALS、in、hasToken()和类似的长字符串中单词和其他值的搜索。例如,一种可能的用途是在非结构的应用程序日志行列中搜索少量的类名或行号。
* 更专业的**ngrambf_v1**。该索引的功能与tokenbf_v1相同。在Bloom filter设置之前需要一个额外的参数即要索引的ngram的大小。一个ngram是长度为n的任何字符串比如如果n是4`A short string`会被分割为`A sh`` sho`, `shor`, `hort`, `ort s`, `or st`, `r str`, ` stri`, `trin`, `ring`。这个索引对于文本搜索也很有用,特别是没有单词间断的语言,比如中文。
### 跳数索引函数
跳数索引核心目的是限制流行查询分析的数据量。鉴于ClickHouse数据的分析特性这些查询的模式在大多数情况下都包含函数表达式。因此跳数索引必须与常用函数正确交互才能提高效率。这种情况可能发生在:
* 插入数据并将索引定义为一个函数表达式(表达式的结果存储在索引文件中)或者
* 处理查询,并将表达式应用于存储的索引值,以确定是否排除数据块。
每种类型的跳数索引支持的函数列表可以查看 [这里](https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree/#functions-support) 。通常集合索引和基于Bloom filter的索引(另一种类型的集合索引)都是无序的因此不能用于范围。相反最大最小值索引在范围中工作得特别好因为确定范围是否相交非常快。部分匹配函数LIKE、startsWith、endsWith和hasToken的有效性取决于使用的索引类型、索引表达式和数据的特定形状。
### 跳数索引的配置
有两个可用的设置可应用于跳数索引。
* **use_skip_indexes** (0或1默认为1)。不是所有查询都可以有效地使用跳过索引。如果一个特定的过滤条件可能包含很多颗粒那么应用数据跳过索引将导致不必要的、有时甚至是非常大的成本。对于不太可能从任何跳过索引中获益的查询将该值设置为0。
* **force_data_skipping_indexes** (以逗号分隔的索引名列表)。此设置可用于防止某些类型的低效查询。在某些情况下,除非使用跳过索引,否则查询表的开销太大,如果将此设置与一个或多个索引名一起使用,则对于任何没有使用所列索引的查询将返回一个异常。这将防止编写糟糕的查询消耗服务器资源。
### 最佳实践
跳数索引并不直观特别是对于来自RDMS领域并且习惯二级行索引或来自文档存储的反向索引的用户来说。要获得任何优化应用ClickHouse数据跳数索引必须避免足够多的颗粒读取以抵消计算索引的成本。关键是如果一个值在一个索引块中只出现一次就意味着整个块必须读入内存并计算而索引开销是不必要的。
考虑以下数据分布:
![Bad Skip!](../../../en/guides/improving-query-performance/images/bad_skip_1.svg)
假设主键/顺序是时间戳并且在visitor_id上有一个索引。考虑下面的查询:
`SELECT timestamp, url FROM table WHERE visitor_id = 1001`
对于这种数据分布传统的二级索引非常有利。不是读取所有的32678行来查找具有请求的visitor_id的5行而是二级索引只包含5行位置并且只从磁盘读取这5行。ClickHouse数据跳过索引的情况正好相反。无论跳转索引的类型是什么visitor_id列中的所有32678值都将被测试。
因此试图通过简单地向键列添加索引来加速ClickHouse查询的冲动通常是不正确的。只有在研究了其他替代方法之后才应该使用此高级功能例如修改主键(查看 [如何选择主键](../improving-query-performance/sparse-primary-indexes.md))、使用投影或使用实体化视图。即使跳数索引是合适的,也经常需要对索引和表进行仔细的调优。
在大多数情况下,一个有用的跳数索引需要主键和目标的非主列/表达式之间具有很强的相关性。如果没有相关性(如上图所示),那么在包含数千个值的块中,至少有一行满足过滤条件的可能性很高,并且只有几个块会被跳过。相反,如果主键的值范围(如一天中的时间)与潜在索引列中的值强相关(如电视观众年龄)则最小值类型的索引可能是有益的。注意在插入数据时可以增加这种相关性方法是在sort /ORDER by键中包含额外的列或者以在插入时对与主键关联的值进行分组的方式对插入进行批处理。例如即使主键是一个包含大量站点事件的时间戳特定site_id的所有事件也都可以被分组并由写入进程插入到一起这将导致许多只包含少量站点id的颗粒因此当根据特定的site_id值搜索时可以跳过许多块。
跳数索引的另一个候选者是高基数表达式其中任何一个值在数据中都相对稀疏。一个可能的例子是跟踪API请求中的错误代码的可观察性平台。某些错误代码虽然在数据中很少出现但对搜索来说可能特别重要。error_code列上的set skip索引将允许绕过绝大多数不包含错误的块从而显著改善针对错误的查询。
最后关键的最佳实践是测试、测试、再测试。同样与用于搜索文档的b-树二级索引或倒排索引不同,跳数索引行为是不容易预测的。将它们添加到表中会在数据摄取和查询方面产生很大的成本,这些查询由于各种原因不能从索引中受益。它们应该总是在真实世界的数据类型上进行测试,测试应该包括类型、粒度大小和其他参数的变化。测试通常会暴露仅仅通过思考不能发现的陷阱。

File diff suppressed because it is too large Load Diff

View File

@ -26,7 +26,7 @@ if [ "$1" = configure ] || [ -n "$not_deb_os" ]; then
${CLICKHOUSE_GENERIC_PROGRAM} install --user "${CLICKHOUSE_USER}" --group "${CLICKHOUSE_GROUP}" --pid-path "${CLICKHOUSE_PIDDIR}" --config-path "${CLICKHOUSE_CONFDIR}" --binary-path "${CLICKHOUSE_BINDIR}" --log-path "${CLICKHOUSE_LOGDIR}" --data-path "${CLICKHOUSE_DATADIR}"
if [ -x "/bin/systemctl" ] && [ -f /etc/systemd/system/clickhouse-server.service ] && [ -d /run/systemd/system ]; then
if [ -x "/bin/systemctl" ] && [ -f /lib/systemd/system/clickhouse-server.service ] && [ -d /run/systemd/system ]; then
# if old rc.d service present - remove it
if [ -x "/etc/init.d/clickhouse-server" ] && [ -x "/usr/sbin/update-rc.d" ]; then
/usr/sbin/update-rc.d clickhouse-server remove
@ -44,4 +44,16 @@ if [ "$1" = configure ] || [ -n "$not_deb_os" ]; then
fi
fi
fi
# /etc/systemd/system/clickhouse-server.service shouldn't be distributed by the package, but it was
# here we delete the service file if it was from our package
if [ -f /etc/systemd/system/clickhouse-server.service ]; then
SHA256=$(sha256sum /etc/systemd/system/clickhouse-server.service | cut -d' ' -f1)
for ref_sum in 7769a14773e811a56f67fd70f7960147217f5e68f746010aec96722e24d289bb 22890012047ea84fbfcebd6e291fe2ef2185cbfdd94a0294e13c8bf9959f58f8 b7790ae57156663c723f92e75ac2508453bf0a7b7e8313bb8081da99e5e88cd3 d1dcc1dbe92dab3ae17baa395f36abf1876b4513df272bf021484923e0111eef ac29ddd32a02eb31670bf5f0018c5d8a3cc006ca7ea572dcf717cb42310dcad7 c62d23052532a70115414833b500b266647d3924eb006a6f3eb673ff0d55f8fa b6b200ffb517afc2b9cf9e25ad8a4afdc0dad5a045bddbfb0174f84cc5a959ed; do
if [ "$SHA256" = "$ref_sum" ]; then
rm /etc/systemd/system/clickhouse-server.service
break
fi
done
fi
fi

View File

@ -18,7 +18,12 @@ option (ENABLE_CLICKHOUSE_SERVER "Server mode (main mode)" ${ENABLE_CLICKHOUSE_A
option (ENABLE_CLICKHOUSE_CLIENT "Client mode (interactive tui/shell that connects to the server)"
${ENABLE_CLICKHOUSE_ALL})
option (ENABLE_CLICKHOUSE_SELF_EXTRACTING "Self-extracting executable" ON)
if (SPLIT_SHARED_LIBRARIES)
# Don't create self-extracting clickhouse for split build
option (ENABLE_CLICKHOUSE_SELF_EXTRACTING "Self-extracting executable" OFF)
else ()
option (ENABLE_CLICKHOUSE_SELF_EXTRACTING "Self-extracting executable" ON)
endif ()
# https://clickhouse.com/docs/en/operations/utilities/clickhouse-local/
option (ENABLE_CLICKHOUSE_LOCAL "Local files fast processing mode" ${ENABLE_CLICKHOUSE_ALL})
@ -44,15 +49,13 @@ option (ENABLE_CLICKHOUSE_OBFUSCATOR "Table data obfuscator (convert real data t
# https://clickhouse.com/docs/en/operations/utilities/odbc-bridge/
# TODO Also needs NANODBC.
if (ENABLE_ODBC AND NOT USE_MUSL)
option (ENABLE_CLICKHOUSE_ODBC_BRIDGE "HTTP-server working like a proxy to ODBC driver"
${ENABLE_CLICKHOUSE_ALL})
option (ENABLE_CLICKHOUSE_ODBC_BRIDGE "HTTP-server working like a proxy to ODBC driver" ${ENABLE_CLICKHOUSE_ALL})
else ()
option (ENABLE_CLICKHOUSE_ODBC_BRIDGE "HTTP-server working like a proxy to ODBC driver" OFF)
endif ()
if (NOT USE_MUSL)
option (ENABLE_CLICKHOUSE_LIBRARY_BRIDGE "HTTP-server working like a proxy to Library dictionary source"
${ENABLE_CLICKHOUSE_ALL})
option (ENABLE_CLICKHOUSE_LIBRARY_BRIDGE "HTTP-server working like a proxy to Library dictionary source" ${ENABLE_CLICKHOUSE_ALL})
endif ()
# https://presentations.clickhouse.com/matemarketing_2020/

View File

@ -3,6 +3,7 @@ package database
import (
"database/sql"
"fmt"
"net/url"
"strings"
"github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/data"
@ -17,7 +18,7 @@ type ClickhouseNativeClient struct {
func NewNativeClient(host string, port uint16, username string, password string) (*ClickhouseNativeClient, error) {
// debug output ?debug=true
connection, err := sql.Open("clickhouse", fmt.Sprintf("clickhouse://%s:%s@%s:%d/", username, password, host, port))
connection, err := sql.Open("clickhouse", fmt.Sprintf("clickhouse://%s:%s@%s:%d/", url.QueryEscape(username), url.QueryEscape(password), host, port))
if err != nil {
return &ClickhouseNativeClient{}, err
}

View File

@ -1,13 +1,13 @@
include(${ClickHouse_SOURCE_DIR}/cmake/split_debug_symbols.cmake)
set (CLICKHOUSE_LIBRARY_BRIDGE_SOURCES
library-bridge.cpp
LibraryInterface.cpp
ExternalDictionaryLibraryAPI.cpp
ExternalDictionaryLibraryHandler.cpp
ExternalDictionaryLibraryHandlerFactory.cpp
LibraryBridge.cpp
Handlers.cpp
HandlerFactory.cpp
SharedLibraryHandler.cpp
SharedLibraryHandlerFactory.cpp
LibraryBridgeHandlerFactory.cpp
LibraryBridgeHandlers.cpp
library-bridge.cpp
)
if (OS_LINUX)

View File

@ -1,4 +1,4 @@
#include "LibraryInterface.h"
#include "ExternalDictionaryLibraryAPI.h"
#include <Common/logger_useful.h>
@ -7,24 +7,7 @@ namespace
const char DICT_LOGGER_NAME[] = "LibraryDictionarySourceExternal";
}
namespace ClickHouseLibrary
{
std::string_view LIBRARY_CREATE_NEW_FUNC_NAME = "ClickHouseDictionary_v3_libNew";
std::string_view LIBRARY_CLONE_FUNC_NAME = "ClickHouseDictionary_v3_libClone";
std::string_view LIBRARY_DELETE_FUNC_NAME = "ClickHouseDictionary_v3_libDelete";
std::string_view LIBRARY_DATA_NEW_FUNC_NAME = "ClickHouseDictionary_v3_dataNew";
std::string_view LIBRARY_DATA_DELETE_FUNC_NAME = "ClickHouseDictionary_v3_dataDelete";
std::string_view LIBRARY_LOAD_ALL_FUNC_NAME = "ClickHouseDictionary_v3_loadAll";
std::string_view LIBRARY_LOAD_IDS_FUNC_NAME = "ClickHouseDictionary_v3_loadIds";
std::string_view LIBRARY_LOAD_KEYS_FUNC_NAME = "ClickHouseDictionary_v3_loadKeys";
std::string_view LIBRARY_IS_MODIFIED_FUNC_NAME = "ClickHouseDictionary_v3_isModified";
std::string_view LIBRARY_SUPPORTS_SELECTIVE_LOAD_FUNC_NAME = "ClickHouseDictionary_v3_supportsSelectiveLoad";
void log(LogLevel level, CString msg)
void ExternalDictionaryLibraryAPI::log(LogLevel level, CString msg)
{
auto & logger = Poco::Logger::get(DICT_LOGGER_NAME);
switch (level)
@ -63,5 +46,3 @@ void log(LogLevel level, CString msg)
break;
}
}
}

View File

@ -0,0 +1,106 @@
#pragma once
#include <cstdint>
#include <string>
#define CLICKHOUSE_DICTIONARY_LIBRARY_API 1
struct ExternalDictionaryLibraryAPI
{
using CString = const char *;
using ColumnName = CString;
using ColumnNames = ColumnName[];
struct CStrings
{
CString * data = nullptr;
uint64_t size = 0;
};
struct VectorUInt64
{
const uint64_t * data = nullptr;
uint64_t size = 0;
};
struct ColumnsUInt64
{
VectorUInt64 * data = nullptr;
uint64_t size = 0;
};
struct Field
{
const void * data = nullptr;
uint64_t size = 0;
};
struct Row
{
const Field * data = nullptr;
uint64_t size = 0;
};
struct Table
{
const Row * data = nullptr;
uint64_t size = 0;
uint64_t error_code = 0; // 0 = ok; !0 = error, with message in error_string
const char * error_string = nullptr;
};
enum LogLevel
{
FATAL = 1,
CRITICAL,
ERROR,
WARNING,
NOTICE,
INFORMATION,
DEBUG,
TRACE,
};
static void log(LogLevel level, CString msg);
using LibraryContext = void *;
using LibraryLoggerFunc = void (*)(LogLevel, CString /* message */);
using LibrarySettings = CStrings *;
using LibraryData = void *;
using RawClickHouseLibraryTable = void *;
/// Can be safely casted into const Table * with static_cast<const ClickHouseLibrary::Table *>
using RequestedColumnsNames = CStrings *;
using RequestedIds = const VectorUInt64 *;
using RequestedKeys = Table *;
using LibraryNewFunc = LibraryContext (*)(LibrarySettings, LibraryLoggerFunc);
static constexpr const char * LIBRARY_CREATE_NEW_FUNC_NAME = "ClickHouseDictionary_v3_libNew";
using LibraryCloneFunc = LibraryContext (*)(LibraryContext);
static constexpr const char * LIBRARY_CLONE_FUNC_NAME = "ClickHouseDictionary_v3_libClone";
using LibraryDeleteFunc = void (*)(LibraryContext);
static constexpr const char * LIBRARY_DELETE_FUNC_NAME = "ClickHouseDictionary_v3_libDelete";
using LibraryDataNewFunc = LibraryData (*)(LibraryContext);
static constexpr const char * LIBRARY_DATA_NEW_FUNC_NAME = "ClickHouseDictionary_v3_dataNew";
using LibraryDataDeleteFunc = void (*)(LibraryContext, LibraryData);
static constexpr const char * LIBRARY_DATA_DELETE_FUNC_NAME = "ClickHouseDictionary_v3_dataDelete";
using LibraryLoadAllFunc = RawClickHouseLibraryTable (*)(LibraryData, LibrarySettings, RequestedColumnsNames);
static constexpr const char * LIBRARY_LOAD_ALL_FUNC_NAME = "ClickHouseDictionary_v3_loadAll";
using LibraryLoadIdsFunc = RawClickHouseLibraryTable (*)(LibraryData, LibrarySettings, RequestedColumnsNames, RequestedIds);
static constexpr const char * LIBRARY_LOAD_IDS_FUNC_NAME = "ClickHouseDictionary_v3_loadIds";
/// There are no requested column names for load keys func
using LibraryLoadKeysFunc = RawClickHouseLibraryTable (*)(LibraryData, LibrarySettings, RequestedKeys);
static constexpr const char * LIBRARY_LOAD_KEYS_FUNC_NAME = "ClickHouseDictionary_v3_loadKeys";
using LibraryIsModifiedFunc = bool (*)(LibraryContext, LibrarySettings);
static constexpr const char * LIBRARY_IS_MODIFIED_FUNC_NAME = "ClickHouseDictionary_v3_isModified";
using LibrarySupportsSelectiveLoadFunc = bool (*)(LibraryContext, LibrarySettings);
static constexpr const char * LIBRARY_SUPPORTS_SELECTIVE_LOAD_FUNC_NAME = "ClickHouseDictionary_v3_supportsSelectiveLoad";
};

View File

@ -0,0 +1,214 @@
#include "ExternalDictionaryLibraryHandler.h"
#include <base/scope_guard.h>
#include <base/bit_cast.h>
#include <base/find_symbols.h>
#include <IO/ReadHelpers.h>
namespace DB
{
namespace ErrorCodes
{
extern const int EXTERNAL_LIBRARY_ERROR;
extern const int SIZES_OF_COLUMNS_DOESNT_MATCH;
}
ExternalDictionaryLibraryHandler::ExternalDictionaryLibraryHandler(
const std::string & library_path_,
const std::vector<std::string> & library_settings,
const Block & sample_block_,
const std::vector<std::string> & attributes_names_)
: library_path(library_path_)
, sample_block(sample_block_)
, attributes_names(attributes_names_)
{
library = std::make_shared<SharedLibrary>(library_path);
settings_holder = std::make_shared<CStringsHolder>(CStringsHolder(library_settings));
auto lib_new = library->tryGet<ExternalDictionaryLibraryAPI::LibraryNewFunc>(ExternalDictionaryLibraryAPI::LIBRARY_CREATE_NEW_FUNC_NAME);
if (lib_new)
lib_data = lib_new(&settings_holder->strings, ExternalDictionaryLibraryAPI::log);
else
throw Exception("Method extDict_libNew failed", ErrorCodes::EXTERNAL_LIBRARY_ERROR);
}
ExternalDictionaryLibraryHandler::ExternalDictionaryLibraryHandler(const ExternalDictionaryLibraryHandler & other)
: library_path{other.library_path}
, sample_block{other.sample_block}
, attributes_names{other.attributes_names}
, library{other.library}
, settings_holder{other.settings_holder}
{
auto lib_clone = library->tryGet<ExternalDictionaryLibraryAPI::LibraryCloneFunc>(ExternalDictionaryLibraryAPI::LIBRARY_CLONE_FUNC_NAME);
if (lib_clone)
{
lib_data = lib_clone(other.lib_data);
}
else
{
auto lib_new = library->tryGet<ExternalDictionaryLibraryAPI::LibraryNewFunc>(ExternalDictionaryLibraryAPI::LIBRARY_CREATE_NEW_FUNC_NAME);
if (lib_new)
lib_data = lib_new(&settings_holder->strings, ExternalDictionaryLibraryAPI::log);
}
}
ExternalDictionaryLibraryHandler::~ExternalDictionaryLibraryHandler()
{
auto lib_delete = library->tryGet<ExternalDictionaryLibraryAPI::LibraryDeleteFunc>(ExternalDictionaryLibraryAPI::LIBRARY_DELETE_FUNC_NAME);
if (lib_delete)
lib_delete(lib_data);
}
bool ExternalDictionaryLibraryHandler::isModified()
{
auto func_is_modified = library->tryGet<ExternalDictionaryLibraryAPI::LibraryIsModifiedFunc>(ExternalDictionaryLibraryAPI::LIBRARY_IS_MODIFIED_FUNC_NAME);
if (func_is_modified)
return func_is_modified(lib_data, &settings_holder->strings);
return true;
}
bool ExternalDictionaryLibraryHandler::supportsSelectiveLoad()
{
auto func_supports_selective_load = library->tryGet<ExternalDictionaryLibraryAPI::LibrarySupportsSelectiveLoadFunc>(ExternalDictionaryLibraryAPI::LIBRARY_SUPPORTS_SELECTIVE_LOAD_FUNC_NAME);
if (func_supports_selective_load)
return func_supports_selective_load(lib_data, &settings_holder->strings);
return true;
}
Block ExternalDictionaryLibraryHandler::loadAll()
{
auto columns_holder = std::make_unique<ExternalDictionaryLibraryAPI::CString[]>(attributes_names.size());
ExternalDictionaryLibraryAPI::CStrings columns{static_cast<decltype(ExternalDictionaryLibraryAPI::CStrings::data)>(columns_holder.get()), attributes_names.size()};
for (size_t i = 0; i < attributes_names.size(); ++i)
columns.data[i] = attributes_names[i].c_str();
auto load_all_func = library->get<ExternalDictionaryLibraryAPI::LibraryLoadAllFunc>(ExternalDictionaryLibraryAPI::LIBRARY_LOAD_ALL_FUNC_NAME);
auto data_new_func = library->get<ExternalDictionaryLibraryAPI::LibraryDataNewFunc>(ExternalDictionaryLibraryAPI::LIBRARY_DATA_NEW_FUNC_NAME);
auto data_delete_func = library->get<ExternalDictionaryLibraryAPI::LibraryDataDeleteFunc>(ExternalDictionaryLibraryAPI::LIBRARY_DATA_DELETE_FUNC_NAME);
ExternalDictionaryLibraryAPI::LibraryData data_ptr = data_new_func(lib_data);
SCOPE_EXIT(data_delete_func(lib_data, data_ptr));
ExternalDictionaryLibraryAPI::RawClickHouseLibraryTable data = load_all_func(data_ptr, &settings_holder->strings, &columns);
return dataToBlock(data);
}
Block ExternalDictionaryLibraryHandler::loadIds(const std::vector<uint64_t> & ids)
{
const ExternalDictionaryLibraryAPI::VectorUInt64 ids_data{bit_cast<decltype(ExternalDictionaryLibraryAPI::VectorUInt64::data)>(ids.data()), ids.size()};
auto columns_holder = std::make_unique<ExternalDictionaryLibraryAPI::CString[]>(attributes_names.size());
ExternalDictionaryLibraryAPI::CStrings columns_pass{static_cast<decltype(ExternalDictionaryLibraryAPI::CStrings::data)>(columns_holder.get()), attributes_names.size()};
auto load_ids_func = library->get<ExternalDictionaryLibraryAPI::LibraryLoadIdsFunc>(ExternalDictionaryLibraryAPI::LIBRARY_LOAD_IDS_FUNC_NAME);
auto data_new_func = library->get<ExternalDictionaryLibraryAPI::LibraryDataNewFunc>(ExternalDictionaryLibraryAPI::LIBRARY_DATA_NEW_FUNC_NAME);
auto data_delete_func = library->get<ExternalDictionaryLibraryAPI::LibraryDataDeleteFunc>(ExternalDictionaryLibraryAPI::LIBRARY_DATA_DELETE_FUNC_NAME);
ExternalDictionaryLibraryAPI::LibraryData data_ptr = data_new_func(lib_data);
SCOPE_EXIT(data_delete_func(lib_data, data_ptr));
ExternalDictionaryLibraryAPI::RawClickHouseLibraryTable data = load_ids_func(data_ptr, &settings_holder->strings, &columns_pass, &ids_data);
return dataToBlock(data);
}
Block ExternalDictionaryLibraryHandler::loadKeys(const Columns & key_columns)
{
auto holder = std::make_unique<ExternalDictionaryLibraryAPI::Row[]>(key_columns.size());
std::vector<std::unique_ptr<ExternalDictionaryLibraryAPI::Field[]>> column_data_holders;
for (size_t i = 0; i < key_columns.size(); ++i)
{
auto cell_holder = std::make_unique<ExternalDictionaryLibraryAPI::Field[]>(key_columns[i]->size());
for (size_t j = 0; j < key_columns[i]->size(); ++j)
{
auto data_ref = key_columns[i]->getDataAt(j);
cell_holder[j] = ExternalDictionaryLibraryAPI::Field{
.data = static_cast<const void *>(data_ref.data),
.size = data_ref.size};
}
holder[i] = ExternalDictionaryLibraryAPI::Row{
.data = static_cast<ExternalDictionaryLibraryAPI::Field *>(cell_holder.get()),
.size = key_columns[i]->size()};
column_data_holders.push_back(std::move(cell_holder));
}
ExternalDictionaryLibraryAPI::Table request_cols{
.data = static_cast<ExternalDictionaryLibraryAPI::Row *>(holder.get()),
.size = key_columns.size()};
auto load_keys_func = library->get<ExternalDictionaryLibraryAPI::LibraryLoadKeysFunc>(ExternalDictionaryLibraryAPI::LIBRARY_LOAD_KEYS_FUNC_NAME);
auto data_new_func = library->get<ExternalDictionaryLibraryAPI::LibraryDataNewFunc>(ExternalDictionaryLibraryAPI::LIBRARY_DATA_NEW_FUNC_NAME);
auto data_delete_func = library->get<ExternalDictionaryLibraryAPI::LibraryDataDeleteFunc>(ExternalDictionaryLibraryAPI::LIBRARY_DATA_DELETE_FUNC_NAME);
ExternalDictionaryLibraryAPI::LibraryData data_ptr = data_new_func(lib_data);
SCOPE_EXIT(data_delete_func(lib_data, data_ptr));
ExternalDictionaryLibraryAPI::RawClickHouseLibraryTable data = load_keys_func(data_ptr, &settings_holder->strings, &request_cols);
return dataToBlock(data);
}
Block ExternalDictionaryLibraryHandler::dataToBlock(ExternalDictionaryLibraryAPI::RawClickHouseLibraryTable data)
{
if (!data)
throw Exception("LibraryDictionarySource: No data returned", ErrorCodes::EXTERNAL_LIBRARY_ERROR);
const auto * columns_received = static_cast<const ExternalDictionaryLibraryAPI::Table *>(data);
if (columns_received->error_code)
throw Exception(
"LibraryDictionarySource: Returned error: " + std::to_string(columns_received->error_code) + " " + (columns_received->error_string ? columns_received->error_string : ""),
ErrorCodes::EXTERNAL_LIBRARY_ERROR);
MutableColumns columns = sample_block.cloneEmptyColumns();
for (size_t col_n = 0; col_n < columns_received->size; ++col_n)
{
if (columns.size() != columns_received->data[col_n].size)
throw Exception(
"LibraryDictionarySource: Returned unexpected number of columns: " + std::to_string(columns_received->data[col_n].size) + ", must be " + std::to_string(columns.size()),
ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH);
for (size_t row_n = 0; row_n < columns_received->data[col_n].size; ++row_n)
{
const auto & field = columns_received->data[col_n].data[row_n];
if (!field.data)
{
/// sample_block contains null_value (from config) inside corresponding column
const auto & col = sample_block.getByPosition(row_n);
columns[row_n]->insertFrom(*(col.column), 0);
}
else
{
const auto & size = field.size;
columns[row_n]->insertData(static_cast<const char *>(field.data), size);
}
}
}
return sample_block.cloneWithColumns(std::move(columns));
}
}

View File

@ -2,7 +2,7 @@
#include <Common/SharedLibrary.h>
#include <Common/logger_useful.h>
#include "LibraryUtils.h"
#include "ExternalDictionaryLibraryUtils.h"
namespace DB
@ -10,21 +10,21 @@ namespace DB
/// A class that manages all operations with library dictionary.
/// Every library dictionary source has its own object of this class, accessed by UUID.
class SharedLibraryHandler
class ExternalDictionaryLibraryHandler
{
public:
SharedLibraryHandler(
ExternalDictionaryLibraryHandler(
const std::string & library_path_,
const std::vector<std::string> & library_settings,
const Block & sample_block_,
const std::vector<std::string> & attributes_names_);
SharedLibraryHandler(const SharedLibraryHandler & other);
ExternalDictionaryLibraryHandler(const ExternalDictionaryLibraryHandler & other);
SharedLibraryHandler & operator=(const SharedLibraryHandler & other) = delete;
ExternalDictionaryLibraryHandler & operator=(const ExternalDictionaryLibraryHandler & other) = delete;
~SharedLibraryHandler();
~ExternalDictionaryLibraryHandler();
Block loadAll();
@ -39,7 +39,7 @@ public:
const Block & getSampleBlock() { return sample_block; }
private:
Block dataToBlock(const ClickHouseLibrary::RawClickHouseLibraryTable data);
Block dataToBlock(ExternalDictionaryLibraryAPI::RawClickHouseLibraryTable data);
std::string library_path;
const Block sample_block;
@ -50,6 +50,6 @@ private:
void * lib_data;
};
using SharedLibraryHandlerPtr = std::shared_ptr<SharedLibraryHandler>;
using SharedLibraryHandlerPtr = std::shared_ptr<ExternalDictionaryLibraryHandler>;
}

View File

@ -0,0 +1,62 @@
#include "ExternalDictionaryLibraryHandlerFactory.h"
namespace DB
{
SharedLibraryHandlerPtr ExternalDictionaryLibraryHandlerFactory::get(const std::string & dictionary_id)
{
std::lock_guard lock(mutex);
auto library_handler = library_handlers.find(dictionary_id);
if (library_handler != library_handlers.end())
return library_handler->second;
return nullptr;
}
void ExternalDictionaryLibraryHandlerFactory::create(
const std::string & dictionary_id,
const std::string & library_path,
const std::vector<std::string> & library_settings,
const Block & sample_block,
const std::vector<std::string> & attributes_names)
{
std::lock_guard lock(mutex);
if (!library_handlers.contains(dictionary_id))
library_handlers.emplace(std::make_pair(dictionary_id, std::make_shared<ExternalDictionaryLibraryHandler>(library_path, library_settings, sample_block, attributes_names)));
else
LOG_WARNING(&Poco::Logger::get("ExternalDictionaryLibraryHandlerFactory"), "Library handler with dictionary id {} already exists", dictionary_id);
}
bool ExternalDictionaryLibraryHandlerFactory::clone(const std::string & from_dictionary_id, const std::string & to_dictionary_id)
{
std::lock_guard lock(mutex);
auto from_library_handler = library_handlers.find(from_dictionary_id);
if (from_library_handler == library_handlers.end())
return false;
/// extDict_libClone method will be called in copy constructor
library_handlers[to_dictionary_id] = std::make_shared<ExternalDictionaryLibraryHandler>(*from_library_handler->second);
return true;
}
bool ExternalDictionaryLibraryHandlerFactory::remove(const std::string & dictionary_id)
{
std::lock_guard lock(mutex);
/// extDict_libDelete is called in destructor.
return library_handlers.erase(dictionary_id);
}
ExternalDictionaryLibraryHandlerFactory & ExternalDictionaryLibraryHandlerFactory::instance()
{
static ExternalDictionaryLibraryHandlerFactory instance;
return instance;
}
}

View File

@ -1,6 +1,6 @@
#pragma once
#include "SharedLibraryHandler.h"
#include "ExternalDictionaryLibraryHandler.h"
#include <base/defines.h>
#include <unordered_map>
@ -11,11 +11,11 @@ namespace DB
{
/// Each library dictionary source has unique UUID. When clone() method is called, a new UUID is generated.
/// There is a unique mapping from diciotnary UUID to sharedLibraryHandler.
class SharedLibraryHandlerFactory final : private boost::noncopyable
/// There is a unique mapping from dictionary UUID to sharedLibraryHandler.
class ExternalDictionaryLibraryHandlerFactory final : private boost::noncopyable
{
public:
static SharedLibraryHandlerFactory & instance();
static ExternalDictionaryLibraryHandlerFactory & instance();
SharedLibraryHandlerPtr get(const std::string & dictionary_id);

View File

@ -5,7 +5,7 @@
#include <base/bit_cast.h>
#include <base/range.h>
#include "LibraryInterface.h"
#include "ExternalDictionaryLibraryAPI.h"
namespace DB
@ -22,7 +22,7 @@ public:
strings_holder = strings_pass;
strings.size = strings_holder.size();
ptr_holder = std::make_unique<ClickHouseLibrary::CString[]>(strings.size);
ptr_holder = std::make_unique<ExternalDictionaryLibraryAPI::CString[]>(strings.size);
strings.data = ptr_holder.get();
size_t i = 0;
@ -33,10 +33,10 @@ public:
}
}
ClickHouseLibrary::CStrings strings; // will pass pointer to lib
ExternalDictionaryLibraryAPI::CStrings strings; // will pass pointer to lib
private:
std::unique_ptr<ClickHouseLibrary::CString[]> ptr_holder = nullptr;
std::unique_ptr<ExternalDictionaryLibraryAPI::CString[]> ptr_holder = nullptr;
Container strings_holder;
};

View File

@ -1,23 +0,0 @@
#include "HandlerFactory.h"
#include <Poco/Net/HTTPServerRequest.h>
#include <Server/HTTP/HTMLForm.h>
#include "Handlers.h"
namespace DB
{
std::unique_ptr<HTTPRequestHandler> LibraryBridgeHandlerFactory::createRequestHandler(const HTTPServerRequest & request)
{
Poco::URI uri{request.getURI()};
LOG_DEBUG(log, "Request URI: {}", uri.toString());
if (request.getMethod() == Poco::Net::HTTPRequest::HTTP_GET)
return std::make_unique<LibraryExistsHandler>(keep_alive_timeout, getContext());
if (request.getMethod() == Poco::Net::HTTPRequest::HTTP_POST)
return std::make_unique<LibraryRequestHandler>(keep_alive_timeout, getContext());
return nullptr;
}
}

View File

@ -1,37 +0,0 @@
#pragma once
#include <Interpreters/Context.h>
#include <Server/HTTP/HTTPRequestHandlerFactory.h>
#include <Common/logger_useful.h>
namespace DB
{
class SharedLibraryHandler;
using SharedLibraryHandlerPtr = std::shared_ptr<SharedLibraryHandler>;
/// Factory for '/ping', '/' handlers.
class LibraryBridgeHandlerFactory : public HTTPRequestHandlerFactory, WithContext
{
public:
LibraryBridgeHandlerFactory(
const std::string & name_,
size_t keep_alive_timeout_,
ContextPtr context_)
: WithContext(context_)
, log(&Poco::Logger::get(name_))
, name(name_)
, keep_alive_timeout(keep_alive_timeout_)
{
}
std::unique_ptr<HTTPRequestHandler> createRequestHandler(const HTTPServerRequest & request) override;
private:
Poco::Logger * log;
std::string name;
size_t keep_alive_timeout;
};
}

View File

@ -1,6 +1,5 @@
#include "LibraryBridge.h"
#pragma GCC diagnostic ignored "-Wmissing-declarations"
int mainEntryClickHouseLibraryBridge(int argc, char ** argv)
{
DB::LibraryBridge app;
@ -15,3 +14,18 @@ int mainEntryClickHouseLibraryBridge(int argc, char ** argv)
return code ? code : 1;
}
}
namespace DB
{
std::string LibraryBridge::bridgeName() const
{
return "LibraryBridge";
}
LibraryBridge::HandlerFactoryPtr LibraryBridge::getHandlerFactoryPtr(ContextPtr context) const
{
return std::make_shared<LibraryBridgeHandlerFactory>("LibraryRequestHandlerFactory", keep_alive_timeout, context);
}
}

View File

@ -2,7 +2,7 @@
#include <Interpreters/Context.h>
#include <Bridge/IBridge.h>
#include "HandlerFactory.h"
#include "LibraryBridgeHandlerFactory.h"
namespace DB
@ -12,15 +12,8 @@ class LibraryBridge : public IBridge
{
protected:
std::string bridgeName() const override
{
return "LibraryBridge";
}
HandlerFactoryPtr getHandlerFactoryPtr(ContextPtr context) const override
{
return std::make_shared<LibraryBridgeHandlerFactory>("LibraryRequestHandlerFactory-factory", keep_alive_timeout, context);
}
std::string bridgeName() const override;
HandlerFactoryPtr getHandlerFactoryPtr(ContextPtr context) const override;
};
}

View File

@ -0,0 +1,40 @@
#include "LibraryBridgeHandlerFactory.h"
#include <Poco/Net/HTTPServerRequest.h>
#include <Server/HTTP/HTMLForm.h>
#include "LibraryBridgeHandlers.h"
namespace DB
{
LibraryBridgeHandlerFactory::LibraryBridgeHandlerFactory(
const std::string & name_,
size_t keep_alive_timeout_,
ContextPtr context_)
: WithContext(context_)
, log(&Poco::Logger::get(name_))
, name(name_)
, keep_alive_timeout(keep_alive_timeout_)
{
}
std::unique_ptr<HTTPRequestHandler> LibraryBridgeHandlerFactory::createRequestHandler(const HTTPServerRequest & request)
{
Poco::URI uri{request.getURI()};
LOG_DEBUG(log, "Request URI: {}", uri.toString());
if (request.getMethod() == Poco::Net::HTTPRequest::HTTP_GET)
{
if (uri.getPath() == "/extdict_ping")
return std::make_unique<LibraryBridgeExistsHandler>(keep_alive_timeout, getContext());
}
if (request.getMethod() == Poco::Net::HTTPRequest::HTTP_POST)
{
if (uri.getPath() == "/extdict_request")
return std::make_unique<LibraryBridgeRequestHandler>(keep_alive_timeout, getContext());
}
return nullptr;
}
}

View File

@ -0,0 +1,27 @@
#pragma once
#include <Interpreters/Context.h>
#include <Server/HTTP/HTTPRequestHandlerFactory.h>
#include <Common/logger_useful.h>
namespace DB
{
class LibraryBridgeHandlerFactory : public HTTPRequestHandlerFactory, WithContext
{
public:
LibraryBridgeHandlerFactory(
const std::string & name_,
size_t keep_alive_timeout_,
ContextPtr context_);
std::unique_ptr<HTTPRequestHandler> createRequestHandler(const HTTPServerRequest & request) override;
private:
Poco::Logger * log;
const std::string name;
const size_t keep_alive_timeout;
};
}

View File

@ -1,5 +1,5 @@
#include "Handlers.h"
#include "SharedLibraryHandlerFactory.h"
#include "LibraryBridgeHandlers.h"
#include "ExternalDictionaryLibraryHandlerFactory.h"
#include <Formats/FormatFactory.h>
#include <Server/HTTP/WriteBufferFromHTTPServerResponse.h>
@ -78,8 +78,14 @@ static void writeData(Block data, OutputFormatPtr format)
executor.execute();
}
LibraryBridgeRequestHandler::LibraryBridgeRequestHandler(size_t keep_alive_timeout_, ContextPtr context_)
: WithContext(context_)
, log(&Poco::Logger::get("LibraryBridgeRequestHandler"))
, keep_alive_timeout(keep_alive_timeout_)
{
}
void LibraryRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response)
void LibraryBridgeRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response)
{
LOG_TRACE(log, "Request URI: {}", request.getURI());
HTMLForm params(getContext()->getSettingsRef(), request);
@ -104,8 +110,8 @@ void LibraryRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServe
try
{
bool lib_new = (method == "libNew");
if (method == "libClone")
bool lib_new = (method == "extDict_libNew");
if (method == "extDict_libClone")
{
if (!params.has("from_dictionary_id"))
{
@ -115,7 +121,7 @@ void LibraryRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServe
std::string from_dictionary_id = params.get("from_dictionary_id");
bool cloned = false;
cloned = SharedLibraryHandlerFactory::instance().clone(from_dictionary_id, dictionary_id);
cloned = ExternalDictionaryLibraryHandlerFactory::instance().clone(from_dictionary_id, dictionary_id);
if (cloned)
{
@ -123,7 +129,7 @@ void LibraryRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServe
}
else
{
LOG_TRACE(log, "Cannot clone from dictionary with id: {}, will call libNew instead", from_dictionary_id);
LOG_TRACE(log, "Cannot clone from dictionary with id: {}, will call extDict_libNew instead", from_dictionary_id);
lib_new = true;
}
}
@ -138,13 +144,14 @@ void LibraryRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServe
return;
}
std::string library_path = params.get("library_path");
if (!params.has("library_settings"))
{
processError(response, "No 'library_settings' in request URL");
return;
}
std::string library_path = params.get("library_path");
const auto & settings_string = params.get("library_settings");
LOG_DEBUG(log, "Parsing library settings from binary string");
@ -197,12 +204,12 @@ void LibraryRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServe
LOG_DEBUG(log, "Dictionary sample block with null values: {}", sample_block_with_nulls.dumpStructure());
SharedLibraryHandlerFactory::instance().create(dictionary_id, library_path, library_settings, sample_block_with_nulls, attributes_names);
ExternalDictionaryLibraryHandlerFactory::instance().create(dictionary_id, library_path, library_settings, sample_block_with_nulls, attributes_names);
writeStringBinary("1", out);
}
else if (method == "libDelete")
else if (method == "extDict_libDelete")
{
auto deleted = SharedLibraryHandlerFactory::instance().remove(dictionary_id);
bool deleted = ExternalDictionaryLibraryHandlerFactory::instance().remove(dictionary_id);
/// Do not throw, a warning is ok.
if (!deleted)
@ -210,57 +217,57 @@ void LibraryRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServe
writeStringBinary("1", out);
}
else if (method == "isModified")
else if (method == "extDict_isModified")
{
auto library_handler = SharedLibraryHandlerFactory::instance().get(dictionary_id);
auto library_handler = ExternalDictionaryLibraryHandlerFactory::instance().get(dictionary_id);
if (!library_handler)
throw Exception(ErrorCodes::BAD_REQUEST_PARAMETER, "Not found dictionary with id: {}", dictionary_id);
bool res = library_handler->isModified();
writeStringBinary(std::to_string(res), out);
}
else if (method == "supportsSelectiveLoad")
else if (method == "extDict_supportsSelectiveLoad")
{
auto library_handler = SharedLibraryHandlerFactory::instance().get(dictionary_id);
auto library_handler = ExternalDictionaryLibraryHandlerFactory::instance().get(dictionary_id);
if (!library_handler)
throw Exception(ErrorCodes::BAD_REQUEST_PARAMETER, "Not found dictionary with id: {}", dictionary_id);
bool res = library_handler->supportsSelectiveLoad();
writeStringBinary(std::to_string(res), out);
}
else if (method == "loadAll")
else if (method == "extDict_loadAll")
{
auto library_handler = SharedLibraryHandlerFactory::instance().get(dictionary_id);
auto library_handler = ExternalDictionaryLibraryHandlerFactory::instance().get(dictionary_id);
if (!library_handler)
throw Exception(ErrorCodes::BAD_REQUEST_PARAMETER, "Not found dictionary with id: {}", dictionary_id);
const auto & sample_block = library_handler->getSampleBlock();
LOG_DEBUG(log, "Calling loadAll() for dictionary id: {}", dictionary_id);
LOG_DEBUG(log, "Calling extDict_loadAll() for dictionary id: {}", dictionary_id);
auto input = library_handler->loadAll();
LOG_DEBUG(log, "Started sending result data for dictionary id: {}", dictionary_id);
auto output = FormatFactory::instance().getOutputFormat(FORMAT, out, sample_block, getContext());
writeData(std::move(input), std::move(output));
}
else if (method == "loadIds")
else if (method == "extDict_loadIds")
{
LOG_DEBUG(log, "Getting diciontary ids for dictionary with id: {}", dictionary_id);
String ids_string;
std::vector<uint64_t> ids = parseIdsFromBinary(request.getStream());
auto library_handler = SharedLibraryHandlerFactory::instance().get(dictionary_id);
auto library_handler = ExternalDictionaryLibraryHandlerFactory::instance().get(dictionary_id);
if (!library_handler)
throw Exception(ErrorCodes::BAD_REQUEST_PARAMETER, "Not found dictionary with id: {}", dictionary_id);
const auto & sample_block = library_handler->getSampleBlock();
LOG_DEBUG(log, "Calling loadIds() for dictionary id: {}", dictionary_id);
LOG_DEBUG(log, "Calling extDict_loadIds() for dictionary id: {}", dictionary_id);
auto input = library_handler->loadIds(ids);
LOG_DEBUG(log, "Started sending result data for dictionary id: {}", dictionary_id);
auto output = FormatFactory::instance().getOutputFormat(FORMAT, out, sample_block, getContext());
writeData(std::move(input), std::move(output));
}
else if (method == "loadKeys")
else if (method == "extDict_loadKeys")
{
if (!params.has("requested_block_sample"))
{
@ -289,18 +296,22 @@ void LibraryRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServe
Block block;
executor.pull(block);
auto library_handler = SharedLibraryHandlerFactory::instance().get(dictionary_id);
auto library_handler = ExternalDictionaryLibraryHandlerFactory::instance().get(dictionary_id);
if (!library_handler)
throw Exception(ErrorCodes::BAD_REQUEST_PARAMETER, "Not found dictionary with id: {}", dictionary_id);
const auto & sample_block = library_handler->getSampleBlock();
LOG_DEBUG(log, "Calling loadKeys() for dictionary id: {}", dictionary_id);
LOG_DEBUG(log, "Calling extDict_loadKeys() for dictionary id: {}", dictionary_id);
auto input = library_handler->loadKeys(block.getColumns());
LOG_DEBUG(log, "Started sending result data for dictionary id: {}", dictionary_id);
auto output = FormatFactory::instance().getOutputFormat(FORMAT, out, sample_block, getContext());
writeData(std::move(input), std::move(output));
}
else
{
LOG_WARNING(log, "Unknown library method: '{}'", method);
}
}
catch (...)
{
@ -329,8 +340,14 @@ void LibraryRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServe
}
}
LibraryBridgeExistsHandler::LibraryBridgeExistsHandler(size_t keep_alive_timeout_, ContextPtr context_)
: WithContext(context_)
, keep_alive_timeout(keep_alive_timeout_)
, log(&Poco::Logger::get("LibraryBridgeExistsHandler"))
{
}
void LibraryExistsHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response)
void LibraryBridgeExistsHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response)
{
try
{
@ -344,15 +361,12 @@ void LibraryExistsHandler::handleRequest(HTTPServerRequest & request, HTTPServer
}
std::string dictionary_id = params.get("dictionary_id");
auto library_handler = SharedLibraryHandlerFactory::instance().get(dictionary_id);
String res;
if (library_handler)
res = "1";
else
res = "0";
auto library_handler = ExternalDictionaryLibraryHandlerFactory::instance().get(dictionary_id);
String res = library_handler ? "1" : "0";
setResponseDefaultHeaders(response, keep_alive_timeout);
LOG_TRACE(log, "Senging ping response: {} (dictionary id: {})", res, dictionary_id);
LOG_TRACE(log, "Sending ping response: {} (dictionary id: {})", res, dictionary_id);
response.sendBuffer(res.data(), res.size());
}
catch (...)

View File

@ -3,7 +3,7 @@
#include <Interpreters/Context.h>
#include <Server/HTTP/HTTPRequestHandler.h>
#include <Common/logger_useful.h>
#include "SharedLibraryHandler.h"
#include "ExternalDictionaryLibraryHandler.h"
namespace DB
@ -11,23 +11,16 @@ namespace DB
/// Handler for requests to Library Dictionary Source, returns response in RowBinary format.
/// When a library dictionary source is created, it sends libNew request to library bridge (which is started on first
/// When a library dictionary source is created, it sends 'extDict_libNew' request to library bridge (which is started on first
/// request to it, if it was not yet started). On this request a new sharedLibrayHandler is added to a
/// sharedLibraryHandlerFactory by a dictionary uuid. With libNew request come: library_path, library_settings,
/// sharedLibraryHandlerFactory by a dictionary uuid. With 'extDict_libNew' request come: library_path, library_settings,
/// names of dictionary attributes, sample block to parse block of null values, block of null values. Everything is
/// passed in binary format and is urlencoded. When dictionary is cloned, a new handler is created.
/// Each handler is unique to dictionary.
class LibraryRequestHandler : public HTTPRequestHandler, WithContext
class LibraryBridgeRequestHandler : public HTTPRequestHandler, WithContext
{
public:
LibraryRequestHandler(
size_t keep_alive_timeout_, ContextPtr context_)
: WithContext(context_)
, log(&Poco::Logger::get("LibraryRequestHandler"))
, keep_alive_timeout(keep_alive_timeout_)
{
}
LibraryBridgeRequestHandler(size_t keep_alive_timeout_, ContextPtr context_);
void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override;
@ -39,22 +32,16 @@ private:
};
class LibraryExistsHandler : public HTTPRequestHandler, WithContext
class LibraryBridgeExistsHandler : public HTTPRequestHandler, WithContext
{
public:
explicit LibraryExistsHandler(size_t keep_alive_timeout_, ContextPtr context_)
: WithContext(context_)
, keep_alive_timeout(keep_alive_timeout_)
, log(&Poco::Logger::get("LibraryRequestHandler"))
{
}
LibraryBridgeExistsHandler(size_t keep_alive_timeout_, ContextPtr context_);
void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override;
private:
const size_t keep_alive_timeout;
Poco::Logger * log;
};
}

View File

@ -1,110 +0,0 @@
#pragma once
#include <cstdint>
#include <string>
#define CLICKHOUSE_DICTIONARY_LIBRARY_API 1
namespace ClickHouseLibrary
{
using CString = const char *;
using ColumnName = CString;
using ColumnNames = ColumnName[];
struct CStrings
{
CString * data = nullptr;
uint64_t size = 0;
};
struct VectorUInt64
{
const uint64_t * data = nullptr;
uint64_t size = 0;
};
struct ColumnsUInt64
{
VectorUInt64 * data = nullptr;
uint64_t size = 0;
};
struct Field
{
const void * data = nullptr;
uint64_t size = 0;
};
struct Row
{
const Field * data = nullptr;
uint64_t size = 0;
};
struct Table
{
const Row * data = nullptr;
uint64_t size = 0;
uint64_t error_code = 0; // 0 = ok; !0 = error, with message in error_string
const char * error_string = nullptr;
};
enum LogLevel
{
FATAL = 1,
CRITICAL,
ERROR,
WARNING,
NOTICE,
INFORMATION,
DEBUG,
TRACE,
};
void log(LogLevel level, CString msg);
extern std::string_view LIBRARY_CREATE_NEW_FUNC_NAME;
extern std::string_view LIBRARY_CLONE_FUNC_NAME;
extern std::string_view LIBRARY_DELETE_FUNC_NAME;
extern std::string_view LIBRARY_DATA_NEW_FUNC_NAME;
extern std::string_view LIBRARY_DATA_DELETE_FUNC_NAME;
extern std::string_view LIBRARY_LOAD_ALL_FUNC_NAME;
extern std::string_view LIBRARY_LOAD_IDS_FUNC_NAME;
extern std::string_view LIBRARY_LOAD_KEYS_FUNC_NAME;
extern std::string_view LIBRARY_IS_MODIFIED_FUNC_NAME;
extern std::string_view LIBRARY_SUPPORTS_SELECTIVE_LOAD_FUNC_NAME;
using LibraryContext = void *;
using LibraryLoggerFunc = void (*)(LogLevel, CString /* message */);
using LibrarySettings = CStrings *;
using LibraryNewFunc = LibraryContext (*)(LibrarySettings, LibraryLoggerFunc);
using LibraryCloneFunc = LibraryContext (*)(LibraryContext);
using LibraryDeleteFunc = void (*)(LibraryContext);
using LibraryData = void *;
using LibraryDataNewFunc = LibraryData (*)(LibraryContext);
using LibraryDataDeleteFunc = void (*)(LibraryContext, LibraryData);
/// Can be safely casted into const Table * with static_cast<const ClickHouseLibrary::Table *>
using RawClickHouseLibraryTable = void *;
using RequestedColumnsNames = CStrings *;
using LibraryLoadAllFunc = RawClickHouseLibraryTable (*)(LibraryData, LibrarySettings, RequestedColumnsNames);
using RequestedIds = const VectorUInt64 *;
using LibraryLoadIdsFunc = RawClickHouseLibraryTable (*)(LibraryData, LibrarySettings, RequestedColumnsNames, RequestedIds);
using RequestedKeys = Table *;
/// There are no requested column names for load keys func
using LibraryLoadKeysFunc = RawClickHouseLibraryTable (*)(LibraryData, LibrarySettings, RequestedKeys);
using LibraryIsModifiedFunc = bool (*)(LibraryContext, LibrarySettings);
using LibrarySupportsSelectiveLoadFunc = bool (*)(LibraryContext, LibrarySettings);
}

View File

@ -1,214 +0,0 @@
#include "SharedLibraryHandler.h"
#include <base/scope_guard.h>
#include <base/bit_cast.h>
#include <base/find_symbols.h>
#include <IO/ReadHelpers.h>
namespace DB
{
namespace ErrorCodes
{
extern const int EXTERNAL_LIBRARY_ERROR;
extern const int SIZES_OF_COLUMNS_DOESNT_MATCH;
}
SharedLibraryHandler::SharedLibraryHandler(
const std::string & library_path_,
const std::vector<std::string> & library_settings,
const Block & sample_block_,
const std::vector<std::string> & attributes_names_)
: library_path(library_path_)
, sample_block(sample_block_)
, attributes_names(attributes_names_)
{
library = std::make_shared<SharedLibrary>(library_path, RTLD_LAZY);
settings_holder = std::make_shared<CStringsHolder>(CStringsHolder(library_settings));
auto lib_new = library->tryGet<ClickHouseLibrary::LibraryNewFunc>(ClickHouseLibrary::LIBRARY_CREATE_NEW_FUNC_NAME);
if (lib_new)
lib_data = lib_new(&settings_holder->strings, ClickHouseLibrary::log);
else
throw Exception("Method libNew failed", ErrorCodes::EXTERNAL_LIBRARY_ERROR);
}
SharedLibraryHandler::SharedLibraryHandler(const SharedLibraryHandler & other)
: library_path{other.library_path}
, sample_block{other.sample_block}
, attributes_names{other.attributes_names}
, library{other.library}
, settings_holder{other.settings_holder}
{
auto lib_clone = library->tryGet<ClickHouseLibrary::LibraryCloneFunc>(ClickHouseLibrary::LIBRARY_CLONE_FUNC_NAME);
if (lib_clone)
{
lib_data = lib_clone(other.lib_data);
}
else
{
auto lib_new = library->tryGet<ClickHouseLibrary::LibraryNewFunc>(ClickHouseLibrary::LIBRARY_CREATE_NEW_FUNC_NAME);
if (lib_new)
lib_data = lib_new(&settings_holder->strings, ClickHouseLibrary::log);
}
}
SharedLibraryHandler::~SharedLibraryHandler()
{
auto lib_delete = library->tryGet<ClickHouseLibrary::LibraryDeleteFunc>(ClickHouseLibrary::LIBRARY_DELETE_FUNC_NAME);
if (lib_delete)
lib_delete(lib_data);
}
bool SharedLibraryHandler::isModified()
{
auto func_is_modified = library->tryGet<ClickHouseLibrary::LibraryIsModifiedFunc>(ClickHouseLibrary::LIBRARY_IS_MODIFIED_FUNC_NAME);
if (func_is_modified)
return func_is_modified(lib_data, &settings_holder->strings);
return true;
}
bool SharedLibraryHandler::supportsSelectiveLoad()
{
auto func_supports_selective_load = library->tryGet<ClickHouseLibrary::LibrarySupportsSelectiveLoadFunc>(ClickHouseLibrary::LIBRARY_SUPPORTS_SELECTIVE_LOAD_FUNC_NAME);
if (func_supports_selective_load)
return func_supports_selective_load(lib_data, &settings_holder->strings);
return true;
}
Block SharedLibraryHandler::loadAll()
{
auto columns_holder = std::make_unique<ClickHouseLibrary::CString[]>(attributes_names.size());
ClickHouseLibrary::CStrings columns{static_cast<decltype(ClickHouseLibrary::CStrings::data)>(columns_holder.get()), attributes_names.size()};
for (size_t i = 0; i < attributes_names.size(); ++i)
columns.data[i] = attributes_names[i].c_str();
auto load_all_func = library->get<ClickHouseLibrary::LibraryLoadAllFunc>(ClickHouseLibrary::LIBRARY_LOAD_ALL_FUNC_NAME);
auto data_new_func = library->get<ClickHouseLibrary::LibraryDataNewFunc>(ClickHouseLibrary::LIBRARY_DATA_NEW_FUNC_NAME);
auto data_delete_func = library->get<ClickHouseLibrary::LibraryDataDeleteFunc>(ClickHouseLibrary::LIBRARY_DATA_DELETE_FUNC_NAME);
ClickHouseLibrary::LibraryData data_ptr = data_new_func(lib_data);
SCOPE_EXIT(data_delete_func(lib_data, data_ptr));
ClickHouseLibrary::RawClickHouseLibraryTable data = load_all_func(data_ptr, &settings_holder->strings, &columns);
return dataToBlock(data);
}
Block SharedLibraryHandler::loadIds(const std::vector<uint64_t> & ids)
{
const ClickHouseLibrary::VectorUInt64 ids_data{bit_cast<decltype(ClickHouseLibrary::VectorUInt64::data)>(ids.data()), ids.size()};
auto columns_holder = std::make_unique<ClickHouseLibrary::CString[]>(attributes_names.size());
ClickHouseLibrary::CStrings columns_pass{static_cast<decltype(ClickHouseLibrary::CStrings::data)>(columns_holder.get()), attributes_names.size()};
auto load_ids_func = library->get<ClickHouseLibrary::LibraryLoadIdsFunc>(ClickHouseLibrary::LIBRARY_LOAD_IDS_FUNC_NAME);
auto data_new_func = library->get<ClickHouseLibrary::LibraryDataNewFunc>(ClickHouseLibrary::LIBRARY_DATA_NEW_FUNC_NAME);
auto data_delete_func = library->get<ClickHouseLibrary::LibraryDataDeleteFunc>(ClickHouseLibrary::LIBRARY_DATA_DELETE_FUNC_NAME);
ClickHouseLibrary::LibraryData data_ptr = data_new_func(lib_data);
SCOPE_EXIT(data_delete_func(lib_data, data_ptr));
ClickHouseLibrary::RawClickHouseLibraryTable data = load_ids_func(data_ptr, &settings_holder->strings, &columns_pass, &ids_data);
return dataToBlock(data);
}
Block SharedLibraryHandler::loadKeys(const Columns & key_columns)
{
auto holder = std::make_unique<ClickHouseLibrary::Row[]>(key_columns.size());
std::vector<std::unique_ptr<ClickHouseLibrary::Field[]>> column_data_holders;
for (size_t i = 0; i < key_columns.size(); ++i)
{
auto cell_holder = std::make_unique<ClickHouseLibrary::Field[]>(key_columns[i]->size());
for (size_t j = 0; j < key_columns[i]->size(); ++j)
{
auto data_ref = key_columns[i]->getDataAt(j);
cell_holder[j] = ClickHouseLibrary::Field{
.data = static_cast<const void *>(data_ref.data),
.size = data_ref.size};
}
holder[i] = ClickHouseLibrary::Row{
.data = static_cast<ClickHouseLibrary::Field *>(cell_holder.get()),
.size = key_columns[i]->size()};
column_data_holders.push_back(std::move(cell_holder));
}
ClickHouseLibrary::Table request_cols{
.data = static_cast<ClickHouseLibrary::Row *>(holder.get()),
.size = key_columns.size()};
auto load_keys_func = library->get<ClickHouseLibrary::LibraryLoadKeysFunc>(ClickHouseLibrary::LIBRARY_LOAD_KEYS_FUNC_NAME);
auto data_new_func = library->get<ClickHouseLibrary::LibraryDataNewFunc>(ClickHouseLibrary::LIBRARY_DATA_NEW_FUNC_NAME);
auto data_delete_func = library->get<ClickHouseLibrary::LibraryDataDeleteFunc>(ClickHouseLibrary::LIBRARY_DATA_DELETE_FUNC_NAME);
ClickHouseLibrary::LibraryData data_ptr = data_new_func(lib_data);
SCOPE_EXIT(data_delete_func(lib_data, data_ptr));
ClickHouseLibrary::RawClickHouseLibraryTable data = load_keys_func(data_ptr, &settings_holder->strings, &request_cols);
return dataToBlock(data);
}
Block SharedLibraryHandler::dataToBlock(const ClickHouseLibrary::RawClickHouseLibraryTable data)
{
if (!data)
throw Exception("LibraryDictionarySource: No data returned", ErrorCodes::EXTERNAL_LIBRARY_ERROR);
const auto * columns_received = static_cast<const ClickHouseLibrary::Table *>(data);
if (columns_received->error_code)
throw Exception(
"LibraryDictionarySource: Returned error: " + std::to_string(columns_received->error_code) + " " + (columns_received->error_string ? columns_received->error_string : ""),
ErrorCodes::EXTERNAL_LIBRARY_ERROR);
MutableColumns columns = sample_block.cloneEmptyColumns();
for (size_t col_n = 0; col_n < columns_received->size; ++col_n)
{
if (columns.size() != columns_received->data[col_n].size)
throw Exception(
"LibraryDictionarySource: Returned unexpected number of columns: " + std::to_string(columns_received->data[col_n].size) + ", must be " + std::to_string(columns.size()),
ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH);
for (size_t row_n = 0; row_n < columns_received->data[col_n].size; ++row_n)
{
const auto & field = columns_received->data[col_n].data[row_n];
if (!field.data)
{
/// sample_block contains null_value (from config) inside corresponding column
const auto & col = sample_block.getByPosition(row_n);
columns[row_n]->insertFrom(*(col.column), 0);
}
else
{
const auto & size = field.size;
columns[row_n]->insertData(static_cast<const char *>(field.data), size);
}
}
}
return sample_block.cloneWithColumns(std::move(columns));
}
}

View File

@ -1,62 +0,0 @@
#include "SharedLibraryHandlerFactory.h"
namespace DB
{
SharedLibraryHandlerPtr SharedLibraryHandlerFactory::get(const std::string & dictionary_id)
{
std::lock_guard lock(mutex);
auto library_handler = library_handlers.find(dictionary_id);
if (library_handler != library_handlers.end())
return library_handler->second;
return nullptr;
}
void SharedLibraryHandlerFactory::create(
const std::string & dictionary_id,
const std::string & library_path,
const std::vector<std::string> & library_settings,
const Block & sample_block,
const std::vector<std::string> & attributes_names)
{
std::lock_guard lock(mutex);
if (!library_handlers.contains(dictionary_id))
library_handlers.emplace(std::make_pair(dictionary_id, std::make_shared<SharedLibraryHandler>(library_path, library_settings, sample_block, attributes_names)));
else
LOG_WARNING(&Poco::Logger::get("SharedLibraryHandlerFactory"), "Library handler with dictionary id {} already exists", dictionary_id);
}
bool SharedLibraryHandlerFactory::clone(const std::string & from_dictionary_id, const std::string & to_dictionary_id)
{
std::lock_guard lock(mutex);
auto from_library_handler = library_handlers.find(from_dictionary_id);
if (from_library_handler == library_handlers.end())
return false;
/// libClone method will be called in copy constructor
library_handlers[to_dictionary_id] = std::make_shared<SharedLibraryHandler>(*from_library_handler->second);
return true;
}
bool SharedLibraryHandlerFactory::remove(const std::string & dictionary_id)
{
std::lock_guard lock(mutex);
/// libDelete is called in destructor.
return library_handlers.erase(dictionary_id);
}
SharedLibraryHandlerFactory & SharedLibraryHandlerFactory::instance()
{
static SharedLibraryHandlerFactory ret;
return ret;
}
}

View File

@ -1,3 +1,2 @@
int mainEntryClickHouseLibraryBridge(int argc, char ** argv);
int main(int argc_, char ** argv_) { return mainEntryClickHouseLibraryBridge(argc_, argv_); }

View File

@ -323,12 +323,28 @@ void LocalServer::setupUsers()
auto & access_control = global_context->getAccessControl();
access_control.setNoPasswordAllowed(config().getBool("allow_no_password", true));
access_control.setPlaintextPasswordAllowed(config().getBool("allow_plaintext_password", true));
if (config().has("users_config") || config().has("config-file") || fs::exists("config.xml"))
if (config().has("config-file") || fs::exists("config.xml"))
{
const auto users_config_path = config().getString("users_config", config().getString("config-file", "config.xml"));
ConfigProcessor config_processor(users_config_path);
const auto loaded_config = config_processor.loadConfig();
users_config = loaded_config.configuration;
String config_path = config().getString("config-file", "");
bool has_user_directories = config().has("user_directories");
const auto config_dir = fs::path{config_path}.remove_filename().string();
String users_config_path = config().getString("users_config", "");
if (users_config_path.empty() && has_user_directories)
{
users_config_path = config().getString("user_directories.users_xml.path");
if (fs::path(users_config_path).is_relative() && fs::exists(fs::path(config_dir) / users_config_path))
users_config_path = fs::path(config_dir) / users_config_path;
}
if (users_config_path.empty())
users_config = getConfigurationFromXMLString(minimal_default_user_xml);
else
{
ConfigProcessor config_processor(users_config_path);
const auto loaded_config = config_processor.loadConfig();
users_config = loaded_config.configuration;
}
}
else
users_config = getConfigurationFromXMLString(minimal_default_user_xml);
@ -338,7 +354,6 @@ void LocalServer::setupUsers()
throw Exception("Can't load config for users", ErrorCodes::CANNOT_LOAD_CONFIG);
}
void LocalServer::connect()
{
connection_parameters = ConnectionParameters(config());

View File

@ -2,17 +2,17 @@ include(${ClickHouse_SOURCE_DIR}/cmake/split_debug_symbols.cmake)
set (CLICKHOUSE_ODBC_BRIDGE_SOURCES
ColumnInfoHandler.cpp
getIdentifierQuote.cpp
HandlerFactory.cpp
IdentifierQuoteHandler.cpp
MainHandler.cpp
ODBCBlockInputStream.cpp
ODBCBlockOutputStream.cpp
ODBCBridge.cpp
ODBCHandlerFactory.cpp
PingHandler.cpp
SchemaAllowedHandler.cpp
validateODBCConnectionString.cpp
getIdentifierQuote.cpp
odbc-bridge.cpp
validateODBCConnectionString.cpp
)
if (OS_LINUX)

View File

@ -1,6 +1,5 @@
#include "ODBCBridge.h"
#pragma GCC diagnostic ignored "-Wmissing-declarations"
int mainEntryClickHouseODBCBridge(int argc, char ** argv)
{
DB::ODBCBridge app;
@ -15,3 +14,18 @@ int mainEntryClickHouseODBCBridge(int argc, char ** argv)
return code ? code : 1;
}
}
namespace DB
{
std::string ODBCBridge::bridgeName() const
{
return "ODBCBridge";
}
ODBCBridge::HandlerFactoryPtr ODBCBridge::getHandlerFactoryPtr(ContextPtr context) const
{
return std::make_shared<ODBCBridgeHandlerFactory>("ODBCRequestHandlerFactory-factory", keep_alive_timeout, context);
}
}

View File

@ -3,7 +3,7 @@
#include <Interpreters/Context.h>
#include <Poco/Logger.h>
#include <Bridge/IBridge.h>
#include "HandlerFactory.h"
#include "ODBCHandlerFactory.h"
namespace DB
@ -13,14 +13,7 @@ class ODBCBridge : public IBridge
{
protected:
std::string bridgeName() const override
{
return "ODBCBridge";
}
HandlerFactoryPtr getHandlerFactoryPtr(ContextPtr context) const override
{
return std::make_shared<ODBCBridgeHandlerFactory>("ODBCRequestHandlerFactory-factory", keep_alive_timeout, context);
}
std::string bridgeName() const override;
HandlerFactoryPtr getHandlerFactoryPtr(ContextPtr context) const override;
};
}

View File

@ -1,4 +1,4 @@
#include "HandlerFactory.h"
#include "ODBCHandlerFactory.h"
#include "PingHandler.h"
#include "ColumnInfoHandler.h"
#include <Common/config.h>
@ -9,6 +9,14 @@
namespace DB
{
ODBCBridgeHandlerFactory::ODBCBridgeHandlerFactory(const std::string & name_, size_t keep_alive_timeout_, ContextPtr context_)
: WithContext(context_)
, log(&Poco::Logger::get(name_))
, name(name_)
, keep_alive_timeout(keep_alive_timeout_)
{
}
std::unique_ptr<HTTPRequestHandler> ODBCBridgeHandlerFactory::createRequestHandler(const HTTPServerRequest & request)
{
Poco::URI uri{request.getURI()};

View File

@ -17,13 +17,7 @@ namespace DB
class ODBCBridgeHandlerFactory : public HTTPRequestHandlerFactory, WithContext
{
public:
ODBCBridgeHandlerFactory(const std::string & name_, size_t keep_alive_timeout_, ContextPtr context_)
: WithContext(context_)
, log(&Poco::Logger::get(name_))
, name(name_)
, keep_alive_timeout(keep_alive_timeout_)
{
}
ODBCBridgeHandlerFactory(const std::string & name_, size_t keep_alive_timeout_, ContextPtr context_);
std::unique_ptr<HTTPRequestHandler> createRequestHandler(const HTTPServerRequest & request) override;

View File

@ -75,6 +75,9 @@
*
{
box-sizing: border-box;
/* For iPad */
margin: 0;
border-radius: 0;
}
html, body
@ -275,13 +278,23 @@
display: none;
}
/* When mouse pointer is over table cell, will display full text (with wrap) instead of cut. */
td.left:hover
/* When mouse pointer is over table cell, will display full text (with wrap) instead of cut.
* We also keep it for some time on mouseout for "hysteresis" effect.
*/
td.left:hover, .td-hover-hysteresis
{
white-space: pre-wrap;
max-width: none;
}
.td-selected
{
white-space: pre-wrap;
max-width: none;
background-color: var(--table-hover-color);
border: 2px solid var(--border-color);
}
td.transposed
{
max-width: none;
@ -295,6 +308,14 @@
vertical-align: middle;
}
.row-number
{
width: 1%;
text-align: right;
background-color: var(--table-header-color);
color: var(--misc-text-color);
}
div.empty-result
{
opacity: 10%;
@ -501,20 +522,27 @@
const server_address = document.getElementById('url').value;
const url = server_address +
var url = server_address +
(server_address.indexOf('?') >= 0 ? '&' : '?') +
/// Ask server to allow cross-domain requests.
'add_http_cors_header=1' +
'&user=' + encodeURIComponent(user) +
'&password=' + encodeURIComponent(password) +
'&default_format=JSONCompact' +
/// Safety settings to prevent results that browser cannot display.
'&max_result_rows=1000&max_result_bytes=10000000&result_overflow_mode=break';
// If play.html is opened locally, append username and password to the URL parameter to avoid CORS issue.
if (document.location.href.startsWith("file://")) {
url += '&user=' + encodeURIComponent(user) +
'&password=' + encodeURIComponent(password)
}
const xhr = new XMLHttpRequest;
xhr.open('POST', url, true);
// If play.html is open normally, use Basic auth to prevent username and password being exposed in URL parameters
if (!document.location.href.startsWith("file://")) {
xhr.setRequestHeader("Authorization", "Basic " + btoa(user+":"+password));
}
xhr.onreadystatechange = function()
{
if (posted_request_num != request_num) {
@ -633,10 +661,9 @@
clearElement('chart');
clearElement('data-unparsed');
clearElement('error');
clearElement('hourglass');
document.getElementById('check-mark').innerText = '';
document.getElementById('hourglass').innerText = '';
document.getElementById('check-mark').display = 'none';
document.getElementById('hourglass').display = 'none';
document.getElementById('stats').innerText = '';
document.getElementById('logo-container').style.display = 'block';
}
@ -784,7 +811,17 @@
return;
}
const should_display_row_numbers = response.data.length > 3;
let thead = document.createElement('thead');
if (should_display_row_numbers) {
let th = document.createElement('th');
th.className = 'row-number';
th.appendChild(document.createTextNode('№'));
thead.appendChild(th);
}
for (let idx in response.meta) {
let th = document.createElement('th');
const name = document.createTextNode(response.meta[idx].name);
@ -794,7 +831,9 @@
/// To prevent hanging the browser, limit the number of cells in a table.
/// It's important to have the limit on number of cells, not just rows, because tables may be wide or narrow.
/// Also we permit rendering of more records but only if elapsed time is not large.
const max_rows = 10000 / response.meta.length;
const max_render_ms = 200;
let row_num = 0;
const column_is_number = response.meta.map(elem => !!elem.type.match(/^(Nullable\()?(U?Int|Decimal|Float)/));
@ -810,18 +849,35 @@
column_need_render_bars: column_need_render_bars,
};
const start_time = performance.now();
let tbody = document.createElement('tbody');
for (let row_idx in response.data) {
let tr = document.createElement('tr');
if (should_display_row_numbers) {
let td = document.createElement('td');
td.className = 'row-number';
td.appendChild(document.createTextNode(1 + +row_idx));
tr.appendChild(td);
}
for (let col_idx in response.data[row_idx]) {
let cell = response.data[row_idx][col_idx];
const td = renderCell(cell, col_idx, settings);
td.onclick = () => { td.classList.add('td-selected') };
td.onmouseenter = () => {
td.classList.add('td-hover-hysteresis');
td.onmouseleave = () => {
setTimeout(() => { td && td.classList.remove('td-hover-hysteresis') }, 1000);
};
};
tr.appendChild(td);
}
tbody.appendChild(tr);
++row_num;
if (row_num >= max_rows) {
if (row_num >= max_rows && performance.now() - start_time >= max_render_ms) {
break;
}
}

View File

@ -49,6 +49,16 @@ public:
return nested_func->getReturnType();
}
const IAggregateFunction & getBaseAggregateFunctionWithSameStateRepresentation() const override
{
return nested_func->getBaseAggregateFunctionWithSameStateRepresentation();
}
DataTypePtr getNormalizedStateType() const override
{
return nested_func->getNormalizedStateType();
}
bool isVersioned() const override
{
return nested_func->isVersioned();

View File

@ -5,9 +5,11 @@
#include <array>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeAggregateFunction.h>
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnsCommon.h>
#include <AggregateFunctions/IAggregateFunction.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <Common/assert_cast.h>
#include <Common/config.h>
@ -102,6 +104,19 @@ public:
}
}
bool haveSameStateRepresentationImpl(const IAggregateFunction & rhs) const override
{
return this->getName() == rhs.getName();
}
DataTypePtr getNormalizedStateType() const override
{
/// Return normalized state type: count()
AggregateFunctionProperties properties;
return std::make_shared<DataTypeAggregateFunction>(
AggregateFunctionFactory::instance().get(getName(), {}, {}, properties), DataTypes{}, Array{});
}
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
{
data(place).count += data(rhs).count;
@ -240,6 +255,19 @@ public:
}
}
bool haveSameStateRepresentationImpl(const IAggregateFunction & rhs) const override
{
return this->getName() == rhs.getName();
}
DataTypePtr getNormalizedStateType() const override
{
/// Return normalized state type: count()
AggregateFunctionProperties properties;
return std::make_shared<DataTypeAggregateFunction>(
AggregateFunctionFactory::instance().get(getName(), {}, {}, properties), DataTypes{}, Array{});
}
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
{
data(place).count += data(rhs).count;

View File

@ -152,8 +152,8 @@ template <typename Data>
class AggregateFunctionDistinct : public IAggregateFunctionDataHelper<Data, AggregateFunctionDistinct<Data>>
{
private:
static constexpr auto prefix_size = sizeof(Data);
AggregateFunctionPtr nested_func;
size_t prefix_size;
size_t arguments_num;
AggregateDataPtr getNestedPlace(AggregateDataPtr __restrict place) const noexcept
@ -170,7 +170,11 @@ public:
AggregateFunctionDistinct(AggregateFunctionPtr nested_func_, const DataTypes & arguments, const Array & params_)
: IAggregateFunctionDataHelper<Data, AggregateFunctionDistinct>(arguments, params_)
, nested_func(nested_func_)
, arguments_num(arguments.size()) {}
, arguments_num(arguments.size())
{
size_t nested_size = nested_func->alignOfData();
prefix_size = (sizeof(Data) + nested_size - 1) / nested_size * nested_size;
}
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
{

View File

@ -56,6 +56,16 @@ public:
return nested_func->getReturnType();
}
const IAggregateFunction & getBaseAggregateFunctionWithSameStateRepresentation() const override
{
return nested_func->getBaseAggregateFunctionWithSameStateRepresentation();
}
DataTypePtr getNormalizedStateType() const override
{
return nested_func->getNormalizedStateType();
}
bool isVersioned() const override
{
return nested_func->isVersioned();

View File

@ -23,14 +23,20 @@ public:
DataTypes transformArguments(const DataTypes & arguments) const override
{
if (arguments.size() != 1)
throw Exception("Incorrect number of arguments for aggregate function with " + getName() + " suffix", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
throw Exception(
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"Incorrect number of arguments for aggregate function with {} suffix",
getName());
const DataTypePtr & argument = arguments[0];
const DataTypeAggregateFunction * function = typeid_cast<const DataTypeAggregateFunction *>(argument.get());
if (!function)
throw Exception("Illegal type " + argument->getName() + " of argument for aggregate function with " + getName() + " suffix"
+ " must be AggregateFunction(...)", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal type {} of argument for aggregate function with {} suffix. It must be AggregateFunction(...)",
argument->getName(),
getName());
return function->getArgumentsDataTypes();
}
@ -45,13 +51,21 @@ public:
const DataTypeAggregateFunction * function = typeid_cast<const DataTypeAggregateFunction *>(argument.get());
if (!function)
throw Exception("Illegal type " + argument->getName() + " of argument for aggregate function with " + getName() + " suffix"
+ " must be AggregateFunction(...)", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal type {} of argument for aggregate function with {} suffix. It must be AggregateFunction(...)",
argument->getName(),
getName());
if (nested_function->getName() != function->getFunctionName())
throw Exception("Illegal type " + argument->getName() + " of argument for aggregate function with " + getName() + " suffix"
+ ", because it corresponds to different aggregate function: " + function->getFunctionName() + " instead of " + nested_function->getName(),
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
if (!nested_function->haveSameStateRepresentation(*function->getFunction()))
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal type {} of argument for aggregate function with {} suffix. because it corresponds to different aggregate "
"function: {} instead of {}",
argument->getName(),
getName(),
function->getFunctionName(),
nested_function->getName());
return std::make_shared<AggregateFunctionMerge>(nested_function, argument, params);
}

View File

@ -50,6 +50,11 @@ public:
return nested_func->getReturnType();
}
const IAggregateFunction & getBaseAggregateFunctionWithSameStateRepresentation() const override
{
return nested_func->getBaseAggregateFunctionWithSameStateRepresentation();
}
bool isVersioned() const override
{
return nested_func->isVersioned();

View File

@ -1,6 +1,7 @@
#pragma once
#include <AggregateFunctions/FactoryHelpers.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>
/// These must be exposed in header for the purpose of dynamic compilation.
#include <AggregateFunctions/QuantileReservoirSampler.h>
@ -20,9 +21,11 @@
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeAggregateFunction.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <Common/assert_cast.h>
#include <Interpreters/GatherFunctionQuantileVisitor.h>
#include <type_traits>
@ -61,10 +64,9 @@ template <
typename FloatReturnType,
/// If true, the function will accept multiple parameters with quantile levels
/// and return an Array filled with many values of that quantiles.
bool returns_many
>
class AggregateFunctionQuantile final : public IAggregateFunctionDataHelper<Data,
AggregateFunctionQuantile<Value, Data, Name, has_second_arg, FloatReturnType, returns_many>>
bool returns_many>
class AggregateFunctionQuantile final
: public IAggregateFunctionDataHelper<Data, AggregateFunctionQuantile<Value, Data, Name, has_second_arg, FloatReturnType, returns_many>>
{
private:
using ColVecType = ColumnVectorOrDecimal<Value>;
@ -81,11 +83,14 @@ private:
public:
AggregateFunctionQuantile(const DataTypes & argument_types_, const Array & params)
: IAggregateFunctionDataHelper<Data, AggregateFunctionQuantile<Value, Data, Name, has_second_arg, FloatReturnType, returns_many>>(argument_types_, params)
, levels(params, returns_many), level(levels.levels[0]), argument_type(this->argument_types[0])
: IAggregateFunctionDataHelper<Data, AggregateFunctionQuantile<Value, Data, Name, has_second_arg, FloatReturnType, returns_many>>(
argument_types_, params)
, levels(params, returns_many)
, level(levels.levels[0])
, argument_type(this->argument_types[0])
{
if (!returns_many && levels.size() > 1)
throw Exception("Aggregate function " + getName() + " require one parameter or less", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Aggregate function {} require one parameter or less", getName());
}
String getName() const override { return Name::name; }
@ -105,9 +110,22 @@ public:
return res;
}
bool haveSameStateRepresentation(const IAggregateFunction & rhs) const override
bool haveSameStateRepresentationImpl(const IAggregateFunction & rhs) const override
{
return getName() == rhs.getName() && this->haveEqualArgumentTypes(rhs);
return GatherFunctionQuantileData::toFusedNameOrSelf(getName()) == GatherFunctionQuantileData::toFusedNameOrSelf(rhs.getName())
&& this->haveEqualArgumentTypes(rhs);
}
DataTypePtr getNormalizedStateType() const override
{
/// Return normalized state type: quantiles*(1)(...)
Array params{1};
AggregateFunctionProperties properties;
return std::make_shared<DataTypeAggregateFunction>(
AggregateFunctionFactory::instance().get(
GatherFunctionQuantileData::toFusedNameOrSelf(getName()), this->argument_types, params, properties),
this->argument_types,
params);
}
bool allocatesMemoryInArena() const override { return false; }
@ -124,9 +142,7 @@ public:
}
if constexpr (has_second_arg)
this->data(place).add(
value,
columns[1]->getUInt(row_num));
this->data(place).add(value, columns[1]->getUInt(row_num));
else
this->data(place).add(value);
}
@ -149,7 +165,6 @@ public:
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
{
/// const_cast is required because some data structures apply finalizaton (like sorting) for obtain a result.
auto & data = this->data(place);
if constexpr (returns_many)
@ -195,7 +210,11 @@ public:
{
assertBinary(Name::name, types);
if (!isUnsignedInteger(types[1]))
throw Exception("Second argument (weight) for function " + std::string(Name::name) + " must be unsigned integer, but it has type " + types[1]->getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Second argument (weight) for function {} must be unsigned integer, but it has type {}",
Name::name,
types[1]->getName());
}
else
assertUnary(Name::name, types);

View File

@ -163,7 +163,7 @@ public:
this->data(place).deserialize(buf);
}
bool haveSameStateRepresentation(const IAggregateFunction & rhs) const override
bool haveSameStateRepresentationImpl(const IAggregateFunction & rhs) const override
{
return this->getName() == rhs.getName() && this->haveEqualArgumentTypes(rhs);
}

View File

@ -194,7 +194,7 @@ public:
DataTypePtr getReturnType() const override { return data_type; }
bool haveSameStateRepresentation(const IAggregateFunction & rhs) const override
bool haveSameStateRepresentationImpl(const IAggregateFunction & rhs) const override
{
return this->getName() == rhs.getName() && this->haveEqualArgumentTypes(rhs);
}

View File

@ -37,6 +37,11 @@ public:
return getStateType();
}
const IAggregateFunction & getBaseAggregateFunctionWithSameStateRepresentation() const override
{
return nested_func->getBaseAggregateFunctionWithSameStateRepresentation();
}
DataTypePtr getStateType() const override
{
return nested_func->getStateType();

View File

@ -59,6 +59,13 @@ bool IAggregateFunction::haveEqualArgumentTypes(const IAggregateFunction & rhs)
}
bool IAggregateFunction::haveSameStateRepresentation(const IAggregateFunction & rhs) const
{
const auto & lhs_base = getBaseAggregateFunctionWithSameStateRepresentation();
const auto & rhs_base = rhs.getBaseAggregateFunctionWithSameStateRepresentation();
return lhs_base.haveSameStateRepresentationImpl(rhs_base);
}
bool IAggregateFunction::haveSameStateRepresentationImpl(const IAggregateFunction & rhs) const
{
bool res = getName() == rhs.getName()
&& parameters == rhs.parameters

View File

@ -73,13 +73,19 @@ public:
/// Get the data type of internal state. By default it is AggregateFunction(name(params), argument_types...).
virtual DataTypePtr getStateType() const;
/// Same as the above but normalize state types so that variants with the same binary representation will use the same type.
virtual DataTypePtr getNormalizedStateType() const { return getStateType(); }
/// Returns true if two aggregate functions have the same state representation in memory and the same serialization,
/// so state of one aggregate function can be safely used with another.
/// Examples:
/// - quantile(x), quantile(a)(x), quantile(b)(x) - parameter doesn't affect state and used for finalization only
/// - foo(x) and fooIf(x) - If combinator doesn't affect state
/// By default returns true only if functions have exactly the same names, combinators and parameters.
virtual bool haveSameStateRepresentation(const IAggregateFunction & rhs) const;
bool haveSameStateRepresentation(const IAggregateFunction & rhs) const;
virtual bool haveSameStateRepresentationImpl(const IAggregateFunction & rhs) const;
virtual const IAggregateFunction & getBaseAggregateFunctionWithSameStateRepresentation() const { return *this; }
bool haveEqualArgumentTypes(const IAggregateFunction & rhs) const;

View File

@ -1,4 +1,4 @@
#include "LibraryBridgeHelper.h"
#include "ExternalDictionaryLibraryBridgeHelper.h"
#include <Formats/formatBlock.h>
#include <Dictionaries/DictionarySourceHelpers.h>
@ -26,26 +26,43 @@ namespace ErrorCodes
extern const int LOGICAL_ERROR;
}
LibraryBridgeHelper::LibraryBridgeHelper(
ExternalDictionaryLibraryBridgeHelper::ExternalDictionaryLibraryBridgeHelper(
ContextPtr context_,
const Block & sample_block_,
const Field & dictionary_id_,
const LibraryInitData & library_data_)
: IBridgeHelper(context_->getGlobalContext())
, log(&Poco::Logger::get("LibraryBridgeHelper"))
, log(&Poco::Logger::get("ExternalDictionaryLibraryBridgeHelper"))
, sample_block(sample_block_)
, config(context_->getConfigRef())
, http_timeout(context_->getGlobalContext()->getSettingsRef().http_receive_timeout.value)
, library_data(library_data_)
, dictionary_id(dictionary_id_)
, bridge_host(config.getString("library_bridge.host", DEFAULT_HOST))
, bridge_port(config.getUInt("library_bridge.port", DEFAULT_PORT))
, http_timeouts(ConnectionTimeouts::getHTTPTimeouts(context_))
{
bridge_port = config.getUInt("library_bridge.port", DEFAULT_PORT);
bridge_host = config.getString("library_bridge.host", DEFAULT_HOST);
}
Poco::URI LibraryBridgeHelper::createRequestURI(const String & method) const
Poco::URI ExternalDictionaryLibraryBridgeHelper::getPingURI() const
{
auto uri = createBaseURI();
uri.setPath(PING_HANDLER);
uri.addQueryParameter("dictionary_id", toString(dictionary_id));
return uri;
}
Poco::URI ExternalDictionaryLibraryBridgeHelper::getMainURI() const
{
auto uri = createBaseURI();
uri.setPath(MAIN_HANDLER);
return uri;
}
Poco::URI ExternalDictionaryLibraryBridgeHelper::createRequestURI(const String & method) const
{
auto uri = getMainURI();
uri.addQueryParameter("dictionary_id", toString(dictionary_id));
@ -54,7 +71,7 @@ Poco::URI LibraryBridgeHelper::createRequestURI(const String & method) const
}
Poco::URI LibraryBridgeHelper::createBaseURI() const
Poco::URI ExternalDictionaryLibraryBridgeHelper::createBaseURI() const
{
Poco::URI uri;
uri.setHost(bridge_host);
@ -64,18 +81,18 @@ Poco::URI LibraryBridgeHelper::createBaseURI() const
}
void LibraryBridgeHelper::startBridge(std::unique_ptr<ShellCommand> cmd) const
void ExternalDictionaryLibraryBridgeHelper::startBridge(std::unique_ptr<ShellCommand> cmd) const
{
getContext()->addBridgeCommand(std::move(cmd));
}
bool LibraryBridgeHelper::bridgeHandShake()
bool ExternalDictionaryLibraryBridgeHelper::bridgeHandShake()
{
String result;
try
{
ReadWriteBufferFromHTTP buf(createRequestURI(PING), Poco::Net::HTTPRequest::HTTP_GET, {}, http_timeouts, credentials);
ReadWriteBufferFromHTTP buf(getPingURI(), Poco::Net::HTTPRequest::HTTP_GET, {}, http_timeouts, credentials);
readString(result, buf);
}
catch (...)
@ -91,12 +108,12 @@ bool LibraryBridgeHelper::bridgeHandShake()
* 2. Bridge crashed or restarted for some reason while server did not.
**/
if (result.size() != 1)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected message from library bridge: {}. Check bridge and server have the same version.", result);
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected message from library bridge: {}. Check that bridge and server have the same version.", result);
UInt8 dictionary_id_exists;
auto parsed = tryParse<UInt8>(dictionary_id_exists, result);
if (!parsed || (dictionary_id_exists != 0 && dictionary_id_exists != 1))
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected message from library bridge: {} ({}). Check bridge and server have the same version.",
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected message from library bridge: {} ({}). Check that bridge and server have the same version.",
result, parsed ? toString(dictionary_id_exists) : "failed to parse");
LOG_TRACE(log, "dictionary_id: {}, dictionary_id_exists on bridge side: {}, library confirmed to be initialized on server side: {}",
@ -113,7 +130,7 @@ bool LibraryBridgeHelper::bridgeHandShake()
bool reinitialized = false;
try
{
auto uri = createRequestURI(LIB_NEW_METHOD);
auto uri = createRequestURI(EXT_DICT_LIB_NEW_METHOD);
reinitialized = executeRequest(uri, getInitLibraryCallback());
}
catch (...)
@ -131,11 +148,11 @@ bool LibraryBridgeHelper::bridgeHandShake()
}
ReadWriteBufferFromHTTP::OutStreamCallback LibraryBridgeHelper::getInitLibraryCallback() const
ReadWriteBufferFromHTTP::OutStreamCallback ExternalDictionaryLibraryBridgeHelper::getInitLibraryCallback() const
{
/// Sample block must contain null values
WriteBufferFromOwnString out;
auto output_format = getContext()->getOutputFormat(LibraryBridgeHelper::DEFAULT_FORMAT, out, sample_block);
auto output_format = getContext()->getOutputFormat(ExternalDictionaryLibraryBridgeHelper::DEFAULT_FORMAT, out, sample_block);
formatBlock(output_format, sample_block);
auto block_string = out.str();
@ -150,19 +167,19 @@ ReadWriteBufferFromHTTP::OutStreamCallback LibraryBridgeHelper::getInitLibraryCa
}
bool LibraryBridgeHelper::initLibrary()
bool ExternalDictionaryLibraryBridgeHelper::initLibrary()
{
startBridgeSync();
auto uri = createRequestURI(LIB_NEW_METHOD);
auto uri = createRequestURI(EXT_DICT_LIB_NEW_METHOD);
library_initialized = executeRequest(uri, getInitLibraryCallback());
return library_initialized;
}
bool LibraryBridgeHelper::cloneLibrary(const Field & other_dictionary_id)
bool ExternalDictionaryLibraryBridgeHelper::cloneLibrary(const Field & other_dictionary_id)
{
startBridgeSync();
auto uri = createRequestURI(LIB_CLONE_METHOD);
auto uri = createRequestURI(EXT_DICT_LIB_CLONE_METHOD);
uri.addQueryParameter("from_dictionary_id", toString(other_dictionary_id));
/// We also pass initialization settings in order to create a library handler
/// in case from_dictionary_id does not exist in bridge side (possible in case of bridge crash).
@ -171,70 +188,70 @@ bool LibraryBridgeHelper::cloneLibrary(const Field & other_dictionary_id)
}
bool LibraryBridgeHelper::removeLibrary()
bool ExternalDictionaryLibraryBridgeHelper::removeLibrary()
{
/// Do not force bridge restart if it is not running in case of removeLibrary
/// because in this case after restart it will not have this dictionaty id in memory anyway.
if (bridgeHandShake())
{
auto uri = createRequestURI(LIB_DELETE_METHOD);
auto uri = createRequestURI(EXT_DICT_LIB_DELETE_METHOD);
return executeRequest(uri);
}
return true;
}
bool LibraryBridgeHelper::isModified()
bool ExternalDictionaryLibraryBridgeHelper::isModified()
{
startBridgeSync();
auto uri = createRequestURI(IS_MODIFIED_METHOD);
auto uri = createRequestURI(EXT_DICT_IS_MODIFIED_METHOD);
return executeRequest(uri);
}
bool LibraryBridgeHelper::supportsSelectiveLoad()
bool ExternalDictionaryLibraryBridgeHelper::supportsSelectiveLoad()
{
startBridgeSync();
auto uri = createRequestURI(SUPPORTS_SELECTIVE_LOAD_METHOD);
auto uri = createRequestURI(EXT_DICT_SUPPORTS_SELECTIVE_LOAD_METHOD);
return executeRequest(uri);
}
QueryPipeline LibraryBridgeHelper::loadAll()
QueryPipeline ExternalDictionaryLibraryBridgeHelper::loadAll()
{
startBridgeSync();
auto uri = createRequestURI(LOAD_ALL_METHOD);
auto uri = createRequestURI(EXT_DICT_LOAD_ALL_METHOD);
return QueryPipeline(loadBase(uri));
}
QueryPipeline LibraryBridgeHelper::loadIds(const std::vector<uint64_t> & ids)
QueryPipeline ExternalDictionaryLibraryBridgeHelper::loadIds(const std::vector<uint64_t> & ids)
{
startBridgeSync();
auto uri = createRequestURI(LOAD_IDS_METHOD);
auto uri = createRequestURI(EXT_DICT_LOAD_IDS_METHOD);
uri.addQueryParameter("ids_num", toString(ids.size())); /// Not used parameter, but helpful
auto ids_string = getDictIdsString(ids);
return QueryPipeline(loadBase(uri, [ids_string](std::ostream & os) { os << ids_string; }));
}
QueryPipeline LibraryBridgeHelper::loadKeys(const Block & requested_block)
QueryPipeline ExternalDictionaryLibraryBridgeHelper::loadKeys(const Block & requested_block)
{
startBridgeSync();
auto uri = createRequestURI(LOAD_KEYS_METHOD);
auto uri = createRequestURI(EXT_DICT_LOAD_KEYS_METHOD);
/// Sample block to parse block from callback
uri.addQueryParameter("requested_block_sample", requested_block.getNamesAndTypesList().toString());
ReadWriteBufferFromHTTP::OutStreamCallback out_stream_callback = [requested_block, this](std::ostream & os)
{
WriteBufferFromOStream out_buffer(os);
auto output_format = getContext()->getOutputFormat(LibraryBridgeHelper::DEFAULT_FORMAT, out_buffer, requested_block.cloneEmpty());
auto output_format = getContext()->getOutputFormat(ExternalDictionaryLibraryBridgeHelper::DEFAULT_FORMAT, out_buffer, requested_block.cloneEmpty());
formatBlock(output_format, requested_block);
};
return QueryPipeline(loadBase(uri, out_stream_callback));
}
bool LibraryBridgeHelper::executeRequest(const Poco::URI & uri, ReadWriteBufferFromHTTP::OutStreamCallback out_stream_callback) const
bool ExternalDictionaryLibraryBridgeHelper::executeRequest(const Poco::URI & uri, ReadWriteBufferFromHTTP::OutStreamCallback out_stream_callback) const
{
ReadWriteBufferFromHTTP buf(
uri,
@ -248,7 +265,7 @@ bool LibraryBridgeHelper::executeRequest(const Poco::URI & uri, ReadWriteBufferF
}
QueryPipeline LibraryBridgeHelper::loadBase(const Poco::URI & uri, ReadWriteBufferFromHTTP::OutStreamCallback out_stream_callback)
QueryPipeline ExternalDictionaryLibraryBridgeHelper::loadBase(const Poco::URI & uri, ReadWriteBufferFromHTTP::OutStreamCallback out_stream_callback)
{
auto read_buf_ptr = std::make_unique<ReadWriteBufferFromHTTP>(
uri,
@ -261,13 +278,13 @@ QueryPipeline LibraryBridgeHelper::loadBase(const Poco::URI & uri, ReadWriteBuff
getContext()->getReadSettings(),
ReadWriteBufferFromHTTP::HTTPHeaderEntries{});
auto source = FormatFactory::instance().getInput(LibraryBridgeHelper::DEFAULT_FORMAT, *read_buf_ptr, sample_block, getContext(), DEFAULT_BLOCK_SIZE);
auto source = FormatFactory::instance().getInput(ExternalDictionaryLibraryBridgeHelper::DEFAULT_FORMAT, *read_buf_ptr, sample_block, getContext(), DEFAULT_BLOCK_SIZE);
source->addBuffer(std::move(read_buf_ptr));
return QueryPipeline(std::move(source));
}
String LibraryBridgeHelper::getDictIdsString(const std::vector<UInt64> & ids)
String ExternalDictionaryLibraryBridgeHelper::getDictIdsString(const std::vector<UInt64> & ids)
{
WriteBufferFromOwnString out;
writeVectorBinary(ids, out);

View File

@ -14,7 +14,7 @@ namespace DB
class Pipe;
class LibraryBridgeHelper : public IBridgeHelper
class ExternalDictionaryLibraryBridgeHelper : public IBridgeHelper
{
public:
@ -26,8 +26,10 @@ public:
};
static constexpr inline size_t DEFAULT_PORT = 9012;
static constexpr inline auto PING_HANDLER = "/extdict_ping";
static constexpr inline auto MAIN_HANDLER = "/extdict_request";
LibraryBridgeHelper(ContextPtr context_, const Block & sample_block, const Field & dictionary_id_, const LibraryInitData & library_data_);
ExternalDictionaryLibraryBridgeHelper(ContextPtr context_, const Block & sample_block, const Field & dictionary_id_, const LibraryInitData & library_data_);
bool initLibrary();
@ -45,13 +47,13 @@ public:
QueryPipeline loadKeys(const Block & requested_block);
QueryPipeline loadBase(const Poco::URI & uri, ReadWriteBufferFromHTTP::OutStreamCallback out_stream_callback = {});
bool executeRequest(const Poco::URI & uri, ReadWriteBufferFromHTTP::OutStreamCallback out_stream_callback = {}) const;
LibraryInitData getLibraryData() const { return library_data; }
protected:
Poco::URI getPingURI() const override;
Poco::URI getMainURI() const override;
bool bridgeHandShake() override;
void startBridge(std::unique_ptr<ShellCommand> cmd) const override;
@ -74,18 +76,21 @@ protected:
Poco::URI createBaseURI() const override;
QueryPipeline loadBase(const Poco::URI & uri, ReadWriteBufferFromHTTP::OutStreamCallback out_stream_callback = {});
bool executeRequest(const Poco::URI & uri, ReadWriteBufferFromHTTP::OutStreamCallback out_stream_callback = {}) const;
ReadWriteBufferFromHTTP::OutStreamCallback getInitLibraryCallback() const;
private:
static constexpr inline auto LIB_NEW_METHOD = "libNew";
static constexpr inline auto LIB_CLONE_METHOD = "libClone";
static constexpr inline auto LIB_DELETE_METHOD = "libDelete";
static constexpr inline auto LOAD_ALL_METHOD = "loadAll";
static constexpr inline auto LOAD_IDS_METHOD = "loadIds";
static constexpr inline auto LOAD_KEYS_METHOD = "loadKeys";
static constexpr inline auto IS_MODIFIED_METHOD = "isModified";
static constexpr inline auto PING = "ping";
static constexpr inline auto SUPPORTS_SELECTIVE_LOAD_METHOD = "supportsSelectiveLoad";
static constexpr inline auto EXT_DICT_LIB_NEW_METHOD = "extDict_libNew";
static constexpr inline auto EXT_DICT_LIB_CLONE_METHOD = "extDict_libClone";
static constexpr inline auto EXT_DICT_LIB_DELETE_METHOD = "extDict_libDelete";
static constexpr inline auto EXT_DICT_LOAD_ALL_METHOD = "extDict_loadAll";
static constexpr inline auto EXT_DICT_LOAD_IDS_METHOD = "extDict_loadIds";
static constexpr inline auto EXT_DICT_LOAD_KEYS_METHOD = "extDict_loadKeys";
static constexpr inline auto EXT_DICT_IS_MODIFIED_METHOD = "extDict_isModified";
static constexpr inline auto EXT_DICT_SUPPORTS_SELECTIVE_LOAD_METHOD = "extDict_supportsSelectiveLoad";
Poco::URI createRequestURI(const String & method) const;

View File

@ -18,22 +18,6 @@ namespace ErrorCodes
}
Poco::URI IBridgeHelper::getMainURI() const
{
auto uri = createBaseURI();
uri.setPath(MAIN_HANDLER);
return uri;
}
Poco::URI IBridgeHelper::getPingURI() const
{
auto uri = createBaseURI();
uri.setPath(PING_HANDLER);
return uri;
}
void IBridgeHelper::startBridgeSync()
{
if (!bridgeHandShake())

View File

@ -19,8 +19,6 @@ class IBridgeHelper: protected WithContext
public:
static constexpr inline auto DEFAULT_HOST = "127.0.0.1";
static constexpr inline auto PING_HANDLER = "/ping";
static constexpr inline auto MAIN_HANDLER = "/";
static constexpr inline auto DEFAULT_FORMAT = "RowBinary";
static constexpr inline auto PING_OK_ANSWER = "Ok.";
@ -31,9 +29,9 @@ public:
virtual ~IBridgeHelper() = default;
Poco::URI getMainURI() const;
virtual Poco::URI getMainURI() const = 0;
Poco::URI getPingURI() const;
virtual Poco::URI getPingURI() const = 0;
void startBridgeSync();
@ -41,7 +39,6 @@ protected:
/// Check bridge is running. Can also check something else in the mean time.
virtual bool bridgeHandShake() = 0;
/// clickhouse-odbc-bridge, clickhouse-library-bridge
virtual String serviceAlias() const = 0;
virtual String serviceFileName() const = 0;

View File

@ -53,6 +53,8 @@ class XDBCBridgeHelper : public IXDBCBridgeHelper
public:
static constexpr inline auto DEFAULT_PORT = BridgeHelperMixin::DEFAULT_PORT;
static constexpr inline auto PING_HANDLER = "/ping";
static constexpr inline auto MAIN_HANDLER = "/";
static constexpr inline auto COL_INFO_HANDLER = "/columns_info";
static constexpr inline auto IDENTIFIER_QUOTE_HANDLER = "/identifier_quote";
static constexpr inline auto SCHEMA_ALLOWED_HANDLER = "/schema_allowed";
@ -72,6 +74,22 @@ public:
}
protected:
Poco::URI getPingURI() const override
{
auto uri = createBaseURI();
uri.setPath(PING_HANDLER);
return uri;
}
Poco::URI getMainURI() const override
{
auto uri = createBaseURI();
uri.setPath(MAIN_HANDLER);
return uri;
}
bool bridgeHandShake() override
{
try

View File

@ -144,8 +144,8 @@ endif ()
list (APPEND clickhouse_common_io_sources ${CONFIG_BUILD})
list (APPEND clickhouse_common_io_headers ${CONFIG_VERSION} ${CONFIG_COMMON})
list (APPEND dbms_sources Functions/IFunction.cpp Functions/FunctionFactory.cpp Functions/FunctionHelpers.cpp Functions/extractTimeZoneFromFunctionArguments.cpp Functions/FunctionsLogical.cpp)
list (APPEND dbms_headers Functions/IFunction.h Functions/FunctionFactory.h Functions/FunctionHelpers.h Functions/extractTimeZoneFromFunctionArguments.h Functions/FunctionsLogical.h)
list (APPEND dbms_sources Functions/IFunction.cpp Functions/FunctionFactory.cpp Functions/FunctionHelpers.cpp Functions/extractTimeZoneFromFunctionArguments.cpp Functions/FunctionsLogical.cpp Functions/indexHint.cpp)
list (APPEND dbms_headers Functions/IFunction.h Functions/FunctionFactory.h Functions/FunctionHelpers.h Functions/extractTimeZoneFromFunctionArguments.h Functions/FunctionsLogical.h Functions/indexHint.h)
list (APPEND dbms_sources
AggregateFunctions/IAggregateFunction.cpp
@ -488,7 +488,7 @@ if (TARGET ch_contrib::aws_s3)
endif()
if (TARGET ch_contrib::azure_sdk)
target_link_libraries (clickhouse_common_io PUBLIC ch_contrib::azure_sdk)
dbms_target_link_libraries (PRIVATE ch_contrib::azure_sdk)
endif()
if (TARGET ch_contrib::s2)
@ -611,4 +611,3 @@ if (ENABLE_TESTS)
add_check(unit_tests_dbms)
endif ()

View File

@ -263,11 +263,6 @@ public:
}
}
SerializationInfoPtr getSerializationInfo() const override
{
return data->getSerializationInfo();
}
bool isNullable() const override { return isColumnNullable(*data); }
bool onlyNull() const override { return data->isNullAt(0); }
bool isNumeric() const override { return data->isNumeric(); }

View File

@ -561,15 +561,4 @@ void ColumnTuple::getIndicesOfNonDefaultRows(Offsets & indices, size_t from, siz
return getIndicesOfNonDefaultRowsImpl<ColumnTuple>(indices, from, limit);
}
SerializationInfoPtr ColumnTuple::getSerializationInfo() const
{
MutableSerializationInfos infos;
infos.reserve(columns.size());
for (const auto & column : columns)
infos.push_back(const_pointer_cast<SerializationInfo>(column->getSerializationInfo()));
return std::make_shared<SerializationInfoTuple>(std::move(infos), SerializationInfo::Settings{});
}
}

View File

@ -102,7 +102,6 @@ public:
ColumnPtr compress() const override;
double getRatioOfDefaultRows(double sample_ratio) const override;
void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const override;
SerializationInfoPtr getSerializationInfo() const override;
size_t tupleSize() const { return columns.size(); }

View File

@ -27,17 +27,17 @@ namespace ErrorCodes
inline UInt64 bytes64MaskToBits64Mask(const UInt8 * bytes64)
{
#if defined(__AVX512F__) && defined(__AVX512BW__)
static const __m512i zero64 = _mm512_setzero_epi32();
UInt64 res = _mm512_cmp_epi8_mask(_mm512_loadu_si512(reinterpret_cast<const __m512i *>(bytes64)), zero64, _MM_CMPINT_EQ);
const __m512i vbytes = _mm512_loadu_si512(reinterpret_cast<const void *>(bytes64));
UInt64 res = _mm512_testn_epi8_mask(vbytes, vbytes);
#elif defined(__AVX__) && defined(__AVX2__)
static const __m256i zero32 = _mm256_setzero_si256();
const __m256i zero32 = _mm256_setzero_si256();
UInt64 res =
(static_cast<UInt64>(_mm256_movemask_epi8(_mm256_cmpeq_epi8(
_mm256_loadu_si256(reinterpret_cast<const __m256i *>(bytes64)), zero32))) & 0xffffffff)
| (static_cast<UInt64>(_mm256_movemask_epi8(_mm256_cmpeq_epi8(
_mm256_loadu_si256(reinterpret_cast<const __m256i *>(bytes64+32)), zero32))) << 32);
#elif defined(__SSE2__)
static const __m128i zero16 = _mm_setzero_si128();
const __m128i zero16 = _mm_setzero_si128();
UInt64 res =
(static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpeq_epi8(
_mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes64)), zero16))) & 0xffff)

View File

@ -64,11 +64,6 @@ ColumnPtr IColumn::createWithOffsets(const Offsets & offsets, const Field & defa
return res;
}
SerializationInfoPtr IColumn::getSerializationInfo() const
{
return std::make_shared<SerializationInfo>(ISerialization::getKind(*this), SerializationInfo::Settings{});
}
bool isColumnNullable(const IColumn & column)
{
return checkColumn<ColumnNullable>(column);

View File

@ -35,9 +35,6 @@ class ColumnGathererStream;
class Field;
class WeakHash32;
class SerializationInfo;
using SerializationInfoPtr = std::shared_ptr<const SerializationInfo>;
/*
* Represents a set of equal ranges in previous column to perform sorting in current column.
* Used in sorting by tuples.
@ -445,8 +442,6 @@ public:
/// Used to create full column from sparse.
[[nodiscard]] virtual Ptr createWithOffsets(const Offsets & offsets, const Field & default_field, size_t total_rows, size_t shift) const;
[[nodiscard]] virtual SerializationInfoPtr getSerializationInfo() const;
/// Compress column in memory to some representation that allows to decompress it back.
/// Return itself if compression is not applicable for this column type.
[[nodiscard]] virtual Ptr compress() const

View File

@ -13,8 +13,6 @@ namespace fs = std::filesystem;
namespace DB
{
constexpr decltype(ConfigReloader::reload_interval) ConfigReloader::reload_interval;
ConfigReloader::ConfigReloader(
const std::string & path_,
const std::string & include_from_path_,

Some files were not shown because too many files have changed in this diff Show More