diff --git a/.clang-tidy b/.clang-tidy index ddd0ee6d911..0400b500e5c 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -33,6 +33,8 @@ Checks: '-*, performance-no-automatic-move, performance-trivially-destructible, performance-unnecessary-copy-initialization, + performance-noexcept-move-constructor, + performance-move-const-arg, readability-avoid-const-params-in-decls, readability-const-return-type, @@ -206,3 +208,5 @@ CheckOptions: value: CamelCase - key: modernize-loop-convert.UseCxx20ReverseRanges value: false + - key: performance-move-const-arg.CheckTriviallyCopyableMove + value: false diff --git a/README.md b/README.md index f433b457861..e6dc9f1e6fc 100644 --- a/README.md +++ b/README.md @@ -10,6 +10,6 @@ ClickHouse® is an open-source column-oriented database management system that a * [YouTube channel](https://www.youtube.com/c/ClickHouseDB) has a lot of content about ClickHouse in video format. * [Slack](https://join.slack.com/t/clickhousedb/shared_invite/zt-rxm3rdrk-lIUmhLC3V8WTaL0TGxsOmg) and [Telegram](https://telegram.me/clickhouse_en) allow chatting with ClickHouse users in real-time. * [Blog](https://clickhouse.com/blog/en/) contains various ClickHouse-related articles, as well as announcements and reports about events. -* [Code Browser (Woboq)](https://clickhouse.com/codebrowser/html_report/ClickHouse/index.html) with syntax highlight and navigation. +* [Code Browser (Woboq)](https://clickhouse.com/codebrowser/ClickHouse/index.html) with syntax highlight and navigation. * [Code Browser (github.dev)](https://github.dev/ClickHouse/ClickHouse) with syntax highlight, powered by github.dev. * [Contacts](https://clickhouse.com/company/#contact) can help to get your questions answered if there are any. diff --git a/base/base/CMakeLists.txt b/base/base/CMakeLists.txt index 3e6f174c6dc..8a1ca6064cb 100644 --- a/base/base/CMakeLists.txt +++ b/base/base/CMakeLists.txt @@ -17,6 +17,7 @@ set (SRCS terminalColors.cpp errnoToString.cpp StringRef.cpp + safeExit.cpp ) if (ENABLE_REPLXX) diff --git a/base/base/StringRef.h b/base/base/StringRef.h index eefc87121fc..171861e9ba7 100644 --- a/base/base/StringRef.h +++ b/base/base/StringRef.h @@ -19,6 +19,12 @@ #if defined(__SSE4_2__) #include #include + #define CRC_INT _mm_crc32_u64 +#endif + +#if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32) + #include + #define CRC_INT __crc32cd #endif @@ -205,7 +211,7 @@ struct StringRefHash64 } }; -#if defined(__SSE4_2__) +#if defined(CRC_INT) /// Parts are taken from CityHash. @@ -281,13 +287,13 @@ struct CRC32Hash do { UInt64 word = unalignedLoad(pos); - res = _mm_crc32_u64(res, word); + res = CRC_INT(res, word); pos += 8; } while (pos + 8 < end); UInt64 word = unalignedLoad(end - 8); /// I'm not sure if this is normal. - res = _mm_crc32_u64(res, word); + res = CRC_INT(res, word); return res; } diff --git a/base/base/insertAtEnd.h b/base/base/insertAtEnd.h index c4fef664511..abb2aa7d563 100644 --- a/base/base/insertAtEnd.h +++ b/base/base/insertAtEnd.h @@ -26,3 +26,27 @@ void insertAtEnd(std::vector & dest, std::vector && src) dest.insert(dest.end(), std::make_move_iterator(src.begin()), std::make_move_iterator(src.end())); src.clear(); } + +template +void insertAtEnd(Container & dest, const Container & src) +{ + if (src.empty()) + return; + + dest.insert(dest.end(), src.begin(), src.end()); +} + +template +void insertAtEnd(Container & dest, Container && src) +{ + if (src.empty()) + return; + if (dest.empty()) + { + dest.swap(src); + return; + } + + dest.insert(dest.end(), std::make_move_iterator(src.begin()), std::make_move_iterator(src.end())); + src.clear(); +} diff --git a/base/base/safeExit.cpp b/base/base/safeExit.cpp new file mode 100644 index 00000000000..4ccfee80643 --- /dev/null +++ b/base/base/safeExit.cpp @@ -0,0 +1,18 @@ +#if defined(OS_LINUX) +# include +#endif +#include +#include +#include + +[[noreturn]] void safeExit(int code) +{ +#if defined(THREAD_SANITIZER) && defined(OS_LINUX) + /// Thread sanitizer tries to do something on exit that we don't need if we want to exit immediately, + /// while connection handling threads are still run. + (void)syscall(SYS_exit_group, code); + __builtin_unreachable(); +#else + _exit(code); +#endif +} diff --git a/base/base/safeExit.h b/base/base/safeExit.h new file mode 100644 index 00000000000..f999ccfac18 --- /dev/null +++ b/base/base/safeExit.h @@ -0,0 +1,4 @@ +#pragma once + +/// _exit() with a workaround for TSan. +[[noreturn]] void safeExit(int code); diff --git a/benchmark/hardware.sh b/benchmark/hardware.sh index 69e05cf804b..f6206d0257c 100755 --- a/benchmark/hardware.sh +++ b/benchmark/hardware.sh @@ -11,10 +11,6 @@ DATASET="${TABLE}_v1.tar.xz" QUERIES_FILE="queries.sql" TRIES=3 -AMD64_BIN_URL="https://builds.clickhouse.com/master/amd64/clickhouse" -AARCH64_BIN_URL="https://builds.clickhouse.com/master/aarch64/clickhouse" -POWERPC64_BIN_URL="https://builds.clickhouse.com/master/ppc64le/clickhouse" - # Note: on older Ubuntu versions, 'axel' does not support IPv6. If you are using IPv6-only servers on very old Ubuntu, just don't install 'axel'. FASTER_DOWNLOAD=wget @@ -33,20 +29,60 @@ fi mkdir -p clickhouse-benchmark-$SCALE pushd clickhouse-benchmark-$SCALE -if [[ ! -f clickhouse ]]; then - CPU=$(uname -m) - if [[ ($CPU == x86_64) || ($CPU == amd64) ]]; then - $FASTER_DOWNLOAD "$AMD64_BIN_URL" - elif [[ $CPU == aarch64 ]]; then - $FASTER_DOWNLOAD "$AARCH64_BIN_URL" - elif [[ $CPU == powerpc64le ]]; then - $FASTER_DOWNLOAD "$POWERPC64_BIN_URL" - else - echo "Unsupported CPU type: $CPU" - exit 1 +OS=$(uname -s) +ARCH=$(uname -m) + +DIR= + +if [ "${OS}" = "Linux" ] +then + if [ "${ARCH}" = "x86_64" ] + then + DIR="amd64" + elif [ "${ARCH}" = "aarch64" ] + then + DIR="aarch64" + elif [ "${ARCH}" = "powerpc64le" ] + then + DIR="powerpc64le" + fi +elif [ "${OS}" = "FreeBSD" ] +then + if [ "${ARCH}" = "x86_64" ] + then + DIR="freebsd" + elif [ "${ARCH}" = "aarch64" ] + then + DIR="freebsd-aarch64" + elif [ "${ARCH}" = "powerpc64le" ] + then + DIR="freebsd-powerpc64le" + fi +elif [ "${OS}" = "Darwin" ] +then + if [ "${ARCH}" = "x86_64" ] + then + DIR="macos" + elif [ "${ARCH}" = "aarch64" -o "${ARCH}" = "arm64" ] + then + DIR="macos-aarch64" fi fi +if [ -z "${DIR}" ] +then + echo "The '${OS}' operating system with the '${ARCH}' architecture is not supported." + exit 1 +fi + +URL="https://builds.clickhouse.com/master/${DIR}/clickhouse" +echo +echo "Will download ${URL}" +echo +curl -O "${URL}" && chmod a+x clickhouse || exit 1 +echo +echo "Successfully downloaded the ClickHouse binary" + chmod a+x clickhouse if [[ ! -f $QUERIES_FILE ]]; then @@ -88,7 +124,12 @@ echo cat "$QUERIES_FILE" | sed "s/{table}/${TABLE}/g" | while read query; do sync - echo 3 | sudo tee /proc/sys/vm/drop_caches >/dev/null + if [ "${OS}" = "Darwin" ] + then + sudo purge > /dev/null + else + echo 3 | sudo tee /proc/sys/vm/drop_caches >/dev/null + fi echo -n "[" for i in $(seq 1 $TRIES); do @@ -104,27 +145,45 @@ echo echo "Benchmark complete. System info:" echo -echo '----Version, build id-----------' -./clickhouse local --query "SELECT format('Version: {}, build id: {}', version(), buildId())" -./clickhouse local --query "SELECT format('The number of threads is: {}', value) FROM system.settings WHERE name = 'max_threads'" --output-format TSVRaw -./clickhouse local --query "SELECT format('Current time: {}', toString(now(), 'UTC'))" -echo '----CPU-------------------------' -cat /proc/cpuinfo | grep -i -F 'model name' | uniq -lscpu -echo '----Block Devices---------------' -lsblk -echo '----Disk Free and Total--------' -df -h . -echo '----Memory Free and Total-------' -free -h -echo '----Physical Memory Amount------' -cat /proc/meminfo | grep MemTotal -echo '----RAID Info-------------------' -cat /proc/mdstat -#echo '----PCI-------------------------' -#lspci -#echo '----All Hardware Info-----------' -#lshw -echo '--------------------------------' - +if [ "${OS}" = "Darwin" ] +then + echo '----Version, build id-----------' + ./clickhouse local --query "SELECT format('Version: {}', version())" + sw_vers | grep BuildVersion + ./clickhouse local --query "SELECT format('The number of threads is: {}', value) FROM system.settings WHERE name = 'max_threads'" --output-format TSVRaw + ./clickhouse local --query "SELECT format('Current time: {}', toString(now(), 'UTC'))" + echo '----CPU-------------------------' + sysctl hw.model + sysctl -a | grep -E 'hw.activecpu|hw.memsize|hw.byteorder|cachesize' + echo '----Disk Free and Total--------' + df -h . + echo '----Memory Free and Total-------' + vm_stat + echo '----Physical Memory Amount------' + ls -l /var/vm + echo '--------------------------------' +else + echo '----Version, build id-----------' + ./clickhouse local --query "SELECT format('Version: {}, build id: {}', version(), buildId())" + ./clickhouse local --query "SELECT format('The number of threads is: {}', value) FROM system.settings WHERE name = 'max_threads'" --output-format TSVRaw + ./clickhouse local --query "SELECT format('Current time: {}', toString(now(), 'UTC'))" + echo '----CPU-------------------------' + cat /proc/cpuinfo | grep -i -F 'model name' | uniq + lscpu + echo '----Block Devices---------------' + lsblk + echo '----Disk Free and Total--------' + df -h . + echo '----Memory Free and Total-------' + free -h + echo '----Physical Memory Amount------' + cat /proc/meminfo | grep MemTotal + echo '----RAID Info-------------------' + cat /proc/mdstat + #echo '----PCI-------------------------' + #lspci + #echo '----All Hardware Info-----------' + #lshw + echo '--------------------------------' +fi echo diff --git a/cmake/limit_jobs.cmake b/cmake/limit_jobs.cmake index b85260e6c76..96c6b75bc43 100644 --- a/cmake/limit_jobs.cmake +++ b/cmake/limit_jobs.cmake @@ -55,5 +55,5 @@ endif () if (PARALLEL_COMPILE_JOBS OR PARALLEL_LINK_JOBS) message(STATUS "${CMAKE_CURRENT_SOURCE_DIR}: Have ${AVAILABLE_PHYSICAL_MEMORY} megabytes of memory. - Limiting concurrent linkers jobs to ${PARALLEL_LINK_JOBS} and compiler jobs to ${PARALLEL_COMPILE_JOBS}") + Limiting concurrent linkers jobs to ${PARALLEL_LINK_JOBS} and compiler jobs to ${PARALLEL_COMPILE_JOBS} (system has ${NUMBER_OF_LOGICAL_CORES} logical cores)") endif () diff --git a/contrib/jemalloc b/contrib/jemalloc index ca709c3139f..78b58379c85 160000 --- a/contrib/jemalloc +++ b/contrib/jemalloc @@ -1 +1 @@ -Subproject commit ca709c3139f77f4c00a903cdee46d71e9028f6c6 +Subproject commit 78b58379c854a639df79beb3289351129d863d4b diff --git a/docker/server/Dockerfile b/docker/server/Dockerfile index 5b10d1fc490..5b7990ab030 100644 --- a/docker/server/Dockerfile +++ b/docker/server/Dockerfile @@ -4,7 +4,7 @@ FROM ubuntu:20.04 ARG apt_archive="http://archive.ubuntu.com" RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list -ARG repository="deb https://repo.clickhouse.com/deb/stable/ main/" +ARG repository="deb https://packages.clickhouse.com/deb stable main" ARG version=22.1.1.* # set non-empty deb_location_url url to create a docker image @@ -58,7 +58,7 @@ RUN groupadd -r clickhouse --gid=101 \ wget \ tzdata \ && mkdir -p /etc/apt/sources.list.d \ - && apt-key adv --keyserver keyserver.ubuntu.com --recv E0C56BD4 \ + && apt-key adv --keyserver keyserver.ubuntu.com --recv 8919F6BD2B48D754 \ && echo $repository > /etc/apt/sources.list.d/clickhouse.list \ && if [ -n "$deb_location_url" ]; then \ echo "installing from custom url with deb packages: $deb_location_url" \ diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index 24168cea330..bd1e0292636 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -263,9 +263,20 @@ function run_tests if [[ $NPROC == 0 ]]; then NPROC=1 fi - time clickhouse-test --hung-check -j "${NPROC}" --order=random \ - --fast-tests-only --no-long --testname --shard --zookeeper --check-zookeeper-session \ - -- "$FASTTEST_FOCUS" 2>&1 \ + + local test_opts=( + --hung-check + --fast-tests-only + --no-long + --testname + --shard + --zookeeper + --check-zookeeper-session + --order random + --print-time + --jobs "${NPROC}" + ) + time clickhouse-test "${test_opts[@]}" -- "$FASTTEST_FOCUS" 2>&1 \ | ts '%Y-%m-%d %H:%M:%S' \ | tee "$FASTTEST_OUTPUT/test_result.txt" set -e diff --git a/docker/test/integration/hive_server/Dockerfile b/docker/test/integration/hive_server/Dockerfile index fa6e4bf6313..391f9a5e22f 100644 --- a/docker/test/integration/hive_server/Dockerfile +++ b/docker/test/integration/hive_server/Dockerfile @@ -42,6 +42,9 @@ COPY prepare_hive_data.sh / COPY demo_data.txt / ENV PATH=/apache-hive-2.3.9-bin/bin:/hadoop-3.1.0/bin:/hadoop-3.1.0/sbin:$PATH - +RUN service ssh start && sed s/HOSTNAME/$HOSTNAME/ /hadoop-3.1.0/etc/hadoop/core-site.xml.template > /hadoop-3.1.0/etc/hadoop/core-site.xml && hdfs namenode -format +RUN apt install -y python3 python3-pip +RUN pip3 install flask requests +COPY http_api_server.py / COPY start.sh / diff --git a/docker/test/integration/hive_server/http_api_server.py b/docker/test/integration/hive_server/http_api_server.py new file mode 100644 index 00000000000..4818b785c89 --- /dev/null +++ b/docker/test/integration/hive_server/http_api_server.py @@ -0,0 +1,70 @@ +import os +import subprocess +import datetime +from flask import Flask, flash, request, redirect, url_for + +def run_command(command, wait=False): + print("{} - execute shell command:{}".format(datetime.datetime.now(), command)) + lines = [] + p = subprocess.Popen(command, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + shell=True) + if wait: + for l in iter(p.stdout.readline, b''): + lines.append(l) + p.poll() + return (lines, p.returncode) + else: + return(iter(p.stdout.readline, b''), 0) + + +UPLOAD_FOLDER = './' +ALLOWED_EXTENSIONS = {'txt', 'sh'} +app = Flask(__name__) +app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER + +@app.route('/') +def hello_world(): + return 'Hello World' + + +def allowed_file(filename): + return '.' in filename and \ + filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS + + +@app.route('/upload', methods=['GET', 'POST']) +def upload_file(): + if request.method == 'POST': + # check if the post request has the file part + if 'file' not in request.files: + flash('No file part') + return redirect(request.url) + file = request.files['file'] + # If the user does not select a file, the browser submits an + # empty file without a filename. + if file.filename == '': + flash('No selected file') + return redirect(request.url) + if file and allowed_file(file.filename): + filename = file.filename + file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename)) + return redirect(url_for('upload_file', name=filename)) + return ''' + + Upload new File +

Upload new File

+
+ + +
+ ''' +@app.route('/run', methods=['GET', 'POST']) +def parse_request(): + data = request.data # data is empty + run_command(data, wait=True) + return 'Ok' + +if __name__ == '__main__': + app.run(port=5011) diff --git a/docker/test/integration/hive_server/prepare_hive_data.sh b/docker/test/integration/hive_server/prepare_hive_data.sh index afecbb91c5d..8126b975612 100755 --- a/docker/test/integration/hive_server/prepare_hive_data.sh +++ b/docker/test/integration/hive_server/prepare_hive_data.sh @@ -2,5 +2,9 @@ hive -e "create database test" hive -e "create table test.demo(id string, score int) PARTITIONED BY(day string) ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'; create table test.demo_orc(id string, score int) PARTITIONED BY(day string) ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.orc.OrcSerde' STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'; " +hive -e "create table test.parquet_demo(id string, score int) PARTITIONED BY(day string, hour string) ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'" hive -e "create table test.demo_text(id string, score int, day string)row format delimited fields terminated by ','; load data local inpath '/demo_data.txt' into table test.demo_text " - hive -e "set hive.exec.dynamic.partition.mode=nonstrict;insert into test.demo partition(day) select * from test.demo_text; insert into test.demo_orc partition(day) select * from test.demo_text" +hive -e "set hive.exec.dynamic.partition.mode=nonstrict;insert into test.demo partition(day) select * from test.demo_text; insert into test.demo_orc partition(day) select * from test.demo_text" + +hive -e "set hive.exec.dynamic.partition.mode=nonstrict;insert into test.parquet_demo partition(day, hour) select id, score, day, '00' as hour from test.demo;" +hive -e "set hive.exec.dynamic.partition.mode=nonstrict;insert into test.parquet_demo partition(day, hour) select id, score, day, '01' as hour from test.demo;" diff --git a/docker/test/integration/hive_server/start.sh b/docker/test/integration/hive_server/start.sh index e01f28542af..4224b8126e6 100755 --- a/docker/test/integration/hive_server/start.sh +++ b/docker/test/integration/hive_server/start.sh @@ -1,6 +1,5 @@ service ssh start sed s/HOSTNAME/$HOSTNAME/ /hadoop-3.1.0/etc/hadoop/core-site.xml.template > /hadoop-3.1.0/etc/hadoop/core-site.xml -hadoop namenode -format start-all.sh service mysql start mysql -u root -e "CREATE USER \"test\"@\"localhost\" IDENTIFIED BY \"test\"" @@ -9,4 +8,4 @@ schematool -initSchema -dbType mysql #nohup hiveserver2 & nohup hive --service metastore & bash /prepare_hive_data.sh -while true; do sleep 1000; done +python3 http_api_server.py diff --git a/docs/_includes/install/deb.sh b/docs/_includes/install/deb.sh index 21106e9fc47..9dceef4c245 100644 --- a/docs/_includes/install/deb.sh +++ b/docs/_includes/install/deb.sh @@ -1,7 +1,7 @@ sudo apt-get install apt-transport-https ca-certificates dirmngr -sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv E0C56BD4 +sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 8919F6BD2B48D754 -echo "deb https://repo.clickhouse.com/deb/stable/ main/" | sudo tee \ +echo "deb https://packages.clickhouse.com/deb stable main/" | sudo tee \ /etc/apt/sources.list.d/clickhouse.list sudo apt-get update diff --git a/docs/_includes/install/deb_repo.sh b/docs/_includes/install/deb_repo.sh new file mode 100644 index 00000000000..21106e9fc47 --- /dev/null +++ b/docs/_includes/install/deb_repo.sh @@ -0,0 +1,11 @@ +sudo apt-get install apt-transport-https ca-certificates dirmngr +sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv E0C56BD4 + +echo "deb https://repo.clickhouse.com/deb/stable/ main/" | sudo tee \ + /etc/apt/sources.list.d/clickhouse.list +sudo apt-get update + +sudo apt-get install -y clickhouse-server clickhouse-client + +sudo service clickhouse-server start +clickhouse-client # or "clickhouse-client --password" if you set up a password. diff --git a/docs/_includes/install/rpm.sh b/docs/_includes/install/rpm.sh index e3fd1232047..ff99018f872 100644 --- a/docs/_includes/install/rpm.sh +++ b/docs/_includes/install/rpm.sh @@ -1,7 +1,6 @@ -sudo yum install yum-utils -sudo rpm --import https://repo.clickhouse.com/CLICKHOUSE-KEY.GPG -sudo yum-config-manager --add-repo https://repo.clickhouse.com/rpm/clickhouse.repo -sudo yum install clickhouse-server clickhouse-client +sudo yum install -y yum-utils +sudo yum-config-manager --add-repo https://packages.clickhouse.com/rpm/clickhouse.repo +sudo yum install -y clickhouse-server clickhouse-client sudo /etc/init.d/clickhouse-server start clickhouse-client # or "clickhouse-client --password" if you set up a password. diff --git a/docs/_includes/install/rpm_repo.sh b/docs/_includes/install/rpm_repo.sh new file mode 100644 index 00000000000..e3fd1232047 --- /dev/null +++ b/docs/_includes/install/rpm_repo.sh @@ -0,0 +1,7 @@ +sudo yum install yum-utils +sudo rpm --import https://repo.clickhouse.com/CLICKHOUSE-KEY.GPG +sudo yum-config-manager --add-repo https://repo.clickhouse.com/rpm/clickhouse.repo +sudo yum install clickhouse-server clickhouse-client + +sudo /etc/init.d/clickhouse-server start +clickhouse-client # or "clickhouse-client --password" if you set up a password. diff --git a/docs/_includes/install/tgz.sh b/docs/_includes/install/tgz.sh index 0994510755b..4ba5890b32b 100644 --- a/docs/_includes/install/tgz.sh +++ b/docs/_includes/install/tgz.sh @@ -1,19 +1,20 @@ -export LATEST_VERSION=$(curl -s https://repo.clickhouse.com/tgz/stable/ | \ +LATEST_VERSION=$(curl -s https://packages.clickhouse.com/tgz/stable/ | \ grep -Eo '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | sort -V -r | head -n 1) -curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-common-static-$LATEST_VERSION.tgz -curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-common-static-dbg-$LATEST_VERSION.tgz -curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-server-$LATEST_VERSION.tgz -curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-client-$LATEST_VERSION.tgz +export LATEST_VERSION +curl -O "https://packages.clickhouse.com/tgz/stable/clickhouse-common-static-$LATEST_VERSION.tgz" +curl -O "https://packages.clickhouse.com/tgz/stable/clickhouse-common-static-dbg-$LATEST_VERSION.tgz" +curl -O "https://packages.clickhouse.com/tgz/stable/clickhouse-server-$LATEST_VERSION.tgz" +curl -O "https://packages.clickhouse.com/tgz/stable/clickhouse-client-$LATEST_VERSION.tgz" -tar -xzvf clickhouse-common-static-$LATEST_VERSION.tgz -sudo clickhouse-common-static-$LATEST_VERSION/install/doinst.sh +tar -xzvf "clickhouse-common-static-$LATEST_VERSION.tgz" +sudo "clickhouse-common-static-$LATEST_VERSION/install/doinst.sh" -tar -xzvf clickhouse-common-static-dbg-$LATEST_VERSION.tgz -sudo clickhouse-common-static-dbg-$LATEST_VERSION/install/doinst.sh +tar -xzvf "clickhouse-common-static-dbg-$LATEST_VERSION.tgz" +sudo "clickhouse-common-static-dbg-$LATEST_VERSION/install/doinst.sh" -tar -xzvf clickhouse-server-$LATEST_VERSION.tgz -sudo clickhouse-server-$LATEST_VERSION/install/doinst.sh +tar -xzvf "clickhouse-server-$LATEST_VERSION.tgz" +sudo "clickhouse-server-$LATEST_VERSION/install/doinst.sh" sudo /etc/init.d/clickhouse-server start -tar -xzvf clickhouse-client-$LATEST_VERSION.tgz -sudo clickhouse-client-$LATEST_VERSION/install/doinst.sh +tar -xzvf "clickhouse-client-$LATEST_VERSION.tgz" +sudo "clickhouse-client-$LATEST_VERSION/install/doinst.sh" diff --git a/docs/_includes/install/tgz_repo.sh b/docs/_includes/install/tgz_repo.sh new file mode 100644 index 00000000000..0994510755b --- /dev/null +++ b/docs/_includes/install/tgz_repo.sh @@ -0,0 +1,19 @@ +export LATEST_VERSION=$(curl -s https://repo.clickhouse.com/tgz/stable/ | \ + grep -Eo '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | sort -V -r | head -n 1) +curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-common-static-$LATEST_VERSION.tgz +curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-common-static-dbg-$LATEST_VERSION.tgz +curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-server-$LATEST_VERSION.tgz +curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-client-$LATEST_VERSION.tgz + +tar -xzvf clickhouse-common-static-$LATEST_VERSION.tgz +sudo clickhouse-common-static-$LATEST_VERSION/install/doinst.sh + +tar -xzvf clickhouse-common-static-dbg-$LATEST_VERSION.tgz +sudo clickhouse-common-static-dbg-$LATEST_VERSION/install/doinst.sh + +tar -xzvf clickhouse-server-$LATEST_VERSION.tgz +sudo clickhouse-server-$LATEST_VERSION/install/doinst.sh +sudo /etc/init.d/clickhouse-server start + +tar -xzvf clickhouse-client-$LATEST_VERSION.tgz +sudo clickhouse-client-$LATEST_VERSION/install/doinst.sh diff --git a/docs/en/development/browse-code.md b/docs/en/development/browse-code.md index fa57d2289b3..0fe8a46873c 100644 --- a/docs/en/development/browse-code.md +++ b/docs/en/development/browse-code.md @@ -5,7 +5,7 @@ toc_title: Source Code Browser # Browse ClickHouse Source Code {#browse-clickhouse-source-code} -You can use **Woboq** online code browser available [here](https://clickhouse.com/codebrowser/html_report/ClickHouse/src/index.html). It provides code navigation and semantic highlighting, search and indexing. The code snapshot is updated daily. +You can use **Woboq** online code browser available [here](https://clickhouse.com/codebrowser/ClickHouse/src/index.html). It provides code navigation and semantic highlighting, search and indexing. The code snapshot is updated daily. Also, you can browse sources on [GitHub](https://github.com/ClickHouse/ClickHouse) as usual. diff --git a/docs/en/development/build.md b/docs/en/development/build.md index aaa3bdfd043..5379fc37937 100644 --- a/docs/en/development/build.md +++ b/docs/en/development/build.md @@ -156,14 +156,6 @@ $ cd ClickHouse $ ./release ``` -## Faster builds for development - -Normally all tools of the ClickHouse bundle, such as `clickhouse-server`, `clickhouse-client` etc., are linked into a single static executable, `clickhouse`. This executable must be re-linked on every change, which might be slow. One common way to improve build time is to use the 'split' build configuration, which builds a separate binary for every tool, and further splits the code into several shared libraries. To enable this tweak, pass the following flags to `cmake`: - -``` --DUSE_STATIC_LIBRARIES=0 -DSPLIT_SHARED_LIBRARIES=1 -DCLICKHOUSE_SPLIT_BINARY=1 -``` - ## You Don’t Have to Build ClickHouse {#you-dont-have-to-build-clickhouse} ClickHouse is available in pre-built binaries and packages. Binaries are portable and can be run on any Linux flavour. @@ -172,9 +164,9 @@ They are built for stable, prestable and testing releases as long as for every c To find the freshest build from `master`, go to [commits page](https://github.com/ClickHouse/ClickHouse/commits/master), click on the first green checkmark or red cross near commit, and click to the “Details” link right after “ClickHouse Build Check”. -## Split build configuration {#split-build} +## Faster builds for development: Split build configuration {#split-build} -Normally ClickHouse is statically linked into a single static `clickhouse` binary with minimal dependencies. This is convenient for distribution, but it means that on every change the entire binary is linked again, which is slow and may be inconvenient for development. There is an alternative configuration which creates dynamically loaded shared libraries instead, allowing faster incremental builds. To use it, add the following flags to your `cmake` invocation: +Normally, ClickHouse is statically linked into a single static `clickhouse` binary with minimal dependencies. This is convenient for distribution, but it means that on every change the entire binary needs to be linked, which is slow and may be inconvenient for development. There is an alternative configuration which instead creates dynamically loaded shared libraries and separate binaries `clickhouse-server`, `clickhouse-client` etc., allowing for faster incremental builds. To use it, add the following flags to your `cmake` invocation: ``` -DUSE_STATIC_LIBRARIES=0 -DSPLIT_SHARED_LIBRARIES=1 -DCLICKHOUSE_SPLIT_BINARY=1 ``` diff --git a/docs/en/engines/database-engines/materialized-mysql.md b/docs/en/engines/database-engines/materialized-mysql.md index bcb026aa0dc..3dc14c87be7 100644 --- a/docs/en/engines/database-engines/materialized-mysql.md +++ b/docs/en/engines/database-engines/materialized-mysql.md @@ -36,6 +36,7 @@ ENGINE = MaterializedMySQL('host:port', ['database' | database], 'user', 'passwo - `max_flush_data_time` — Maximum number of milliseconds that data is allowed to cache in memory (for database and the cache data unable to query). When this time is exceeded, the data will be materialized. Default: `1000`. - `max_wait_time_when_mysql_unavailable` — Retry interval when MySQL is not available (milliseconds). Negative value disables retry. Default: `1000`. - `allows_query_when_mysql_lost` — Allows to query a materialized table when MySQL is lost. Default: `0` (`false`). +- `materialized_mysql_tables_list` — a comma-separated list of mysql database tables, which will be replicated by MaterializedMySQL database engine. Default value: empty list — means whole tables will be replicated. ```sql CREATE DATABASE mysql ENGINE = MaterializedMySQL('localhost:3306', 'db', 'user', '***') diff --git a/docs/en/faq/general/columnar-database.md b/docs/en/faq/general/columnar-database.md index cbc5f77d0ba..11bbd2e63f6 100644 --- a/docs/en/faq/general/columnar-database.md +++ b/docs/en/faq/general/columnar-database.md @@ -22,4 +22,4 @@ Here is the illustration of the difference between traditional row-oriented syst **Columnar** ![Columnar](https://clickhouse.com/docs/en/images/column-oriented.gif#) -A columnar database is a preferred choice for analytical applications because it allows to have many columns in a table just in case, but do not pay the cost for unused columns on read query execution time. Column-oriented databases are designed for big data processing because and data warehousing, they often natively scale using distributed clusters of low-cost hardware to increase throughput. ClickHouse does it with combination of [distributed](../../engines/table-engines/special/distributed.md) and [replicated](../../engines/table-engines/mergetree-family/replication.md) tables. +A columnar database is a preferred choice for analytical applications because it allows to have many columns in a table just in case, but do not pay the cost for unused columns on read query execution time. Column-oriented databases are designed for big data processing and data warehousing, because they often natively scale using distributed clusters of low-cost hardware to increase throughput. ClickHouse does it with combination of [distributed](../../engines/table-engines/special/distributed.md) and [replicated](../../engines/table-engines/mergetree-family/replication.md) tables. diff --git a/docs/en/getting-started/install.md b/docs/en/getting-started/install.md index c03daf45b02..cd734d4dc8b 100644 --- a/docs/en/getting-started/install.md +++ b/docs/en/getting-started/install.md @@ -27,9 +27,17 @@ It is recommended to use official pre-compiled `deb` packages for Debian or Ubun {% include 'install/deb.sh' %} ``` +
+ +Deprecated Method for installing deb-packages +``` bash +{% include 'install/deb_repo.sh' %} +``` +
+ You can replace `stable` with `lts` or `testing` to use different [release trains](../faq/operations/production.md) based on your needs. -You can also download and install packages manually from [here](https://repo.clickhouse.com/deb/stable/main/). +You can also download and install packages manually from [here](https://packages.clickhouse.com/deb/pool/stable). #### Packages {#packages} @@ -49,11 +57,17 @@ It is recommended to use official pre-compiled `rpm` packages for CentOS, RedHat First, you need to add the official repository: ``` bash -sudo yum install yum-utils -sudo rpm --import https://repo.clickhouse.com/CLICKHOUSE-KEY.GPG -sudo yum-config-manager --add-repo https://repo.clickhouse.com/rpm/stable/x86_64 +{% include 'install/rpm.sh' %} ``` +
+ +Deprecated Method for installing rpm-packages +``` bash +{% include 'install/rpm_repo.sh' %} +``` +
+ If you want to use the most recent version, replace `stable` with `testing` (this is recommended for your testing environments). `prestable` is sometimes also available. Then run these commands to install packages: @@ -62,36 +76,27 @@ Then run these commands to install packages: sudo yum install clickhouse-server clickhouse-client ``` -You can also download and install packages manually from [here](https://repo.clickhouse.com/rpm/stable/x86_64). +You can also download and install packages manually from [here](https://packages.clickhouse.com/rpm/stable). ### From Tgz Archives {#from-tgz-archives} It is recommended to use official pre-compiled `tgz` archives for all Linux distributions, where installation of `deb` or `rpm` packages is not possible. -The required version can be downloaded with `curl` or `wget` from repository https://repo.clickhouse.com/tgz/. +The required version can be downloaded with `curl` or `wget` from repository https://packages.clickhouse.com/tgz/. After that downloaded archives should be unpacked and installed with installation scripts. Example for the latest stable version: ``` bash -export LATEST_VERSION=`curl https://api.github.com/repos/ClickHouse/ClickHouse/tags 2>/dev/null | grep stable | grep -Eo '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | head -n 1` -curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-common-static-$LATEST_VERSION.tgz -curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-common-static-dbg-$LATEST_VERSION.tgz -curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-server-$LATEST_VERSION.tgz -curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-client-$LATEST_VERSION.tgz - -tar -xzvf clickhouse-common-static-$LATEST_VERSION.tgz -sudo clickhouse-common-static-$LATEST_VERSION/install/doinst.sh - -tar -xzvf clickhouse-common-static-dbg-$LATEST_VERSION.tgz -sudo clickhouse-common-static-dbg-$LATEST_VERSION/install/doinst.sh - -tar -xzvf clickhouse-server-$LATEST_VERSION.tgz -sudo clickhouse-server-$LATEST_VERSION/install/doinst.sh -sudo /etc/init.d/clickhouse-server start - -tar -xzvf clickhouse-client-$LATEST_VERSION.tgz -sudo clickhouse-client-$LATEST_VERSION/install/doinst.sh +{% include 'install/tgz.sh' %} ``` +
+ +Deprecated Method for installing tgz archives +``` bash +{% include 'install/tgz_repo.sh' %} +``` +
+ For production environments, it’s recommended to use the latest `stable`-version. You can find its number on GitHub page https://github.com/ClickHouse/ClickHouse/tags with postfix `-stable`. ### From Docker Image {#from-docker-image} @@ -215,6 +220,6 @@ SELECT 1 **Congratulations, the system works!** -To continue experimenting, you can download one of the test data sets or go through [tutorial](https://clickhouse.com/tutorial.html). +To continue experimenting, you can download one of the test data sets or go through [tutorial](./tutorial.md). [Original article](https://clickhouse.com/docs/en/getting_started/install/) diff --git a/docs/en/operations/performance-test.md b/docs/en/operations/performance-test.md index a220575cb3c..3c29ebc3270 100644 --- a/docs/en/operations/performance-test.md +++ b/docs/en/operations/performance-test.md @@ -38,6 +38,18 @@ Alternatively you can perform benchmark in the following steps. wget https://builds.clickhouse.com/master/amd64/clickhouse # For aarch64: wget https://builds.clickhouse.com/master/aarch64/clickhouse +# For powerpc64le: +wget https://builds.clickhouse.com/master/powerpc64le/clickhouse +# For freebsd: +wget https://builds.clickhouse.com/master/freebsd/clickhouse +# For freebsd-aarch64: +wget https://builds.clickhouse.com/master/freebsd-aarch64/clickhouse +# For freebsd-powerpc64le: +wget https://builds.clickhouse.com/master/freebsd-powerpc64le/clickhouse +# For macos: +wget https://builds.clickhouse.com/master/macos/clickhouse +# For macos-aarch64: +wget https://builds.clickhouse.com/master/macos-aarch64/clickhouse # Then do: chmod a+x clickhouse ``` diff --git a/docs/en/operations/system-tables/parts.md b/docs/en/operations/system-tables/parts.md index 45fdcc40451..cf0f93ecdd6 100644 --- a/docs/en/operations/system-tables/parts.md +++ b/docs/en/operations/system-tables/parts.md @@ -82,7 +82,7 @@ Columns: - `path` ([String](../../sql-reference/data-types/string.md)) – Absolute path to the folder with data part files. -- `disk` ([String](../../sql-reference/data-types/string.md)) – Name of a disk that stores the data part. +- `disk_name` ([String](../../sql-reference/data-types/string.md)) – Name of a disk that stores the data part. - `hash_of_all_files` ([String](../../sql-reference/data-types/string.md)) – [sipHash128](../../sql-reference/functions/hash-functions.md#hash_functions-siphash128) of compressed files. diff --git a/docs/en/sql-reference/functions/ym-dict-functions.md b/docs/en/sql-reference/functions/ym-dict-functions.md index f947c81c7a9..1e6c9cbd0b4 100644 --- a/docs/en/sql-reference/functions/ym-dict-functions.md +++ b/docs/en/sql-reference/functions/ym-dict-functions.md @@ -1,11 +1,11 @@ --- toc_priority: 59 -toc_title: Yandex.Metrica Dictionaries +toc_title: Embedded Dictionaries --- -# Functions for Working with Yandex.Metrica Dictionaries {#functions-for-working-with-yandex-metrica-dictionaries} +# Functions for Working with Embedded Dictionaries -In order for the functions below to work, the server config must specify the paths and addresses for getting all the Yandex.Metrica dictionaries. The dictionaries are loaded at the first call of any of these functions. If the reference lists can’t be loaded, an exception is thrown. +In order for the functions below to work, the server config must specify the paths and addresses for getting all the embedded dictionaries. The dictionaries are loaded at the first call of any of these functions. If the reference lists can’t be loaded, an exception is thrown. For information about creating reference lists, see the section “Dictionaries”. @@ -33,7 +33,7 @@ regionToCountry(RegionID, 'ua') – Uses the dictionary for the 'ua' key: /opt/g ### regionToCity(id\[, geobase\]) {#regiontocityid-geobase} -Accepts a UInt32 number – the region ID from the Yandex geobase. If this region is a city or part of a city, it returns the region ID for the appropriate city. Otherwise, returns 0. +Accepts a UInt32 number – the region ID from the geobase. If this region is a city or part of a city, it returns the region ID for the appropriate city. Otherwise, returns 0. ### regionToArea(id\[, geobase\]) {#regiontoareaid-geobase} @@ -117,7 +117,7 @@ regionToTopContinent(id[, geobase]) **Arguments** -- `id` — Region ID from the Yandex geobase. [UInt32](../../sql-reference/data-types/int-uint.md). +- `id` — Region ID from the geobase. [UInt32](../../sql-reference/data-types/int-uint.md). - `geobase` — Dictionary key. See [Multiple Geobases](#multiple-geobases). [String](../../sql-reference/data-types/string.md). Optional. **Returned value** @@ -132,7 +132,7 @@ Type: `UInt32`. Gets the population for a region. The population can be recorded in files with the geobase. See the section “External dictionaries”. If the population is not recorded for the region, it returns 0. -In the Yandex geobase, the population might be recorded for child regions, but not for parent regions. +In the geobase, the population might be recorded for child regions, but not for parent regions. ### regionIn(lhs, rhs\[, geobase\]) {#regioninlhs-rhs-geobase} @@ -141,12 +141,12 @@ The relationship is reflexive – any region also belongs to itself. ### regionHierarchy(id\[, geobase\]) {#regionhierarchyid-geobase} -Accepts a UInt32 number – the region ID from the Yandex geobase. Returns an array of region IDs consisting of the passed region and all parents along the chain. +Accepts a UInt32 number – the region ID from the geobase. Returns an array of region IDs consisting of the passed region and all parents along the chain. Example: `regionHierarchy(toUInt32(213)) = [213,1,3,225,10001,10000]`. ### regionToName(id\[, lang\]) {#regiontonameid-lang} -Accepts a UInt32 number – the region ID from the Yandex geobase. A string with the name of the language can be passed as a second argument. Supported languages are: ru, en, ua, uk, by, kz, tr. If the second argument is omitted, the language ‘ru’ is used. If the language is not supported, an exception is thrown. Returns a string – the name of the region in the corresponding language. If the region with the specified ID does not exist, an empty string is returned. +Accepts a UInt32 number – the region ID from the geobase. A string with the name of the language can be passed as a second argument. Supported languages are: ru, en, ua, uk, by, kz, tr. If the second argument is omitted, the language ‘ru’ is used. If the language is not supported, an exception is thrown. Returns a string – the name of the region in the corresponding language. If the region with the specified ID does not exist, an empty string is returned. `ua` and `uk` both mean Ukrainian. diff --git a/docs/en/sql-reference/statements/select/limit-by.md b/docs/en/sql-reference/statements/select/limit-by.md index e1ca58cdec8..68b459a46e8 100644 --- a/docs/en/sql-reference/statements/select/limit-by.md +++ b/docs/en/sql-reference/statements/select/limit-by.md @@ -11,7 +11,7 @@ ClickHouse supports the following syntax variants: - `LIMIT [offset_value, ]n BY expressions` - `LIMIT n OFFSET offset_value BY expressions` -During query processing, ClickHouse selects data ordered by sorting key. The sorting key is set explicitly using an [ORDER BY](../../../sql-reference/statements/select/order-by.md) clause or implicitly as a property of the table engine. Then ClickHouse applies `LIMIT n BY expressions` and returns the first `n` rows for each distinct combination of `expressions`. If `OFFSET` is specified, then for each data block that belongs to a distinct combination of `expressions`, ClickHouse skips `offset_value` number of rows from the beginning of the block and returns a maximum of `n` rows as a result. If `offset_value` is bigger than the number of rows in the data block, ClickHouse returns zero rows from the block. +During query processing, ClickHouse selects data ordered by sorting key. The sorting key is set explicitly using an [ORDER BY](order-by.md#select-order-by) clause or implicitly as a property of the table engine (row order is only guaranteed when using [ORDER BY](order-by.md#select-order-by), otherwise the row blocks will not be ordered due to multi-threading). Then ClickHouse applies `LIMIT n BY expressions` and returns the first `n` rows for each distinct combination of `expressions`. If `OFFSET` is specified, then for each data block that belongs to a distinct combination of `expressions`, ClickHouse skips `offset_value` number of rows from the beginning of the block and returns a maximum of `n` rows as a result. If `offset_value` is bigger than the number of rows in the data block, ClickHouse returns zero rows from the block. !!! note "Note" `LIMIT BY` is not related to [LIMIT](../../../sql-reference/statements/select/limit.md). They can both be used in the same query. diff --git a/docs/en/whats-new/changelog/2020.md b/docs/en/whats-new/changelog/2020.md index b54fbacd2b0..e0afe256777 100644 --- a/docs/en/whats-new/changelog/2020.md +++ b/docs/en/whats-new/changelog/2020.md @@ -2968,7 +2968,7 @@ No changes compared to v20.4.3.16-stable. * Updated checking for hung queries in clickhouse-test script [#8858](https://github.com/ClickHouse/ClickHouse/pull/8858) ([Alexander Kazakov](https://github.com/Akazz)) * Removed some useless files from repository. [#8843](https://github.com/ClickHouse/ClickHouse/pull/8843) ([alexey-milovidov](https://github.com/alexey-milovidov)) * Changed type of math perftests from `once` to `loop`. [#8783](https://github.com/ClickHouse/ClickHouse/pull/8783) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -* Add docker image which allows to build interactive code browser HTML report for our codebase. [#8781](https://github.com/ClickHouse/ClickHouse/pull/8781) ([alesapin](https://github.com/alesapin)) See [Woboq Code Browser](https://clickhouse-test-reports.s3.yandex.net/codebrowser/html_report///ClickHouse/dbms/index.html) +* Add docker image which allows to build interactive code browser HTML report for our codebase. [#8781](https://github.com/ClickHouse/ClickHouse/pull/8781) ([alesapin](https://github.com/alesapin)) See [Woboq Code Browser](https://clickhouse-test-reports.s3.yandex.net/codebrowser/ClickHouse/dbms/index.html) * Suppress some test failures under MSan. [#8780](https://github.com/ClickHouse/ClickHouse/pull/8780) ([Alexander Kuzmenkov](https://github.com/akuzm)) * Speedup "exception while insert" test. This test often time out in debug-with-coverage build. [#8711](https://github.com/ClickHouse/ClickHouse/pull/8711) ([alexey-milovidov](https://github.com/alexey-milovidov)) * Updated `libcxx` and `libcxxabi` to master. In preparation to [#9304](https://github.com/ClickHouse/ClickHouse/issues/9304) [#9308](https://github.com/ClickHouse/ClickHouse/pull/9308) ([alexey-milovidov](https://github.com/alexey-milovidov)) diff --git a/docs/ja/development/browse-code.md b/docs/ja/development/browse-code.md index 6539014eaf0..2f4cfb622c6 100644 --- a/docs/ja/development/browse-code.md +++ b/docs/ja/development/browse-code.md @@ -7,7 +7,7 @@ toc_title: "\u30BD\u30FC\u30B9\u30B3\u30FC\u30C9\u306E\u53C2\u7167" # ClickHouseのソースコードを参照 {#browse-clickhouse-source-code} -以下を使用できます **Woboq** オンラインのコードブラウザをご利用 [ここに](https://clickhouse.com/codebrowser/html_report/ClickHouse/src/index.html). このコードナビゲーションや意味のハイライト表示、検索インデックス. コードのスナップショットは随時更新中です。 +以下を使用できます **Woboq** オンラインのコードブラウザをご利用 [ここに](https://clickhouse.com/codebrowser/ClickHouse/src/index.html). このコードナビゲーションや意味のハイライト表示、検索インデックス. コードのスナップショットは随時更新中です。 また、ソースを参照することもできます [GitHub](https://github.com/ClickHouse/ClickHouse) いつものように diff --git a/docs/ja/engines/table-engines/mergetree-family/replication.md b/docs/ja/engines/table-engines/mergetree-family/replication.md index 195c2453467..3819b8be068 100644 --- a/docs/ja/engines/table-engines/mergetree-family/replication.md +++ b/docs/ja/engines/table-engines/mergetree-family/replication.md @@ -14,7 +14,7 @@ toc_title: "\u30C7\u30FC\u30BF\u8907\u88FD" - レプリケートリプレースマージツリー - 複製された集合マージツリー - レプリケートコラプシングマージツリー -- ReplicatedVersionedCollapsingMergetree +- ReplicatedVersionedCollapsingMergeTree - レプリケートグラフィティマージツリー 複製の作品のレベルを個別のテーブルではなく、全体のサーバーです。 サーバーでの店舗も複製、非複製のテーブルでも同時に行います。 diff --git a/docs/ja/getting-started/install.md b/docs/ja/getting-started/install.md index 7a2a822fe52..b348ae7a6ca 100644 --- a/docs/ja/getting-started/install.md +++ b/docs/ja/getting-started/install.md @@ -28,9 +28,17 @@ Debian や Ubuntu 用にコンパイル済みの公式パッケージ `deb` を {% include 'install/deb.sh' %} ``` +
+ +Deprecated Method for installing deb-packages +``` bash +{% include 'install/deb_repo.sh' %} +``` +
+ 最新版を使いたい場合は、`stable`を`testing`に置き換えてください。(テスト環境ではこれを推奨します) -同様に、[こちら](https://repo.clickhouse.com/deb/stable/main/)からパッケージをダウンロードして、手動でインストールすることもできます。 +同様に、[こちら](https://packages.clickhouse.com/deb/pool/stable)からパッケージをダウンロードして、手動でインストールすることもできます。 #### パッケージ {#packages} @@ -46,11 +54,17 @@ CentOS、RedHat、その他すべてのrpmベースのLinuxディストリビュ まず、公式リポジトリを追加する必要があります: ``` bash -sudo yum install yum-utils -sudo rpm --import https://repo.clickhouse.com/CLICKHOUSE-KEY.GPG -sudo yum-config-manager --add-repo https://repo.clickhouse.com/rpm/stable/x86_64 +{% include 'install/rpm.sh' %} ``` +
+ +Deprecated Method for installing rpm-packages +``` bash +{% include 'install/rpm_repo.sh' %} +``` +
+ 最新版を使いたい場合は `stable` を `testing` に置き換えてください。(テスト環境ではこれが推奨されています)。`prestable` もしばしば同様に利用できます。 そして、以下のコマンドを実行してパッケージをインストールします: @@ -59,35 +73,26 @@ sudo yum-config-manager --add-repo https://repo.clickhouse.com/rpm/stable/x86_64 sudo yum install clickhouse-server clickhouse-client ``` -同様に、[こちら](https://repo.clickhouse.com/rpm/stable/x86_64) からパッケージをダウンロードして、手動でインストールすることもできます。 +同様に、[こちら](https://packages.clickhouse.com/rpm/stable) からパッケージをダウンロードして、手動でインストールすることもできます。 ### Tgzアーカイブから {#from-tgz-archives} すべての Linux ディストリビューションで、`deb` や `rpm` パッケージがインストールできない場合は、公式のコンパイル済み `tgz` アーカイブを使用することをお勧めします。 -必要なバージョンは、リポジトリ https://repo.clickhouse.com/tgz/ から `curl` または `wget` でダウンロードできます。その後、ダウンロードしたアーカイブを解凍し、インストールスクリプトでインストールしてください。最新版の例は以下です: +必要なバージョンは、リポジトリ https://packages.clickhouse.com/tgz/ から `curl` または `wget` でダウンロードできます。その後、ダウンロードしたアーカイブを解凍し、インストールスクリプトでインストールしてください。最新版の例は以下です: ``` bash -export LATEST_VERSION=`curl https://api.github.com/repos/ClickHouse/ClickHouse/tags 2>/dev/null | grep -Eo '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | head -n 1` -curl -O https://repo.clickhouse.com/tgz/clickhouse-common-static-$LATEST_VERSION.tgz -curl -O https://repo.clickhouse.com/tgz/clickhouse-common-static-dbg-$LATEST_VERSION.tgz -curl -O https://repo.clickhouse.com/tgz/clickhouse-server-$LATEST_VERSION.tgz -curl -O https://repo.clickhouse.com/tgz/clickhouse-client-$LATEST_VERSION.tgz - -tar -xzvf clickhouse-common-static-$LATEST_VERSION.tgz -sudo clickhouse-common-static-$LATEST_VERSION/install/doinst.sh - -tar -xzvf clickhouse-common-static-dbg-$LATEST_VERSION.tgz -sudo clickhouse-common-static-dbg-$LATEST_VERSION/install/doinst.sh - -tar -xzvf clickhouse-server-$LATEST_VERSION.tgz -sudo clickhouse-server-$LATEST_VERSION/install/doinst.sh -sudo /etc/init.d/clickhouse-server start - -tar -xzvf clickhouse-client-$LATEST_VERSION.tgz -sudo clickhouse-client-$LATEST_VERSION/install/doinst.sh +{% include 'install/tgz.sh' %} ``` +
+ +Deprecated Method for installing tgz archives +``` bash +{% include 'install/tgz_repo.sh' %} +``` +
+ 本番環境では、最新の `stable` バージョンを使うことをお勧めします。GitHub のページ https://github.com/ClickHouse/ClickHouse/tags で 接尾辞 `-stable` となっているバージョン番号として確認できます。 ### Dockerイメージから {#from-docker-image} @@ -186,6 +191,6 @@ SELECT 1 **おめでとうございます!システムが動きました!** -動作確認を続けるには、テストデータセットをダウンロードするか、[チュートリアル](https://clickhouse.com/tutorial.html)を参照してください。 +動作確認を続けるには、テストデータセットをダウンロードするか、[チュートリアル](./tutorial.md)を参照してください。 [元の記事](https://clickhouse.com/docs/en/getting_started/install/) diff --git a/docs/ja/operations/performance-test.md b/docs/ja/operations/performance-test.md index 068eb4fbc04..8c05acaf60b 100644 --- a/docs/ja/operations/performance-test.md +++ b/docs/ja/operations/performance-test.md @@ -20,9 +20,21 @@ toc_title: "\u30CF\u30FC\u30C9\u30A6\u30A7\u30A2\u8A66\u9A13" # For amd64: - wget https://clickhouse-builds.s3.yandex.net/0/00ba767f5d2a929394ea3be193b1f79074a1c4bc/1578163263_binary/clickhouse + wget https://builds.clickhouse.com/master/amd64/clickhouse # For aarch64: - wget https://clickhouse-builds.s3.yandex.net/0/00ba767f5d2a929394ea3be193b1f79074a1c4bc/1578161264_binary/clickhouse + wget https://builds.clickhouse.com/master/aarch64/clickhouse + # For powerpc64le: + wget https://builds.clickhouse.com/master/powerpc64le/clickhouse + # For freebsd: + wget https://builds.clickhouse.com/master/freebsd/clickhouse + # For freebsd-aarch64: + wget https://builds.clickhouse.com/master/freebsd-aarch64/clickhouse + # For freebsd-powerpc64le: + wget https://builds.clickhouse.com/master/freebsd-powerpc64le/clickhouse + # For macos: + wget https://builds.clickhouse.com/master/macos/clickhouse + # For macos-aarch64: + wget https://builds.clickhouse.com/master/macos-aarch64/clickhouse # Then do: chmod a+x clickhouse diff --git a/docs/redirects.txt b/docs/redirects.txt index d0d4d4d6c2c..949b9d48ca8 100644 --- a/docs/redirects.txt +++ b/docs/redirects.txt @@ -6,6 +6,7 @@ changelog/2017.md whats-new/changelog/2017.md changelog/2018.md whats-new/changelog/2018.md changelog/2019.md whats-new/changelog/2019.md changelog/index.md whats-new/changelog/index.md +commercial/cloud.md https://clickhouse.com/cloud/ data_types/array.md sql-reference/data-types/array.md data_types/boolean.md sql-reference/data-types/boolean.md data_types/date.md sql-reference/data-types/date.md diff --git a/docs/ru/development/browse-code.md b/docs/ru/development/browse-code.md index 26b3f491599..730e97aed27 100644 --- a/docs/ru/development/browse-code.md +++ b/docs/ru/development/browse-code.md @@ -6,7 +6,7 @@ toc_title: "Навигация по коду ClickHouse" # Навигация по коду ClickHouse {#navigatsiia-po-kodu-clickhouse} -Для навигации по коду онлайн доступен **Woboq**, он расположен [здесь](https://clickhouse.com/codebrowser/html_report///ClickHouse/src/index.html). В нём реализовано удобное перемещение между исходными файлами, семантическая подсветка, подсказки, индексация и поиск. Слепок кода обновляется ежедневно. +Для навигации по коду онлайн доступен **Woboq**, он расположен [здесь](https://clickhouse.com/codebrowser/ClickHouse/src/index.html). В нём реализовано удобное перемещение между исходными файлами, семантическая подсветка, подсказки, индексация и поиск. Слепок кода обновляется ежедневно. Также вы можете просматривать исходники на [GitHub](https://github.com/ClickHouse/ClickHouse). diff --git a/docs/ru/getting-started/install.md b/docs/ru/getting-started/install.md index a12773a75b0..8b35b8a836d 100644 --- a/docs/ru/getting-started/install.md +++ b/docs/ru/getting-started/install.md @@ -27,11 +27,17 @@ grep -q sse4_2 /proc/cpuinfo && echo "SSE 4.2 supported" || echo "SSE 4.2 not su {% include 'install/deb.sh' %} ``` -Также эти пакеты можно скачать и установить вручную отсюда: https://repo.clickhouse.com/deb/stable/main/. +
+ +Устаревший способ установки deb-пакетов +``` bash +{% include 'install/deb_repo.sh' %} +``` +
Чтобы использовать различные [версии ClickHouse](../faq/operations/production.md) в зависимости от ваших потребностей, вы можете заменить `stable` на `lts` или `testing`. -Также вы можете вручную скачать и установить пакеты из [репозитория](https://repo.clickhouse.com/deb/stable/main/). +Также вы можете вручную скачать и установить пакеты из [репозитория](https://packages.clickhouse.com/deb/pool/stable). #### Пакеты {#packages} @@ -51,11 +57,17 @@ grep -q sse4_2 /proc/cpuinfo && echo "SSE 4.2 supported" || echo "SSE 4.2 not su Сначала нужно подключить официальный репозиторий: ``` bash -sudo yum install yum-utils -sudo rpm --import https://repo.clickhouse.com/CLICKHOUSE-KEY.GPG -sudo yum-config-manager --add-repo https://repo.clickhouse.com/rpm/stable/x86_64 +{% include 'install/rpm.sh' %} ``` +
+ +Устаревший способ установки rpm-пакетов +``` bash +{% include 'install/rpm_repo.sh' %} +``` +
+ Для использования наиболее свежих версий нужно заменить `stable` на `testing` (рекомендуется для тестовых окружений). Также иногда доступен `prestable`. Для, собственно, установки пакетов необходимо выполнить следующие команды: @@ -64,36 +76,27 @@ sudo yum-config-manager --add-repo https://repo.clickhouse.com/rpm/stable/x86_64 sudo yum install clickhouse-server clickhouse-client ``` -Также есть возможность установить пакеты вручную, скачав отсюда: https://repo.clickhouse.com/rpm/stable/x86_64. +Также есть возможность установить пакеты вручную, скачав отсюда: https://packages.clickhouse.com/rpm/stable. ### Из Tgz архивов {#from-tgz-archives} Команда ClickHouse в Яндексе рекомендует использовать предкомпилированные бинарники из `tgz` архивов для всех дистрибутивов, где невозможна установка `deb` и `rpm` пакетов. -Интересующую версию архивов можно скачать вручную с помощью `curl` или `wget` из репозитория https://repo.clickhouse.com/tgz/. +Интересующую версию архивов можно скачать вручную с помощью `curl` или `wget` из репозитория https://packages.clickhouse.com/tgz/. После этого архивы нужно распаковать и воспользоваться скриптами установки. Пример установки самой свежей версии: ``` bash -export LATEST_VERSION=`curl https://api.github.com/repos/ClickHouse/ClickHouse/tags 2>/dev/null | grep -Eo '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | head -n 1` -curl -O https://repo.clickhouse.com/tgz/clickhouse-common-static-$LATEST_VERSION.tgz -curl -O https://repo.clickhouse.com/tgz/clickhouse-common-static-dbg-$LATEST_VERSION.tgz -curl -O https://repo.clickhouse.com/tgz/clickhouse-server-$LATEST_VERSION.tgz -curl -O https://repo.clickhouse.com/tgz/clickhouse-client-$LATEST_VERSION.tgz - -tar -xzvf clickhouse-common-static-$LATEST_VERSION.tgz -sudo clickhouse-common-static-$LATEST_VERSION/install/doinst.sh - -tar -xzvf clickhouse-common-static-dbg-$LATEST_VERSION.tgz -sudo clickhouse-common-static-dbg-$LATEST_VERSION/install/doinst.sh - -tar -xzvf clickhouse-server-$LATEST_VERSION.tgz -sudo clickhouse-server-$LATEST_VERSION/install/doinst.sh -sudo /etc/init.d/clickhouse-server start - -tar -xzvf clickhouse-client-$LATEST_VERSION.tgz -sudo clickhouse-client-$LATEST_VERSION/install/doinst.sh +{% include 'install/tgz.sh' %} ``` +
+ +Устаревший способ установки из архивов tgz +``` bash +{% include 'install/tgz_repo.sh' %} +``` +
+ Для production окружений рекомендуется использовать последнюю `stable`-версию. Её номер также можно найти на github с на вкладке https://github.com/ClickHouse/ClickHouse/tags c постфиксом `-stable`. ### Из Docker образа {#from-docker-image} @@ -195,4 +198,4 @@ SELECT 1 **Поздравляем, система работает!** -Для дальнейших экспериментов можно попробовать загрузить один из тестовых наборов данных или пройти [пошаговое руководство для начинающих](https://clickhouse.com/tutorial.html). +Для дальнейших экспериментов можно попробовать загрузить один из тестовых наборов данных или пройти [пошаговое руководство для начинающих](./tutorial.md). diff --git a/docs/ru/sql-reference/statements/select/limit-by.md b/docs/ru/sql-reference/statements/select/limit-by.md index 861d88dcafb..5da001addf4 100644 --- a/docs/ru/sql-reference/statements/select/limit-by.md +++ b/docs/ru/sql-reference/statements/select/limit-by.md @@ -11,7 +11,7 @@ ClickHouse поддерживает следующий синтаксис: - `LIMIT [offset_value, ]n BY expressions` - `LIMIT n OFFSET offset_value BY expressions` -Во время обработки запроса, ClickHouse выбирает данные, упорядоченные по ключу сортировки. Ключ сортировки задаётся явно в секции [ORDER BY](order-by.md#select-order-by) или неявно в свойствах движка таблицы. Затем ClickHouse применяет `LIMIT n BY expressions` и возвращает первые `n` для каждой отличной комбинации `expressions`. Если указан `OFFSET`, то для каждого блока данных, который принадлежит отдельной комбинации `expressions`, ClickHouse отступает `offset_value` строк от начала блока и возвращает не более `n`. Если `offset_value` больше, чем количество строк в блоке данных, ClickHouse не возвращает ни одной строки. +Во время обработки запроса, ClickHouse выбирает данные, упорядоченные по ключу сортировки. Ключ сортировки задаётся явно в секции [ORDER BY](order-by.md#select-order-by) или неявно в свойствах движка таблицы (порядок строк гарантирован только при использовании [ORDER BY](order-by.md#select-order-by), в ином случае блоки строк не будут упорядочены из-за многопоточной обработки). Затем ClickHouse применяет `LIMIT n BY expressions` и возвращает первые `n` для каждой отличной комбинации `expressions`. Если указан `OFFSET`, то для каждого блока данных, который принадлежит отдельной комбинации `expressions`, ClickHouse отступает `offset_value` строк от начала блока и возвращает не более `n`. Если `offset_value` больше, чем количество строк в блоке данных, ClickHouse не возвращает ни одной строки. `LIMIT BY` не связана с секцией `LIMIT`. Их можно использовать в одном запросе. diff --git a/docs/tools/redirects.py b/docs/tools/redirects.py index 20e3ec7aa6f..1f0a3bb4b74 100644 --- a/docs/tools/redirects.py +++ b/docs/tools/redirects.py @@ -31,7 +31,12 @@ def build_redirect_html(args, base_prefix, lang, output_dir, from_path, to_path) from_path.replace('/index.md', '/index.html').replace('.md', '/index.html') ) target_path = to_path.replace('/index.md', '/').replace('.md', '/') - to_url = f'/{base_prefix}/{lang}/{target_path}' + + if target_path[0:7] != 'http://' and target_path[0:8] != 'https://': + to_url = f'/{base_prefix}/{lang}/{target_path}' + else: + to_url = target_path + to_url = to_url.strip() write_redirect_html(out_path, to_url) diff --git a/docs/zh/changelog/index.md b/docs/zh/changelog/index.md index d36a676134e..306c72103fb 100644 --- a/docs/zh/changelog/index.md +++ b/docs/zh/changelog/index.md @@ -247,7 +247,7 @@ toc_title: "\u53D8\u66F4\u65E5\u5FD7" - 更新了clickhouse-test脚本中挂起查询的检查 [#8858](https://github.com/ClickHouse/ClickHouse/pull/8858) ([亚历山大\*卡扎科夫](https://github.com/Akazz)) - 从存储库中删除了一些无用的文件。 [#8843](https://github.com/ClickHouse/ClickHouse/pull/8843) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) - 更改类型的数学perftests从 `once` 到 `loop`. [#8783](https://github.com/ClickHouse/ClickHouse/pull/8783) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) -- 添加码头镜像,它允许为我们的代码库构建交互式代码浏览器HTML报告。 [#8781](https://github.com/ClickHouse/ClickHouse/pull/8781) ([阿利沙平](https://github.com/alesapin))见 [Woboq代码浏览器](https://clickhouse.com/codebrowser/html_report///ClickHouse/dbms/index.html) +- 添加码头镜像,它允许为我们的代码库构建交互式代码浏览器HTML报告。 [#8781](https://github.com/ClickHouse/ClickHouse/pull/8781) ([阿利沙平](https://github.com/alesapin))见 [Woboq代码浏览器](https://clickhouse.com/codebrowser/ClickHouse/dbms/index.html) - 抑制MSan下的一些测试失败。 [#8780](https://github.com/ClickHouse/ClickHouse/pull/8780) ([Alexander Kuzmenkov](https://github.com/akuzm)) - 加速 “exception while insert” 测试 此测试通常在具有复盖率的调试版本中超时。 [#8711](https://github.com/ClickHouse/ClickHouse/pull/8711) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) - 更新 `libcxx` 和 `libcxxabi` 为了主人 在准备 [#9304](https://github.com/ClickHouse/ClickHouse/issues/9304) [#9308](https://github.com/ClickHouse/ClickHouse/pull/9308) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) diff --git a/docs/zh/development/browse-code.md b/docs/zh/development/browse-code.md index 9cee0a37444..f0ad6fd0984 100644 --- a/docs/zh/development/browse-code.md +++ b/docs/zh/development/browse-code.md @@ -5,7 +5,7 @@ toc_title: "\u6D4F\u89C8\u6E90\u4EE3\u7801" # 浏览ClickHouse源代码 {#browse-clickhouse-source-code} -您可以使用 **Woboq** 在线代码浏览器 [点击这里](https://clickhouse.com/codebrowser/html_report/ClickHouse/src/index.html). 它提供了代码导航和语义突出显示、搜索和索引。 代码快照每天更新。 +您可以使用 **Woboq** 在线代码浏览器 [点击这里](https://clickhouse.com/codebrowser/ClickHouse/src/index.html). 它提供了代码导航和语义突出显示、搜索和索引。 代码快照每天更新。 此外,您还可以像往常一样浏览源代码 [GitHub](https://github.com/ClickHouse/ClickHouse) diff --git a/docs/zh/engines/table-engines/integrations/embedded-rocksdb.md b/docs/zh/engines/table-engines/integrations/embedded-rocksdb.md index 79ca8f0cd10..abb2af6332d 100644 --- a/docs/zh/engines/table-engines/integrations/embedded-rocksdb.md +++ b/docs/zh/engines/table-engines/integrations/embedded-rocksdb.md @@ -38,5 +38,46 @@ CREATE TABLE test ENGINE = EmbeddedRocksDB PRIMARY KEY key ``` +## 指标 + +还有一个`system.rocksdb` 表, 公开rocksdb的统计信息: + +```sql +SELECT + name, + value +FROM system.rocksdb + +┌─name──────────────────────┬─value─┐ +│ no.file.opens │ 1 │ +│ number.block.decompressed │ 1 │ +└───────────────────────────┴───────┘ +``` + +## 配置 + +你能修改任何[rocksdb options](https://github.com/facebook/rocksdb/wiki/Option-String-and-Option-Map) 配置,使用配置文件: + +```xml + + + 8 + + + 2 + + + + TABLE + + 8 + + + 2 + +
+
+
+``` [原始文章](https://clickhouse.com/docs/en/engines/table-engines/integrations/embedded-rocksdb/) diff --git a/docs/zh/engines/table-engines/mergetree-family/replication.md b/docs/zh/engines/table-engines/mergetree-family/replication.md index 2e6391c01dd..c3be3a382cb 100644 --- a/docs/zh/engines/table-engines/mergetree-family/replication.md +++ b/docs/zh/engines/table-engines/mergetree-family/replication.md @@ -7,7 +7,7 @@ - ReplicatedReplacingMergeTree - ReplicatedAggregatingMergeTree - ReplicatedCollapsingMergeTree -- ReplicatedVersionedCollapsingMergetree +- ReplicatedVersionedCollapsingMergeTree - ReplicatedGraphiteMergeTree 副本是表级别的,不是整个服务器级的。所以,服务器里可以同时有复制表和非复制表。 diff --git a/docs/zh/faq/integration/file-export.md b/docs/zh/faq/integration/file-export.md deleted file mode 120000 index 19a5c67148b..00000000000 --- a/docs/zh/faq/integration/file-export.md +++ /dev/null @@ -1 +0,0 @@ -../../../en/faq/integration/file-export.md \ No newline at end of file diff --git a/docs/zh/faq/integration/file-export.md b/docs/zh/faq/integration/file-export.md new file mode 100644 index 00000000000..3582bfb1008 --- /dev/null +++ b/docs/zh/faq/integration/file-export.md @@ -0,0 +1,37 @@ +--- +title: 如何从 ClickHouse 导出数据到一个文件? +toc_hidden: true +toc_priority: 10 +--- + +# 如何从 ClickHouse 导出数据到一个文件? {#how-to-export-to-file} + +## 使用 INTO OUTFILE 语法 {#using-into-outfile-clause} + +加一个 [INTO OUTFILE](../../sql-reference/statements/select/into-outfile.md#into-outfile-clause) 语法到你的查询语句中. + +例如: + +``` sql +SELECT * FROM table INTO OUTFILE 'file' +``` + +ClickHouse 默认使用[TabSeparated](../../interfaces/formats.md#tabseparated) 格式写入数据. 修改[数据格式](../../interfaces/formats.md), 请用 [FORMAT 语法](../../sql-reference/statements/select/format.md#format-clause). + +例如: + +``` sql +SELECT * FROM table INTO OUTFILE 'file' FORMAT CSV +``` + +## 使用一个文件引擎表 {#using-a-file-engine-table} + +查看 [File](../../engines/table-engines/special/file.md) 表引擎. + +## 使用命令行重定向 {#using-command-line-redirection} + +``` bash +$ clickhouse-client --query "SELECT * from table" --format FormatName > result.txt +``` + +查看 [clickhouse-client](../../interfaces/cli.md). diff --git a/docs/zh/getting-started/install.md b/docs/zh/getting-started/install.md index eec3aabe2a1..e74a05a9913 100644 --- a/docs/zh/getting-started/install.md +++ b/docs/zh/getting-started/install.md @@ -27,9 +27,17 @@ $ grep -q sse4_2 /proc/cpuinfo && echo "SSE 4.2 supported" || echo "SSE 4.2 not {% include 'install/deb.sh' %} ``` +
+ +Deprecated Method for installing deb-packages +``` bash +{% include 'install/deb_repo.sh' %} +``` +
+ 如果您想使用最新的版本,请用`testing`替代`stable`(我们只推荐您用于测试环境)。 -你也可以从这里手动下载安装包:[下载](https://repo.clickhouse.com/deb/stable/main/)。 +你也可以从这里手动下载安装包:[下载](https://packages.clickhouse.com/deb/pool/stable)。 安装包列表: @@ -45,11 +53,17 @@ $ grep -q sse4_2 /proc/cpuinfo && echo "SSE 4.2 supported" || echo "SSE 4.2 not 首先,您需要添加官方存储库: ``` bash -sudo yum install yum-utils -sudo rpm --import https://repo.clickhouse.com/CLICKHOUSE-KEY.GPG -sudo yum-config-manager --add-repo https://repo.clickhouse.com/rpm/stable/x86_64 +{% include 'install/rpm.sh' %} ``` +
+ +Deprecated Method for installing rpm-packages +``` bash +{% include 'install/rpm_repo.sh' %} +``` +
+ 如果您想使用最新的版本,请用`testing`替代`stable`(我们只推荐您用于测试环境)。`prestable`有时也可用。 然后运行命令安装: @@ -58,37 +72,28 @@ sudo yum-config-manager --add-repo https://repo.clickhouse.com/rpm/stable/x86_64 sudo yum install clickhouse-server clickhouse-client ``` -你也可以从这里手动下载安装包:[下载](https://repo.clickhouse.com/rpm/stable/x86_64)。 +你也可以从这里手动下载安装包:[下载](https://packages.clickhouse.com/rpm/stable)。 ### `Tgz`安装包 {#from-tgz-archives} 如果您的操作系统不支持安装`deb`或`rpm`包,建议使用官方预编译的`tgz`软件包。 -所需的版本可以通过`curl`或`wget`从存储库`https://repo.clickhouse.com/tgz/`下载。 +所需的版本可以通过`curl`或`wget`从存储库`https://packages.clickhouse.com/tgz/`下载。 下载后解压缩下载资源文件并使用安装脚本进行安装。以下是一个最新稳定版本的安装示例: ``` bash -export LATEST_VERSION=`curl https://api.github.com/repos/ClickHouse/ClickHouse/tags 2>/dev/null | grep stable | grep -Eo '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | head -n 1` -curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-common-static-$LATEST_VERSION.tgz -curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-common-static-dbg-$LATEST_VERSION.tgz -curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-server-$LATEST_VERSION.tgz -curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-client-$LATEST_VERSION.tgz - -tar -xzvf clickhouse-common-static-$LATEST_VERSION.tgz -sudo clickhouse-common-static-$LATEST_VERSION/install/doinst.sh - -tar -xzvf clickhouse-common-static-dbg-$LATEST_VERSION.tgz -sudo clickhouse-common-static-dbg-$LATEST_VERSION/install/doinst.sh - -tar -xzvf clickhouse-server-$LATEST_VERSION.tgz -sudo clickhouse-server-$LATEST_VERSION/install/doinst.sh -sudo /etc/init.d/clickhouse-server start - -tar -xzvf clickhouse-client-$LATEST_VERSION.tgz -sudo clickhouse-client-$LATEST_VERSION/install/doinst.sh +{% include 'install/tgz.sh' %} ``` +
+ +Deprecated Method for installing tgz archives +``` bash +{% include 'install/tgz_repo.sh' %} +``` +
+ 对于生产环境,建议使用最新的`stable`版本。你可以在GitHub页面https://github.com/ClickHouse/ClickHouse/tags找到它,它以后缀`-stable`标志。 ### `Docker`安装包 {#from-docker-image} @@ -183,6 +188,6 @@ SELECT 1 **恭喜,系统已经工作了!** -为了继续进行实验,你可以尝试下载测试数据集或查看[教程](https://clickhouse.com/tutorial.html)。 +为了继续进行实验,你可以尝试下载测试数据集或查看[教程](./tutorial.md)。 [原始文章](https://clickhouse.com/docs/en/getting_started/install/) diff --git a/docs/zh/operations/performance-test.md b/docs/zh/operations/performance-test.md index d3643969c2e..9761d516ddd 100644 --- a/docs/zh/operations/performance-test.md +++ b/docs/zh/operations/performance-test.md @@ -36,6 +36,18 @@ chmod a+x ./hardware.sh wget https://builds.clickhouse.com/master/amd64/clickhouse # For aarch64: wget https://builds.clickhouse.com/master/aarch64/clickhouse +# For powerpc64le: +wget https://builds.clickhouse.com/master/powerpc64le/clickhouse +# For freebsd: +wget https://builds.clickhouse.com/master/freebsd/clickhouse +# For freebsd-aarch64: +wget https://builds.clickhouse.com/master/freebsd-aarch64/clickhouse +# For freebsd-powerpc64le: +wget https://builds.clickhouse.com/master/freebsd-powerpc64le/clickhouse +# For macos: +wget https://builds.clickhouse.com/master/macos/clickhouse +# For macos-aarch64: +wget https://builds.clickhouse.com/master/macos-aarch64/clickhouse # Then do: chmod a+x clickhouse ``` diff --git a/docs/zh/sql-reference/statements/alter/role.md b/docs/zh/sql-reference/statements/alter/role.md deleted file mode 120000 index ce1f0a94eb3..00000000000 --- a/docs/zh/sql-reference/statements/alter/role.md +++ /dev/null @@ -1 +0,0 @@ -../../../../en/sql-reference/statements/alter/role.md \ No newline at end of file diff --git a/docs/zh/sql-reference/statements/alter/role.md b/docs/zh/sql-reference/statements/alter/role.md new file mode 100644 index 00000000000..3f5c5daf7b8 --- /dev/null +++ b/docs/zh/sql-reference/statements/alter/role.md @@ -0,0 +1,16 @@ +--- +toc_priority: 46 +toc_title: 角色 +--- + +## 操作角色 {#alter-role-statement} + +修改角色. + +语法示例: + +``` sql +ALTER ROLE [IF EXISTS] name1 [ON CLUSTER cluster_name1] [RENAME TO new_name1] + [, name2 [ON CLUSTER cluster_name2] [RENAME TO new_name2] ...] + [SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | PROFILE 'profile_name'] [,...] +``` diff --git a/docs/zh/sql-reference/statements/alter/row-policy.md b/docs/zh/sql-reference/statements/alter/row-policy.md deleted file mode 120000 index 09ad2d301f3..00000000000 --- a/docs/zh/sql-reference/statements/alter/row-policy.md +++ /dev/null @@ -1 +0,0 @@ -../../../../en/sql-reference/statements/alter/row-policy.md \ No newline at end of file diff --git a/docs/zh/sql-reference/statements/alter/row-policy.md b/docs/zh/sql-reference/statements/alter/row-policy.md new file mode 100644 index 00000000000..0cdba239b84 --- /dev/null +++ b/docs/zh/sql-reference/statements/alter/row-policy.md @@ -0,0 +1,19 @@ +--- +toc_priority: 47 +toc_title: 行策略 +--- + +# 操作行策略 {#alter-row-policy-statement} + +修改行策略. + +语法: + +``` sql +ALTER [ROW] POLICY [IF EXISTS] name1 [ON CLUSTER cluster_name1] ON [database1.]table1 [RENAME TO new_name1] + [, name2 [ON CLUSTER cluster_name2] ON [database2.]table2 [RENAME TO new_name2] ...] + [AS {PERMISSIVE | RESTRICTIVE}] + [FOR SELECT] + [USING {condition | NONE}][,...] + [TO {role [,...] | ALL | ALL EXCEPT role [,...]}] +``` diff --git a/docs/zh/sql-reference/statements/select/limit-by.md b/docs/zh/sql-reference/statements/select/limit-by.md index f5ed5b1bf98..9b93bb9cf21 100644 --- a/docs/zh/sql-reference/statements/select/limit-by.md +++ b/docs/zh/sql-reference/statements/select/limit-by.md @@ -11,7 +11,7 @@ ClickHouse支持以下语法变体: - `LIMIT [offset_value, ]n BY expressions` - `LIMIT n OFFSET offset_value BY expressions` -在查询处理过程中,ClickHouse会选择按排序键排序的数据。 排序键使用以下命令显式设置 [ORDER BY](../../../sql-reference/statements/select/order-by.md) 子句或隐式作为表引擎的属性。 然后ClickHouse应用 `LIMIT n BY expressions` 并返回第一 `n` 每个不同组合的行 `expressions`. 如果 `OFFSET` 被指定,则对于每个数据块属于一个不同的组合 `expressions`,ClickHouse跳过 `offset_value` 从块开始的行数,并返回最大值 `n` 行的结果。 如果 `offset_value` 如果数据块中的行数大于数据块中的行数,ClickHouse将从该块返回零行。 +在进行查询处理时,ClickHouse选择按排序键排序的数据。排序键设置显式地使用一个[ORDER BY](order-by.md#select-order-by)条款或隐式属性表的引擎(行顺序只是保证在使用[ORDER BY](order-by.md#select-order-by),否则不会命令行块由于多线程)。然后ClickHouse应用`LIMIT n BY 表达式`,并为每个不同的`表达式`组合返回前n行。如果指定了`OFFSET`,那么对于每个属于不同`表达式`组合的数据块,ClickHouse将跳过`offset_value`从块开始的行数,并最终返回最多`n`行的结果。如果`offset_value`大于数据块中的行数,则ClickHouse从数据块中返回零行。 !!! note "注" `LIMIT BY` 是不相关的 [LIMIT](../../../sql-reference/statements/select/limit.md). 它们都可以在同一个查询中使用。 diff --git a/docs/zh/whats-new/changelog/2020.md b/docs/zh/whats-new/changelog/2020.md index 19e9125224c..6890f0f551e 100644 --- a/docs/zh/whats-new/changelog/2020.md +++ b/docs/zh/whats-new/changelog/2020.md @@ -2962,7 +2962,7 @@ * 更新了对 clickhouse-test 脚本中挂起查询的检查. [#8858](https://github.com/ClickHouse/ClickHouse/pull/8858) ([Alexander Kazakov](https://github.com/Akazz)) * 从存储库中删除了一些无用的文件. [#8843](https://github.com/ClickHouse/ClickHouse/pull/8843) ([alexey-milovidov](https://github.com/alexey-milovidov)) * 将数学性能测试的类型从 `once` 更改为 `loop` . [#8783](https://github.com/ClickHouse/ClickHouse/pull/8783) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -* 添加 docker 图像,它允许为我们的代码库构建交互式代码浏览器 HTML 报告. [#8781](https://github.com/ClickHouse/ClickHouse/pull/8781) ([alesapin](https://github.com/alesapin)) See [Woboq Code Browser](https://clickhouse-test-reports.s3.yandex.net/codebrowser/html_report///ClickHouse/dbms/index.html) +* 添加 docker 图像,它允许为我们的代码库构建交互式代码浏览器 HTML 报告. [#8781](https://github.com/ClickHouse/ClickHouse/pull/8781) ([alesapin](https://github.com/alesapin)) See [Woboq Code Browser](https://clickhouse-test-reports.s3.yandex.net/codebrowser/ClickHouse/dbms/index.html) * 抑制 MSan 下的一些测试失败. [#8780](https://github.com/ClickHouse/ClickHouse/pull/8780) ([Alexander Kuzmenkov](https://github.com/akuzm)) * 加速 `exception while insert` 测试. 此测试经常在 debug-with-coverage 构建中超时. [#8711](https://github.com/ClickHouse/ClickHouse/pull/8711) ([alexey-milovidov](https://github.com/alexey-milovidov)) * 将 `libcxx` 和 `libcxxabi` 更新为 master. 准备 [#9304](https://github.com/ClickHouse/ClickHouse/issues/9304) [#9308](https://github.com/ClickHouse/ClickHouse/pull/9308) ([alexey-milovidov](https://github.com/alexey-milovidov)) diff --git a/packages/clickhouse-rpm.repo b/packages/clickhouse-rpm.repo new file mode 100644 index 00000000000..27321123dc1 --- /dev/null +++ b/packages/clickhouse-rpm.repo @@ -0,0 +1,31 @@ +[clickhouse-stable] +name=ClickHouse - Stable Repository +baseurl=https://packages.clickhouse.com/rpm/stable/ +gpgkey=https://packages.clickhouse.com/rpm/stable/repodata/repomd.xml.key +gpgcheck=0 +repo_gpgcheck=1 +enabled=0 + +[clickhouse-lts] +name=ClickHouse - LTS Repository +baseurl=https://packages.clickhouse.com/rpm/lts/ +gpgkey=https://packages.clickhouse.com/rpm/lts/repodata/repomd.xml.key +gpgcheck=0 +repo_gpgcheck=1 +enabled=0 + +[clickhouse-prestable] +name=ClickHouse - Pre-stable Repository +baseurl=https://packages.clickhouse.com/rpm/prestable/ +gpgkey=https://packages.clickhouse.com/rpm/prestable/repodata/repomd.xml.key +gpgcheck=0 +repo_gpgcheck=1 +enabled=0 + +[clickhouse-testing] +name=ClickHouse - Testing Repository +baseurl=https://packages.clickhouse.com/rpm/testing/ +gpgkey=https://packages.clickhouse.com/rpm/testing/repodata/repomd.xml.key +gpgcheck=0 +repo_gpgcheck=1 +enabled=1 diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index c40f41cd8d1..a3f5c0ab1c7 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -371,6 +371,13 @@ void Client::initialize(Poco::Util::Application & self) configReadClient(config(), home_path); + const char * env_user = getenv("CLICKHOUSE_USER"); + const char * env_password = getenv("CLICKHOUSE_PASSWORD"); + if (env_user) + config().setString("user", env_user); + if (env_password) + config().setString("password", env_password); + // global_context->setApplicationType(Context::ApplicationType::CLIENT); global_context->setQueryParameters(query_parameters); @@ -1119,7 +1126,12 @@ void Client::processOptions(const OptionsDescription & options_description, { const auto & name = setting.getName(); if (options.count(name)) - config().setString(name, options[name].as()); + { + if (allow_repeated_settings) + config().setString(name, options[name].as().back()); + else + config().setString(name, options[name].as()); + } } if (options.count("config-file") && options.count("config")) diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp index 88df4d5b3e7..fd317f88912 100644 --- a/programs/keeper/Keeper.cpp +++ b/programs/keeper/Keeper.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -34,11 +35,6 @@ #include #include -#if defined(OS_LINUX) -# include -# include -#endif - int mainEntryClickHouseKeeper(int argc, char ** argv) { @@ -127,18 +123,6 @@ Poco::Net::SocketAddress makeSocketAddress(const std::string & host, UInt16 port return socket_address; } -[[noreturn]] void forceShutdown() -{ -#if defined(THREAD_SANITIZER) && defined(OS_LINUX) - /// Thread sanitizer tries to do something on exit that we don't need if we want to exit immediately, - /// while connection handling threads are still run. - (void)syscall(SYS_exit_group, 0); - __builtin_unreachable(); -#else - _exit(0); -#endif -} - std::string getUserName(uid_t user_id) { /// Try to convert user id into user name. @@ -474,7 +458,7 @@ int Keeper::main(const std::vector & /*args*/) if (current_connections) { LOG_INFO(log, "Will shutdown forcefully."); - forceShutdown(); + safeExit(0); } }); diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index eb3a03d0564..eb562dfd9eb 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -411,7 +411,8 @@ void LocalServer::setupUsers() void LocalServer::connect() { connection_parameters = ConnectionParameters(config()); - connection = LocalConnection::createConnection(connection_parameters, global_context, need_render_progress); + connection = LocalConnection::createConnection( + connection_parameters, global_context, need_render_progress, need_render_profile_events, server_display_name); } diff --git a/programs/obfuscator/Obfuscator.cpp b/programs/obfuscator/Obfuscator.cpp index 947e7ab1768..1ffb0b437a6 100644 --- a/programs/obfuscator/Obfuscator.cpp +++ b/programs/obfuscator/Obfuscator.cpp @@ -909,7 +909,7 @@ public: ColumnPtr new_nested_column = nested_model->generate(nested_column); - return ColumnArray::create(IColumn::mutate(std::move(new_nested_column)), IColumn::mutate(std::move(column_array.getOffsetsPtr()))); + return ColumnArray::create(IColumn::mutate(std::move(new_nested_column)), IColumn::mutate(column_array.getOffsetsPtr())); } void updateSeed() override @@ -947,7 +947,7 @@ public: ColumnPtr new_nested_column = nested_model->generate(nested_column); - return ColumnNullable::create(IColumn::mutate(std::move(new_nested_column)), IColumn::mutate(std::move(column_nullable.getNullMapColumnPtr()))); + return ColumnNullable::create(IColumn::mutate(std::move(new_nested_column)), IColumn::mutate(column_nullable.getNullMapColumnPtr())); } void updateSeed() override diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 79837310ec4..b856131d821 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -22,6 +22,8 @@ #include #include #include +#include +#include #include #include #include @@ -31,7 +33,6 @@ #include #include #include -#include #include #include #include @@ -95,8 +96,6 @@ # include # include # include -# include -# include #endif #if USE_SSL @@ -505,19 +504,6 @@ void checkForUsersNotInMainConfig( } } -[[noreturn]] void forceShutdown() -{ -#if defined(THREAD_SANITIZER) && defined(OS_LINUX) - /// Thread sanitizer tries to do something on exit that we don't need if we want to exit immediately, - /// while connection handling threads are still run. - (void)syscall(SYS_exit_group, 0); - __builtin_unreachable(); -#else - _exit(0); -#endif -} - - int Server::main(const std::vector & /*args*/) { Poco::Logger * log = &logger(); @@ -1317,7 +1303,7 @@ if (ThreadFuzzer::instance().isEffective()) #endif #if !defined(__x86_64__) - LOG_INFO(log, "Query Profiler is only tested on x86_64. It also known to not work under qemu-user."); + LOG_INFO(log, "Query Profiler and TraceCollector is only tested on x86_64. It also known to not work under qemu-user."); #endif if (!hasPHDRCache()) @@ -1527,7 +1513,7 @@ if (ThreadFuzzer::instance().isEffective()) /// Dump coverage here, because std::atexit callback would not be called. dumpCoverageReportIfPossible(); LOG_INFO(log, "Will shutdown forcefully."); - forceShutdown(); + safeExit(0); } }); diff --git a/programs/server/play.html b/programs/server/play.html index f7154643504..f2515109161 100644 --- a/programs/server/play.html +++ b/programs/server/play.html @@ -87,13 +87,6 @@ color: var(--text-color); } - /* Otherwise scrollbar may appear dynamically and it will alter viewport height, - then relative heights of elements will change suddenly, and it will break overall impression. */ - /* html - { - overflow-x: scroll; - }*/ - div { width: 100%; @@ -382,7 +375,7 @@ diff --git a/src/Access/AccessEntityIO.cpp b/src/Access/AccessEntityIO.cpp index e944dc71114..acf2a972b13 100644 --- a/src/Access/AccessEntityIO.cpp +++ b/src/Access/AccessEntityIO.cpp @@ -91,7 +91,7 @@ String serializeAccessEntity(const IAccessEntity & entity) return buf.str(); } -AccessEntityPtr deserializeAccessEntity(const String & definition, const String & path) +AccessEntityPtr deserializeAccessEntityImpl(const String & definition) { ASTs queries; ParserAttachAccessEntity parser; @@ -118,43 +118,42 @@ AccessEntityPtr deserializeAccessEntity(const String & definition, const String if (auto * create_user_query = query->as()) { if (res) - throw Exception("Two access entities attached in " + path, ErrorCodes::INCORRECT_ACCESS_ENTITY_DEFINITION); + throw Exception("Two access entities attached in the same file", ErrorCodes::INCORRECT_ACCESS_ENTITY_DEFINITION); res = user = std::make_unique(); InterpreterCreateUserQuery::updateUserFromQuery(*user, *create_user_query); } else if (auto * create_role_query = query->as()) { if (res) - throw Exception("Two access entities attached in " + path, ErrorCodes::INCORRECT_ACCESS_ENTITY_DEFINITION); + throw Exception("Two access entities attached in the same file", ErrorCodes::INCORRECT_ACCESS_ENTITY_DEFINITION); res = role = std::make_unique(); InterpreterCreateRoleQuery::updateRoleFromQuery(*role, *create_role_query); } else if (auto * create_policy_query = query->as()) { if (res) - throw Exception("Two access entities attached in " + path, ErrorCodes::INCORRECT_ACCESS_ENTITY_DEFINITION); + throw Exception("Two access entities attached in the same file", ErrorCodes::INCORRECT_ACCESS_ENTITY_DEFINITION); res = policy = std::make_unique(); InterpreterCreateRowPolicyQuery::updateRowPolicyFromQuery(*policy, *create_policy_query); } else if (auto * create_quota_query = query->as()) { if (res) - throw Exception("Two access entities attached in " + path, ErrorCodes::INCORRECT_ACCESS_ENTITY_DEFINITION); + throw Exception("Two access entities attached in the same file", ErrorCodes::INCORRECT_ACCESS_ENTITY_DEFINITION); res = quota = std::make_unique(); InterpreterCreateQuotaQuery::updateQuotaFromQuery(*quota, *create_quota_query); } else if (auto * create_profile_query = query->as()) { if (res) - throw Exception("Two access entities attached in " + path, ErrorCodes::INCORRECT_ACCESS_ENTITY_DEFINITION); + throw Exception("Two access entities attached in the same file", ErrorCodes::INCORRECT_ACCESS_ENTITY_DEFINITION); res = profile = std::make_unique(); InterpreterCreateSettingsProfileQuery::updateSettingsProfileFromQuery(*profile, *create_profile_query); } else if (auto * grant_query = query->as()) { if (!user && !role) - throw Exception( - "A user or role should be attached before grant in " + path, ErrorCodes::INCORRECT_ACCESS_ENTITY_DEFINITION); + throw Exception("A user or role should be attached before grant", ErrorCodes::INCORRECT_ACCESS_ENTITY_DEFINITION); if (user) InterpreterGrantQuery::updateUserFromQuery(*user, *grant_query); else @@ -165,9 +164,27 @@ AccessEntityPtr deserializeAccessEntity(const String & definition, const String } if (!res) - throw Exception("No access entities attached in " + path, ErrorCodes::INCORRECT_ACCESS_ENTITY_DEFINITION); + throw Exception("No access entities attached", ErrorCodes::INCORRECT_ACCESS_ENTITY_DEFINITION); return res; } + +AccessEntityPtr deserializeAccessEntity(const String & definition, const String & file_path) +{ + if (file_path.empty()) + return deserializeAccessEntityImpl(definition); + + try + { + return deserializeAccessEntityImpl(definition); + } + catch (Exception & e) + { + e.addMessage("Could not parse " + file_path); + e.rethrow(); + __builtin_unreachable(); + } +} + } diff --git a/src/Access/AccessEntityIO.h b/src/Access/AccessEntityIO.h index aa0a3e7cf63..457b490affb 100644 --- a/src/Access/AccessEntityIO.h +++ b/src/Access/AccessEntityIO.h @@ -10,6 +10,6 @@ using AccessEntityPtr = std::shared_ptr; String serializeAccessEntity(const IAccessEntity & entity); -AccessEntityPtr deserializeAccessEntity(const String & definition, const String & path); +AccessEntityPtr deserializeAccessEntity(const String & definition, const String & file_path = ""); } diff --git a/src/Access/AccessRights.cpp b/src/Access/AccessRights.cpp index ca8609f3984..130eb9f80d7 100644 --- a/src/Access/AccessRights.cpp +++ b/src/Access/AccessRights.cpp @@ -706,8 +706,8 @@ private: AccessRights::AccessRights() = default; AccessRights::~AccessRights() = default; -AccessRights::AccessRights(AccessRights && src) = default; -AccessRights & AccessRights::operator =(AccessRights && src) = default; +AccessRights::AccessRights(AccessRights && src) noexcept = default; +AccessRights & AccessRights::operator =(AccessRights && src) noexcept = default; AccessRights::AccessRights(const AccessRights & src) diff --git a/src/Access/AccessRights.h b/src/Access/AccessRights.h index c3f75b8c303..e71b409b7b6 100644 --- a/src/Access/AccessRights.h +++ b/src/Access/AccessRights.h @@ -19,8 +19,8 @@ public: ~AccessRights(); AccessRights(const AccessRights & src); AccessRights & operator =(const AccessRights & src); - AccessRights(AccessRights && src); - AccessRights & operator =(AccessRights && src); + AccessRights(AccessRights && src) noexcept; + AccessRights & operator =(AccessRights && src) noexcept; bool isEmpty() const; diff --git a/src/Access/DiskAccessStorage.cpp b/src/Access/DiskAccessStorage.cpp index 5215139b50c..7393fcd8d36 100644 --- a/src/Access/DiskAccessStorage.cpp +++ b/src/Access/DiskAccessStorage.cpp @@ -48,7 +48,7 @@ namespace } catch (...) { - tryLogCurrentException(&log, "Could not parse " + file_path); + tryLogCurrentException(&log); return nullptr; } } diff --git a/src/Access/LDAPAccessStorage.cpp b/src/Access/LDAPAccessStorage.cpp index cac5dacef9b..b7040e219db 100644 --- a/src/Access/LDAPAccessStorage.cpp +++ b/src/Access/LDAPAccessStorage.cpp @@ -208,7 +208,7 @@ void LDAPAccessStorage::assignRolesNoLock(User & user, const LDAPClient::SearchR { const auto & user_name = user.getName(); auto & granted_roles = user.granted_roles; - const auto local_role_names = mapExternalRolesNoLock(external_roles); + auto local_role_names = mapExternalRolesNoLock(external_roles); auto grant_role = [this, &user_name, &granted_roles] (const String & role_name, const bool common) { diff --git a/src/Access/RolesOrUsersSet.cpp b/src/Access/RolesOrUsersSet.cpp index 2c302fde229..3e0d56c77a0 100644 --- a/src/Access/RolesOrUsersSet.cpp +++ b/src/Access/RolesOrUsersSet.cpp @@ -22,8 +22,8 @@ namespace ErrorCodes RolesOrUsersSet::RolesOrUsersSet() = default; RolesOrUsersSet::RolesOrUsersSet(const RolesOrUsersSet & src) = default; RolesOrUsersSet & RolesOrUsersSet::operator =(const RolesOrUsersSet & src) = default; -RolesOrUsersSet::RolesOrUsersSet(RolesOrUsersSet && src) = default; -RolesOrUsersSet & RolesOrUsersSet::operator =(RolesOrUsersSet && src) = default; +RolesOrUsersSet::RolesOrUsersSet(RolesOrUsersSet && src) noexcept = default; +RolesOrUsersSet & RolesOrUsersSet::operator =(RolesOrUsersSet && src) noexcept = default; RolesOrUsersSet::RolesOrUsersSet(AllTag) diff --git a/src/Access/RolesOrUsersSet.h b/src/Access/RolesOrUsersSet.h index 1d5842e31a6..02313c2c424 100644 --- a/src/Access/RolesOrUsersSet.h +++ b/src/Access/RolesOrUsersSet.h @@ -22,8 +22,8 @@ struct RolesOrUsersSet RolesOrUsersSet(); RolesOrUsersSet(const RolesOrUsersSet & src); RolesOrUsersSet & operator =(const RolesOrUsersSet & src); - RolesOrUsersSet(RolesOrUsersSet && src); - RolesOrUsersSet & operator =(RolesOrUsersSet && src); + RolesOrUsersSet(RolesOrUsersSet && src) noexcept; + RolesOrUsersSet & operator =(RolesOrUsersSet && src) noexcept; struct AllTag {}; RolesOrUsersSet(AllTag); diff --git a/src/Access/SettingsConstraints.cpp b/src/Access/SettingsConstraints.cpp index 9d21ba91f5d..6084138f306 100644 --- a/src/Access/SettingsConstraints.cpp +++ b/src/Access/SettingsConstraints.cpp @@ -25,8 +25,8 @@ SettingsConstraints::SettingsConstraints(const AccessControl & access_control_) SettingsConstraints::SettingsConstraints(const SettingsConstraints & src) = default; SettingsConstraints & SettingsConstraints::operator=(const SettingsConstraints & src) = default; -SettingsConstraints::SettingsConstraints(SettingsConstraints && src) = default; -SettingsConstraints & SettingsConstraints::operator=(SettingsConstraints && src) = default; +SettingsConstraints::SettingsConstraints(SettingsConstraints && src) noexcept = default; +SettingsConstraints & SettingsConstraints::operator=(SettingsConstraints && src) noexcept = default; SettingsConstraints::~SettingsConstraints() = default; diff --git a/src/Access/SettingsConstraints.h b/src/Access/SettingsConstraints.h index ec0421e060d..7439bad5d1a 100644 --- a/src/Access/SettingsConstraints.h +++ b/src/Access/SettingsConstraints.h @@ -53,9 +53,9 @@ class SettingsConstraints public: SettingsConstraints(const AccessControl & access_control_); SettingsConstraints(const SettingsConstraints & src); - SettingsConstraints & operator =(const SettingsConstraints & src); - SettingsConstraints(SettingsConstraints && src); - SettingsConstraints & operator =(SettingsConstraints && src); + SettingsConstraints & operator=(const SettingsConstraints & src); + SettingsConstraints(SettingsConstraints && src) noexcept; + SettingsConstraints & operator=(SettingsConstraints && src) noexcept; ~SettingsConstraints(); void clear(); diff --git a/src/Backups/BackupImpl.cpp b/src/Backups/BackupImpl.cpp index 8615f6b610f..d8ed247d20e 100644 --- a/src/Backups/BackupImpl.cpp +++ b/src/Backups/BackupImpl.cpp @@ -147,7 +147,7 @@ void BackupImpl::readBackupMetadata() auto in = readFileImpl(".backup"); String str; readStringUntilEOF(str, *in); - std::istringstream stream(std::move(str)); // STYLE_CHECK_ALLOW_STD_STRING_STREAM + std::istringstream stream(str); // STYLE_CHECK_ALLOW_STD_STRING_STREAM Poco::AutoPtr config{new Poco::Util::XMLConfiguration()}; config->load(stream); diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 2829f9e9f46..a69acb2ed82 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -7,12 +7,13 @@ #include #include -#include #include #include #include +#include #include #include +#include #include #include #include @@ -229,11 +230,11 @@ public: static bool cancelled() { return exit_on_signal.test(); } }; -/// This signal handler is set only for sigint. +/// This signal handler is set only for SIGINT. void interruptSignalHandler(int signum) { if (exit_on_signal.test_and_set()) - _exit(signum); + safeExit(128 + signum); } @@ -243,22 +244,22 @@ ClientBase::ClientBase() = default; void ClientBase::setupSignalHandler() { - exit_on_signal.test_and_set(); + exit_on_signal.test_and_set(); - struct sigaction new_act; - memset(&new_act, 0, sizeof(new_act)); + struct sigaction new_act; + memset(&new_act, 0, sizeof(new_act)); - new_act.sa_handler = interruptSignalHandler; - new_act.sa_flags = 0; + new_act.sa_handler = interruptSignalHandler; + new_act.sa_flags = 0; #if defined(OS_DARWIN) sigemptyset(&new_act.sa_mask); #else - if (sigemptyset(&new_act.sa_mask)) + if (sigemptyset(&new_act.sa_mask)) throw Exception(ErrorCodes::CANNOT_SET_SIGNAL_HANDLER, "Cannot set signal handler."); #endif - if (sigaction(SIGINT, &new_act, nullptr)) + if (sigaction(SIGINT, &new_act, nullptr)) throw Exception(ErrorCodes::CANNOT_SET_SIGNAL_HANDLER, "Cannot set signal handler."); } @@ -702,7 +703,6 @@ void ClientBase::processOrdinaryQuery(const String & query_to_execute, ASTPtr pa /// Also checks if query execution should be cancelled. void ClientBase::receiveResult(ASTPtr parsed_query) { - bool cancelled = false; QueryInterruptHandler query_interrupt_handler; // TODO: get the poll_interval from commandline. @@ -773,7 +773,7 @@ void ClientBase::receiveResult(ASTPtr parsed_query) /// Receive a part of the result, or progress info or an exception and process it. /// Returns true if one should continue receiving packets. /// Output of result is suppressed if query was cancelled. -bool ClientBase::receiveAndProcessPacket(ASTPtr parsed_query, bool cancelled) +bool ClientBase::receiveAndProcessPacket(ASTPtr parsed_query, bool cancelled_) { Packet packet = connection->receivePacket(); @@ -783,7 +783,7 @@ bool ClientBase::receiveAndProcessPacket(ASTPtr parsed_query, bool cancelled) return true; case Protocol::Server::Data: - if (!cancelled) + if (!cancelled_) onData(packet.block, parsed_query); return true; @@ -796,12 +796,12 @@ bool ClientBase::receiveAndProcessPacket(ASTPtr parsed_query, bool cancelled) return true; case Protocol::Server::Totals: - if (!cancelled) + if (!cancelled_) onTotals(packet.block, parsed_query); return true; case Protocol::Server::Extremes: - if (!cancelled) + if (!cancelled_) onExtremes(packet.block, parsed_query); return true; @@ -867,7 +867,7 @@ void ClientBase::onProfileEvents(Block & block) if (rows == 0) return; - if (server_revision >= DBMS_MIN_PROTOCOL_VERSION_WITH_INCREMENTAL_PROFILE_EVENTS) + if (getName() == "local" || server_revision >= DBMS_MIN_PROTOCOL_VERSION_WITH_INCREMENTAL_PROFILE_EVENTS) { const auto & array_thread_id = typeid_cast(*block.getByName("thread_id").column).getData(); const auto & names = typeid_cast(*block.getByName("name").column); @@ -1265,6 +1265,7 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin { resetOutput(); have_error = false; + cancelled = false; client_exception.reset(); server_exception.reset(); @@ -1392,6 +1393,9 @@ MultiQueryProcessingStage ClientBase::analyzeMultiQueryText( String & query_to_execute, ASTPtr & parsed_query, const String & all_queries_text, std::optional & current_exception) { + if (!is_interactive && cancelled) + return MultiQueryProcessingStage::QUERIES_END; + if (this_query_begin >= all_queries_end) return MultiQueryProcessingStage::QUERIES_END; @@ -1868,6 +1872,8 @@ void ClientBase::readArguments( prev_port_arg = port_arg; } } + else if (arg == "--allow_repeated_settings"sv) + allow_repeated_settings = true; else common_arguments.emplace_back(arg); } @@ -1880,7 +1886,10 @@ void ClientBase::readArguments( void ClientBase::parseAndCheckOptions(OptionsDescription & options_description, po::variables_map & options, Arguments & arguments) { - cmd_settings.addProgramOptions(options_description.main_description.value()); + if (allow_repeated_settings) + cmd_settings.addProgramOptionsAsMultitokens(options_description.main_description.value()); + else + cmd_settings.addProgramOptions(options_description.main_description.value()); /// Parse main commandline options. auto parser = po::command_line_parser(arguments).options(options_description.main_description.value()).allow_unregistered(); po::parsed_options parsed = parser.run(); diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h index 0a11745b996..e625d4a5c63 100644 --- a/src/Client/ClientBase.h +++ b/src/Client/ClientBase.h @@ -108,7 +108,7 @@ protected: private: void receiveResult(ASTPtr parsed_query); - bool receiveAndProcessPacket(ASTPtr parsed_query, bool cancelled); + bool receiveAndProcessPacket(ASTPtr parsed_query, bool cancelled_); void receiveLogs(ASTPtr parsed_query); bool receiveSampleBlock(Block & out, ColumnsDescription & columns_description, ASTPtr parsed_query); bool receiveEndOfQuery(); @@ -219,6 +219,7 @@ protected: ProgressIndication progress_indication; bool need_render_progress = true; + bool need_render_profile_events = true; bool written_first_block = false; size_t processed_rows = 0; /// How many rows have been read or written. @@ -259,6 +260,10 @@ protected: }; std::vector hosts_and_ports{}; + + bool allow_repeated_settings = false; + + bool cancelled = false; }; } diff --git a/src/Client/ConnectionEstablisher.cpp b/src/Client/ConnectionEstablisher.cpp index 3385834e386..60cd24eddfb 100644 --- a/src/Client/ConnectionEstablisher.cpp +++ b/src/Client/ConnectionEstablisher.cpp @@ -165,7 +165,7 @@ std::variant ConnectionEstablisherAsync:: fiber = std::move(fiber).resume(); if (exception) - std::rethrow_exception(std::move(exception)); + std::rethrow_exception(exception); if (connection_establisher.isFinished()) { diff --git a/src/Client/HedgedConnections.cpp b/src/Client/HedgedConnections.cpp index 791ac4c1ef1..9c3c20b4d02 100644 --- a/src/Client/HedgedConnections.cpp +++ b/src/Client/HedgedConnections.cpp @@ -363,7 +363,7 @@ bool HedgedConnections::resumePacketReceiver(const HedgedConnections::ReplicaLoc else if (std::holds_alternative(res)) { finishProcessReplica(replica_state, true); - std::rethrow_exception(std::move(std::get(res))); + std::rethrow_exception(std::get(res)); } return false; diff --git a/src/Client/LocalConnection.cpp b/src/Client/LocalConnection.cpp index 8ee4b9e1c1f..a57086810bf 100644 --- a/src/Client/LocalConnection.cpp +++ b/src/Client/LocalConnection.cpp @@ -6,6 +6,8 @@ #include #include #include +#include +#include namespace DB @@ -18,10 +20,12 @@ namespace ErrorCodes extern const int NOT_IMPLEMENTED; } -LocalConnection::LocalConnection(ContextPtr context_, bool send_progress_) +LocalConnection::LocalConnection(ContextPtr context_, bool send_progress_, bool send_profile_events_, const String & server_display_name_) : WithContext(context_) , session(getContext(), ClientInfo::Interface::LOCAL) , send_progress(send_progress_) + , send_profile_events(send_profile_events_) + , server_display_name(server_display_name_) { /// Authenticate and create a context to execute queries. session.authenticate("default", "", Poco::Net::SocketAddress{}); @@ -58,6 +62,11 @@ void LocalConnection::updateProgress(const Progress & value) state->progress.incrementPiecewiseAtomically(value); } +void LocalConnection::getProfileEvents(Block & block) +{ + ProfileEvents::getProfileEvents(server_display_name, state->profile_queue, block, last_sent_snapshots); +} + void LocalConnection::sendQuery( const ConnectionTimeouts &, const String & query, @@ -77,18 +86,23 @@ void LocalConnection::sendQuery( if (!current_database.empty()) query_context->setCurrentDatabase(current_database); - CurrentThread::QueryScope query_scope_holder(query_context); state.reset(); state.emplace(); state->query_id = query_id; state->query = query; + state->query_scope_holder = std::make_unique(query_context); state->stage = QueryProcessingStage::Enum(stage); + state->profile_queue = std::make_shared(std::numeric_limits::max()); + CurrentThread::attachInternalProfileEventsQueue(state->profile_queue); if (send_progress) state->after_send_progress.restart(); + if (send_profile_events) + state->after_send_profile_events.restart(); + next_packet_type.reset(); try @@ -161,11 +175,11 @@ void LocalConnection::sendData(const Block & block, const String &, bool) if (state->pushing_async_executor) { - state->pushing_async_executor->push(std::move(block)); + state->pushing_async_executor->push(block); } else if (state->pushing_executor) { - state->pushing_executor->push(std::move(block)); + state->pushing_executor->push(block); } } @@ -231,6 +245,16 @@ bool LocalConnection::poll(size_t) return true; } + if (send_profile_events && (state->after_send_profile_events.elapsedMicroseconds() >= query_context->getSettingsRef().interactive_delay)) + { + Block block; + state->after_send_profile_events.restart(); + next_packet_type = Protocol::Server::ProfileEvents; + getProfileEvents(block); + state->block.emplace(std::move(block)); + return true; + } + try { pollImpl(); @@ -459,9 +483,14 @@ void LocalConnection::sendMergeTreeReadTaskResponse(const PartitionReadResponse throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Not implemented"); } -ServerConnectionPtr LocalConnection::createConnection(const ConnectionParameters &, ContextPtr current_context, bool send_progress) +ServerConnectionPtr LocalConnection::createConnection( + const ConnectionParameters &, + ContextPtr current_context, + bool send_progress, + bool send_profile_events, + const String & server_display_name) { - return std::make_unique(current_context, send_progress); + return std::make_unique(current_context, send_progress, send_profile_events, server_display_name); } diff --git a/src/Client/LocalConnection.h b/src/Client/LocalConnection.h index b85022cf183..62e95cdfee6 100644 --- a/src/Client/LocalConnection.h +++ b/src/Client/LocalConnection.h @@ -5,6 +5,7 @@ #include #include #include +#include #include @@ -29,6 +30,7 @@ struct LocalQueryState std::unique_ptr executor; std::unique_ptr pushing_executor; std::unique_ptr pushing_async_executor; + InternalProfileEventsQueuePtr profile_queue; std::optional exception; @@ -50,19 +52,28 @@ struct LocalQueryState Progress progress; /// Time after the last check to stop the request and send the progress. Stopwatch after_send_progress; + Stopwatch after_send_profile_events; + + std::unique_ptr query_scope_holder; }; class LocalConnection : public IServerConnection, WithContext { public: - explicit LocalConnection(ContextPtr context_, bool send_progress_ = false); + explicit LocalConnection( + ContextPtr context_, bool send_progress_ = false, bool send_profile_events_ = false, const String & server_display_name_ = ""); ~LocalConnection() override; IServerConnection::Type getConnectionType() const override { return IServerConnection::Type::LOCAL; } - static ServerConnectionPtr createConnection(const ConnectionParameters & connection_parameters, ContextPtr current_context, bool send_progress = false); + static ServerConnectionPtr createConnection( + const ConnectionParameters & connection_parameters, + ContextPtr current_context, + bool send_progress = false, + bool send_profile_events = false, + const String & server_display_name = ""); void setDefaultDatabase(const String & database) override; @@ -129,12 +140,16 @@ private: void updateProgress(const Progress & value); + void getProfileEvents(Block & block); + bool pollImpl(); ContextMutablePtr query_context; Session session; bool send_progress; + bool send_profile_events; + String server_display_name; String description = "clickhouse-local"; std::optional state; @@ -144,5 +159,7 @@ private: std::optional next_packet_type; String current_database; + + ProfileEvents::ThreadIdToCountersSnapshot last_sent_snapshots; }; } diff --git a/src/Client/Suggest.cpp b/src/Client/Suggest.cpp index 738c98d2119..b711008e233 100644 --- a/src/Client/Suggest.cpp +++ b/src/Client/Suggest.cpp @@ -103,6 +103,7 @@ void Suggest::load(ContextPtr context, const ConnectionParameters & connection_p { loading_thread = std::thread([context=Context::createCopy(context), connection_parameters, suggestion_limit, this] { + ThreadStatus thread_status; for (size_t retry = 0; retry < 10; ++retry) { try diff --git a/src/Columns/ColumnMap.h b/src/Columns/ColumnMap.h index fb69541c363..b33630b993c 100644 --- a/src/Columns/ColumnMap.h +++ b/src/Columns/ColumnMap.h @@ -36,8 +36,8 @@ public: static Ptr create(const ColumnPtr & column) { return ColumnMap::create(column->assumeMutable()); } static Ptr create(ColumnPtr && arg) { return create(arg); } - template ::value>::type> - static MutablePtr create(Arg && arg) { return Base::create(std::forward(arg)); } + template ::value>::type> + static MutablePtr create(Args &&... args) { return Base::create(std::forward(args)...); } std::string getName() const override; const char * getFamilyName() const override { return "Map"; } diff --git a/src/Columns/ColumnNullable.cpp b/src/Columns/ColumnNullable.cpp index 7a4ce8288a3..bc117b065b7 100644 --- a/src/Columns/ColumnNullable.cpp +++ b/src/Columns/ColumnNullable.cpp @@ -541,7 +541,7 @@ ColumnPtr ColumnNullable::compress() const size_t byte_size = nested_column->byteSize() + null_map->byteSize(); return ColumnCompressed::create(size(), byte_size, - [nested_column = std::move(nested_column), null_map = std::move(null_map)] + [nested_column = std::move(nested_compressed), null_map = std::move(null_map_compressed)] { return ColumnNullable::create(nested_column->decompress(), null_map->decompress()); }); diff --git a/src/Columns/ColumnSparse.cpp b/src/Columns/ColumnSparse.cpp index e9bdc3971c0..611c6fd7186 100644 --- a/src/Columns/ColumnSparse.cpp +++ b/src/Columns/ColumnSparse.cpp @@ -330,7 +330,7 @@ ColumnPtr ColumnSparse::filter(const Filter & filt, ssize_t) const } auto res_values = values->filter(values_filter, values_result_size_hint); - return this->create(std::move(res_values), std::move(res_offsets), res_offset); + return this->create(res_values, std::move(res_offsets), res_offset); } void ColumnSparse::expand(const Filter & mask, bool inverted) diff --git a/src/Common/ActionLock.cpp b/src/Common/ActionLock.cpp index f02ddc14183..1ae54ff9ff2 100644 --- a/src/Common/ActionLock.cpp +++ b/src/Common/ActionLock.cpp @@ -11,12 +11,12 @@ ActionLock::ActionLock(const ActionBlocker & blocker) : counter_ptr(blocker.coun ++(*counter); } -ActionLock::ActionLock(ActionLock && other) +ActionLock::ActionLock(ActionLock && other) noexcept { *this = std::move(other); } -ActionLock & ActionLock::operator=(ActionLock && other) +ActionLock & ActionLock::operator=(ActionLock && other) noexcept { auto lock_lhs = this->counter_ptr.lock(); diff --git a/src/Common/ActionLock.h b/src/Common/ActionLock.h index 1167a23b8dd..aeede564e11 100644 --- a/src/Common/ActionLock.h +++ b/src/Common/ActionLock.h @@ -19,8 +19,8 @@ public: explicit ActionLock(const ActionBlocker & blocker); - ActionLock(ActionLock && other); - ActionLock & operator=(ActionLock && other); + ActionLock(ActionLock && other) noexcept; + ActionLock & operator=(ActionLock && other) noexcept; ActionLock(const ActionLock & other) = delete; ActionLock & operator=(const ActionLock & other) = delete; diff --git a/src/Common/ConcurrentBoundedQueue.h b/src/Common/ConcurrentBoundedQueue.h index 4e422f7482e..6d7ef9cd34d 100644 --- a/src/Common/ConcurrentBoundedQueue.h +++ b/src/Common/ConcurrentBoundedQueue.h @@ -100,6 +100,11 @@ public: return emplace(x); } + [[nodiscard]] bool push(T && x) + { + return emplace(std::move(x)); + } + /// Returns false if queue is finished template [[nodiscard]] bool emplace(Args &&... args) @@ -120,6 +125,11 @@ public: return emplaceImpl(milliseconds, x); } + [[nodiscard]] bool tryPush(T && x, UInt64 milliseconds = 0) + { + return emplaceImpl(milliseconds, std::move(x)); + } + /// Returns false if queue is finished or object was not emplaced during timeout template [[nodiscard]] bool tryEmplace(UInt64 milliseconds, Args &&... args) diff --git a/src/Common/Config/ConfigProcessor.cpp b/src/Common/Config/ConfigProcessor.cpp index b86e8ed3e40..1070430b842 100644 --- a/src/Common/Config/ConfigProcessor.cpp +++ b/src/Common/Config/ConfigProcessor.cpp @@ -664,6 +664,10 @@ void ConfigProcessor::savePreprocessedConfig(const LoadedConfig & loaded_config, new_path.erase(0, main_config_path.size()); std::replace(new_path.begin(), new_path.end(), '/', '_'); + /// If we have config file in YAML format, the preprocessed config will inherit .yaml extension + /// but will contain config in XML format, so some tools like clickhouse extract-from-config won't work + new_path = fs::path(new_path).replace_extension(".xml").string(); + if (preprocessed_dir.empty()) { if (!loaded_config.configuration->has("path")) diff --git a/src/Common/DateLUTImpl.h b/src/Common/DateLUTImpl.h index c178dc58854..5ca37448e36 100644 --- a/src/Common/DateLUTImpl.h +++ b/src/Common/DateLUTImpl.h @@ -1001,8 +1001,12 @@ public: inline LUTIndex makeLUTIndex(Int16 year, UInt8 month, UInt8 day_of_month) const { - if (unlikely(year < DATE_LUT_MIN_YEAR || year > DATE_LUT_MAX_YEAR || month < 1 || month > 12 || day_of_month < 1 || day_of_month > 31)) + if (unlikely(year < DATE_LUT_MIN_YEAR || month < 1 || month > 12 || day_of_month < 1 || day_of_month > 31)) return LUTIndex(0); + + if (unlikely(year > DATE_LUT_MAX_YEAR)) + return LUTIndex(DATE_LUT_SIZE - 1); + auto year_lut_index = (year - DATE_LUT_MIN_YEAR) * 12 + month - 1; UInt32 index = years_months_lut[year_lut_index].toUnderType() + day_of_month - 1; /// When date is out of range, default value is DATE_LUT_SIZE - 1 (2283-11-11) @@ -1012,7 +1016,7 @@ public: /// Create DayNum from year, month, day of month. inline ExtendedDayNum makeDayNum(Int16 year, UInt8 month, UInt8 day_of_month, Int32 default_error_day_num = 0) const { - if (unlikely(year < DATE_LUT_MIN_YEAR || year > DATE_LUT_MAX_YEAR || month < 1 || month > 12 || day_of_month < 1 || day_of_month > 31)) + if (unlikely(year < DATE_LUT_MIN_YEAR || month < 1 || month > 12 || day_of_month < 1 || day_of_month > 31)) return ExtendedDayNum(default_error_day_num); return toDayNum(makeLUTIndex(year, month, day_of_month)); diff --git a/src/Common/Epoll.cpp b/src/Common/Epoll.cpp index c86af5e49e3..4c335afcde1 100644 --- a/src/Common/Epoll.cpp +++ b/src/Common/Epoll.cpp @@ -21,12 +21,12 @@ Epoll::Epoll() : events_count(0) throwFromErrno("Cannot open epoll descriptor", DB::ErrorCodes::EPOLL_ERROR); } -Epoll::Epoll(Epoll && other) : epoll_fd(other.epoll_fd), events_count(other.events_count.load()) +Epoll::Epoll(Epoll && other) noexcept : epoll_fd(other.epoll_fd), events_count(other.events_count.load()) { other.epoll_fd = -1; } -Epoll & Epoll::operator=(Epoll && other) +Epoll & Epoll::operator=(Epoll && other) noexcept { epoll_fd = other.epoll_fd; other.epoll_fd = -1; diff --git a/src/Common/Epoll.h b/src/Common/Epoll.h index 032f4045033..9c75974791f 100644 --- a/src/Common/Epoll.h +++ b/src/Common/Epoll.h @@ -19,8 +19,8 @@ public: Epoll(const Epoll &) = delete; Epoll & operator=(const Epoll &) = delete; - Epoll & operator=(Epoll && other); - Epoll(Epoll && other); + Epoll & operator=(Epoll && other) noexcept; + Epoll(Epoll && other) noexcept; /// Add new file descriptor to epoll. If ptr set to nullptr, epoll_event.data.fd = fd, /// otherwise epoll_event.data.ptr = ptr. diff --git a/src/Common/MemoryStatisticsOS.cpp b/src/Common/MemoryStatisticsOS.cpp index 220d7a2013b..c61d8d7f531 100644 --- a/src/Common/MemoryStatisticsOS.cpp +++ b/src/Common/MemoryStatisticsOS.cpp @@ -1,7 +1,11 @@ -#if defined(OS_LINUX) +#if defined(OS_LINUX) || defined(OS_FREEBSD) #include #include +#if defined(OS_FREEBSD) +#include +#include +#endif #include #include #include @@ -18,6 +22,8 @@ namespace DB { +#if defined(OS_LINUX) + namespace ErrorCodes { extern const int FILE_DOESNT_EXIST; @@ -103,6 +109,53 @@ MemoryStatisticsOS::Data MemoryStatisticsOS::get() const return data; } +#endif + +#if defined(OS_FREEBSD) + +namespace ErrorCodes +{ + extern const int SYSTEM_ERROR; +} + +MemoryStatisticsOS::MemoryStatisticsOS() +{ + pagesize = static_cast(::getPageSize()); + self = ::getpid(); +} + +MemoryStatisticsOS::~MemoryStatisticsOS() +{ +} + +MemoryStatisticsOS::Data MemoryStatisticsOS::get() const +{ + Data data; + int mib[4] = { CTL_KERN, KERN_PROC, KERN_PROC_PID, self }; + struct kinfo_proc kp; + size_t len = sizeof(struct kinfo_proc); + + if (-1 == ::sysctl(mib, 4, &kp, &len, NULL, 0)) + throwFromErrno("Cannot sysctl(kern.proc.pid." + std::to_string(self) + ")", ErrorCodes::SYSTEM_ERROR); + + if (sizeof(struct kinfo_proc) != len) + throw DB::Exception(DB::ErrorCodes::SYSTEM_ERROR, "Kernel returns structure of {} bytes instead of expected {}", + len, sizeof(struct kinfo_proc)); + + if (sizeof(struct kinfo_proc) != kp.ki_structsize) + throw DB::Exception(DB::ErrorCodes::SYSTEM_ERROR, "Kernel structure size ({}) does not match expected ({}).", + kp.ki_structsize, sizeof(struct kinfo_proc)); + + data.virt = kp.ki_size; + data.resident = kp.ki_rssize * pagesize; + data.code = kp.ki_tsize * pagesize; + data.data_and_stack = (kp.ki_dsize + kp.ki_ssize) * pagesize; + + return data; +} + +#endif + } #endif diff --git a/src/Common/MemoryStatisticsOS.h b/src/Common/MemoryStatisticsOS.h index 0893e333007..43f3fdf2f24 100644 --- a/src/Common/MemoryStatisticsOS.h +++ b/src/Common/MemoryStatisticsOS.h @@ -1,6 +1,9 @@ #pragma once -#if defined(OS_LINUX) +#if defined(OS_LINUX) || defined(OS_FREEBSD) #include +#if defined(OS_FREEBSD) +#include +#endif namespace DB @@ -23,7 +26,9 @@ public: { uint64_t virt; uint64_t resident; +#if defined(OS_LINUX) uint64_t shared; +#endif uint64_t code; uint64_t data_and_stack; }; @@ -35,7 +40,13 @@ public: Data get() const; private: +#if defined(OS_LINUX) int fd; +#endif +#if defined(OS_FREEBSD) + int pagesize; + pid_t self; +#endif }; } diff --git a/src/Common/TimerDescriptor.cpp b/src/Common/TimerDescriptor.cpp index 086d462eeb2..a7c74dab8be 100644 --- a/src/Common/TimerDescriptor.cpp +++ b/src/Common/TimerDescriptor.cpp @@ -27,7 +27,7 @@ TimerDescriptor::TimerDescriptor(int clockid, int flags) throwFromErrno("Cannot set O_NONBLOCK for timer_fd", ErrorCodes::CANNOT_FCNTL); } -TimerDescriptor::TimerDescriptor(TimerDescriptor && other) : timer_fd(other.timer_fd) +TimerDescriptor::TimerDescriptor(TimerDescriptor && other) noexcept : timer_fd(other.timer_fd) { other.timer_fd = -1; } diff --git a/src/Common/TimerDescriptor.h b/src/Common/TimerDescriptor.h index 8ca69344b53..5263c05edea 100644 --- a/src/Common/TimerDescriptor.h +++ b/src/Common/TimerDescriptor.h @@ -17,7 +17,7 @@ public: TimerDescriptor(const TimerDescriptor &) = delete; TimerDescriptor & operator=(const TimerDescriptor &) = delete; - TimerDescriptor(TimerDescriptor && other); + TimerDescriptor(TimerDescriptor && other) noexcept; TimerDescriptor & operator=(TimerDescriptor &&) = default; int getDescriptor() const { return timer_fd; } diff --git a/src/Common/ZooKeeper/IKeeper.cpp b/src/Common/ZooKeeper/IKeeper.cpp index 4f0c5efe680..70fe33b3f6e 100644 --- a/src/Common/ZooKeeper/IKeeper.cpp +++ b/src/Common/ZooKeeper/IKeeper.cpp @@ -59,7 +59,7 @@ static void addRootPath(String & path, const String & root_path) throw Exception("Path cannot be empty", Error::ZBADARGUMENTS); if (path[0] != '/') - throw Exception("Path must begin with /, got " + path, Error::ZBADARGUMENTS); + throw Exception("Path must begin with /, got path '" + path + "'", Error::ZBADARGUMENTS); if (root_path.empty()) return; diff --git a/src/Common/examples/cow_compositions.cpp b/src/Common/examples/cow_compositions.cpp index 427fe9130e9..f013e751063 100644 --- a/src/Common/examples/cow_compositions.cpp +++ b/src/Common/examples/cow_compositions.cpp @@ -52,7 +52,7 @@ private: { std::cerr << "Mutating\n"; auto res = shallowMutate(); - res->wrapped = IColumn::mutate(std::move(wrapped)); + res->wrapped = IColumn::mutate(wrapped); return res; } diff --git a/src/Common/tests/gtest_DateLUTImpl.cpp b/src/Common/tests/gtest_DateLUTImpl.cpp index 1220c50b409..fd4294fb6ef 100644 --- a/src/Common/tests/gtest_DateLUTImpl.cpp +++ b/src/Common/tests/gtest_DateLUTImpl.cpp @@ -79,8 +79,12 @@ FailuresCount countFailures(const ::testing::TestResult & test_result) TEST(DateLUTTest, makeDayNumTest) { const DateLUTImpl & lut = DateLUT::instance("UTC"); - EXPECT_EQ(0, lut.makeDayNum(2500, 12, 25)); EXPECT_EQ(0, lut.makeDayNum(1924, 12, 31)); + EXPECT_EQ(-1, lut.makeDayNum(1924, 12, 31, -1)); + EXPECT_EQ(-16436, lut.makeDayNum(1925, 1, 1)); + EXPECT_EQ(0, lut.makeDayNum(1970, 1, 1)); + EXPECT_EQ(114635, lut.makeDayNum(2283, 11, 11)); + EXPECT_EQ(114635, lut.makeDayNum(2500, 12, 25)); } diff --git a/src/Compression/tests/gtest_compressionCodec.cpp b/src/Compression/tests/gtest_compressionCodec.cpp index f0d234e5cc5..d90f6064292 100644 --- a/src/Compression/tests/gtest_compressionCodec.cpp +++ b/src/Compression/tests/gtest_compressionCodec.cpp @@ -316,7 +316,7 @@ CodecTestSequence operator+(CodecTestSequence && left, const CodecTestSequence & std::vector operator+(const std::vector & left, const std::vector & right) { - std::vector result(std::move(left)); + std::vector result(left); std::move(std::begin(right), std::end(right), std::back_inserter(result)); return result; diff --git a/src/Coordination/Changelog.cpp b/src/Coordination/Changelog.cpp index ab86bb7d44a..eb8a724ade9 100644 --- a/src/Coordination/Changelog.cpp +++ b/src/Coordination/Changelog.cpp @@ -732,7 +732,7 @@ void Changelog::cleanLogThread() while (!log_files_to_delete_queue.isFinishedAndEmpty()) { std::string path; - if (log_files_to_delete_queue.tryPop(path)) + if (log_files_to_delete_queue.pop(path)) { std::error_code ec; if (std::filesystem::remove(path, ec)) diff --git a/src/Coordination/FourLetterCommand.cpp b/src/Coordination/FourLetterCommand.cpp index 4c76d052f9b..feeec222c0f 100644 --- a/src/Coordination/FourLetterCommand.cpp +++ b/src/Coordination/FourLetterCommand.cpp @@ -202,7 +202,7 @@ void print(IFourLetterCommand::StringBuffer & buf, const String & key, uint64_t String MonitorCommand::run() { - KeeperConnectionStats stats = keeper_dispatcher.getKeeperConnectionStats(); + auto & stats = keeper_dispatcher.getKeeperConnectionStats(); Keeper4LWInfo keeper_info = keeper_dispatcher.getKeeper4LWInfo(); if (!keeper_info.has_leader) @@ -288,7 +288,7 @@ String ServerStatCommand::run() writeText('\n', buf); }; - KeeperConnectionStats stats = keeper_dispatcher.getKeeperConnectionStats(); + auto & stats = keeper_dispatcher.getKeeperConnectionStats(); Keeper4LWInfo keeper_info = keeper_dispatcher.getKeeper4LWInfo(); write("ClickHouse Keeper version", String(VERSION_DESCRIBE) + "-" + VERSION_GITHASH); @@ -314,7 +314,7 @@ String StatCommand::run() auto write = [&buf] (const String & key, const String & value) { buf << key << ": " << value << '\n'; }; - KeeperConnectionStats stats = keeper_dispatcher.getKeeperConnectionStats(); + auto & stats = keeper_dispatcher.getKeeperConnectionStats(); Keeper4LWInfo keeper_info = keeper_dispatcher.getKeeper4LWInfo(); write("ClickHouse Keeper version", String(VERSION_DESCRIBE) + "-" + VERSION_GITHASH); diff --git a/src/Coordination/KeeperConnectionStats.cpp b/src/Coordination/KeeperConnectionStats.cpp index 1e9f2c051a7..b4edfe45159 100644 --- a/src/Coordination/KeeperConnectionStats.cpp +++ b/src/Coordination/KeeperConnectionStats.cpp @@ -1,3 +1,4 @@ +#include #include namespace DB @@ -5,61 +6,58 @@ namespace DB uint64_t KeeperConnectionStats::getMinLatency() const { - return min_latency; + return min_latency.load(std::memory_order_relaxed); } uint64_t KeeperConnectionStats::getMaxLatency() const { - return max_latency; + return max_latency.load(std::memory_order_relaxed); } uint64_t KeeperConnectionStats::getAvgLatency() const { - if (count != 0) - return total_latency / count; + auto cnt = count.load(std::memory_order_relaxed); + if (cnt) + return total_latency.load(std::memory_order_relaxed) / cnt; return 0; } uint64_t KeeperConnectionStats::getLastLatency() const { - return last_latency; + return last_latency.load(std::memory_order_relaxed); } uint64_t KeeperConnectionStats::getPacketsReceived() const { - return packets_received; + return packets_received.load(std::memory_order_relaxed); } uint64_t KeeperConnectionStats::getPacketsSent() const { - return packets_sent; + return packets_sent.load(std::memory_order_relaxed); } void KeeperConnectionStats::incrementPacketsReceived() { - packets_received++; + packets_received.fetch_add(1, std::memory_order_relaxed); } void KeeperConnectionStats::incrementPacketsSent() { - packets_sent++; + packets_sent.fetch_add(1, std::memory_order_relaxed); } void KeeperConnectionStats::updateLatency(uint64_t latency_ms) { - last_latency = latency_ms; - total_latency += (latency_ms); - count++; + last_latency.store(latency_ms, std::memory_order_relaxed); + total_latency.fetch_add(latency_ms, std::memory_order_relaxed); + count.fetch_add(1, std::memory_order_relaxed); - if (latency_ms < min_latency) - { - min_latency = latency_ms; - } + uint64_t prev_val = min_latency.load(std::memory_order_relaxed); + while (prev_val > latency_ms && !min_latency.compare_exchange_weak(prev_val, latency_ms, std::memory_order_relaxed)) {} - if (latency_ms > max_latency) - { - max_latency = latency_ms; - } + prev_val = max_latency.load(std::memory_order_relaxed); + while (prev_val < latency_ms && !max_latency.compare_exchange_weak(prev_val, latency_ms, std::memory_order_relaxed)) {} } void KeeperConnectionStats::reset() @@ -70,17 +68,17 @@ void KeeperConnectionStats::reset() void KeeperConnectionStats::resetLatency() { - total_latency = 0; - count = 0; - max_latency = 0; - min_latency = 0; - last_latency = 0; + total_latency.store(0, std::memory_order_relaxed); + count.store(0, std::memory_order_relaxed); + max_latency.store(0, std::memory_order_relaxed); + min_latency.store(0, std::memory_order_relaxed); + last_latency.store(0, std::memory_order_relaxed); } void KeeperConnectionStats::resetRequestCounters() { - packets_received = 0; - packets_sent = 0; + packets_received.store(0, std::memory_order_relaxed); + packets_sent.store(0, std::memory_order_relaxed); } } diff --git a/src/Coordination/KeeperConnectionStats.h b/src/Coordination/KeeperConnectionStats.h index 3cd881d553b..ff276def0c9 100644 --- a/src/Coordination/KeeperConnectionStats.h +++ b/src/Coordination/KeeperConnectionStats.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -11,7 +12,10 @@ namespace DB class KeeperConnectionStats { public: - KeeperConnectionStats() = default; + KeeperConnectionStats() + { + reset(); + } uint64_t getMinLatency() const; uint64_t getMaxLatency() const; @@ -33,20 +37,20 @@ private: void resetRequestCounters(); /// all response with watch response included - uint64_t packets_sent = 0; + std::atomic_uint64_t packets_sent; /// All user requests - uint64_t packets_received = 0; + std::atomic_uint64_t packets_received; /// For consistent with zookeeper measured by millisecond, /// otherwise maybe microsecond is better - uint64_t total_latency = 0; - uint64_t max_latency = 0; - uint64_t min_latency = 0; + std::atomic_uint64_t total_latency; + std::atomic_uint64_t max_latency; + std::atomic_uint64_t min_latency; /// last operation latency - uint64_t last_latency = 0; + std::atomic_uint64_t last_latency; - uint64_t count = 0; + std::atomic_uint64_t count; }; } diff --git a/src/Coordination/KeeperDispatcher.cpp b/src/Coordination/KeeperDispatcher.cpp index 86e22b834d4..a4dcb0acc52 100644 --- a/src/Coordination/KeeperDispatcher.cpp +++ b/src/Coordination/KeeperDispatcher.cpp @@ -594,7 +594,6 @@ void KeeperDispatcher::updateConfiguration(const Poco::Util::AbstractConfigurati void KeeperDispatcher::updateKeeperStatLatency(uint64_t process_time_ms) { - std::lock_guard lock(keeper_stats_mutex); keeper_stats.updateLatency(process_time_ms); } diff --git a/src/Coordination/KeeperDispatcher.h b/src/Coordination/KeeperDispatcher.h index 646fab5d80f..cc680f8d78f 100644 --- a/src/Coordination/KeeperDispatcher.h +++ b/src/Coordination/KeeperDispatcher.h @@ -68,7 +68,6 @@ private: /// RAFT wrapper. std::unique_ptr server; - mutable std::mutex keeper_stats_mutex; KeeperConnectionStats keeper_stats; KeeperConfigurationAndSettingsPtr configuration_and_settings; @@ -159,9 +158,8 @@ public: uint64_t getSnapDirSize() const; /// Request statistics such as qps, latency etc. - KeeperConnectionStats getKeeperConnectionStats() const + KeeperConnectionStats & getKeeperConnectionStats() { - std::lock_guard lock(keeper_stats_mutex); return keeper_stats; } @@ -179,19 +177,16 @@ public: void incrementPacketsSent() { - std::lock_guard lock(keeper_stats_mutex); keeper_stats.incrementPacketsSent(); } void incrementPacketsReceived() { - std::lock_guard lock(keeper_stats_mutex); keeper_stats.incrementPacketsReceived(); } void resetConnectionStats() { - std::lock_guard lock(keeper_stats_mutex); keeper_stats.reset(); } }; diff --git a/src/Coordination/KeeperStateMachine.cpp b/src/Coordination/KeeperStateMachine.cpp index e3d99d4775b..35e56ba1e30 100644 --- a/src/Coordination/KeeperStateMachine.cpp +++ b/src/Coordination/KeeperStateMachine.cpp @@ -39,7 +39,12 @@ namespace request_for_session.request->xid = xid; request_for_session.request->readImpl(buffer); - readIntBinary(request_for_session.time, buffer); + if (!buffer.eof()) + readIntBinary(request_for_session.time, buffer); + else /// backward compatibility + request_for_session.time = std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch()).count(); + + return request_for_session; } } diff --git a/src/Coordination/KeeperStateManager.cpp b/src/Coordination/KeeperStateManager.cpp index c2d4274f972..52d0b0cc881 100644 --- a/src/Coordination/KeeperStateManager.cpp +++ b/src/Coordination/KeeperStateManager.cpp @@ -115,6 +115,15 @@ void KeeperStateManager::loadLogStore(uint64_t last_commited_index, uint64_t log log_store->init(last_commited_index, logs_to_keep); } +void KeeperStateManager::system_exit(const int /* exit_code */) +{ + /// NuRaft itself calls exit() which will call atexit handlers + /// and this may lead to an issues in multi-threaded program. + /// + /// Override this with abort(). + abort(); +} + ClusterConfigPtr KeeperStateManager::getLatestConfigFromLogStore() const { auto entry_with_change = log_store->getLatestConfigChange(); diff --git a/src/Coordination/KeeperStateManager.h b/src/Coordination/KeeperStateManager.h index fad76c89503..66037d78a63 100644 --- a/src/Coordination/KeeperStateManager.h +++ b/src/Coordination/KeeperStateManager.h @@ -73,7 +73,7 @@ public: nuraft::ptr get_srv_config() const { return configuration_wrapper.config; } - void system_exit(const int /* exit_code */) override {} + void system_exit(const int exit_code) override; int getPort() const { diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp index 1e6081a628f..e13b43d056a 100644 --- a/src/Coordination/KeeperStorage.cpp +++ b/src/Coordination/KeeperStorage.cpp @@ -317,7 +317,7 @@ struct KeeperStorageCreateRequestProcessor final : public KeeperStorageRequestPr created_node.data = request.data; created_node.is_sequental = request.is_sequential; - auto [map_key, _] = container.insert(path_created, std::move(created_node)); + auto [map_key, _] = container.insert(path_created, created_node); /// Take child path from key owned by map. auto child_path = getBaseName(map_key->getKey()); diff --git a/src/Core/Block.cpp b/src/Core/Block.cpp index 26c883b308d..f7d6761124f 100644 --- a/src/Core/Block.cpp +++ b/src/Core/Block.cpp @@ -122,6 +122,11 @@ Block::Block(const ColumnsWithTypeAndName & data_) : data{data_} initializeIndexByName(); } +Block::Block(ColumnsWithTypeAndName && data_) : data{std::move(data_)} +{ + initializeIndexByName(); +} + void Block::initializeIndexByName() { diff --git a/src/Core/Block.h b/src/Core/Block.h index 2624b57880c..2a1e6113b73 100644 --- a/src/Core/Block.h +++ b/src/Core/Block.h @@ -37,6 +37,7 @@ public: Block() = default; Block(std::initializer_list il); Block(const ColumnsWithTypeAndName & data_); + Block(ColumnsWithTypeAndName && data_); /// insert the column at the specified position void insert(size_t position, ColumnWithTypeAndName elem); diff --git a/src/Core/MySQL/MySQLClient.cpp b/src/Core/MySQL/MySQLClient.cpp index 26535f05be7..98797b3d284 100644 --- a/src/Core/MySQL/MySQLClient.cpp +++ b/src/Core/MySQL/MySQLClient.cpp @@ -24,12 +24,12 @@ namespace ErrorCodes } MySQLClient::MySQLClient(const String & host_, UInt16 port_, const String & user_, const String & password_) - : host(host_), port(port_), user(user_), password(std::move(password_)), + : host(host_), port(port_), user(user_), password(password_), client_capabilities(CLIENT_PROTOCOL_41 | CLIENT_PLUGIN_AUTH | CLIENT_SECURE_CONNECTION) { } -MySQLClient::MySQLClient(MySQLClient && other) +MySQLClient::MySQLClient(MySQLClient && other) noexcept : host(std::move(other.host)), port(other.port), user(std::move(other.user)), password(std::move(other.password)) , client_capabilities(other.client_capabilities) { @@ -142,7 +142,7 @@ void MySQLClient::setBinlogChecksum(const String & binlog_checksum) replication.setChecksumSignatureLength(Poco::toUpper(binlog_checksum) == "NONE" ? 0 : 4); } -void MySQLClient::startBinlogDumpGTID(UInt32 slave_id, String replicate_db, String gtid_str, const String & binlog_checksum) +void MySQLClient::startBinlogDumpGTID(UInt32 slave_id, String replicate_db, std::unordered_set replicate_tables, String gtid_str, const String & binlog_checksum) { /// Maybe CRC32 or NONE. mysqlbinlog.cc use NONE, see its below comments: /// Make a notice to the server that this client is checksum-aware. @@ -165,6 +165,7 @@ void MySQLClient::startBinlogDumpGTID(UInt32 slave_id, String replicate_db, Stri /// Set Filter rule to replication. replication.setReplicateDatabase(replicate_db); + replication.setReplicateTables(replicate_tables); BinlogDumpGTID binlog_dump(slave_id, gtid_sets.toPayload()); packet_endpoint->sendPacket(binlog_dump, true); diff --git a/src/Core/MySQL/MySQLClient.h b/src/Core/MySQL/MySQLClient.h index 5b33a8f852b..9fa3ace6baa 100644 --- a/src/Core/MySQL/MySQLClient.h +++ b/src/Core/MySQL/MySQLClient.h @@ -22,7 +22,7 @@ class MySQLClient { public: MySQLClient(const String & host_, UInt16 port_, const String & user_, const String & password_); - MySQLClient(MySQLClient && other); + MySQLClient(MySQLClient && other) noexcept; void connect(); void disconnect(); @@ -33,7 +33,7 @@ public: /// Start replication stream by GTID. /// replicate_db: replication database schema, events from other databases will be ignored. /// gtid: executed gtid sets format like 'hhhhhhhh-hhhh-hhhh-hhhh-hhhhhhhhhhhh:x-y'. - void startBinlogDumpGTID(UInt32 slave_id, String replicate_db, String gtid, const String & binlog_checksum); + void startBinlogDumpGTID(UInt32 slave_id, String replicate_db, std::unordered_set replicate_tables, String gtid, const String & binlog_checksum); BinlogEventPtr readOneBinlogEvent(UInt64 milliseconds = 0); Position getPosition() const { return replication.getPosition(); } diff --git a/src/Core/MySQL/MySQLReplication.cpp b/src/Core/MySQL/MySQLReplication.cpp index 50f6be23f83..49cc201955a 100644 --- a/src/Core/MySQL/MySQLReplication.cpp +++ b/src/Core/MySQL/MySQLReplication.cpp @@ -142,8 +142,7 @@ namespace MySQLReplication out << "XID: " << this->xid << '\n'; } - /// https://dev.mysql.com/doc/internals/en/table-map-event.html - void TableMapEvent::parseImpl(ReadBuffer & payload) + void TableMapEventHeader::parse(ReadBuffer & payload) { payload.readStrict(reinterpret_cast(&table_id), 6); payload.readStrict(reinterpret_cast(&flags), 2); @@ -157,7 +156,11 @@ namespace MySQLReplication table.resize(table_len); payload.readStrict(reinterpret_cast(table.data()), table_len); payload.ignore(1); + } + /// https://dev.mysql.com/doc/internals/en/table-map-event.html + void TableMapEvent::parseImpl(ReadBuffer & payload) + { column_count = readLengthEncodedNumber(payload); for (auto i = 0U; i < column_count; ++i) { @@ -165,7 +168,6 @@ namespace MySQLReplication payload.readStrict(reinterpret_cast(&v), 1); column_type.emplace_back(v); } - String meta; readLengthEncodedString(meta, payload); parseMeta(meta); @@ -957,10 +959,20 @@ namespace MySQLReplication } case TABLE_MAP_EVENT: { - event = std::make_shared(std::move(event_header)); - event->parseEvent(event_payload); - auto table_map = std::static_pointer_cast(event); - table_maps[table_map->table_id] = table_map; + TableMapEventHeader map_event_header; + map_event_header.parse(event_payload); + if (doReplicate(map_event_header.schema, map_event_header.table)) + { + event = std::make_shared(std::move(event_header), map_event_header); + event->parseEvent(event_payload); + auto table_map = std::static_pointer_cast(event); + table_maps[table_map->table_id] = table_map; + } + else + { + event = std::make_shared(std::move(event_header)); + event->parseEvent(event_payload); + } break; } case WRITE_ROWS_EVENT_V1: @@ -1030,8 +1042,21 @@ namespace MySQLReplication // Special "dummy event" return false; } - auto table_map = table_maps.at(table_id); - return table_map->schema == replicate_do_db; + if (table_maps.contains(table_id)) + { + auto table_map = table_maps.at(table_id); + return (table_map->schema == replicate_do_db) && (replicate_tables.empty() || replicate_tables.contains(table_map->table)); + } + return false; + } + + bool MySQLFlavor::doReplicate(const String & db, const String & table_name) + { + if (replicate_do_db.empty()) + return false; + if (replicate_do_db != db) + return false; + return replicate_tables.empty() || table_name.empty() || replicate_tables.contains(table_name); } } diff --git a/src/Core/MySQL/MySQLReplication.h b/src/Core/MySQL/MySQLReplication.h index cb67ce73de9..8900eee0102 100644 --- a/src/Core/MySQL/MySQLReplication.h +++ b/src/Core/MySQL/MySQLReplication.h @@ -409,6 +409,20 @@ namespace MySQLReplication void parseImpl(ReadBuffer & payload) override; }; + class TableMapEventHeader + { + public: + UInt64 table_id; + UInt16 flags; + UInt8 schema_len; + String schema; + UInt8 table_len; + String table; + + TableMapEventHeader(): table_id(0), flags(0), schema_len(0), table_len(0) {} + void parse(ReadBuffer & payload); + }; + class TableMapEvent : public EventBase { public: @@ -423,7 +437,15 @@ namespace MySQLReplication std::vector column_meta; Bitmap null_bitmap; - TableMapEvent(EventHeader && header_) : EventBase(std::move(header_)), table_id(0), flags(0), schema_len(0), table_len(0), column_count(0) {} + TableMapEvent(EventHeader && header_, const TableMapEventHeader & map_event_header) : EventBase(std::move(header_)), column_count(0) + { + table_id = map_event_header.table_id; + flags = map_event_header.flags; + schema_len = map_event_header.schema_len; + schema = map_event_header.schema; + table_len = map_event_header.table_len; + table = map_event_header.table; + } void dump(WriteBuffer & out) const override; protected: @@ -563,6 +585,7 @@ namespace MySQLReplication Position getPosition() const override { return position; } BinlogEventPtr readOneEvent() override { return event; } void setReplicateDatabase(String db) override { replicate_do_db = std::move(db); } + void setReplicateTables(std::unordered_set tables) { replicate_tables = std::move(tables); } void setGTIDSets(GTIDSets sets) override { position.gtid_sets = std::move(sets); } void setChecksumSignatureLength(size_t checksum_signature_length_) override { checksum_signature_length = checksum_signature_length_; } @@ -570,10 +593,13 @@ namespace MySQLReplication Position position; BinlogEventPtr event; String replicate_do_db; + // only for filter data(Row Event), not include DDL Event + std::unordered_set replicate_tables; std::map > table_maps; size_t checksum_signature_length = 4; bool doReplicate(UInt64 table_id); + bool doReplicate(const String & db, const String & table_name); }; } diff --git a/src/Core/MySQL/PacketsConnection.cpp b/src/Core/MySQL/PacketsConnection.cpp index 32a8a9cf8ab..a2eaa0ba7ba 100644 --- a/src/Core/MySQL/PacketsConnection.cpp +++ b/src/Core/MySQL/PacketsConnection.cpp @@ -99,8 +99,8 @@ HandshakeResponse::HandshakeResponse() : capability_flags(0x00), max_packet_size HandshakeResponse::HandshakeResponse( UInt32 capability_flags_, UInt32 max_packet_size_, UInt8 character_set_, const String & username_, const String & database_, const String & auth_response_, const String & auth_plugin_name_) - : capability_flags(capability_flags_), max_packet_size(max_packet_size_), character_set(character_set_), username(std::move(username_)), - database(std::move(database_)), auth_response(std::move(auth_response_)), auth_plugin_name(std::move(auth_plugin_name_)) + : capability_flags(capability_flags_), max_packet_size(max_packet_size_), character_set(character_set_), username(username_), + database(database_), auth_response(auth_response_), auth_plugin_name(auth_plugin_name_) { } diff --git a/src/Core/ProtocolDefines.h b/src/Core/ProtocolDefines.h index 93f44b02ce3..6ee491f3ab5 100644 --- a/src/Core/ProtocolDefines.h +++ b/src/Core/ProtocolDefines.h @@ -8,7 +8,6 @@ #define DBMS_MIN_REVISION_WITH_SERVER_DISPLAY_NAME 54372 #define DBMS_MIN_REVISION_WITH_VERSION_PATCH 54401 #define DBMS_MIN_REVISION_WITH_SERVER_LOGS 54406 -#define DBMS_MIN_REVISION_WITH_CLIENT_SUPPORT_EMBEDDED_DATA 54415 /// Minimum revision with exactly the same set of aggregation methods and rules to select them. /// Two-level (bucketed) aggregation is incompatible if servers are inconsistent in these rules /// (keys will be placed in different buckets and result will not be fully aggregated). diff --git a/src/Core/Settings.cpp b/src/Core/Settings.cpp index 87d7eee0daa..411e73bdf1a 100644 --- a/src/Core/Settings.cpp +++ b/src/Core/Settings.cpp @@ -89,6 +89,14 @@ void Settings::addProgramOptions(boost::program_options::options_description & o } } +void Settings::addProgramOptionsAsMultitokens(boost::program_options::options_description & options) +{ + for (const auto & field : all()) + { + addProgramOptionAsMultitoken(options, field); + } +} + void Settings::addProgramOption(boost::program_options::options_description & options, const SettingFieldRef & field) { const std::string_view name = field.getName(); @@ -97,6 +105,14 @@ void Settings::addProgramOption(boost::program_options::options_description & op name.data(), boost::program_options::value()->composing()->notifier(on_program_option), field.getDescription()))); } +void Settings::addProgramOptionAsMultitoken(boost::program_options::options_description & options, const SettingFieldRef & field) +{ + const std::string_view name = field.getName(); + auto on_program_option = boost::function1([this, name](const Strings & values) { set(name, values.back()); }); + options.add(boost::shared_ptr(new boost::program_options::option_description( + name.data(), boost::program_options::value()->multitoken()->composing()->notifier(on_program_option), field.getDescription()))); +} + void Settings::checkNoSettingNamesAtTopLevel(const Poco::Util::AbstractConfiguration & config, const String & config_path) { if (config.getBool("skip_check_for_incorrect_settings", false)) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 8171333a426..18e75224e7a 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -537,7 +537,7 @@ class IColumn; M(Int64, read_priority, 0, "Priority to read data from local filesystem. Only supported for 'pread_threadpool' method.", 0) \ M(UInt64, merge_tree_min_rows_for_concurrent_read_for_remote_filesystem, (20 * 8192), "If at least as many lines are read from one file, the reading can be parallelized, when reading from remote filesystem.", 0) \ M(UInt64, merge_tree_min_bytes_for_concurrent_read_for_remote_filesystem, (24 * 10 * 1024 * 1024), "If at least as many bytes are read from one file, the reading can be parallelized, when reading from remote filesystem.", 0) \ - M(UInt64, remote_read_min_bytes_for_seek, DBMS_DEFAULT_BUFFER_SIZE, "Min bytes required for remote read (url, s3) to do seek, instead for read with ignore.", 0) \ + M(UInt64, remote_read_min_bytes_for_seek, 4 * DBMS_DEFAULT_BUFFER_SIZE, "Min bytes required for remote read (url, s3) to do seek, instead for read with ignore.", 0) \ \ M(UInt64, async_insert_threads, 16, "Maximum number of threads to actually parse and insert data in background. Zero means asynchronous mode is disabled", 0) \ M(Bool, async_insert, false, "If true, data from INSERT query is stored in queue and later flushed to table in background. Makes sense only for inserts via HTTP protocol. If wait_for_async_insert is false, INSERT query is processed almost instantly, otherwise client will wait until data will be flushed to table", 0) \ @@ -558,6 +558,8 @@ class IColumn; M(Bool, check_table_dependencies, true, "Check that DDL query (such as DROP TABLE or RENAME) will not break dependencies", 0) \ M(Bool, use_local_cache_for_remote_storage, true, "Use local cache for remote storage like HDFS or S3, it's used for remote table engine only", 0) \ \ + M(Bool, allow_unrestricted_reads_from_keeper, false, "Allow unrestricted (w/o condition on path) reads from system.zookeeper table, can be handy, but is not safe for zookeeper", 0) \ + \ /** Experimental functions */ \ M(Bool, allow_experimental_funnel_functions, false, "Enable experimental functions for funnel analysis.", 0) \ M(Bool, allow_experimental_nlp_functions, false, "Enable experimental functions for natural language processing.", 0) \ @@ -621,7 +623,7 @@ class IColumn; M(MsgPackUUIDRepresentation, output_format_msgpack_uuid_representation, FormatSettings::MsgPackUUIDRepresentation::EXT, "The way how to output UUID in MsgPack format.", 0) \ M(UInt64, input_format_max_rows_to_read_for_schema_inference, 100, "The maximum rows of data to read for automatic schema inference", 0) \ \ - M(DateTimeInputFormat, date_time_input_format, FormatSettings::DateTimeInputFormat::Basic, "Method to read DateTime from text input formats. Possible values: 'basic' and 'best_effort'.", 0) \ + M(DateTimeInputFormat, date_time_input_format, FormatSettings::DateTimeInputFormat::Basic, "Method to read DateTime from text input formats. Possible values: 'basic', 'best_effort' and 'best_effort_us'.", 0) \ M(DateTimeOutputFormat, date_time_output_format, FormatSettings::DateTimeOutputFormat::Simple, "Method to write DateTime to text output. Possible values: 'simple', 'iso', 'unix_timestamp'.", 0) \ \ M(String, bool_true_representation, "true", "Text to represent bool value in TSV/CSV formats.", 0) \ @@ -721,6 +723,11 @@ struct Settings : public BaseSettings, public IHints<2, Settings /// (Don't forget to call notify() on the `variables_map` after parsing it!) void addProgramOptions(boost::program_options::options_description & options); + /// Adds program options as to set the settings from a command line. + /// Allows to set one setting multiple times, the last value will be used. + /// (Don't forget to call notify() on the `variables_map` after parsing it!) + void addProgramOptionsAsMultitokens(boost::program_options::options_description & options); + /// Check that there is no user-level settings at the top level in config. /// This is a common source of mistake (user don't know where to write user-level setting). static void checkNoSettingNamesAtTopLevel(const Poco::Util::AbstractConfiguration & config, const String & config_path); @@ -728,6 +735,8 @@ struct Settings : public BaseSettings, public IHints<2, Settings std::vector getAllRegisteredNames() const override; void addProgramOption(boost::program_options::options_description & options, const SettingFieldRef & field); + + void addProgramOptionAsMultitoken(boost::program_options::options_description & options, const SettingFieldRef & field); }; /* diff --git a/src/Core/SettingsEnums.cpp b/src/Core/SettingsEnums.cpp index 17d24946cd8..1b0f6c96954 100644 --- a/src/Core/SettingsEnums.cpp +++ b/src/Core/SettingsEnums.cpp @@ -64,7 +64,8 @@ IMPLEMENT_SETTING_ENUM(DistributedProductMode, ErrorCodes::UNKNOWN_DISTRIBUTED_P IMPLEMENT_SETTING_ENUM_WITH_RENAME(DateTimeInputFormat, ErrorCodes::BAD_ARGUMENTS, {{"basic", FormatSettings::DateTimeInputFormat::Basic}, - {"best_effort", FormatSettings::DateTimeInputFormat::BestEffort}}) + {"best_effort", FormatSettings::DateTimeInputFormat::BestEffort}, + {"best_effort_us", FormatSettings::DateTimeInputFormat::BestEffortUS}}) IMPLEMENT_SETTING_ENUM_WITH_RENAME(DateTimeOutputFormat, ErrorCodes::BAD_ARGUMENTS, diff --git a/src/Core/examples/coro.cpp b/src/Core/examples/coro.cpp index 0f152d8090a..ecff0e23d11 100644 --- a/src/Core/examples/coro.cpp +++ b/src/Core/examples/coro.cpp @@ -84,7 +84,7 @@ struct Task std::cout << " Task " << tag << std::endl; } Task(Task &) = delete; - Task(Task &&rhs) : my(rhs.my), tag(rhs.tag) + Task(Task &&rhs) noexcept : my(rhs.my), tag(rhs.tag) { rhs.my = {}; std::cout << " Task&& " << tag << std::endl; diff --git a/src/DataTypes/Serializations/SerializationDateTime.cpp b/src/DataTypes/Serializations/SerializationDateTime.cpp index b4269fb0f8c..fd56c1baebd 100644 --- a/src/DataTypes/Serializations/SerializationDateTime.cpp +++ b/src/DataTypes/Serializations/SerializationDateTime.cpp @@ -27,6 +27,9 @@ inline void readText(time_t & x, ReadBuffer & istr, const FormatSettings & setti case FormatSettings::DateTimeInputFormat::BestEffort: parseDateTimeBestEffort(x, istr, time_zone, utc_time_zone); return; + case FormatSettings::DateTimeInputFormat::BestEffortUS: + parseDateTimeBestEffortUS(x, istr, time_zone, utc_time_zone); + return; } } diff --git a/src/DataTypes/Serializations/SerializationDateTime64.cpp b/src/DataTypes/Serializations/SerializationDateTime64.cpp index b9ed5bd4a02..78c7ea56529 100644 --- a/src/DataTypes/Serializations/SerializationDateTime64.cpp +++ b/src/DataTypes/Serializations/SerializationDateTime64.cpp @@ -69,6 +69,9 @@ static inline void readText(DateTime64 & x, UInt32 scale, ReadBuffer & istr, con case FormatSettings::DateTimeInputFormat::BestEffort: parseDateTime64BestEffort(x, scale, istr, time_zone, utc_time_zone); return; + case FormatSettings::DateTimeInputFormat::BestEffortUS: + parseDateTime64BestEffortUS(x, scale, istr, time_zone, utc_time_zone); + return; } } diff --git a/src/DataTypes/Serializations/SerializationInfo.cpp b/src/DataTypes/Serializations/SerializationInfo.cpp index 22df95fc8f7..a0dc20b6479 100644 --- a/src/DataTypes/Serializations/SerializationInfo.cpp +++ b/src/DataTypes/Serializations/SerializationInfo.cpp @@ -181,10 +181,10 @@ void SerializationInfoByName::writeJSON(WriteBuffer & out) const { auto info_json = info->toJSON(); info_json.set(KEY_NAME, name); - column_infos.add(std::move(info_json)); + column_infos.add(std::move(info_json)); /// NOLINT } - object.set(KEY_COLUMNS, std::move(column_infos)); + object.set(KEY_COLUMNS, std::move(column_infos)); /// NOLINT std::ostringstream oss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM oss.exceptions(std::ios::failbit); diff --git a/src/DataTypes/Serializations/SerializationInfoTuple.cpp b/src/DataTypes/Serializations/SerializationInfoTuple.cpp index 378bed2af53..803302f9642 100644 --- a/src/DataTypes/Serializations/SerializationInfoTuple.cpp +++ b/src/DataTypes/Serializations/SerializationInfoTuple.cpp @@ -89,7 +89,7 @@ Poco::JSON::Object SerializationInfoTuple::toJSON() const for (const auto & elem : elems) subcolumns.add(elem->toJSON()); - object.set("subcolumns", std::move(subcolumns)); + object.set("subcolumns", subcolumns); return object; } diff --git a/src/Databases/DatabaseAtomic.cpp b/src/Databases/DatabaseAtomic.cpp index 324f4808b1f..adfcd83f5a7 100644 --- a/src/Databases/DatabaseAtomic.cpp +++ b/src/Databases/DatabaseAtomic.cpp @@ -37,7 +37,7 @@ public: }; DatabaseAtomic::DatabaseAtomic(String name_, String metadata_path_, UUID uuid, const String & logger_name, ContextPtr context_) - : DatabaseOrdinary(name_, std::move(metadata_path_), "store/", logger_name, context_) + : DatabaseOrdinary(name_, metadata_path_, "store/", logger_name, context_) , path_to_table_symlinks(fs::path(getContext()->getPath()) / "data" / escapeForFileName(name_) / "") , path_to_metadata_symlink(fs::path(getContext()->getPath()) / "metadata" / escapeForFileName(name_)) , db_uuid(uuid) diff --git a/src/Databases/MySQL/DatabaseMySQL.cpp b/src/Databases/MySQL/DatabaseMySQL.cpp index cc6d808a564..5f4027a26b3 100644 --- a/src/Databases/MySQL/DatabaseMySQL.cpp +++ b/src/Databases/MySQL/DatabaseMySQL.cpp @@ -61,7 +61,7 @@ DatabaseMySQL::DatabaseMySQL( , database_engine_define(database_engine_define_->clone()) , database_name_in_mysql(database_name_in_mysql_) , database_settings(std::move(settings_)) - , mysql_pool(std::move(pool)) + , mysql_pool(std::move(pool)) /// NOLINT { try { diff --git a/src/Databases/MySQL/MaterializeMetadata.cpp b/src/Databases/MySQL/MaterializeMetadata.cpp index 0facdfc20be..580eb41b449 100644 --- a/src/Databases/MySQL/MaterializeMetadata.cpp +++ b/src/Databases/MySQL/MaterializeMetadata.cpp @@ -30,11 +30,15 @@ namespace ErrorCodes static std::unordered_map fetchTablesCreateQuery( const mysqlxx::PoolWithFailover::Entry & connection, const String & database_name, - const std::vector & fetch_tables, const Settings & global_settings) + const std::vector & fetch_tables, std::unordered_set & materialized_tables_list, + const Settings & global_settings) { std::unordered_map tables_create_query; for (const auto & fetch_table_name : fetch_tables) { + if (!materialized_tables_list.empty() && !materialized_tables_list.contains(fetch_table_name)) + continue; + Block show_create_table_header{ {std::make_shared(), "Table"}, {std::make_shared(), "Create Table"}, @@ -253,7 +257,7 @@ void MaterializeMetadata::transaction(const MySQLReplication::Position & positio out.close(); } - commitMetadata(std::move(fun), persistent_tmp_path, persistent_path); + commitMetadata(fun, persistent_tmp_path, persistent_path); } MaterializeMetadata::MaterializeMetadata(const String & path_, const Settings & settings_) : persistent_path(path_), settings(settings_) @@ -276,7 +280,8 @@ MaterializeMetadata::MaterializeMetadata(const String & path_, const Settings & void MaterializeMetadata::startReplication( mysqlxx::PoolWithFailover::Entry & connection, const String & database, - bool & opened_transaction, std::unordered_map & need_dumping_tables) + bool & opened_transaction, std::unordered_map & need_dumping_tables, + std::unordered_set & materialized_tables_list) { checkSyncUserPriv(connection, settings); @@ -297,7 +302,7 @@ void MaterializeMetadata::startReplication( connection->query("START TRANSACTION /*!40100 WITH CONSISTENT SNAPSHOT */;").execute(); opened_transaction = true; - need_dumping_tables = fetchTablesCreateQuery(connection, database, fetchTablesInDB(connection, database, settings), settings); + need_dumping_tables = fetchTablesCreateQuery(connection, database, fetchTablesInDB(connection, database, settings), materialized_tables_list, settings); connection->query("UNLOCK TABLES;").execute(); } catch (...) diff --git a/src/Databases/MySQL/MaterializeMetadata.h b/src/Databases/MySQL/MaterializeMetadata.h index bcb0465b61e..b828c901fbb 100644 --- a/src/Databases/MySQL/MaterializeMetadata.h +++ b/src/Databases/MySQL/MaterializeMetadata.h @@ -48,7 +48,8 @@ struct MaterializeMetadata mysqlxx::PoolWithFailover::Entry & connection, const String & database, bool & opened_transaction, - std::unordered_map & need_dumping_tables); + std::unordered_map & need_dumping_tables, + std::unordered_set & materialized_tables_list); MaterializeMetadata(const String & path_, const Settings & settings_); }; diff --git a/src/Databases/MySQL/MaterializedMySQLSettings.h b/src/Databases/MySQL/MaterializedMySQLSettings.h index d5acdc81602..43235d502c3 100644 --- a/src/Databases/MySQL/MaterializedMySQLSettings.h +++ b/src/Databases/MySQL/MaterializedMySQLSettings.h @@ -16,6 +16,7 @@ class ASTStorage; M(UInt64, max_flush_data_time, 1000, "Max milliseconds that data is allowed to cache in memory(for database and the cache data unable to query). when this time is exceeded, the data will be materialized", 0) \ M(Int64, max_wait_time_when_mysql_unavailable, 1000, "Retry interval when MySQL is not available (milliseconds). Negative value disable retry.", 0) \ M(Bool, allows_query_when_mysql_lost, false, "Allow query materialized table when mysql is lost.", 0) \ + M(String, materialized_mysql_tables_list, "", "a comma-separated list of mysql database tables, which will be replicated by MaterializedMySQL database engine. Default value: empty list — means whole tables will be replicated.", 0) \ DECLARE_SETTINGS_TRAITS(MaterializedMySQLSettingsTraits, LIST_OF_MATERIALIZE_MODE_SETTINGS) diff --git a/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp b/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp index 8033d65c549..230b158b231 100644 --- a/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp +++ b/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp @@ -25,6 +25,10 @@ #include #include #include +#include +#include +#include +#include namespace DB { @@ -148,6 +152,61 @@ static void checkMySQLVariables(const mysqlxx::Pool::Entry & connection, const S } } +static std::tuple tryExtractTableNameFromDDL(const String & ddl) +{ + String table_name; + String database_name; + if (ddl.empty()) return std::make_tuple(database_name, table_name); + + bool parse_failed = false; + Tokens tokens(ddl.data(), ddl.data() + ddl.size()); + IParser::Pos pos(tokens, 0); + Expected expected; + ASTPtr res; + ASTPtr table; + if (ParserKeyword("CREATE TEMPORARY TABLE").ignore(pos, expected) || ParserKeyword("CREATE TABLE").ignore(pos, expected)) + { + ParserKeyword("IF NOT EXISTS").ignore(pos, expected); + if (!ParserCompoundIdentifier(true).parse(pos, table, expected)) + parse_failed = true; + } + else if (ParserKeyword("ALTER TABLE").ignore(pos, expected)) + { + if (!ParserCompoundIdentifier(true).parse(pos, table, expected)) + parse_failed = true; + } + else if (ParserKeyword("DROP TABLE").ignore(pos, expected) || ParserKeyword("DROP TEMPORARY TABLE").ignore(pos, expected)) + { + ParserKeyword("IF EXISTS").ignore(pos, expected); + if (!ParserCompoundIdentifier(true).parse(pos, table, expected)) + parse_failed = true; + } + else if (ParserKeyword("TRUNCATE").ignore(pos, expected)) + { + ParserKeyword("TABLE").ignore(pos, expected); + if (!ParserCompoundIdentifier(true).parse(pos, table, expected)) + parse_failed = true; + } + else if (ParserKeyword("RENAME TABLE").ignore(pos, expected)) + { + if (!ParserCompoundIdentifier(true).parse(pos, table, expected)) + parse_failed = true; + } + else + { + parse_failed = true; + } + if (!parse_failed) + { + if (auto table_id = table->as()->getTableId()) + { + database_name = table_id.database_name; + table_name = table_id.table_name; + } + } + return std::make_tuple(database_name, table_name); +} + MaterializedMySQLSyncThread::MaterializedMySQLSyncThread( ContextPtr context_, const String & database_name_, @@ -159,11 +218,22 @@ MaterializedMySQLSyncThread::MaterializedMySQLSyncThread( , log(&Poco::Logger::get("MaterializedMySQLSyncThread")) , database_name(database_name_) , mysql_database_name(mysql_database_name_) - , pool(std::move(pool_)) + , pool(std::move(pool_)) /// NOLINT , client(std::move(client_)) , settings(settings_) { query_prefix = "EXTERNAL DDL FROM MySQL(" + backQuoteIfNeed(database_name) + ", " + backQuoteIfNeed(mysql_database_name) + ") "; + + if (!settings->materialized_mysql_tables_list.value.empty()) + { + Names tables_list; + boost::split(tables_list, settings->materialized_mysql_tables_list.value, [](char c){ return c == ','; }); + for (String & table_name: tables_list) + { + boost::trim(table_name); + materialized_tables_list.insert(table_name); + } + } } void MaterializedMySQLSyncThread::synchronization() @@ -434,7 +504,7 @@ bool MaterializedMySQLSyncThread::prepareSynchronized(MaterializeMetadata & meta checkMySQLVariables(connection, getContext()->getSettingsRef()); std::unordered_map need_dumping_tables; - metadata.startReplication(connection, mysql_database_name, opened_transaction, need_dumping_tables); + metadata.startReplication(connection, mysql_database_name, opened_transaction, need_dumping_tables, materialized_tables_list); if (!need_dumping_tables.empty()) { @@ -464,7 +534,7 @@ bool MaterializedMySQLSyncThread::prepareSynchronized(MaterializeMetadata & meta connection->query("COMMIT").execute(); client.connect(); - client.startBinlogDumpGTID(randomNumber(), mysql_database_name, metadata.executed_gtid_set, metadata.binlog_checksum); + client.startBinlogDumpGTID(randomNumber(), mysql_database_name, materialized_tables_list, metadata.executed_gtid_set, metadata.binlog_checksum); setSynchronizationThreadException(nullptr); return true; @@ -792,9 +862,24 @@ void MaterializedMySQLSyncThread::executeDDLAtomic(const QueryEvent & query_even auto query_context = createQueryContext(getContext()); CurrentThread::QueryScope query_scope(query_context); + String query = query_event.query; + if (!materialized_tables_list.empty()) + { + auto [ddl_database_name, ddl_table_name] = tryExtractTableNameFromDDL(query_event.query); + + if (!ddl_table_name.empty()) + { + ddl_database_name = ddl_database_name.empty() ? query_event.schema: ddl_database_name; + if (ddl_database_name != mysql_database_name || !materialized_tables_list.contains(ddl_table_name)) + { + LOG_DEBUG(log, "Skip MySQL DDL: \n {}", query_event.query); + return; + } + } + } String comment = "Materialize MySQL step 2: execute MySQL DDL for sync data"; String event_database = query_event.schema == mysql_database_name ? database_name : ""; - tryToExecuteQuery(query_prefix + query_event.query, query_context, event_database, comment); + tryToExecuteQuery(query_prefix + query, query_context, event_database, comment); } catch (Exception & exception) { diff --git a/src/Databases/MySQL/MaterializedMySQLSyncThread.h b/src/Databases/MySQL/MaterializedMySQLSyncThread.h index ba5022137bf..c7781595a85 100644 --- a/src/Databases/MySQL/MaterializedMySQLSyncThread.h +++ b/src/Databases/MySQL/MaterializedMySQLSyncThread.h @@ -63,6 +63,7 @@ private: mutable MySQLClient client; MaterializedMySQLSettings * settings; String query_prefix; + NameSet materialized_tables_list; // USE MySQL ERROR CODE: // https://dev.mysql.com/doc/mysql-errors/5.7/en/server-error-reference.html diff --git a/src/Dictionaries/CacheDictionary.cpp b/src/Dictionaries/CacheDictionary.cpp index cad3e3b8799..8b8d0a57cc7 100644 --- a/src/Dictionaries/CacheDictionary.cpp +++ b/src/Dictionaries/CacheDictionary.cpp @@ -494,7 +494,7 @@ Pipe CacheDictionary::read(const Names & column_names, size { auto keys = cache_storage_ptr->getCachedSimpleKeys(); auto keys_column = getColumnFromPODArray(std::move(keys)); - key_columns = {ColumnWithTypeAndName(std::move(keys_column), std::make_shared(), dict_struct.id->name)}; + key_columns = {ColumnWithTypeAndName(keys_column, std::make_shared(), dict_struct.id->name)}; } else { diff --git a/src/Dictionaries/DictionarySourceHelpers.cpp b/src/Dictionaries/DictionarySourceHelpers.cpp index cd87cf831a2..fcad8398c0b 100644 --- a/src/Dictionaries/DictionarySourceHelpers.cpp +++ b/src/Dictionaries/DictionarySourceHelpers.cpp @@ -52,7 +52,7 @@ Block blockForKeys( auto filtered_column = source_column->filter(filter, requested_rows.size()); - block.insert({std::move(filtered_column), (*dict_struct.key)[i].type, (*dict_struct.key)[i].name}); + block.insert({filtered_column, (*dict_struct.key)[i].type, (*dict_struct.key)[i].name}); } return block; diff --git a/src/Dictionaries/DictionaryStructure.cpp b/src/Dictionaries/DictionaryStructure.cpp index 3e29f3efe76..012750bde60 100644 --- a/src/Dictionaries/DictionaryStructure.cpp +++ b/src/Dictionaries/DictionaryStructure.cpp @@ -33,8 +33,8 @@ namespace DictionaryTypedSpecialAttribute makeDictionaryTypedSpecialAttribute( const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, const std::string & default_type) { - const auto name = config.getString(config_prefix + ".name", ""); - const auto expression = config.getString(config_prefix + ".expression", ""); + auto name = config.getString(config_prefix + ".name", ""); + auto expression = config.getString(config_prefix + ".expression", ""); if (name.empty() && !expression.empty()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Element {}.name is empty"); diff --git a/src/Dictionaries/FlatDictionary.cpp b/src/Dictionaries/FlatDictionary.cpp index 0c82da7b73b..26144821a0e 100644 --- a/src/Dictionaries/FlatDictionary.cpp +++ b/src/Dictionaries/FlatDictionary.cpp @@ -147,7 +147,7 @@ ColumnPtr FlatDictionary::getColumn( callOnDictionaryAttributeType(attribute.type, type_call); if (attribute.is_nullable_set) - result = ColumnNullable::create(std::move(result), std::move(col_null_map_to)); + result = ColumnNullable::create(result, std::move(col_null_map_to)); return result; } @@ -572,7 +572,7 @@ Pipe FlatDictionary::read(const Names & column_names, size_t max_block_size, siz keys.push_back(key_index); auto keys_column = getColumnFromPODArray(std::move(keys)); - ColumnsWithTypeAndName key_columns = {ColumnWithTypeAndName(std::move(keys_column), std::make_shared(), dict_struct.id->name)}; + ColumnsWithTypeAndName key_columns = {ColumnWithTypeAndName(keys_column, std::make_shared(), dict_struct.id->name)}; std::shared_ptr dictionary = shared_from_this(); auto coordinator = DictionarySourceCoordinator::create(dictionary, column_names, std::move(key_columns), max_block_size); diff --git a/src/Dictionaries/HashedArrayDictionary.cpp b/src/Dictionaries/HashedArrayDictionary.cpp index ea041c63d73..65d9b3e7d42 100644 --- a/src/Dictionaries/HashedArrayDictionary.cpp +++ b/src/Dictionaries/HashedArrayDictionary.cpp @@ -578,7 +578,7 @@ ColumnPtr HashedArrayDictionary::getAttributeColumn( callOnDictionaryAttributeType(attribute.type, type_call); if (is_attribute_nullable) - result = ColumnNullable::create(std::move(result), std::move(col_null_map_to)); + result = ColumnNullable::create(result, std::move(col_null_map_to)); return result; } diff --git a/src/Dictionaries/HashedDictionary.cpp b/src/Dictionaries/HashedDictionary.cpp index b70f018df6b..178631d9c53 100644 --- a/src/Dictionaries/HashedDictionary.cpp +++ b/src/Dictionaries/HashedDictionary.cpp @@ -159,7 +159,7 @@ ColumnPtr HashedDictionary::getColumn( callOnDictionaryAttributeType(attribute.type, type_call); if (is_attribute_nullable) - result = ColumnNullable::create(std::move(result), std::move(col_null_map_to)); + result = ColumnNullable::create(result, std::move(col_null_map_to)); return result; } diff --git a/src/Dictionaries/PolygonDictionary.cpp b/src/Dictionaries/PolygonDictionary.cpp index deec1e6a588..1a4e01d4aa3 100644 --- a/src/Dictionaries/PolygonDictionary.cpp +++ b/src/Dictionaries/PolygonDictionary.cpp @@ -61,7 +61,7 @@ void IPolygonDictionary::convertKeyColumns(Columns & key_columns, DataTypes & ke auto & key_column_to_cast = key_columns[key_type_index]; ColumnWithTypeAndName column_to_cast = {key_column_to_cast, key_type, ""}; - auto casted_column = castColumnAccurate(std::move(column_to_cast), float_64_type); + auto casted_column = castColumnAccurate(column_to_cast, float_64_type); key_column_to_cast = std::move(casted_column); key_type = float_64_type; } diff --git a/src/Dictionaries/RangeHashedDictionary.cpp b/src/Dictionaries/RangeHashedDictionary.cpp index 5330bc684c3..e82fcd580e2 100644 --- a/src/Dictionaries/RangeHashedDictionary.cpp +++ b/src/Dictionaries/RangeHashedDictionary.cpp @@ -198,7 +198,7 @@ ColumnPtr RangeHashedDictionary::getColumn( callOnDictionaryAttributeType(attribute.type, type_call); if (is_attribute_nullable) - result = ColumnNullable::create(std::move(result), std::move(col_null_map_to)); + result = ColumnNullable::create(result, std::move(col_null_map_to)); return result; } @@ -298,7 +298,7 @@ ColumnPtr RangeHashedDictionary::getColumnInternal( callOnDictionaryAttributeType(attribute.type, type_call); if (is_attribute_nullable) - result = ColumnNullable::create(std::move(result), std::move(col_null_map_to)); + result = ColumnNullable::create(result, std::move(col_null_map_to)); return result; } diff --git a/src/Dictionaries/RedisDictionarySource.cpp b/src/Dictionaries/RedisDictionarySource.cpp index a1b406b3424..fd381ab2921 100644 --- a/src/Dictionaries/RedisDictionarySource.cpp +++ b/src/Dictionaries/RedisDictionarySource.cpp @@ -136,9 +136,9 @@ namespace DB RedisArray keys; auto key_type = storageTypeToKeyType(configuration.storage_type); - for (const auto & key : all_keys) + for (auto && key : all_keys) if (key_type == connection->client->execute(RedisCommand("TYPE").addRedisType(key))) - keys.addRedisType(std::move(key)); + keys.addRedisType(key); if (configuration.storage_type == RedisStorageType::HASH_MAP) { @@ -165,10 +165,10 @@ namespace DB } if (primary_with_secondary.size() > 1) - hkeys.add(std::move(primary_with_secondary)); + hkeys.add(primary_with_secondary); } - keys = std::move(hkeys); + keys = hkeys; } return Pipe(std::make_shared( diff --git a/src/Dictionaries/SSDCacheDictionaryStorage.h b/src/Dictionaries/SSDCacheDictionaryStorage.h index adbe4084d81..9b1a4ed1e6d 100644 --- a/src/Dictionaries/SSDCacheDictionaryStorage.h +++ b/src/Dictionaries/SSDCacheDictionaryStorage.h @@ -761,9 +761,9 @@ private: FileDescriptor() = default; - FileDescriptor(FileDescriptor && rhs) : fd(rhs.fd) { rhs.fd = -1; } + FileDescriptor(FileDescriptor && rhs) noexcept : fd(rhs.fd) { rhs.fd = -1; } - FileDescriptor & operator=(FileDescriptor && rhs) + FileDescriptor & operator=(FileDescriptor && rhs) noexcept { if (this == &rhs) return *this; diff --git a/src/Disks/DiskCacheWrapper.cpp b/src/Disks/DiskCacheWrapper.cpp index 5d5eb89691e..7cacab98af5 100644 --- a/src/Disks/DiskCacheWrapper.cpp +++ b/src/Disks/DiskCacheWrapper.cpp @@ -144,6 +144,12 @@ DiskCacheWrapper::readFile( } } + /// Do not use RemoteFSReadMethod::threadpool for index and mark files. + /// Here it does not make sense since the files are small. + /// Note: enabling `threadpool` read requires to call setReadUntilEnd(). + auto current_read_settings = settings; + current_read_settings.remote_fs_method = RemoteFSReadMethod::read; + if (metadata->status == DOWNLOADING) { FileDownloadStatus result_status = DOWNLOADED; @@ -158,7 +164,7 @@ DiskCacheWrapper::readFile( auto tmp_path = path + ".tmp"; { - auto src_buffer = DiskDecorator::readFile(path, settings, read_hint, file_size); + auto src_buffer = DiskDecorator::readFile(path, current_read_settings, read_hint, file_size); auto dst_buffer = cache_disk->writeFile(tmp_path, settings.local_fs_buffer_size, WriteMode::Rewrite); copyData(*src_buffer, *dst_buffer); } @@ -184,7 +190,7 @@ DiskCacheWrapper::readFile( if (metadata->status == DOWNLOADED) return cache_disk->readFile(path, settings, read_hint, file_size); - return DiskDecorator::readFile(path, settings, read_hint, file_size); + return DiskDecorator::readFile(path, current_read_settings, read_hint, file_size); } std::unique_ptr diff --git a/src/Formats/CMakeLists.txt b/src/Formats/CMakeLists.txt index 6e6aa6d4553..44883c271f4 100644 --- a/src/Formats/CMakeLists.txt +++ b/src/Formats/CMakeLists.txt @@ -1,21 +1,2 @@ -if (TARGET ch_contrib::avrocpp) - set(USE_AVRO 1) -endif() -if (TARGET ch_contrib::parquet) - set(USE_PARQUET 1) - set(USE_ARROW 1) - set(USE_ORC 1) -endif() -if (TARGET ch_contrib::snappy) - set(USE_SNAPPY 1) -endif() -if (TARGET ch_contrib::protobuf) - set(USE_PROTOBUF 1) -endif() -if (TARGET ch_contrib::msgpack) - set(USE_MSGPACK 1) -endif() -if (TARGET ch_contrib::capnp) - set(USE_CAPNP 1) -endif() +include(configure_config.cmake) configure_file(config_formats.h.in ${ConfigIncludePath}/config_formats.h) diff --git a/src/Formats/EscapingRuleUtils.cpp b/src/Formats/EscapingRuleUtils.cpp index 0a7747fc864..b0ea10abdb6 100644 --- a/src/Formats/EscapingRuleUtils.cpp +++ b/src/Formats/EscapingRuleUtils.cpp @@ -262,7 +262,7 @@ static bool evaluateConstantExpressionFromString(const StringRef & field, DataTy /// FIXME: Our parser cannot parse maps in the form of '{key : value}' that is used in text formats. bool parsed = parser.parse(token_iterator, ast, expected); - if (!parsed) + if (!parsed || !token_iterator->isEnd()) return false; try diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index be565a532bb..3fea8d3eb7b 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -278,9 +278,10 @@ OutputFormatPtr FormatFactory::getOutputFormatParallelIfPossible( if (settings.output_format_parallel_formatting && getCreators(name).supports_parallel_formatting && !settings.output_format_json_array_of_rows) { - auto formatter_creator = [output_getter, sample, callback, format_settings] - (WriteBuffer & output) -> OutputFormatPtr - { return output_getter(output, sample, {std::move(callback)}, format_settings);}; + auto formatter_creator = [output_getter, sample, callback, format_settings] (WriteBuffer & output) -> OutputFormatPtr + { + return output_getter(output, sample, {callback}, format_settings); + }; ParallelFormattingOutputFormat::Params builder{buf, sample, formatter_creator, settings.max_threads}; diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index 265c879e768..751b3c51fa8 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -38,8 +38,9 @@ struct FormatSettings enum class DateTimeInputFormat { - Basic, /// Default format for fast parsing: YYYY-MM-DD hh:mm:ss (ISO-8601 without fractional part and timezone) or NNNNNNNNNN unix timestamp. - BestEffort /// Use sophisticated rules to parse whatever possible. + Basic, /// Default format for fast parsing: YYYY-MM-DD hh:mm:ss (ISO-8601 without fractional part and timezone) or NNNNNNNNNN unix timestamp. + BestEffort, /// Use sophisticated rules to parse whatever possible. + BestEffortUS /// Use sophisticated rules to parse American style: mm/dd/yyyy }; DateTimeInputFormat date_time_input_format = DateTimeInputFormat::Basic; diff --git a/src/Formats/configure_config.cmake b/src/Formats/configure_config.cmake new file mode 100644 index 00000000000..3a11f3c6448 --- /dev/null +++ b/src/Formats/configure_config.cmake @@ -0,0 +1,20 @@ +if (TARGET ch_contrib::avrocpp) + set(USE_AVRO 1) +endif() +if (TARGET ch_contrib::parquet) + set(USE_PARQUET 1) + set(USE_ARROW 1) + set(USE_ORC 1) +endif() +if (TARGET ch_contrib::snappy) + set(USE_SNAPPY 1) +endif() +if (TARGET ch_contrib::protobuf) + set(USE_PROTOBUF 1) +endif() +if (TARGET ch_contrib::msgpack) + set(USE_MSGPACK 1) +endif() +if (TARGET ch_contrib::capnp) + set(USE_CAPNP 1) +endif() diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h index 909803d7cd7..f75d67032f2 100644 --- a/src/Functions/FunctionsConversion.h +++ b/src/Functions/FunctionsConversion.h @@ -906,6 +906,41 @@ struct ConvertImplGenericToString } }; +/** Conversion of time_t to UInt16, Int32, UInt32 + */ +template +void convertFromTime(typename DataType::FieldType & x, time_t & time) +{ + x = time; +} + +template <> +inline void convertFromTime(DataTypeDate::FieldType & x, time_t & time) +{ + if (unlikely(time < 0)) + x = 0; + else if (unlikely(time > 0xFFFF)) + x = 0xFFFF; + else + x = time; +} + +template <> +inline void convertFromTime(DataTypeDate32::FieldType & x, time_t & time) +{ + x = time; +} + +template <> +inline void convertFromTime(DataTypeDateTime::FieldType & x, time_t & time) +{ + if (unlikely(time < 0)) + x = 0; + else if (unlikely(time > 0xFFFFFFFF)) + x = 0xFFFFFFFF; + else + x = time; +} /** Conversion of strings to numbers, dates, datetimes: through parsing. */ @@ -931,18 +966,16 @@ inline void parseImpl(DataTypeDate32::FieldType & x, ReadBuffer x = tmp; } + // NOTE: no need of extra overload of DateTime64, since readDateTimeText64 has different signature and that case is explicitly handled in the calling code. template <> inline void parseImpl(DataTypeDateTime::FieldType & x, ReadBuffer & rb, const DateLUTImpl * time_zone) { time_t time = 0; readDateTimeText(time, rb, *time_zone); - if (time < 0) - time = 0; - x = time; + convertFromTime(x, time); } - template <> inline void parseImpl(DataTypeUUID::FieldType & x, ReadBuffer & rb, const DateLUTImpl *) { @@ -951,7 +984,6 @@ inline void parseImpl(DataTypeUUID::FieldType & x, ReadBuffer & rb x = tmp.toUnderType(); } - template bool tryParseImpl(typename DataType::FieldType & x, ReadBuffer & rb, const DateLUTImpl *) { @@ -1178,7 +1210,7 @@ struct ConvertThroughParsing { time_t res; parseDateTimeBestEffort(res, read_buffer, *local_time_zone, *utc_time_zone); - vec_to[i] = res; + convertFromTime(vec_to[i], res); } } else if constexpr (parsing_mode == ConvertFromStringParsingMode::BestEffortUS) @@ -1193,7 +1225,7 @@ struct ConvertThroughParsing { time_t res; parseDateTimeBestEffortUS(res, read_buffer, *local_time_zone, *utc_time_zone); - vec_to[i] = res; + convertFromTime(vec_to[i], res); } } else @@ -1232,14 +1264,14 @@ struct ConvertThroughParsing { time_t res; parsed = tryParseDateTimeBestEffort(res, read_buffer, *local_time_zone, *utc_time_zone); - vec_to[i] = res; + convertFromTime(vec_to[i],res); } } else if constexpr (parsing_mode == ConvertFromStringParsingMode::BestEffortUS) { time_t res; parsed = tryParseDateTimeBestEffortUS(res, read_buffer, *local_time_zone, *utc_time_zone); - vec_to[i] = res; + convertFromTime(vec_to[i],res); } else { diff --git a/src/Functions/FunctionsExternalDictionaries.h b/src/Functions/FunctionsExternalDictionaries.h index fb0dbdfff5c..7e26de574aa 100644 --- a/src/Functions/FunctionsExternalDictionaries.h +++ b/src/Functions/FunctionsExternalDictionaries.h @@ -90,6 +90,22 @@ public: return getDictionary(dict_name_col->getValue()); } + static const DictionaryAttribute & getDictionaryHierarchicalAttribute(const std::shared_ptr & dictionary) + { + const auto & dictionary_structure = dictionary->getStructure(); + auto hierarchical_attribute_index_optional = dictionary_structure.hierarchical_attribute_index; + + if (!dictionary->hasHierarchy() || !hierarchical_attribute_index_optional.has_value()) + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, + "Dictionary {} does not support hierarchy", + dictionary->getFullName()); + + size_t hierarchical_attribute_index = *hierarchical_attribute_index_optional; + const auto & hierarchical_attribute = dictionary_structure.attributes[hierarchical_attribute_index]; + + return hierarchical_attribute; + } + bool isDictGetFunctionInjective(const Block & sample_columns) { /// Assume non-injective by default @@ -939,39 +955,38 @@ private: bool useDefaultImplementationForConstants() const final { return true; } ColumnNumbers getArgumentsThatAreAlwaysConstant() const final { return {0}; } - - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override - { - if (!isString(arguments[0])) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Illegal type of first argument of function {}. Expected String. Actual type {}", - getName(), - arguments[0]->getName()); - - if (!WhichDataType(arguments[1]).isUInt64()) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Illegal type of second argument of function {}. Expected UInt64. Actual type {}", - getName(), - arguments[1]->getName()); - - return std::make_shared(std::make_shared()); - } - bool isDeterministic() const override { return false; } + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + String dictionary_name; + if (const auto * name_col = checkAndGetColumnConst(arguments[0].column.get())) + dictionary_name = name_col->getValue(); + else + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of first argument of function {}, expected a const string.", + arguments[0].type->getName(), + getName()); + + auto dictionary = helper.getDictionary(arguments[0].column); + const auto & hierarchical_attribute = helper.getDictionaryHierarchicalAttribute(dictionary); + + return std::make_shared(hierarchical_attribute.type); + } + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { if (input_rows_count == 0) return result_type->createColumn(); auto dictionary = helper.getDictionary(arguments[0].column); + const auto & hierarchical_attribute = helper.getDictionaryHierarchicalAttribute(dictionary); - if (!dictionary->hasHierarchy()) - throw Exception(ErrorCodes::UNSUPPORTED_METHOD, - "Dictionary {} does not support hierarchy", - dictionary->getFullName()); + auto key_column = ColumnWithTypeAndName{arguments[1].column, arguments[1].type, arguments[1].name}; + auto key_column_casted = castColumnAccurate(key_column, hierarchical_attribute.type); + + ColumnPtr result = dictionary->getHierarchy(key_column_casted, hierarchical_attribute.type); - ColumnPtr result = dictionary->getHierarchy(arguments[1].column, std::make_shared()); return result; } @@ -1009,18 +1024,6 @@ private: getName(), arguments[0]->getName()); - if (!WhichDataType(arguments[1]).isUInt64()) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Illegal type of second argument of function {}. Expected UInt64. Actual type {}", - getName(), - arguments[1]->getName()); - - if (!WhichDataType(arguments[2]).isUInt64()) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Illegal type of third argument of function {}. Expected UInt64. Actual type {}", - getName(), - arguments[2]->getName()); - return std::make_shared(); } @@ -1031,16 +1034,18 @@ private: if (input_rows_count == 0) return result_type->createColumn(); - auto dict = helper.getDictionary(arguments[0].column); + auto dictionary = helper.getDictionary(arguments[0].column); + const auto & hierarchical_attribute = helper.getDictionaryHierarchicalAttribute(dictionary); - if (!dict->hasHierarchy()) - throw Exception(ErrorCodes::UNSUPPORTED_METHOD, - "Dictionary {} does not support hierarchy", - dict->getFullName()); + auto key_column = ColumnWithTypeAndName{arguments[1].column->convertToFullColumnIfConst(), arguments[1].type, arguments[2].name}; + auto in_key_column = ColumnWithTypeAndName{arguments[2].column->convertToFullColumnIfConst(), arguments[2].type, arguments[2].name}; - ColumnPtr res = dict->isInHierarchy(arguments[1].column, arguments[2].column, std::make_shared()); + auto key_column_casted = castColumnAccurate(key_column, hierarchical_attribute.type); + auto in_key_column_casted = castColumnAccurate(in_key_column, hierarchical_attribute.type); - return res; + ColumnPtr result = dictionary->isInHierarchy(key_column_casted, in_key_column_casted, hierarchical_attribute.type); + + return result; } mutable FunctionDictHelper helper; @@ -1069,21 +1074,18 @@ private: bool isDeterministic() const override { return false; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { - if (!isString(arguments[0])) + if (!isString(arguments[0].type)) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type of first argument of function {}. Expected String. Actual type {}", getName(), - arguments[0]->getName()); + arguments[0].type->getName()); - if (!WhichDataType(arguments[1]).isUInt64()) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Illegal type of second argument of function {}. Expected UInt64. Actual type {}", - getName(), - arguments[1]->getName()); + auto dictionary = helper.getDictionary(arguments[0].column); + const auto & hierarchical_attribute = helper.getDictionaryHierarchicalAttribute(dictionary); - return std::make_shared(std::make_shared()); + return std::make_shared(hierarchical_attribute.type); } ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override @@ -1092,13 +1094,12 @@ private: return result_type->createColumn(); auto dictionary = helper.getDictionary(arguments[0].column); + const auto & hierarchical_attribute = helper.getDictionaryHierarchicalAttribute(dictionary); - if (!dictionary->hasHierarchy()) - throw Exception(ErrorCodes::UNSUPPORTED_METHOD, - "Dictionary {} does not support hierarchy", - dictionary->getFullName()); + auto key_column = ColumnWithTypeAndName{arguments[1].column->convertToFullColumnIfConst(), arguments[1].type, arguments[1].name}; + auto key_column_casted = castColumnAccurate(key_column, hierarchical_attribute.type); - ColumnPtr result = dictionary->getDescendants(arguments[1].column, std::make_shared(), 1); + ColumnPtr result = dictionary->getDescendants(key_column_casted, hierarchical_attribute.type, 1); return result; } @@ -1126,12 +1127,11 @@ private: bool isVariadic() const override { return true; } bool useDefaultImplementationForConstants() const final { return true; } - ColumnNumbers getArgumentsThatAreAlwaysConstant() const final { return {0}; } + ColumnNumbers getArgumentsThatAreAlwaysConstant() const final { return {0, 2}; } bool isDeterministic() const override { return false; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } - - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { size_t arguments_size = arguments.size(); if (arguments_size < 2 || arguments_size > 3) @@ -1142,27 +1142,24 @@ private: arguments_size); } - if (!isString(arguments[0])) + if (!isString(arguments[0].type)) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type of first argument of function {}. Expected const String. Actual type {}", getName(), - arguments[0]->getName()); + arguments[0].type->getName()); - if (!WhichDataType(arguments[1]).isUInt64()) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Illegal type of second argument of function {}. Expected UInt64. Actual type {}", - getName(), - arguments[1]->getName()); - - if (arguments.size() == 3 && !isUnsignedInteger(arguments[2])) + if (arguments.size() == 3 && !isInteger(arguments[2].type)) { throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type of third argument of function {}. Expected const unsigned integer. Actual type {}", getName(), - arguments[2]->getName()); + arguments[2].type->getName()); } - return std::make_shared(std::make_shared()); + auto dictionary = helper.getDictionary(arguments[0].column); + const auto & hierarchical_attribute = helper.getDictionaryHierarchicalAttribute(dictionary); + + return std::make_shared(hierarchical_attribute.type); } ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override @@ -1171,6 +1168,7 @@ private: return result_type->createColumn(); auto dictionary = helper.getDictionary(arguments[0].column); + const auto & hierarchical_attribute = helper.getDictionaryHierarchicalAttribute(dictionary); size_t level = 0; @@ -1181,17 +1179,21 @@ private: "Illegal type of third argument of function {}. Expected const unsigned integer.", getName()); - level = static_cast(arguments[2].column->get64(0)); + auto value = static_cast(arguments[2].column->getInt(0)); + if (value < 0) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type of third argument of function {}. Expected const unsigned integer.", + getName()); + + level = static_cast(value); } - if (!dictionary->hasHierarchy()) - throw Exception(ErrorCodes::UNSUPPORTED_METHOD, - "Dictionary {} does not support hierarchy", - dictionary->getFullName()); + auto key_column = ColumnWithTypeAndName{arguments[1].column->convertToFullColumnIfConst(), arguments[1].type, arguments[1].name}; + auto key_column_casted = castColumnAccurate(key_column, hierarchical_attribute.type); - ColumnPtr res = dictionary->getDescendants(arguments[1].column, std::make_shared(), level); + ColumnPtr result = dictionary->getDescendants(key_column_casted, hierarchical_attribute.type, level); - return res; + return result; } mutable FunctionDictHelper helper; diff --git a/src/Functions/FunctionsLogical.cpp b/src/Functions/FunctionsLogical.cpp index 0dee048dae3..c709cd22880 100644 --- a/src/Functions/FunctionsLogical.cpp +++ b/src/Functions/FunctionsLogical.cpp @@ -611,7 +611,7 @@ template ColumnPtr FunctionAnyArityLogical::executeImpl( const ColumnsWithTypeAndName & args, const DataTypePtr & result_type, size_t input_rows_count) const { - ColumnsWithTypeAndName arguments = std::move(args); + ColumnsWithTypeAndName arguments = args; /// Special implementation for short-circuit arguments. if (checkShortCircuitArguments(arguments) != -1) diff --git a/src/Functions/array/FunctionArrayMapped.h b/src/Functions/array/FunctionArrayMapped.h index 029e33db0cf..58e6db86f75 100644 --- a/src/Functions/array/FunctionArrayMapped.h +++ b/src/Functions/array/FunctionArrayMapped.h @@ -1,18 +1,29 @@ #pragma once +#include + +#include +#include +#include +#include +#include + +#include +#include +#include + #include #include #include -#include -#include -#include -#include -#include -#include +#include + #include -#include +#include + #include +#include + namespace DB { @@ -21,11 +32,38 @@ namespace ErrorCodes { extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int LOGICAL_ERROR; extern const int SIZES_OF_ARRAYS_DOESNT_MATCH; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } +template +ColumnPtr getOffsetsPtr(const T & column) +{ + if constexpr (std::is_same_v) + { + return column.getOffsetsPtr(); + } + else // ColumnMap + { + return column.getNestedColumn().getOffsetsPtr(); + } +} + +template +const IColumn::Offsets & getOffsets(const T & column) +{ + if constexpr (std::is_same_v) + { + return column.getOffsets(); + } + else // ColumnMap + { + return column.getNestedColumn().getOffsets(); + } +} + /** Higher-order functions for arrays. * These functions optionally apply a map (transform) to array (or multiple arrays of identical size) by lambda function, * and return some result based on that transformation. @@ -60,29 +98,42 @@ public: void getLambdaArgumentTypes(DataTypes & arguments) const override { if (arguments.empty()) - throw Exception("Function " + getName() + " needs at least one argument; passed " - + toString(arguments.size()) + ".", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Function {} needs at least one argument, passed {}", getName(), arguments.size()); if (arguments.size() == 1) - throw Exception("Function " + getName() + " needs at least one array argument.", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Function {} needs at least one argument with data", getName()); - DataTypes nested_types(arguments.size() - 1); - for (size_t i = 0; i < nested_types.size(); ++i) + if (arguments.size() > 2 && Impl::needOneArray()) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Function {} needs one argument with data", getName()); + + size_t nested_types_count = std::is_same_v ? (arguments.size() - 1) * 2 : (arguments.size() - 1); + DataTypes nested_types(nested_types_count); + for (size_t i = 0; i < arguments.size() - 1; ++i) { - const DataTypeArray * array_type = checkAndGetDataType(&*arguments[i + 1]); + const auto * array_type = checkAndGetDataType(&*arguments[i + 1]); if (!array_type) throw Exception("Argument " + toString(i + 2) + " of function " + getName() + " must be array. Found " + arguments[i + 1]->getName() + " instead.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - nested_types[i] = recursiveRemoveLowCardinality(array_type->getNestedType()); + if constexpr (std::is_same_v) + { + nested_types[2 * i] = recursiveRemoveLowCardinality(array_type->getKeyType()); + nested_types[2 * i + 1] = recursiveRemoveLowCardinality(array_type->getValueType()); + } + else if constexpr (std::is_same_v) + { + nested_types[i] = recursiveRemoveLowCardinality(array_type->getNestedType()); + } } const DataTypeFunction * function_type = checkAndGetDataType(arguments[0].get()); if (!function_type || function_type->getArgumentTypes().size() != nested_types.size()) - throw Exception("First argument for this overload of " + getName() + " must be a function with " - + toString(nested_types.size()) + " arguments. Found " - + arguments[0]->getName() + " instead.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "First argument for this overload of {} must be a function with {} arguments, found {} instead", + getName(), nested_types.size(), arguments[0]->getName()); arguments[0] = std::make_shared(nested_types); } @@ -91,37 +142,39 @@ public: { size_t min_args = Impl::needExpression() ? 2 : 1; if (arguments.size() < min_args) - throw Exception("Function " + getName() + " needs at least " - + toString(min_args) + " argument; passed " - + toString(arguments.size()) + ".", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Function {} needs at least {} argument, passed {}", + getName(), min_args, arguments.size()); - if (arguments.size() == 1) + if ((arguments.size() == 1) && std::is_same_v) { - const auto * array_type = checkAndGetDataType(arguments[0].type.get()); + const auto * data_type = checkAndGetDataType(arguments[0].type.get()); - if (!array_type) + if (!data_type) throw Exception("The only argument for function " + getName() + " must be array. Found " - + arguments[0].type->getName() + " instead.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + arguments[0].type->getName() + " instead", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - DataTypePtr nested_type = array_type->getNestedType(); + DataTypePtr nested_type = data_type->getNestedType(); if (Impl::needBoolean() && !WhichDataType(nested_type).isUInt8()) throw Exception("The only argument for function " + getName() + " must be array of UInt8. Found " - + arguments[0].type->getName() + " instead.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + arguments[0].type->getName() + " instead", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - return Impl::getReturnType(nested_type, nested_type); + if constexpr (std::is_same_v) + return Impl::getReturnType(nested_type, nested_type); + else + throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Unreachable code reached"); } else { if (arguments.size() > 2 && Impl::needOneArray()) - throw Exception("Function " + getName() + " needs one array argument.", + throw Exception("Function " + getName() + " needs one argument with data", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); const auto * data_type_function = checkAndGetDataType(arguments[0].type.get()); if (!data_type_function) - throw Exception("First argument for function " + getName() + " must be a function.", + throw Exception("First argument for function " + getName() + " must be a function", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); /// The types of the remaining arguments are already checked in getLambdaArgumentTypes. @@ -131,9 +184,28 @@ public: throw Exception("Expression for function " + getName() + " must return UInt8, found " + return_type->getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - const auto * first_array_type = checkAndGetDataType(arguments[1].type.get()); + static_assert( + std::is_same_v || + std::is_same_v, + "unsupported type"); - return Impl::getReturnType(return_type, first_array_type->getNestedType()); + if (arguments.size() < 2) + { + throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "{}", arguments.size()); + } + + const auto * first_array_type = checkAndGetDataType(arguments[1].type.get()); + + if (!first_array_type) + throw DB::Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Unsupported type {}", arguments[1].type->getName()); + + if constexpr (std::is_same_v) + return Impl::getReturnType(return_type, first_array_type->getNestedType()); + + if constexpr (std::is_same_v) + return Impl::getReturnType(return_type, first_array_type->getKeyValueTypes()); + + throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Unreachable code reached"); } } @@ -142,18 +214,25 @@ public: if (arguments.size() == 1) { ColumnPtr column_array_ptr = arguments[0].column; - const auto * column_array = checkAndGetColumn(column_array_ptr.get()); + const auto * column_array = checkAndGetColumn(column_array_ptr.get()); if (!column_array) { - const ColumnConst * column_const_array = checkAndGetColumnConst(column_array_ptr.get()); + const ColumnConst * column_const_array = checkAndGetColumnConst(column_array_ptr.get()); if (!column_const_array) throw Exception("Expected array column, found " + column_array_ptr->getName(), ErrorCodes::ILLEGAL_COLUMN); column_array_ptr = column_const_array->convertToFullColumn(); - column_array = assert_cast(column_array_ptr.get()); + column_array = assert_cast(column_array_ptr.get()); } - return Impl::execute(*column_array, column_array->getDataPtr()); + if constexpr (std::is_same_v) + { + return Impl::execute(*column_array, column_array->getNestedColumn().getDataPtr()); + } + else + { + return Impl::execute(*column_array, column_array->getDataPtr()); + } } else { @@ -172,7 +251,7 @@ public: ColumnPtr offsets_column; ColumnPtr column_first_array_ptr; - const ColumnArray * column_first_array = nullptr; + const typename Impl::column_type * column_first_array = nullptr; ColumnsWithTypeAndName arrays; arrays.reserve(arguments.size() - 1); @@ -182,18 +261,18 @@ public: const auto & array_with_type_and_name = arguments[i]; ColumnPtr column_array_ptr = array_with_type_and_name.column; - const auto * column_array = checkAndGetColumn(column_array_ptr.get()); + const auto * column_array = checkAndGetColumn(column_array_ptr.get()); const DataTypePtr & array_type_ptr = array_with_type_and_name.type; - const auto * array_type = checkAndGetDataType(array_type_ptr.get()); + const auto * array_type = checkAndGetDataType(array_type_ptr.get()); if (!column_array) { - const ColumnConst * column_const_array = checkAndGetColumnConst(column_array_ptr.get()); + const ColumnConst * column_const_array = checkAndGetColumnConst(column_array_ptr.get()); if (!column_const_array) throw Exception("Expected array column, found " + column_array_ptr->getName(), ErrorCodes::ILLEGAL_COLUMN); column_array_ptr = recursiveRemoveLowCardinality(column_const_array->convertToFullColumn()); - column_array = checkAndGetColumn(column_array_ptr.get()); + column_array = checkAndGetColumn(column_array_ptr.get()); } if (!array_type) @@ -201,13 +280,13 @@ public: if (!offsets_column) { - offsets_column = column_array->getOffsetsPtr(); + offsets_column = getOffsetsPtr(*column_array); } else { /// The first condition is optimization: do not compare data if the pointers are equal. - if (column_array->getOffsetsPtr() != offsets_column - && column_array->getOffsets() != typeid_cast(*offsets_column).getData()) + if (getOffsetsPtr(*column_array) != offsets_column + && getOffsets(*column_array) != typeid_cast(*offsets_column).getData()) throw Exception("Arrays passed to " + getName() + " must have equal size", ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH); } @@ -217,13 +296,23 @@ public: column_first_array = column_array; } - arrays.emplace_back(ColumnWithTypeAndName(column_array->getDataPtr(), - recursiveRemoveLowCardinality(array_type->getNestedType()), - array_with_type_and_name.name)); + if constexpr (std::is_same_v) + { + arrays.emplace_back(ColumnWithTypeAndName( + column_array->getNestedData().getColumnPtr(0), recursiveRemoveLowCardinality(array_type->getKeyType()), array_with_type_and_name.name+".key")); + arrays.emplace_back(ColumnWithTypeAndName( + column_array->getNestedData().getColumnPtr(1), recursiveRemoveLowCardinality(array_type->getValueType()), array_with_type_and_name.name+".value")); + } + else + { + arrays.emplace_back(ColumnWithTypeAndName(column_array->getDataPtr(), + recursiveRemoveLowCardinality(array_type->getNestedType()), + array_with_type_and_name.name)); + } } /// Put all the necessary columns multiplied by the sizes of arrays into the columns. - auto replicated_column_function_ptr = IColumn::mutate(column_function->replicate(column_first_array->getOffsets())); + auto replicated_column_function_ptr = IColumn::mutate(column_function->replicate(getOffsets(*column_first_array))); auto * replicated_column_function = typeid_cast(replicated_column_function_ptr.get()); replicated_column_function->appendArguments(arrays); diff --git a/src/Functions/array/arrayAggregation.cpp b/src/Functions/array/arrayAggregation.cpp index ee08c4f7f37..97a2f9c4c17 100644 --- a/src/Functions/array/arrayAggregation.cpp +++ b/src/Functions/array/arrayAggregation.cpp @@ -1,12 +1,18 @@ -#include -#include -#include -#include -#include -#include "FunctionArrayMapped.h" -#include #include +#include +#include +#include + +#include +#include +#include +#include + +#include + +#include "FunctionArrayMapped.h" + namespace DB { @@ -83,6 +89,9 @@ using ArrayAggregateResult = typename ArrayAggregateResultImpl struct ArrayAggregateImpl { + using column_type = ColumnArray; + using data_type = DataTypeArray; + static bool needBoolean() { return false; } static bool needExpression() { return false; } static bool needOneArray() { return false; } diff --git a/src/Functions/array/arrayAll.cpp b/src/Functions/array/arrayAll.cpp index 34deafdffdf..0f7ae797dc9 100644 --- a/src/Functions/array/arrayAll.cpp +++ b/src/Functions/array/arrayAll.cpp @@ -1,8 +1,8 @@ -#include #include -#include "FunctionArrayMapped.h" +#include #include +#include "FunctionArrayMapped.h" namespace DB { @@ -16,6 +16,9 @@ namespace ErrorCodes */ struct ArrayAllImpl { + using column_type = ColumnArray; + using data_type = DataTypeArray; + static bool needBoolean() { return true; } static bool needExpression() { return false; } static bool needOneArray() { return false; } diff --git a/src/Functions/array/arrayCompact.cpp b/src/Functions/array/arrayCompact.cpp index c2908e37e12..8abce7288d2 100644 --- a/src/Functions/array/arrayCompact.cpp +++ b/src/Functions/array/arrayCompact.cpp @@ -1,10 +1,13 @@ -#include -#include -#include #include +#include + #include -#include + +#include +#include + #include +#include namespace DB @@ -16,13 +19,16 @@ namespace ErrorCodes struct ArrayCompactImpl { + using column_type = ColumnArray; + using data_type = DataTypeArray; + static bool needBoolean() { return false; } static bool needExpression() { return false; } static bool needOneArray() { return false; } - static DataTypePtr getReturnType(const DataTypePtr & nested_type, const DataTypePtr &) + static DataTypePtr getReturnType(const DataTypePtr & , const DataTypePtr & array_element) { - return std::make_shared(nested_type); + return std::make_shared(array_element); } template @@ -30,14 +36,16 @@ struct ArrayCompactImpl { using ColVecType = ColumnVectorOrDecimal; - const ColVecType * src_values_column = checkAndGetColumn(mapped.get()); + const ColVecType * check_values_column = checkAndGetColumn(mapped.get()); + const ColVecType * src_values_column = checkAndGetColumn(array.getData()); - if (!src_values_column) + if (!src_values_column || !check_values_column) return false; const IColumn::Offsets & src_offsets = array.getOffsets(); - const typename ColVecType::Container & src_values = src_values_column->getData(); + const auto & src_values = src_values_column->getData(); + const auto & check_values = check_values_column->getData(); typename ColVecType::MutablePtr res_values_column; if constexpr (is_decimal) res_values_column = ColVecType::create(src_values.size(), src_values_column->getScale()); @@ -45,6 +53,7 @@ struct ArrayCompactImpl res_values_column = ColVecType::create(src_values.size()); typename ColVecType::Container & res_values = res_values_column->getData(); + size_t src_offsets_size = src_offsets.size(); auto res_offsets_column = ColumnArray::ColumnOffsets::create(src_offsets_size); IColumn::Offsets & res_offsets = res_offsets_column->getData(); @@ -67,7 +76,7 @@ struct ArrayCompactImpl ++res_pos; for (; src_pos < src_offset; ++src_pos) { - if (!bitEquals(src_values[src_pos], src_values[src_pos - 1])) + if (!bitEquals(check_values[src_pos], check_values[src_pos - 1])) { res_values[res_pos] = src_values[src_pos]; ++res_pos; @@ -86,8 +95,9 @@ struct ArrayCompactImpl { const IColumn::Offsets & src_offsets = array.getOffsets(); - auto res_values_column = mapped->cloneEmpty(); - res_values_column->reserve(mapped->size()); + const auto & src_values = array.getData(); + auto res_values_column = src_values.cloneEmpty(); + res_values_column->reserve(src_values.size()); size_t src_offsets_size = src_offsets.size(); auto res_offsets_column = ColumnArray::ColumnOffsets::create(src_offsets_size); @@ -104,7 +114,7 @@ struct ArrayCompactImpl if (src_pos < src_offset) { /// Insert first element unconditionally. - res_values_column->insertFrom(*mapped, src_pos); + res_values_column->insertFrom(src_values, src_pos); /// For the rest of elements, insert if the element is different from the previous. ++src_pos; @@ -113,7 +123,7 @@ struct ArrayCompactImpl { if (mapped->compareAt(src_pos - 1, src_pos, *mapped, 1)) { - res_values_column->insertFrom(*mapped, src_pos); + res_values_column->insertFrom(src_values, src_pos); ++res_pos; } } diff --git a/src/Functions/array/arrayCount.cpp b/src/Functions/array/arrayCount.cpp index 377a6eb8fb1..df45783323b 100644 --- a/src/Functions/array/arrayCount.cpp +++ b/src/Functions/array/arrayCount.cpp @@ -1,8 +1,9 @@ -#include #include -#include "FunctionArrayMapped.h" +#include #include +#include "FunctionArrayMapped.h" + namespace DB { @@ -16,6 +17,9 @@ namespace ErrorCodes */ struct ArrayCountImpl { + using column_type = ColumnArray; + using data_type = DataTypeArray; + static bool needBoolean() { return true; } static bool needExpression() { return false; } static bool needOneArray() { return false; } diff --git a/src/Functions/array/arrayCumSum.cpp b/src/Functions/array/arrayCumSum.cpp index 467d9ad3951..98ffa09820b 100644 --- a/src/Functions/array/arrayCumSum.cpp +++ b/src/Functions/array/arrayCumSum.cpp @@ -1,10 +1,11 @@ -#include -#include -#include #include -#include "FunctionArrayMapped.h" +#include +#include +#include #include +#include "FunctionArrayMapped.h" + namespace DB { @@ -17,6 +18,9 @@ namespace ErrorCodes struct ArrayCumSumImpl { + using column_type = ColumnArray; + using data_type = DataTypeArray; + static bool needBoolean() { return false; } static bool needExpression() { return false; } static bool needOneArray() { return false; } diff --git a/src/Functions/array/arrayCumSumNonNegative.cpp b/src/Functions/array/arrayCumSumNonNegative.cpp index 476bbd08163..cd8393b7a5f 100644 --- a/src/Functions/array/arrayCumSumNonNegative.cpp +++ b/src/Functions/array/arrayCumSumNonNegative.cpp @@ -1,10 +1,10 @@ -#include -#include -#include #include -#include "FunctionArrayMapped.h" +#include +#include +#include #include +#include "FunctionArrayMapped.h" namespace DB { @@ -19,6 +19,9 @@ namespace ErrorCodes */ struct ArrayCumSumNonNegativeImpl { + using column_type = ColumnArray; + using data_type = DataTypeArray; + static bool needBoolean() { return false; } static bool needExpression() { return false; } static bool needOneArray() { return false; } diff --git a/src/Functions/array/arrayDifference.cpp b/src/Functions/array/arrayDifference.cpp index c5fdf27100b..8af0e8b04f9 100644 --- a/src/Functions/array/arrayDifference.cpp +++ b/src/Functions/array/arrayDifference.cpp @@ -1,10 +1,11 @@ -#include -#include -#include #include -#include "FunctionArrayMapped.h" +#include +#include +#include #include +#include "FunctionArrayMapped.h" + namespace DB { @@ -20,6 +21,9 @@ namespace ErrorCodes */ struct ArrayDifferenceImpl { + using column_type = ColumnArray; + using data_type = DataTypeArray; + static bool needBoolean() { return false; } static bool needExpression() { return false; } static bool needOneArray() { return false; } diff --git a/src/Functions/array/arrayExists.cpp b/src/Functions/array/arrayExists.cpp index 34ea71af259..ea39cc0dc0b 100644 --- a/src/Functions/array/arrayExists.cpp +++ b/src/Functions/array/arrayExists.cpp @@ -1,8 +1,9 @@ -#include #include -#include "FunctionArrayMapped.h" +#include #include +#include "FunctionArrayMapped.h" + namespace DB { @@ -16,6 +17,9 @@ namespace ErrorCodes */ struct ArrayExistsImpl { + using column_type = ColumnArray; + using data_type = DataTypeArray; + static bool needBoolean() { return true; } static bool needExpression() { return false; } static bool needOneArray() { return false; } diff --git a/src/Functions/array/arrayFill.cpp b/src/Functions/array/arrayFill.cpp index d4b36a89ba5..22b9e9a657b 100644 --- a/src/Functions/array/arrayFill.cpp +++ b/src/Functions/array/arrayFill.cpp @@ -1,8 +1,9 @@ -#include #include -#include "FunctionArrayMapped.h" +#include #include +#include "FunctionArrayMapped.h" + namespace DB { @@ -19,6 +20,9 @@ namespace ErrorCodes template struct ArrayFillImpl { + using column_type = ColumnArray; + using data_type = DataTypeArray; + static bool needBoolean() { return true; } static bool needExpression() { return true; } static bool needOneArray() { return false; } diff --git a/src/Functions/array/arrayFilter.cpp b/src/Functions/array/arrayFilter.cpp index 1291989f9a2..89a9de44532 100644 --- a/src/Functions/array/arrayFilter.cpp +++ b/src/Functions/array/arrayFilter.cpp @@ -1,8 +1,9 @@ -#include #include -#include "FunctionArrayMapped.h" +#include #include +#include "FunctionArrayMapped.h" + namespace DB { @@ -15,6 +16,9 @@ namespace ErrorCodes */ struct ArrayFilterImpl { + using column_type = ColumnArray; + using data_type = DataTypeArray; + static bool needBoolean() { return true; } static bool needExpression() { return true; } static bool needOneArray() { return false; } diff --git a/src/Functions/array/arrayFirst.cpp b/src/Functions/array/arrayFirst.cpp index edbf7ef6269..693aea746f5 100644 --- a/src/Functions/array/arrayFirst.cpp +++ b/src/Functions/array/arrayFirst.cpp @@ -1,8 +1,9 @@ -#include #include -#include "FunctionArrayMapped.h" +#include #include +#include "FunctionArrayMapped.h" + namespace DB { @@ -20,6 +21,9 @@ enum class ArrayFirstLastStrategy template struct ArrayFirstLastImpl { + using column_type = ColumnArray; + using data_type = DataTypeArray; + static bool needBoolean() { return false; } static bool needExpression() { return true; } static bool needOneArray() { return false; } diff --git a/src/Functions/array/arrayFirstLastIndex.cpp b/src/Functions/array/arrayFirstLastIndex.cpp index 467678f3faa..9392cbdc840 100644 --- a/src/Functions/array/arrayFirstLastIndex.cpp +++ b/src/Functions/array/arrayFirstLastIndex.cpp @@ -1,8 +1,9 @@ #include #include -#include "FunctionArrayMapped.h" #include +#include "FunctionArrayMapped.h" + namespace DB { @@ -20,6 +21,9 @@ enum class ArrayFirstLastIndexStrategy template struct ArrayFirstLastIndexImpl { + using column_type = ColumnArray; + using data_type = DataTypeArray; + static bool needBoolean() { return false; } static bool needExpression() { return true; } static bool needOneArray() { return false; } diff --git a/src/Functions/array/arrayMap.cpp b/src/Functions/array/arrayMap.cpp index e3afaf7fb66..ec1973d573b 100644 --- a/src/Functions/array/arrayMap.cpp +++ b/src/Functions/array/arrayMap.cpp @@ -1,14 +1,18 @@ -#include "FunctionArrayMapped.h" #include +#include "FunctionArrayMapped.h" + namespace DB { -/** arrayMap(x1,...,xn -> expression, array1,...,arrayn) - apply the expression to each element of the array (or set of parallel arrays). +/** arrayMap(x1, ..., xn -> expression, array1, ..., arrayn) - apply the expression to each element of the array (or set of parallel arrays). */ struct ArrayMapImpl { + using column_type = ColumnArray; + using data_type = DataTypeArray; + /// true if the expression (for an overload of f(expression, arrays)) or an array (for f(array)) should be boolean. static bool needBoolean() { return false; } /// true if the f(array) overload is unavailable. diff --git a/src/Functions/array/arraySort.cpp b/src/Functions/array/arraySort.cpp index 476dfb46f07..5421185211e 100644 --- a/src/Functions/array/arraySort.cpp +++ b/src/Functions/array/arraySort.cpp @@ -1,8 +1,8 @@ #include "FunctionArrayMapped.h" + #include #include - namespace DB { @@ -11,6 +11,9 @@ namespace DB template struct ArraySortImpl { + using column_type = ColumnArray; + using data_type = DataTypeArray; + static bool needBoolean() { return false; } static bool needExpression() { return false; } static bool needOneArray() { return false; } diff --git a/src/Functions/array/arraySplit.cpp b/src/Functions/array/arraySplit.cpp index 2e5f2d8432e..c818be97f60 100644 --- a/src/Functions/array/arraySplit.cpp +++ b/src/Functions/array/arraySplit.cpp @@ -1,8 +1,9 @@ -#include #include -#include "FunctionArrayMapped.h" +#include #include +#include "FunctionArrayMapped.h" + namespace DB { @@ -14,6 +15,9 @@ namespace ErrorCodes template struct ArraySplitImpl { + using column_type = ColumnArray; + using data_type = DataTypeArray; + static bool needBoolean() { return true; } static bool needExpression() { return true; } static bool needOneArray() { return false; } diff --git a/src/Functions/array/mapPopulateSeries.cpp b/src/Functions/array/mapPopulateSeries.cpp index 17269f8dfe1..8b4a1dda197 100644 --- a/src/Functions/array/mapPopulateSeries.cpp +++ b/src/Functions/array/mapPopulateSeries.cpp @@ -379,8 +379,7 @@ private: if (!max_key_column_type->equals(*input.key_series_type)) { ColumnWithTypeAndName column_to_cast = {max_key_column, max_key_column_type, ""}; - auto casted_column = castColumnAccurate(std::move(column_to_cast), input.key_series_type); - max_key_column = std::move(casted_column); + max_key_column = castColumnAccurate(column_to_cast, input.key_series_type); } } diff --git a/src/Functions/castOrDefault.cpp b/src/Functions/castOrDefault.cpp index 95046d95176..628ac57f34d 100644 --- a/src/Functions/castOrDefault.cpp +++ b/src/Functions/castOrDefault.cpp @@ -99,7 +99,7 @@ public: { const ColumnWithTypeAndName & column_to_cast = arguments[0]; auto non_const_column_to_cast = column_to_cast.column->convertToFullColumnIfConst(); - ColumnWithTypeAndName column_to_cast_non_const { std::move(non_const_column_to_cast), column_to_cast.type, column_to_cast.name }; + ColumnWithTypeAndName column_to_cast_non_const { non_const_column_to_cast, column_to_cast.type, column_to_cast.name }; auto cast_result = castColumnAccurateOrNull(column_to_cast_non_const, return_type); diff --git a/src/Functions/if.cpp b/src/Functions/if.cpp index 6841098ebcf..0b30f404f8e 100644 --- a/src/Functions/if.cpp +++ b/src/Functions/if.cpp @@ -1027,7 +1027,7 @@ public: ColumnPtr executeImpl(const ColumnsWithTypeAndName & args, const DataTypePtr & result_type, size_t input_rows_count) const override { - ColumnsWithTypeAndName arguments = std::move(args); + ColumnsWithTypeAndName arguments = args; executeShortCircuitArguments(arguments); ColumnPtr res; if ( (res = executeForConstAndNullableCondition(arguments, result_type, input_rows_count)) diff --git a/src/Functions/map.cpp b/src/Functions/map.cpp index 4e242c4348b..471d6fc575c 100644 --- a/src/Functions/map.cpp +++ b/src/Functions/map.cpp @@ -518,6 +518,115 @@ public: } }; +class FunctionMapUpdate : public IFunction +{ +public: + static constexpr auto name = "mapUpdate"; + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + + String getName() const override + { + return name; + } + + size_t getNumberOfArguments() const override { return 2; } + + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + if (arguments.size() != 2) + throw Exception("Number of arguments for function " + getName() + " doesn't match: passed " + + toString(arguments.size()) + ", should be 2", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + const DataTypeMap * left = checkAndGetDataType(arguments[0].type.get()); + const DataTypeMap * right = checkAndGetDataType(arguments[1].type.get()); + + if (!left || !right) + throw Exception{"The two arguments for function " + getName() + " must be both Map type", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; + if (!left->getKeyType()->equals(*right->getKeyType()) || !left->getValueType()->equals(*right->getValueType())) + throw Exception{"The Key And Value type of Map for function " + getName() + " must be the same", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; + + return std::make_shared(left->getKeyType(), left->getValueType()); + } + + bool useDefaultImplementationForConstants() const override { return true; } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + { + const ColumnMap * col_map_left = typeid_cast(arguments[0].column.get()); + const auto * col_const_map_left = checkAndGetColumnConst(arguments[0].column.get()); + if (col_const_map_left) + col_map_left = typeid_cast(&col_const_map_left->getDataColumn()); + if (!col_map_left) + return nullptr; + + const ColumnMap * col_map_right = typeid_cast(arguments[1].column.get()); + const auto * col_const_map_right = checkAndGetColumnConst(arguments[1].column.get()); + if (col_const_map_right) + col_map_right = typeid_cast(&col_const_map_right->getDataColumn()); + if (!col_map_right) + return nullptr; + + const auto & nested_column_left = col_map_left->getNestedColumn(); + const auto & keys_data_left = col_map_left->getNestedData().getColumn(0); + const auto & values_data_left = col_map_left->getNestedData().getColumn(1); + const auto & offsets_left = nested_column_left.getOffsets(); + + const auto & nested_column_right = col_map_right->getNestedColumn(); + const auto & keys_data_right = col_map_right->getNestedData().getColumn(0); + const auto & values_data_right = col_map_right->getNestedData().getColumn(1); + const auto & offsets_right = nested_column_right.getOffsets(); + + const auto & result_type_map = static_cast(*result_type); + const DataTypePtr & key_type = result_type_map.getKeyType(); + const DataTypePtr & value_type = result_type_map.getValueType(); + MutableColumnPtr keys_data = key_type->createColumn(); + MutableColumnPtr values_data = value_type->createColumn(); + MutableColumnPtr offsets = DataTypeNumber().createColumn(); + + IColumn::Offset current_offset = 0; + for (size_t idx = 0; idx < input_rows_count; ++idx) + { + for (size_t i = offsets_left[idx - 1]; i < offsets_left[idx]; ++i) + { + bool matched = false; + auto key = keys_data_left.getDataAt(i); + for (size_t j = offsets_right[idx - 1]; j < offsets_right[idx]; ++j) + { + if (keys_data_right.getDataAt(j).toString() == key.toString()) + { + matched = true; + break; + } + } + if (!matched) + { + keys_data->insertFrom(keys_data_left, i); + values_data->insertFrom(values_data_left, i); + ++current_offset; + } + } + for (size_t j = offsets_right[idx - 1]; j < offsets_right[idx]; ++j) + { + keys_data->insertFrom(keys_data_right, j); + values_data->insertFrom(values_data_right, j); + ++current_offset; + } + offsets->insert(current_offset); + } + + auto nested_column = ColumnArray::create( + ColumnTuple::create(Columns{std::move(keys_data), std::move(values_data)}), + std::move(offsets)); + + return ColumnMap::create(nested_column); + } +}; + } void registerFunctionsMap(FunctionFactory & factory) @@ -528,6 +637,7 @@ void registerFunctionsMap(FunctionFactory & factory) factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); + factory.registerFunction(); } } diff --git a/src/Functions/mapFilter.cpp b/src/Functions/mapFilter.cpp new file mode 100644 index 00000000000..f38f8f8b4d1 --- /dev/null +++ b/src/Functions/mapFilter.cpp @@ -0,0 +1,144 @@ +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_COLUMN; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + +/** Higher-order functions for map. + * These functions optionally apply a map by lambda function, + * and return some result based on that transformation. + */ + + +/** mapFilter((k, v) -> predicate, map) - leave in the map only the kv elements for which the expression is true. + */ +struct MapFilterImpl +{ + using data_type = DataTypeMap; + using column_type = ColumnMap; + + static constexpr auto name = "mapFilter"; + + static bool needBoolean() { return true; } + static bool needExpression() { return true; } + static bool needOneArray() { return true; } + + static DataTypePtr getReturnType(const DataTypePtr & /*expression_return*/, const DataTypes & elems) + { + return std::make_shared(elems); + } + + /// If there are several arrays, the first one is passed here. + static ColumnPtr execute(const ColumnMap & map_column, ColumnPtr mapped) + { + const ColumnUInt8 * column_filter = typeid_cast(&*mapped); + + if (!column_filter) + { + const auto * column_filter_const = checkAndGetColumnConst(&*mapped); + + if (!column_filter_const) + throw Exception("Unexpected type of filter column", ErrorCodes::ILLEGAL_COLUMN); + + if (column_filter_const->getValue()) + return map_column.clone(); + else + { + const auto * column_array = typeid_cast(map_column.getNestedColumnPtr().get()); + const auto * column_tuple = typeid_cast(column_array->getDataPtr().get()); + ColumnPtr keys = column_tuple->getColumnPtr(0)->cloneEmpty(); + ColumnPtr values = column_tuple->getColumnPtr(1)->cloneEmpty(); + return ColumnMap::create(keys, values, ColumnArray::ColumnOffsets::create(map_column.size(), 0)); + } + } + + const IColumn::Filter & filter = column_filter->getData(); + ColumnPtr filtered = map_column.getNestedColumn().getData().filter(filter, -1); + + const IColumn::Offsets & in_offsets = map_column.getNestedColumn().getOffsets(); + auto column_offsets = ColumnArray::ColumnOffsets::create(in_offsets.size()); + IColumn::Offsets & out_offsets = column_offsets->getData(); + + size_t in_pos = 0; + size_t out_pos = 0; + for (size_t i = 0; i < in_offsets.size(); ++i) + { + for (; in_pos < in_offsets[i]; ++in_pos) + { + if (filter[in_pos]) + ++out_pos; + } + out_offsets[i] = out_pos; + } + + return ColumnMap::create(ColumnArray::create(filtered, std::move(column_offsets))); + } +}; + + +/** mapApply((k,v) -> expression, map) - apply the expression to the map. + */ +struct MapApplyImpl +{ + using data_type = DataTypeMap; + using column_type = ColumnMap; + + static constexpr auto name = "mapApply"; + + /// true if the expression (for an overload of f(expression, maps)) or a map (for f(map)) should be boolean. + static bool needBoolean() { return false; } + static bool needExpression() { return true; } + static bool needOneArray() { return true; } + + static DataTypePtr getReturnType(const DataTypePtr & expression_return, const DataTypes & /*elems*/) + { + const auto * tuple_types = typeid_cast(expression_return.get()); + if (!tuple_types) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Expected return type is tuple, got {}", expression_return->getName()); + if (tuple_types->getElements().size() != 2) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Expected 2 columns as map's key and value, but found {}", tuple_types->getElements().size()); + + return std::make_shared(tuple_types->getElements()); + } + + static ColumnPtr execute(const ColumnMap & map, ColumnPtr mapped) + { + const auto * column_tuple = checkAndGetColumn(mapped.get()); + if (!column_tuple) + { + const ColumnConst * column_const_tuple = checkAndGetColumnConst(mapped.get()); + if (!column_const_tuple) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Expected tuple column, found {}", mapped->getName()); + auto cols = convertConstTupleToConstantElements(*column_const_tuple); + return ColumnMap::create(cols[0]->convertToFullColumnIfConst(), cols[1]->convertToFullColumnIfConst(), map.getNestedColumn().getOffsetsPtr()); + } + + return ColumnMap::create(column_tuple->getColumnPtr(0), column_tuple->getColumnPtr(1), + map.getNestedColumn().getOffsetsPtr()); + } +}; + +void registerFunctionMapApply(FunctionFactory & factory) +{ + factory.registerFunction>(); + factory.registerFunction>(); +} + +} + + diff --git a/src/Functions/multiIf.cpp b/src/Functions/multiIf.cpp index 070a7c2f05e..7ed0ee00954 100644 --- a/src/Functions/multiIf.cpp +++ b/src/Functions/multiIf.cpp @@ -117,7 +117,7 @@ public: ColumnPtr executeImpl(const ColumnsWithTypeAndName & args, const DataTypePtr & result_type, size_t input_rows_count) const override { - ColumnsWithTypeAndName arguments = std::move(args); + ColumnsWithTypeAndName arguments = args; executeShortCircuitArguments(arguments); /** We will gather values from columns in branches to result column, * depending on values of conditions. diff --git a/src/Functions/now64.cpp b/src/Functions/now64.cpp index bd1038b1fc6..e7d9011db53 100644 --- a/src/Functions/now64.cpp +++ b/src/Functions/now64.cpp @@ -152,7 +152,7 @@ public: for (const auto & arg : arguments) arg_types.push_back(arg.type); - return std::make_unique(nowSubsecond(scale), std::move(arg_types), std::move(result_type)); + return std::make_unique(nowSubsecond(scale), std::move(arg_types), result_type); } }; diff --git a/src/Functions/nullIf.cpp b/src/Functions/nullIf.cpp index c54bbc08bcd..0b4d024c91c 100644 --- a/src/Functions/nullIf.cpp +++ b/src/Functions/nullIf.cpp @@ -61,7 +61,7 @@ public: auto func_if = FunctionFactory::instance().get("if", context)->build(if_columns); auto if_res = func_if->execute(if_columns, result_type, input_rows_count); - return makeNullable(std::move(if_res)); + return makeNullable(if_res); } }; diff --git a/src/Functions/registerFunctionsHigherOrder.cpp b/src/Functions/registerFunctionsHigherOrder.cpp index d3621a03ecd..00bea58b918 100644 --- a/src/Functions/registerFunctionsHigherOrder.cpp +++ b/src/Functions/registerFunctionsHigherOrder.cpp @@ -18,6 +18,7 @@ void registerFunctionsArraySort(FunctionFactory & factory); void registerFunctionArrayCumSum(FunctionFactory & factory); void registerFunctionArrayCumSumNonNegative(FunctionFactory & factory); void registerFunctionArrayDifference(FunctionFactory & factory); +void registerFunctionMapApply(FunctionFactory & factory); void registerFunctionsHigherOrder(FunctionFactory & factory) { @@ -36,6 +37,7 @@ void registerFunctionsHigherOrder(FunctionFactory & factory) registerFunctionArrayCumSum(factory); registerFunctionArrayCumSumNonNegative(factory); registerFunctionArrayDifference(factory); + registerFunctionMapApply(factory); } } diff --git a/src/IO/AIO.cpp b/src/IO/AIO.cpp index 97e5a470463..fb762271e4d 100644 --- a/src/IO/AIO.cpp +++ b/src/IO/AIO.cpp @@ -55,12 +55,12 @@ AIOContext::~AIOContext() io_destroy(ctx); } -AIOContext::AIOContext(AIOContext && rhs) +AIOContext::AIOContext(AIOContext && rhs) noexcept { *this = std::move(rhs); } -AIOContext & AIOContext::operator=(AIOContext && rhs) +AIOContext & AIOContext::operator=(AIOContext && rhs) noexcept { std::swap(ctx, rhs.ctx); return *this; diff --git a/src/IO/AIO.h b/src/IO/AIO.h index 5149aa2eb71..b8609c8853c 100644 --- a/src/IO/AIO.h +++ b/src/IO/AIO.h @@ -38,8 +38,8 @@ struct AIOContext : private boost::noncopyable AIOContext() {} AIOContext(unsigned int nr_events); ~AIOContext(); - AIOContext(AIOContext && rhs); - AIOContext & operator=(AIOContext && rhs); + AIOContext(AIOContext && rhs) noexcept; + AIOContext & operator=(AIOContext && rhs) noexcept; }; #elif defined(OS_FREEBSD) diff --git a/src/IO/Archives/ZipArchiveReader.cpp b/src/IO/Archives/ZipArchiveReader.cpp index 16604da62dc..4e83234615c 100644 --- a/src/IO/Archives/ZipArchiveReader.cpp +++ b/src/IO/Archives/ZipArchiveReader.cpp @@ -42,12 +42,12 @@ public: } } - HandleHolder(HandleHolder && src) + HandleHolder(HandleHolder && src) noexcept { *this = std::move(src); } - HandleHolder & operator =(HandleHolder && src) + HandleHolder & operator=(HandleHolder && src) noexcept { reader = std::exchange(src.reader, nullptr); raw_handle = std::exchange(src.raw_handle, nullptr); diff --git a/src/IO/Archives/ZipArchiveWriter.cpp b/src/IO/Archives/ZipArchiveWriter.cpp index f5ecea5e5aa..79192223657 100644 --- a/src/IO/Archives/ZipArchiveWriter.cpp +++ b/src/IO/Archives/ZipArchiveWriter.cpp @@ -46,12 +46,12 @@ public: } } - HandleHolder(HandleHolder && src) + HandleHolder(HandleHolder && src) noexcept { *this = std::move(src); } - HandleHolder & operator =(HandleHolder && src) + HandleHolder & operator=(HandleHolder && src) noexcept { writer = std::exchange(src.writer, nullptr); raw_handle = std::exchange(src.raw_handle, nullptr); diff --git a/src/IO/CompressionMethod.cpp b/src/IO/CompressionMethod.cpp index eaab7560e6a..f6daec78170 100644 --- a/src/IO/CompressionMethod.cpp +++ b/src/IO/CompressionMethod.cpp @@ -65,7 +65,13 @@ CompressionMethod chooseCompressionMethod(const std::string & path, const std::s file_extension = path.substr(pos + 1, std::string::npos); } - std::string method_str = file_extension.empty() ? hint : std::move(file_extension); + std::string method_str; + + if (file_extension.empty()) + method_str = hint; + else + method_str = std::move(file_extension); + boost::algorithm::to_lower(method_str); if (method_str == "gzip" || method_str == "gz") diff --git a/src/IO/Progress.cpp b/src/IO/Progress.cpp index ebc7d04e86e..1d16f54de7b 100644 --- a/src/IO/Progress.cpp +++ b/src/IO/Progress.cpp @@ -126,7 +126,7 @@ ProgressValues Progress::fetchAndResetPiecewiseAtomically() return res; } -Progress & Progress::operator=(Progress && other) +Progress & Progress::operator=(Progress && other) noexcept { read_rows = other.read_rows.load(std::memory_order_relaxed); read_bytes = other.read_bytes.load(std::memory_order_relaxed); diff --git a/src/IO/Progress.h b/src/IO/Progress.h index c00eea98ff4..77187aea8f9 100644 --- a/src/IO/Progress.h +++ b/src/IO/Progress.h @@ -111,9 +111,9 @@ struct Progress ProgressValues fetchAndResetPiecewiseAtomically(); - Progress & operator=(Progress && other); + Progress & operator=(Progress && other) noexcept; - Progress(Progress && other) + Progress(Progress && other) noexcept { *this = std::move(other); } diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h index e84daa8cdbe..69942953925 100644 --- a/src/IO/ReadHelpers.h +++ b/src/IO/ReadHelpers.h @@ -687,6 +687,16 @@ inline ReturnType readDateTextImpl(LocalDate & date, ReadBuffer & buf) return readDateTextFallback(date, buf); } +inline void convertToDayNum(DayNum & date, ExtendedDayNum & from) +{ + if (unlikely(from < 0)) + date = 0; + else if (unlikely(from > 0xFFFF)) + date = 0xFFFF; + else + date = from; +} + template inline ReturnType readDateTextImpl(DayNum & date, ReadBuffer & buf) { @@ -699,7 +709,8 @@ inline ReturnType readDateTextImpl(DayNum & date, ReadBuffer & buf) else if (!readDateTextImpl(local_date, buf)) return false; - date = DateLUT::instance().makeDayNum(local_date.year(), local_date.month(), local_date.day()); + ExtendedDayNum ret = DateLUT::instance().makeDayNum(local_date.year(), local_date.month(), local_date.day()); + convertToDayNum(date,ret); return ReturnType(true); } diff --git a/src/IO/WriteBufferFromS3.cpp b/src/IO/WriteBufferFromS3.cpp index 60c75b3c90c..eda7bb6f8ae 100644 --- a/src/IO/WriteBufferFromS3.cpp +++ b/src/IO/WriteBufferFromS3.cpp @@ -116,7 +116,14 @@ void WriteBufferFromS3::allocateBuffer() WriteBufferFromS3::~WriteBufferFromS3() { - finalize(); + try + { + finalize(); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } } void WriteBufferFromS3::preFinalize() @@ -386,7 +393,7 @@ void WriteBufferFromS3::waitForReadyBackGroundTasks() while (!upload_object_tasks.empty() && upload_object_tasks.front().is_finised) { auto & task = upload_object_tasks.front(); - auto exception = std::move(task.exception); + auto exception = task.exception; auto tag = std::move(task.tag); upload_object_tasks.pop_front(); @@ -413,7 +420,7 @@ void WriteBufferFromS3::waitForAllBackGroundTasks() { auto & task = upload_object_tasks.front(); if (task.exception) - std::rethrow_exception(std::move(task.exception)); + std::rethrow_exception(task.exception); part_tags.push_back(task.tag); @@ -424,7 +431,7 @@ void WriteBufferFromS3::waitForAllBackGroundTasks() { bg_tasks_condvar.wait(lock, [this]() { return put_object_task->is_finised; }); if (put_object_task->exception) - std::rethrow_exception(std::move(put_object_task->exception)); + std::rethrow_exception(put_object_task->exception); } } } diff --git a/src/Interpreters/Access/InterpreterShowAccessQuery.cpp b/src/Interpreters/Access/InterpreterShowAccessQuery.cpp index e16ee03c711..d1d8ee63b8e 100644 --- a/src/Interpreters/Access/InterpreterShowAccessQuery.cpp +++ b/src/Interpreters/Access/InterpreterShowAccessQuery.cpp @@ -11,7 +11,7 @@ #include #include #include -#include +#include namespace DB @@ -76,11 +76,11 @@ ASTs InterpreterShowAccessQuery::getCreateAndGrantQueries() const { create_queries.push_back(InterpreterShowCreateAccessEntityQuery::getCreateQuery(*entity, access_control)); if (entity->isTypeOf(AccessEntityType::USER) || entity->isTypeOf(AccessEntityType::ROLE)) - boost::range::push_back(grant_queries, InterpreterShowGrantsQuery::getGrantQueries(*entity, access_control)); + insertAtEnd(grant_queries, InterpreterShowGrantsQuery::getGrantQueries(*entity, access_control)); } ASTs result = std::move(create_queries); - boost::range::push_back(result, std::move(grant_queries)); + insertAtEnd(result, std::move(grant_queries)); return result; } diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index a5122cd54c7..25116f5145a 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -517,7 +517,7 @@ Block ActionsDAG::updateHeader(Block header) const { auto & list = it->second; pos_to_remove.insert(pos); - node_to_column[inputs[list.front()]] = std::move(col); + node_to_column[inputs[list.front()]] = col; list.pop_front(); } } @@ -590,7 +590,7 @@ Block ActionsDAG::updateHeader(Block header) const for (auto & col : result_columns) res.insert(std::move(col)); - for (const auto & item : header) + for (auto && item : header) res.insert(std::move(item)); return res; @@ -651,8 +651,8 @@ NameSet ActionsDAG::foldActionsByProjection( { /// Projection folding. node->type = ActionsDAG::ActionType::INPUT; - node->result_type = std::move(column_with_type_name->type); - node->result_name = std::move(column_with_type_name->name); + node->result_type = column_with_type_name->type; + node->result_name = column_with_type_name->name; node->children.clear(); inputs.push_back(node); } @@ -724,7 +724,7 @@ void ActionsDAG::addAliases(const NamesWithAliases & aliases) Node node; node.type = ActionType::ALIAS; node.result_type = child->result_type; - node.result_name = std::move(item.second); + node.result_name = item.second; node.column = child->column; node.children.emplace_back(child); @@ -771,7 +771,7 @@ void ActionsDAG::project(const NamesWithAliases & projection) Node node; node.type = ActionType::ALIAS; node.result_type = child->result_type; - node.result_name = std::move(item.second); + node.result_name = item.second; node.column = child->column; node.children.emplace_back(child); diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index bc937755618..99583c41b64 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -120,7 +120,7 @@ static Block createBlockFromCollection(const Collection & collection, const Data if (i == tuple_size) for (i = 0; i < tuple_size; ++i) - columns[i]->insert(std::move(tuple_values[i])); + columns[i]->insert(tuple_values[i]); } } @@ -391,7 +391,7 @@ SetPtr makeExplicitSet( ScopeStack::Level::~Level() = default; ScopeStack::Level::Level() = default; -ScopeStack::Level::Level(Level &&) = default; +ScopeStack::Level::Level(Level &&) noexcept = default; class ScopeStack::Index { diff --git a/src/Interpreters/ActionsVisitor.h b/src/Interpreters/ActionsVisitor.h index 1d7d64f739a..b6b67bac81c 100644 --- a/src/Interpreters/ActionsVisitor.h +++ b/src/Interpreters/ActionsVisitor.h @@ -72,7 +72,7 @@ struct ScopeStack : WithContext NameSet inputs; Level(); - Level(Level &&); + Level(Level &&) noexcept; ~Level(); }; diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp index 5321d5f6fd3..c60ab0f6510 100644 --- a/src/Interpreters/AsynchronousInsertQueue.cpp +++ b/src/Interpreters/AsynchronousInsertQueue.cpp @@ -165,9 +165,9 @@ void AsynchronousInsertQueue::scheduleDataProcessingJob(const InsertQuery & key, { /// Wrap 'unique_ptr' with 'shared_ptr' to make this /// lambda copyable and allow to save it to the thread pool. - pool.scheduleOrThrowOnError([=, data = std::make_shared(std::move(data))] + pool.scheduleOrThrowOnError([key, global_context, data = std::make_shared(std::move(data))]() mutable { - processData(std::move(key), std::move(*data), std::move(global_context)); + processData(key, std::move(*data), std::move(global_context)); }); } @@ -184,7 +184,10 @@ void AsynchronousInsertQueue::push(ASTPtr query, ContextPtr query_context) if (!FormatFactory::instance().isInputFormat(insert_query.format)) throw Exception(ErrorCodes::UNKNOWN_FORMAT, "Unknown input format {}", insert_query.format); - query_context->checkAccess(AccessType::INSERT, insert_query.table_id, sample_block.getNames()); + /// For table functions we check access while executing + /// InterpreterInsertQuery::getTable() -> ITableFunction::execute(). + if (insert_query.table_id) + query_context->checkAccess(AccessType::INSERT, insert_query.table_id, sample_block.getNames()); String bytes; { @@ -411,7 +414,7 @@ try }; std::shared_ptr adding_defaults_transform; - if (insert_context->getSettingsRef().input_format_defaults_for_omitted_fields) + if (insert_context->getSettingsRef().input_format_defaults_for_omitted_fields && insert_query.table_id) { StoragePtr storage = DatabaseCatalog::instance().getTable(insert_query.table_id, insert_context); auto metadata_snapshot = storage->getInMemoryMetadataPtr(); diff --git a/src/Interpreters/AsynchronousInsertQueue.h b/src/Interpreters/AsynchronousInsertQueue.h index 1dd2ad216aa..db3cb3049fd 100644 --- a/src/Interpreters/AsynchronousInsertQueue.h +++ b/src/Interpreters/AsynchronousInsertQueue.h @@ -138,10 +138,10 @@ private: static void finishWithException(const ASTPtr & query, const std::list & entries, const E & exception); public: - Queue getQueue() const + auto getQueueLocked() const { std::shared_lock lock(rwlock); - return queue; + return std::make_pair(std::ref(queue), std::move(lock)); } }; diff --git a/src/Interpreters/AsynchronousMetrics.cpp b/src/Interpreters/AsynchronousMetrics.cpp index b8262370a0d..c87ce12c2fa 100644 --- a/src/Interpreters/AsynchronousMetrics.cpp +++ b/src/Interpreters/AsynchronousMetrics.cpp @@ -620,13 +620,15 @@ void AsynchronousMetrics::update(std::chrono::system_clock::time_point update_ti new_values["Uptime"] = getContext()->getUptimeSeconds(); /// Process process memory usage according to OS -#if defined(OS_LINUX) +#if defined(OS_LINUX) || defined(OS_FREEBSD) { MemoryStatisticsOS::Data data = memory_stat.get(); new_values["MemoryVirtual"] = data.virt; new_values["MemoryResident"] = data.resident; +#if !defined(OS_FREEBSD) new_values["MemoryShared"] = data.shared; +#endif new_values["MemoryCode"] = data.code; new_values["MemoryDataAndStack"] = data.data_and_stack; @@ -653,7 +655,9 @@ void AsynchronousMetrics::update(std::chrono::system_clock::time_point update_ti CurrentMetrics::set(CurrentMetrics::MemoryTracking, new_amount); } } +#endif +#if defined(OS_LINUX) if (loadavg) { try diff --git a/src/Interpreters/AsynchronousMetrics.h b/src/Interpreters/AsynchronousMetrics.h index 3c7581ce1a3..e4bcb2890f3 100644 --- a/src/Interpreters/AsynchronousMetrics.h +++ b/src/Interpreters/AsynchronousMetrics.h @@ -76,9 +76,11 @@ private: bool first_run = true; std::chrono::system_clock::time_point previous_update_time; -#if defined(OS_LINUX) +#if defined(OS_LINUX) || defined(OS_FREEBSD) MemoryStatisticsOS memory_stat; +#endif +#if defined(OS_LINUX) std::optional meminfo; std::optional loadavg; std::optional proc_stat; diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 7b63146f192..51125c2dcc6 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -181,10 +181,10 @@ struct ContextSharedPart mutable VolumePtr backups_volume; /// Volume for all the backups. - mutable std::optional embedded_dictionaries; /// Metrica's dictionaries. Have lazy initialization. - mutable std::optional external_dictionaries_loader; - mutable std::optional external_user_defined_executable_functions_loader; - mutable std::optional external_models_loader; + mutable std::unique_ptr embedded_dictionaries; /// Metrica's dictionaries. Have lazy initialization. + mutable std::unique_ptr external_dictionaries_loader; + mutable std::unique_ptr external_user_defined_executable_functions_loader; + mutable std::unique_ptr external_models_loader; ExternalLoaderXMLConfigRepository * external_models_config_repository = nullptr; scope_guard models_repository_guard; @@ -216,10 +216,10 @@ struct ContextSharedPart ConfigurationPtr users_config; /// Config with the users, profiles and quotas sections. InterserverIOHandler interserver_io_handler; /// Handler for interserver communication. - mutable std::optional buffer_flush_schedule_pool; /// A thread pool that can do background flush for Buffer tables. - mutable std::optional schedule_pool; /// A thread pool that can run different jobs in background (used in replicated tables) - mutable std::optional distributed_schedule_pool; /// A thread pool that can run different jobs in background (used for distributed sends) - mutable std::optional message_broker_schedule_pool; /// A thread pool that can run different jobs in background (used for message brokers, like RabbitMQ and Kafka) + mutable std::unique_ptr buffer_flush_schedule_pool; /// A thread pool that can do background flush for Buffer tables. + mutable std::unique_ptr schedule_pool; /// A thread pool that can run different jobs in background (used in replicated tables) + mutable std::unique_ptr distributed_schedule_pool; /// A thread pool that can run different jobs in background (used for distributed sends) + mutable std::unique_ptr message_broker_schedule_pool; /// A thread pool that can run different jobs in background (used for message brokers, like RabbitMQ and Kafka) mutable ThrottlerPtr replicated_fetches_throttler; /// A server-wide throttler for replicated fetches mutable ThrottlerPtr replicated_sends_throttler; /// A server-wide throttler for replicated sends @@ -348,12 +348,23 @@ struct ContextSharedPart TransactionLog::shutdownIfAny(); std::unique_ptr delete_system_logs; + std::unique_ptr delete_embedded_dictionaries; + std::unique_ptr delete_external_dictionaries_loader; + std::unique_ptr delete_external_user_defined_executable_functions_loader; + std::unique_ptr delete_external_models_loader; + std::unique_ptr delete_buffer_flush_schedule_pool; + std::unique_ptr delete_schedule_pool; + std::unique_ptr delete_distributed_schedule_pool; + std::unique_ptr delete_message_broker_schedule_pool; + std::unique_ptr delete_ddl_worker; + std::unique_ptr delete_access_control; + { auto lock = std::lock_guard(mutex); - /** Compiled expressions stored in cache need to be destroyed before destruction of static objects. - * Because CHJIT instance can be static object. - */ + /** Compiled expressions stored in cache need to be destroyed before destruction of static objects. + * Because CHJIT instance can be static object. + */ #if USE_EMBEDDED_COMPILER if (auto * cache = CompiledExpressionCacheFactory::instance().tryGetCache()) cache->reset(); @@ -373,19 +384,19 @@ struct ContextSharedPart /// but at least they can be preserved for storage termination. dictionaries_xmls.reset(); user_defined_executable_functions_xmls.reset(); + models_repository_guard.reset(); delete_system_logs = std::move(system_logs); - embedded_dictionaries.reset(); - external_dictionaries_loader.reset(); - external_user_defined_executable_functions_loader.reset(); - models_repository_guard.reset(); - external_models_loader.reset(); - buffer_flush_schedule_pool.reset(); - schedule_pool.reset(); - distributed_schedule_pool.reset(); - message_broker_schedule_pool.reset(); - ddl_worker.reset(); - access_control.reset(); + delete_embedded_dictionaries = std::move(embedded_dictionaries); + delete_external_dictionaries_loader = std::move(external_dictionaries_loader); + delete_external_user_defined_executable_functions_loader = std::move(external_user_defined_executable_functions_loader); + delete_external_models_loader = std::move(external_models_loader); + delete_buffer_flush_schedule_pool = std::move(buffer_flush_schedule_pool); + delete_schedule_pool = std::move(schedule_pool); + delete_distributed_schedule_pool = std::move(distributed_schedule_pool); + delete_message_broker_schedule_pool = std::move(message_broker_schedule_pool); + delete_ddl_worker = std::move(ddl_worker); + delete_access_control = std::move(access_control); /// Stop trace collector if any trace_collector.reset(); @@ -395,6 +406,17 @@ struct ContextSharedPart /// Can be removed w/o context lock delete_system_logs.reset(); + delete_embedded_dictionaries.reset(); + delete_external_dictionaries_loader.reset(); + delete_external_user_defined_executable_functions_loader.reset(); + delete_external_models_loader.reset(); + delete_ddl_worker.reset(); + delete_buffer_flush_schedule_pool.reset(); + delete_schedule_pool.reset(); + delete_distributed_schedule_pool.reset(); + delete_message_broker_schedule_pool.reset(); + delete_ddl_worker.reset(); + delete_access_control.reset(); } bool hasTraceCollector() const @@ -1371,7 +1393,8 @@ ExternalDictionariesLoader & Context::getExternalDictionariesLoader() ExternalDictionariesLoader & Context::getExternalDictionariesLoaderUnlocked() { if (!shared->external_dictionaries_loader) - shared->external_dictionaries_loader.emplace(getGlobalContext()); + shared->external_dictionaries_loader = + std::make_unique(getGlobalContext()); return *shared->external_dictionaries_loader; } @@ -1389,7 +1412,8 @@ ExternalUserDefinedExecutableFunctionsLoader & Context::getExternalUserDefinedEx ExternalUserDefinedExecutableFunctionsLoader & Context::getExternalUserDefinedExecutableFunctionsLoaderUnlocked() { if (!shared->external_user_defined_executable_functions_loader) - shared->external_user_defined_executable_functions_loader.emplace(getGlobalContext()); + shared->external_user_defined_executable_functions_loader = + std::make_unique(getGlobalContext()); return *shared->external_user_defined_executable_functions_loader; } @@ -1407,7 +1431,8 @@ ExternalModelsLoader & Context::getExternalModelsLoader() ExternalModelsLoader & Context::getExternalModelsLoaderUnlocked() { if (!shared->external_models_loader) - shared->external_models_loader.emplace(getGlobalContext()); + shared->external_models_loader = + std::make_unique(getGlobalContext()); return *shared->external_models_loader; } @@ -1442,7 +1467,7 @@ EmbeddedDictionaries & Context::getEmbeddedDictionariesImpl(const bool throw_on_ { auto geo_dictionaries_loader = std::make_unique(); - shared->embedded_dictionaries.emplace( + shared->embedded_dictionaries = std::make_unique( std::move(geo_dictionaries_loader), getGlobalContext(), throw_on_error); @@ -1701,7 +1726,7 @@ BackgroundSchedulePool & Context::getBufferFlushSchedulePool() const { auto lock = getLock(); if (!shared->buffer_flush_schedule_pool) - shared->buffer_flush_schedule_pool.emplace( + shared->buffer_flush_schedule_pool = std::make_unique( settings.background_buffer_flush_schedule_pool_size, CurrentMetrics::BackgroundBufferFlushSchedulePoolTask, "BgBufSchPool"); @@ -1743,7 +1768,7 @@ BackgroundSchedulePool & Context::getSchedulePool() const { auto lock = getLock(); if (!shared->schedule_pool) - shared->schedule_pool.emplace( + shared->schedule_pool = std::make_unique( settings.background_schedule_pool_size, CurrentMetrics::BackgroundSchedulePoolTask, "BgSchPool"); @@ -1754,7 +1779,7 @@ BackgroundSchedulePool & Context::getDistributedSchedulePool() const { auto lock = getLock(); if (!shared->distributed_schedule_pool) - shared->distributed_schedule_pool.emplace( + shared->distributed_schedule_pool = std::make_unique( settings.background_distributed_schedule_pool_size, CurrentMetrics::BackgroundDistributedSchedulePoolTask, "BgDistSchPool"); @@ -1765,7 +1790,7 @@ BackgroundSchedulePool & Context::getMessageBrokerSchedulePool() const { auto lock = getLock(); if (!shared->message_broker_schedule_pool) - shared->message_broker_schedule_pool.emplace( + shared->message_broker_schedule_pool = std::make_unique( settings.background_message_broker_schedule_pool_size, CurrentMetrics::BackgroundMessageBrokerSchedulePoolTask, "BgMBSchPool"); diff --git a/src/Interpreters/DDLTask.cpp b/src/Interpreters/DDLTask.cpp index 64b9bf88ae9..a490d7bed43 100644 --- a/src/Interpreters/DDLTask.cpp +++ b/src/Interpreters/DDLTask.cpp @@ -259,13 +259,17 @@ bool DDLTask::tryFindHostInCluster() * */ is_circular_replicated = true; auto * query_with_table = dynamic_cast(query.get()); - if (!query_with_table || !query_with_table->database) + + /// For other DDLs like CREATE USER, there is no database name and should be executed successfully. + if (query_with_table) { - throw Exception(ErrorCodes::INCONSISTENT_CLUSTER_DEFINITION, - "For a distributed DDL on circular replicated cluster its table name must be qualified by database name."); + if (!query_with_table->database) + throw Exception(ErrorCodes::INCONSISTENT_CLUSTER_DEFINITION, + "For a distributed DDL on circular replicated cluster its table name must be qualified by database name."); + + if (default_database == query_with_table->getDatabase()) + return true; } - if (default_database == query_with_table->getDatabase()) - return true; } } found_exact_match = true; diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index 117119a3ee8..360a5d430e0 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -103,13 +103,13 @@ TemporaryTableHolder::TemporaryTableHolder( { } -TemporaryTableHolder::TemporaryTableHolder(TemporaryTableHolder && rhs) +TemporaryTableHolder::TemporaryTableHolder(TemporaryTableHolder && rhs) noexcept : WithContext(rhs.context), temporary_tables(rhs.temporary_tables), id(rhs.id) { rhs.id = UUIDHelpers::Nil; } -TemporaryTableHolder & TemporaryTableHolder::operator = (TemporaryTableHolder && rhs) +TemporaryTableHolder & TemporaryTableHolder::operator=(TemporaryTableHolder && rhs) noexcept { id = rhs.id; rhs.id = UUIDHelpers::Nil; diff --git a/src/Interpreters/DatabaseCatalog.h b/src/Interpreters/DatabaseCatalog.h index a32995658f1..c2a46277015 100644 --- a/src/Interpreters/DatabaseCatalog.h +++ b/src/Interpreters/DatabaseCatalog.h @@ -98,8 +98,8 @@ struct TemporaryTableHolder : boost::noncopyable, WithContext const ASTPtr & query = {}, bool create_for_global_subquery = false); - TemporaryTableHolder(TemporaryTableHolder && rhs); - TemporaryTableHolder & operator = (TemporaryTableHolder && rhs); + TemporaryTableHolder(TemporaryTableHolder && rhs) noexcept; + TemporaryTableHolder & operator=(TemporaryTableHolder && rhs) noexcept; ~TemporaryTableHolder(); diff --git a/src/Interpreters/ExpressionActions.cpp b/src/Interpreters/ExpressionActions.cpp index 30c832e4917..83f8de78fa6 100644 --- a/src/Interpreters/ExpressionActions.cpp +++ b/src/Interpreters/ExpressionActions.cpp @@ -748,7 +748,7 @@ void ExpressionActions::execute(Block & block, size_t & num_rows, bool dry_run) if (execution_context.columns[pos].column) res.insert(execution_context.columns[pos]); - for (const auto & item : block) + for (auto && item : block) res.insert(std::move(item)); block.swap(res); diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 2e464053131..7e150f59694 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -1367,7 +1367,7 @@ bool SelectQueryExpressionAnalyzer::appendLimitBy(ExpressionActionsChain & chain auto child_name = child->getColumnName(); if (!aggregated_names.count(child_name)) - step.addRequiredOutput(std::move(child_name)); + step.addRequiredOutput(child_name); } return true; diff --git a/src/Interpreters/ExternalLoader.cpp b/src/Interpreters/ExternalLoader.cpp index aab3a9e7437..a75cdce820c 100644 --- a/src/Interpreters/ExternalLoader.cpp +++ b/src/Interpreters/ExternalLoader.cpp @@ -56,7 +56,7 @@ namespace static_assert(std::is_same_v); ExternalLoader::Loadables objects; objects.reserve(results.size()); - for (const auto & result : results) + for (auto && result : results) { if (auto object = std::move(result.object)) objects.push_back(std::move(object)); diff --git a/src/Interpreters/ExternalUserDefinedExecutableFunctionsLoader.cpp b/src/Interpreters/ExternalUserDefinedExecutableFunctionsLoader.cpp index 31b4d4a621c..e3d40033cff 100644 --- a/src/Interpreters/ExternalUserDefinedExecutableFunctionsLoader.cpp +++ b/src/Interpreters/ExternalUserDefinedExecutableFunctionsLoader.cpp @@ -139,7 +139,7 @@ ExternalLoader::LoadablePtr ExternalUserDefinedExecutableFunctionsLoader::create UserDefinedExecutableFunctionConfiguration function_configuration { - .name = std::move(name), + .name = name, .command = std::move(command_value), .command_arguments = std::move(command_arguments), .arguments = std::move(arguments), diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 71db15dc46f..ed996430996 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -642,6 +642,11 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::getTableProperti properties.indices = as_storage_metadata->getSecondaryIndices(); properties.projections = as_storage_metadata->getProjections().clone(); } + else + { + /// Only MergeTree support TTL + properties.columns.resetColumnTTLs(); + } properties.constraints = as_storage_metadata->getConstraints(); } @@ -1057,7 +1062,7 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) QualifiedTableName qualified_name{database_name, create.getTable()}; TableNamesSet loading_dependencies = getDependenciesSetFromCreateQuery(getContext()->getGlobalContext(), qualified_name, query_ptr); if (!loading_dependencies.empty()) - DatabaseCatalog::instance().addLoadingDependencies(std::move(qualified_name), std::move(loading_dependencies)); + DatabaseCatalog::instance().addLoadingDependencies(qualified_name, std::move(loading_dependencies)); return fillTableIfNeeded(create); } @@ -1185,6 +1190,9 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create, properties.columns, properties.constraints, false); + + /// If schema wes inferred while storage creation, add columns description to create query. + addColumnsDescriptionToCreateQueryIfNecessary(query_ptr->as(), res); } if (from_path && !res->storesDataOnDisk()) @@ -1484,4 +1492,26 @@ void InterpreterCreateQuery::extendQueryLogElemImpl(QueryLogElement & elem, cons } } +void InterpreterCreateQuery::addColumnsDescriptionToCreateQueryIfNecessary(ASTCreateQuery & create, const StoragePtr & storage) +{ + if (create.is_dictionary || (create.columns_list && create.columns_list->columns && !create.columns_list->columns->children.empty())) + return; + + auto ast_storage = std::make_shared(); + auto query_from_storage = DB::getCreateQueryFromStorage(storage, ast_storage, false, + getContext()->getSettingsRef().max_parser_depth, true); + auto & create_query_from_storage = query_from_storage->as(); + + if (!create.columns_list) + { + ASTPtr columns_list = std::make_shared(*create_query_from_storage.columns_list); + create.set(create.columns_list, columns_list); + } + else + { + ASTPtr columns = std::make_shared(*create_query_from_storage.columns_list->columns); + create.columns_list->set(create.columns_list->columns, columns); + } +} + } diff --git a/src/Interpreters/InterpreterCreateQuery.h b/src/Interpreters/InterpreterCreateQuery.h index 5804d817fe2..b6c8e10668a 100644 --- a/src/Interpreters/InterpreterCreateQuery.h +++ b/src/Interpreters/InterpreterCreateQuery.h @@ -96,6 +96,10 @@ private: void assertOrSetUUID(ASTCreateQuery & create, const DatabasePtr & database) const; + /// Update create query with columns description from storage if query doesn't have it. + /// It's used to prevent automatic schema inference while table creation on each server startup. + void addColumnsDescriptionToCreateQueryIfNecessary(ASTCreateQuery & create, const StoragePtr & storage); + ASTPtr query_ptr; /// Skip safety threshold when loading tables. diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index b7cd095bc29..e801785b539 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -60,6 +60,18 @@ StoragePtr InterpreterInsertQuery::getTable(ASTInsertQuery & query) { const auto & factory = TableFunctionFactory::instance(); TableFunctionPtr table_function_ptr = factory.get(query.table_function, getContext()); + + /// If table function needs structure hint from select query + /// we can create a temporary pipeline and get the header. + if (query.select && table_function_ptr->needStructureHint()) + { + InterpreterSelectWithUnionQuery interpreter_select{ + query.select, getContext(), SelectQueryOptions(QueryProcessingStage::Complete, 1)}; + QueryPipelineBuilder tmp_pipeline = interpreter_select.buildQueryPipeline(); + ColumnsDescription structure_hint{tmp_pipeline.getHeader().getNamesAndTypesList()}; + table_function_ptr->setStructureHint(structure_hint); + } + return table_function_ptr->execute(query.table_function, getContext(), table_function_ptr->getName()); } @@ -185,7 +197,7 @@ Chain InterpreterInsertQuery::buildChain( std::atomic_uint64_t * elapsed_counter_ms) { auto sample = getSampleBlock(columns, table, metadata_snapshot); - return buildChainImpl(table, metadata_snapshot, std::move(sample) , thread_status, elapsed_counter_ms); + return buildChainImpl(table, metadata_snapshot, sample, thread_status, elapsed_counter_ms); } Chain InterpreterInsertQuery::buildChainImpl( @@ -285,6 +297,9 @@ BlockIO InterpreterInsertQuery::execute() auto metadata_snapshot = table->getInMemoryMetadataPtr(); auto query_sample_block = getSampleBlock(query, table, metadata_snapshot); + + /// For table functions we check access while executing + /// getTable() -> ITableFunction::execute(). if (!query.table_function) getContext()->checkAccess(AccessType::INSERT, query.table_id, query_sample_block.getNames()); diff --git a/src/Interpreters/InterpreterKillQueryQuery.cpp b/src/Interpreters/InterpreterKillQueryQuery.cpp index 2b949266c17..5ec6abb08a7 100644 --- a/src/Interpreters/InterpreterKillQueryQuery.cpp +++ b/src/Interpreters/InterpreterKillQueryQuery.cpp @@ -133,7 +133,7 @@ public: , process_list(process_list_) , processes_to_stop(std::move(processes_to_stop_)) , processes_block(std::move(processes_block_)) - , res_sample_block(std::move(res_sample_block_)) + , res_sample_block(res_sample_block_) { addTotalRowsApprox(processes_to_stop.size()); } diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 03303621817..318898c02b8 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -565,7 +565,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( /// Reuse already built sets for multiple passes of analysis subquery_for_sets = std::move(query_analyzer->getSubqueriesForSets()); - prepared_sets = query_info.sets.empty() ? std::move(query_analyzer->getPreparedSets()) : std::move(query_info.sets); + prepared_sets = query_info.sets.empty() ? query_analyzer->getPreparedSets() : query_info.sets; /// Do not try move conditions to PREWHERE for the second time. /// Otherwise, we won't be able to fallback from inefficient PREWHERE to WHERE later. @@ -639,8 +639,6 @@ BlockIO InterpreterSelectQuery::execute() Block InterpreterSelectQuery::getSampleBlockImpl() { - OpenTelemetrySpanHolder span(__PRETTY_FUNCTION__); - query_info.query = query_ptr; query_info.has_window = query_analyzer->hasWindow(); if (storage && !options.only_analyze) diff --git a/src/Interpreters/JoinToSubqueryTransformVisitor.cpp b/src/Interpreters/JoinToSubqueryTransformVisitor.cpp index ed20b1b2048..d6a00ba89b4 100644 --- a/src/Interpreters/JoinToSubqueryTransformVisitor.cpp +++ b/src/Interpreters/JoinToSubqueryTransformVisitor.cpp @@ -576,7 +576,7 @@ std::shared_ptr subqueryExpressionList( needed_columns[table_pos].fillExpressionList(*expression_list); for (const auto & expr : alias_pushdown[table_pos]) - expression_list->children.emplace_back(std::move(expr)); + expression_list->children.emplace_back(expr); return expression_list; } diff --git a/src/Interpreters/OpenTelemetrySpanLog.cpp b/src/Interpreters/OpenTelemetrySpanLog.cpp index 40f31e4976c..36ffd617cd6 100644 --- a/src/Interpreters/OpenTelemetrySpanLog.cpp +++ b/src/Interpreters/OpenTelemetrySpanLog.cpp @@ -150,6 +150,42 @@ OpenTelemetrySpanHolder::~OpenTelemetrySpanHolder() } } +void OpenTelemetrySpanHolder::addAttribute(const std::string& name, UInt64 value) +{ + if (trace_id == UUID()) + return; + + this->attribute_names.push_back(name); + this->attribute_values.push_back(std::to_string(value)); +} + +void OpenTelemetrySpanHolder::addAttribute(const std::string& name, const std::string& value) +{ + if (trace_id == UUID()) + return; + + this->attribute_names.push_back(name); + this->attribute_values.push_back(value); +} + +void OpenTelemetrySpanHolder::addAttribute(const Exception & e) +{ + if (trace_id == UUID()) + return; + + this->attribute_names.push_back("clickhouse.exception"); + this->attribute_values.push_back(getExceptionMessage(e, false)); +} + +void OpenTelemetrySpanHolder::addAttribute(std::exception_ptr e) +{ + if (trace_id == UUID() || e == nullptr) + return; + + this->attribute_names.push_back("clickhouse.exception"); + this->attribute_values.push_back(getExceptionMessage(e, false)); +} + bool OpenTelemetryTraceContext::parseTraceparentHeader(const std::string & traceparent, std::string & error) { diff --git a/src/Interpreters/OpenTelemetrySpanLog.h b/src/Interpreters/OpenTelemetrySpanLog.h index 8dfc2eccc00..f4b3a388b54 100644 --- a/src/Interpreters/OpenTelemetrySpanLog.h +++ b/src/Interpreters/OpenTelemetrySpanLog.h @@ -45,6 +45,11 @@ public: struct OpenTelemetrySpanHolder : public OpenTelemetrySpan { OpenTelemetrySpanHolder(const std::string & _operation_name); + void addAttribute(const std::string& name, UInt64 value); + void addAttribute(const std::string& name, const std::string& value); + void addAttribute(const Exception & e); + void addAttribute(std::exception_ptr e); + ~OpenTelemetrySpanHolder(); }; diff --git a/src/Interpreters/ProfileEventsExt.cpp b/src/Interpreters/ProfileEventsExt.cpp index 472efc109fb..ea87d565854 100644 --- a/src/Interpreters/ProfileEventsExt.cpp +++ b/src/Interpreters/ProfileEventsExt.cpp @@ -1,5 +1,7 @@ #include "ProfileEventsExt.h" #include +#include +#include #include #include #include @@ -36,7 +38,7 @@ void dumpToMapColumn(const Counters::Snapshot & counters, DB::IColumn * column, if (nonzero_only && 0 == value) continue; - const char * desc = ProfileEvents::getName(event); + const char * desc = getName(event); key_column.insertData(desc, strlen(desc)); value_column.insert(value); size++; @@ -45,4 +47,133 @@ void dumpToMapColumn(const Counters::Snapshot & counters, DB::IColumn * column, offsets.push_back(offsets.back() + size); } +/// Add records about provided non-zero ProfileEvents::Counters. +static void dumpProfileEvents(ProfileEventsSnapshot const & snapshot, DB::MutableColumns & columns, String const & host_name) +{ + size_t rows = 0; + auto & name_column = columns[NAME_COLUMN_INDEX]; + auto & value_column = columns[VALUE_COLUMN_INDEX]; + for (Event event = 0; event < Counters::num_counters; ++event) + { + Int64 value = snapshot.counters[event]; + + if (value == 0) + continue; + + const char * desc = getName(event); + name_column->insertData(desc, strlen(desc)); + value_column->insert(value); + rows++; + } + + // Fill the rest of the columns with data + for (size_t row = 0; row < rows; ++row) + { + size_t i = 0; + columns[i++]->insertData(host_name.data(), host_name.size()); + columns[i++]->insert(UInt64(snapshot.current_time)); + columns[i++]->insert(UInt64{snapshot.thread_id}); + columns[i++]->insert(Type::INCREMENT); + } +} + +static void dumpMemoryTracker(ProfileEventsSnapshot const & snapshot, DB::MutableColumns & columns, String const & host_name) +{ + size_t i = 0; + columns[i++]->insertData(host_name.data(), host_name.size()); + columns[i++]->insert(UInt64(snapshot.current_time)); + columns[i++]->insert(UInt64{snapshot.thread_id}); + columns[i++]->insert(Type::GAUGE); + + columns[i++]->insertData(MemoryTracker::USAGE_EVENT_NAME, strlen(MemoryTracker::USAGE_EVENT_NAME)); + columns[i++]->insert(snapshot.memory_usage); +} + +void getProfileEvents( + const String & server_display_name, + DB::InternalProfileEventsQueuePtr profile_queue, + DB::Block & block, + ThreadIdToCountersSnapshot & last_sent_snapshots) +{ + using namespace DB; + static const NamesAndTypesList column_names_and_types = { + {"host_name", std::make_shared()}, + {"current_time", std::make_shared()}, + {"thread_id", std::make_shared()}, + {"type", TypeEnum}, + {"name", std::make_shared()}, + {"value", std::make_shared()}, + }; + + ColumnsWithTypeAndName temp_columns; + for (auto const & name_and_type : column_names_and_types) + temp_columns.emplace_back(name_and_type.type, name_and_type.name); + + block = std::move(temp_columns); + MutableColumns columns = block.mutateColumns(); + auto thread_group = CurrentThread::getGroup(); + auto const current_thread_id = CurrentThread::get().thread_id; + std::vector snapshots; + ThreadIdToCountersSnapshot new_snapshots; + ProfileEventsSnapshot group_snapshot; + { + auto stats = thread_group->getProfileEventsCountersAndMemoryForThreads(); + snapshots.reserve(stats.size()); + + for (auto & stat : stats) + { + auto const thread_id = stat.thread_id; + if (thread_id == current_thread_id) + continue; + auto current_time = time(nullptr); + auto previous_snapshot = last_sent_snapshots.find(thread_id); + auto increment = + previous_snapshot != last_sent_snapshots.end() + ? CountersIncrement(stat.counters, previous_snapshot->second) + : CountersIncrement(stat.counters); + snapshots.push_back(ProfileEventsSnapshot{ + thread_id, + std::move(increment), + stat.memory_usage, + current_time + }); + new_snapshots[thread_id] = std::move(stat.counters); + } + + group_snapshot.thread_id = 0; + group_snapshot.current_time = time(nullptr); + group_snapshot.memory_usage = thread_group->memory_tracker.get(); + auto group_counters = thread_group->performance_counters.getPartiallyAtomicSnapshot(); + auto prev_group_snapshot = last_sent_snapshots.find(0); + group_snapshot.counters = + prev_group_snapshot != last_sent_snapshots.end() + ? CountersIncrement(group_counters, prev_group_snapshot->second) + : CountersIncrement(group_counters); + new_snapshots[0] = std::move(group_counters); + } + last_sent_snapshots = std::move(new_snapshots); + + for (auto & snapshot : snapshots) + { + dumpProfileEvents(snapshot, columns, server_display_name); + dumpMemoryTracker(snapshot, columns, server_display_name); + } + dumpProfileEvents(group_snapshot, columns, server_display_name); + dumpMemoryTracker(group_snapshot, columns, server_display_name); + + Block curr_block; + size_t rows = 0; + + for (; profile_queue->tryPop(curr_block); ++rows) + { + auto curr_columns = curr_block.getColumns(); + for (size_t j = 0; j < curr_columns.size(); ++j) + columns[j]->insertRangeFrom(*curr_columns[j], 0, curr_columns[j]->size()); + } + + bool empty = columns[0]->empty(); + if (!empty) + block.setColumns(std::move(columns)); +} + } diff --git a/src/Interpreters/ProfileEventsExt.h b/src/Interpreters/ProfileEventsExt.h index 8a92eadec79..7d9fc512d15 100644 --- a/src/Interpreters/ProfileEventsExt.h +++ b/src/Interpreters/ProfileEventsExt.h @@ -1,5 +1,6 @@ #pragma once #include +#include #include #include @@ -7,9 +8,28 @@ namespace ProfileEvents { +constexpr size_t NAME_COLUMN_INDEX = 4; +constexpr size_t VALUE_COLUMN_INDEX = 5; + +struct ProfileEventsSnapshot +{ + UInt64 thread_id; + CountersIncrement counters; + Int64 memory_usage; + time_t current_time; +}; + +using ThreadIdToCountersSnapshot = std::unordered_map; + /// Dumps profile events to columns Map(String, UInt64) void dumpToMapColumn(const Counters::Snapshot & counters, DB::IColumn * column, bool nonzero_only = true); +void getProfileEvents( + const String & server_display_name, + DB::InternalProfileEventsQueuePtr profile_queue, + DB::Block & block, + ThreadIdToCountersSnapshot & last_sent_snapshots); + /// This is for ProfileEvents packets. enum Type : int8_t { diff --git a/src/Interpreters/SessionLog.cpp b/src/Interpreters/SessionLog.cpp index d9698be1a9b..a0c29c07d38 100644 --- a/src/Interpreters/SessionLog.cpp +++ b/src/Interpreters/SessionLog.cpp @@ -77,7 +77,7 @@ SessionLogElement::SessionLogElement(const UUID & auth_id_, Type type_) NamesAndTypesList SessionLogElement::getNamesAndTypes() { - const auto event_type = std::make_shared( + auto event_type = std::make_shared( DataTypeEnum8::Values { {"LoginFailure", static_cast(SESSION_LOGIN_FAILURE)}, @@ -86,7 +86,7 @@ NamesAndTypesList SessionLogElement::getNamesAndTypes() }); #define AUTH_TYPE_NAME_AND_VALUE(v) std::make_pair(AuthenticationTypeInfo::get(v).raw_name, static_cast(v)) - const auto identified_with_column = std::make_shared( + auto identified_with_column = std::make_shared( DataTypeEnum8::Values { AUTH_TYPE_NAME_AND_VALUE(AuthType::NO_PASSWORD), @@ -98,7 +98,7 @@ NamesAndTypesList SessionLogElement::getNamesAndTypes() }); #undef AUTH_TYPE_NAME_AND_VALUE - const auto interface_type_column = std::make_shared( + auto interface_type_column = std::make_shared( DataTypeEnum8::Values { {"TCP", static_cast(Interface::TCP)}, @@ -108,9 +108,9 @@ NamesAndTypesList SessionLogElement::getNamesAndTypes() {"PostgreSQL", static_cast(Interface::POSTGRESQL)} }); - const auto lc_string_datatype = std::make_shared(std::make_shared()); + auto lc_string_datatype = std::make_shared(std::make_shared()); - const auto settings_type_column = std::make_shared( + auto settings_type_column = std::make_shared( std::make_shared( DataTypes({ // setting name diff --git a/src/Interpreters/SubqueryForSet.cpp b/src/Interpreters/SubqueryForSet.cpp index 08fc07c71e1..d669e091131 100644 --- a/src/Interpreters/SubqueryForSet.cpp +++ b/src/Interpreters/SubqueryForSet.cpp @@ -7,7 +7,7 @@ namespace DB SubqueryForSet::SubqueryForSet() = default; SubqueryForSet::~SubqueryForSet() = default; -SubqueryForSet::SubqueryForSet(SubqueryForSet &&) = default; -SubqueryForSet & SubqueryForSet::operator= (SubqueryForSet &&) = default; +SubqueryForSet::SubqueryForSet(SubqueryForSet &&) noexcept = default; +SubqueryForSet & SubqueryForSet::operator= (SubqueryForSet &&) noexcept = default; } diff --git a/src/Interpreters/SubqueryForSet.h b/src/Interpreters/SubqueryForSet.h index 974f5bd3e58..f737ec4582b 100644 --- a/src/Interpreters/SubqueryForSet.h +++ b/src/Interpreters/SubqueryForSet.h @@ -17,8 +17,8 @@ struct SubqueryForSet { SubqueryForSet(); ~SubqueryForSet(); - SubqueryForSet(SubqueryForSet &&); - SubqueryForSet & operator= (SubqueryForSet &&); + SubqueryForSet(SubqueryForSet &&) noexcept; + SubqueryForSet & operator=(SubqueryForSet &&) noexcept; /// The source is obtained using the InterpreterSelectQuery subquery. std::unique_ptr source; diff --git a/src/Interpreters/SystemLog.cpp b/src/Interpreters/SystemLog.cpp index 7f663a2191b..f2ee9e2b84f 100644 --- a/src/Interpreters/SystemLog.cpp +++ b/src/Interpreters/SystemLog.cpp @@ -23,7 +23,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -113,9 +115,7 @@ std::shared_ptr createSystemLog( } -/// returns CREATE TABLE query, but with removed: -/// - UUID -/// - SETTINGS (for MergeTree) +/// returns CREATE TABLE query, but with removed UUID /// That way it can be used to compare with the SystemLog::getCreateTableQuery() ASTPtr getCreateTableQueryClean(const StorageID & table_id, ContextPtr context) { @@ -124,11 +124,6 @@ ASTPtr getCreateTableQueryClean(const StorageID & table_id, ContextPtr context) auto & old_create_query_ast = old_ast->as(); /// Reset UUID old_create_query_ast.uuid = UUIDHelpers::Nil; - /// Existing table has default settings (i.e. `index_granularity = 8192`), reset them. - if (ASTStorage * storage = old_create_query_ast.storage) - { - storage->reset(storage->settings); - } return old_ast; } @@ -481,6 +476,16 @@ ASTPtr SystemLog::getCreateTableQuery() "Storage to create table for " + LogElement::name(), 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); create->set(create->storage, storage_ast); + /// Write additional (default) settings for MergeTree engine to make it make it possible to compare ASTs + /// and recreate tables on settings changes. + const auto & engine = create->storage->engine->as(); + if (endsWith(engine.name, "MergeTree")) + { + auto storage_settings = std::make_unique(getContext()->getMergeTreeSettings()); + storage_settings->loadFromQuery(*create->storage); + } + + return create; } diff --git a/src/Interpreters/UserDefinedExecutableFunctionFactory.cpp b/src/Interpreters/UserDefinedExecutableFunctionFactory.cpp index 8d40dc6dfc8..6d7dee7a4c7 100644 --- a/src/Interpreters/UserDefinedExecutableFunctionFactory.cpp +++ b/src/Interpreters/UserDefinedExecutableFunctionFactory.cpp @@ -47,6 +47,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } bool useDefaultImplementationForNulls() const override { return true; } bool isDeterministic() const override { return false; } + bool isDeterministicInScopeOfQuery() const override { return false; } DataTypePtr getReturnTypeImpl(const DataTypes &) const override { diff --git a/src/Interpreters/addMissingDefaults.cpp b/src/Interpreters/addMissingDefaults.cpp index 04e1e6856cc..d043fd16bb5 100644 --- a/src/Interpreters/addMissingDefaults.cpp +++ b/src/Interpreters/addMissingDefaults.cpp @@ -63,7 +63,7 @@ ActionsDAGPtr addMissingDefaults( { const auto & nested_type = array_type->getNestedType(); ColumnPtr nested_column = nested_type->createColumnConstWithDefaultValue(0); - const auto & constant = actions->addColumn({std::move(nested_column), nested_type, column.name}); + const auto & constant = actions->addColumn({nested_column, nested_type, column.name}); auto & group = nested_groups[offsets_name]; group[0] = &constant; @@ -76,7 +76,7 @@ ActionsDAGPtr addMissingDefaults( * it can be full (or the interpreter may decide that it is constant everywhere). */ auto new_column = column.type->createColumnConstWithDefaultValue(0); - const auto * col = &actions->addColumn({std::move(new_column), column.type, column.name}); + const auto * col = &actions->addColumn({new_column, column.type, column.name}); index.push_back(&actions->materializeNode(*col)); } diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 0f8a2a4be25..504bd64907c 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -810,8 +810,8 @@ static std::tuple executeQueryImpl( element.memory_usage = info.peak_memory_usage > 0 ? info.peak_memory_usage : 0; - element.thread_ids = std::move(info.thread_ids); - element.profile_counters = std::move(info.profile_counters); + element.thread_ids = info.thread_ids; + element.profile_counters = info.profile_counters; /// We need to refresh the access info since dependent views might have added extra information, either during /// creation of the view (PushingToViewsBlockOutputStream) or while executing its internal SELECT diff --git a/src/Interpreters/getHeaderForProcessingStage.cpp b/src/Interpreters/getHeaderForProcessingStage.cpp index 69b7b7d833f..31913777902 100644 --- a/src/Interpreters/getHeaderForProcessingStage.cpp +++ b/src/Interpreters/getHeaderForProcessingStage.cpp @@ -57,7 +57,7 @@ bool removeJoin(ASTSelectQuery & select, TreeRewriterResult & rewriter_result, C const size_t left_table_pos = 0; /// Test each argument of `and` function and select ones related to only left table std::shared_ptr new_conj = makeASTFunction("and"); - for (const auto & node : collectConjunctions(where)) + for (auto && node : collectConjunctions(where)) { if (membership_collector.getIdentsMembership(node) == left_table_pos) new_conj->arguments->children.push_back(std::move(node)); diff --git a/src/Parsers/Access/ParserCreateRoleQuery.cpp b/src/Parsers/Access/ParserCreateRoleQuery.cpp index 314075cb7c0..da9749958ee 100644 --- a/src/Parsers/Access/ParserCreateRoleQuery.cpp +++ b/src/Parsers/Access/ParserCreateRoleQuery.cpp @@ -6,7 +6,7 @@ #include #include #include -#include +#include namespace DB @@ -37,7 +37,7 @@ namespace if (!elements_p.parse(pos, new_settings_ast, expected)) return false; - settings = std::move(new_settings_ast->as().elements); + settings = std::move(new_settings_ast->as().elements); return true; }); } @@ -102,7 +102,8 @@ bool ParserCreateRoleQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec { if (!settings) settings = std::make_shared(); - boost::range::push_back(settings->elements, std::move(new_settings)); + + insertAtEnd(settings->elements, std::move(new_settings)); continue; } diff --git a/src/Parsers/Access/ParserCreateRowPolicyQuery.cpp b/src/Parsers/Access/ParserCreateRowPolicyQuery.cpp index 731564a14c7..83156c6a8e1 100644 --- a/src/Parsers/Access/ParserCreateRowPolicyQuery.cpp +++ b/src/Parsers/Access/ParserCreateRowPolicyQuery.cpp @@ -11,7 +11,7 @@ #include #include #include -#include +#include namespace DB @@ -264,7 +264,7 @@ bool ParserCreateRowPolicyQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & std::vector> new_filters; if (parseForClauses(pos, expected, alter, new_filters)) { - boost::range::push_back(filters, std::move(new_filters)); + insertAtEnd(filters, std::move(new_filters)); continue; } diff --git a/src/Parsers/Access/ParserCreateSettingsProfileQuery.cpp b/src/Parsers/Access/ParserCreateSettingsProfileQuery.cpp index 8b5f2df2dd2..c58a3035dc6 100644 --- a/src/Parsers/Access/ParserCreateSettingsProfileQuery.cpp +++ b/src/Parsers/Access/ParserCreateSettingsProfileQuery.cpp @@ -8,7 +8,7 @@ #include #include #include -#include +#include namespace DB @@ -39,7 +39,7 @@ namespace if (!elements_p.parse(pos, new_settings_ast, expected)) return false; - settings = std::move(new_settings_ast->as().elements); + settings = std::move(new_settings_ast->as().elements); return true; }); } @@ -122,7 +122,8 @@ bool ParserCreateSettingsProfileQuery::parseImpl(Pos & pos, ASTPtr & node, Expec { if (!settings) settings = std::make_shared(); - boost::range::push_back(settings->elements, std::move(new_settings)); + + insertAtEnd(settings->elements, std::move(new_settings)); continue; } diff --git a/src/Parsers/Access/ParserCreateUserQuery.cpp b/src/Parsers/Access/ParserCreateUserQuery.cpp index cde14e632dd..da8e212fe2f 100644 --- a/src/Parsers/Access/ParserCreateUserQuery.cpp +++ b/src/Parsers/Access/ParserCreateUserQuery.cpp @@ -15,7 +15,7 @@ #include #include #include -#include +#include namespace DB @@ -250,7 +250,7 @@ namespace if (!parseHostsWithoutPrefix(pos, expected, res_hosts)) return false; - hosts.add(std::move(res_hosts)); + hosts.add(res_hosts); return true; }); } @@ -289,7 +289,7 @@ namespace if (!elements_p.parse(pos, new_settings_ast, expected)) return false; - settings = std::move(new_settings_ast->as().elements); + settings = std::move(new_settings_ast->as().elements); return true; }); } @@ -414,7 +414,8 @@ bool ParserCreateUserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec { if (!settings) settings = std::make_shared(); - boost::range::push_back(settings->elements, std::move(new_settings)); + + insertAtEnd(settings->elements, std::move(new_settings)); continue; } diff --git a/src/Parsers/Access/ParserGrantQuery.cpp b/src/Parsers/Access/ParserGrantQuery.cpp index 9f7e8535a14..43e1cedd34d 100644 --- a/src/Parsers/Access/ParserGrantQuery.cpp +++ b/src/Parsers/Access/ParserGrantQuery.cpp @@ -156,7 +156,7 @@ namespace } - void eraseNonGrantable(AccessRightsElements & elements) + void throwIfNotGrantable(AccessRightsElements & elements) { boost::range::remove_erase_if(elements, [](AccessRightsElement & element) { @@ -303,7 +303,12 @@ bool ParserGrantQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } if (!is_revoke) - eraseNonGrantable(elements); + { + if (attach_mode) + elements.eraseNonGrantable(); + else + throwIfNotGrantable(elements); + } auto query = std::make_shared(); node = query; diff --git a/src/Parsers/Access/ParserRowPolicyName.cpp b/src/Parsers/Access/ParserRowPolicyName.cpp index 7df4e5a36dc..cf5d2ab21b6 100644 --- a/src/Parsers/Access/ParserRowPolicyName.cpp +++ b/src/Parsers/Access/ParserRowPolicyName.cpp @@ -5,7 +5,7 @@ #include #include #include -#include +#include namespace DB @@ -179,7 +179,7 @@ bool ParserRowPolicyNames::parseImpl(Pos & pos, ASTPtr & node, Expected & expect return false; num_added_names_last_time = new_full_names.size(); - boost::range::push_back(full_names, std::move(new_full_names)); + insertAtEnd(full_names, std::move(new_full_names)); return true; }; diff --git a/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp b/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp index 198d5ce5d8d..0f091e73743 100644 --- a/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp +++ b/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp @@ -35,7 +35,7 @@ struct PullingAsyncPipelineExecutor::Data if (has_exception) { has_exception = false; - std::rethrow_exception(std::move(exception)); + std::rethrow_exception(exception); } } }; diff --git a/src/Processors/Executors/PushingAsyncPipelineExecutor.cpp b/src/Processors/Executors/PushingAsyncPipelineExecutor.cpp index 6c2e62b77dc..07cdb554aba 100644 --- a/src/Processors/Executors/PushingAsyncPipelineExecutor.cpp +++ b/src/Processors/Executors/PushingAsyncPipelineExecutor.cpp @@ -90,7 +90,7 @@ struct PushingAsyncPipelineExecutor::Data if (has_exception) { has_exception = false; - std::rethrow_exception(std::move(exception)); + std::rethrow_exception(exception); } } }; diff --git a/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp b/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp index 558ba9bdd65..cf5cfa681a1 100644 --- a/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp @@ -114,7 +114,7 @@ static std::shared_ptr createFileReader(ReadB if (is_stopped) return nullptr; - auto file_reader_status = arrow::ipc::RecordBatchFileReader::Open(std::move(arrow_file)); + auto file_reader_status = arrow::ipc::RecordBatchFileReader::Open(arrow_file); if (!file_reader_status.ok()) throw Exception(ErrorCodes::UNKNOWN_EXCEPTION, "Error while opening a table: {}", file_reader_status.status().ToString()); diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp index 102c30088c9..ecaa485c3d6 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp @@ -241,7 +241,7 @@ static ColumnWithTypeAndName readColumnWithDecimalDataImpl(std::shared_ptr(chunk.Value(value_i))); // TODO: copy column } } - return {std::move(internal_column), std::move(internal_type), column_name}; + return {std::move(internal_column), internal_type, column_name}; } template @@ -337,7 +337,7 @@ static ColumnWithTypeAndName readColumnFromArrowColumn( auto nested_column = readColumnFromArrowColumn(arrow_column, column_name, format_name, true, dictionary_values, read_ints_as_dates); auto nullmap_column = readByteMapFromArrowColumn(arrow_column); auto nullable_type = std::make_shared(std::move(nested_column.type)); - auto nullable_column = ColumnNullable::create(std::move(nested_column.column), std::move(nullmap_column)); + auto nullable_column = ColumnNullable::create(nested_column.column, nullmap_column); return {std::move(nullable_column), std::move(nullable_type), column_name}; } @@ -384,7 +384,7 @@ static ColumnWithTypeAndName readColumnFromArrowColumn( const auto * tuple_column = assert_cast(nested_column.column.get()); const auto * tuple_type = assert_cast(nested_column.type.get()); - auto map_column = ColumnMap::create(std::move(tuple_column->getColumnPtr(0)), std::move(tuple_column->getColumnPtr(1)), std::move(offsets_column)); + auto map_column = ColumnMap::create(tuple_column->getColumnPtr(0), tuple_column->getColumnPtr(1), offsets_column); auto map_type = std::make_shared(tuple_type->getElements()[0], tuple_type->getElements()[1]); return {std::move(map_column), std::move(map_type), column_name}; } @@ -393,7 +393,7 @@ static ColumnWithTypeAndName readColumnFromArrowColumn( auto arrow_nested_column = getNestedArrowColumn(arrow_column); auto nested_column = readColumnFromArrowColumn(arrow_nested_column, column_name, format_name, false, dictionary_values, read_ints_as_dates); auto offsets_column = readOffsetsFromArrowListColumn(arrow_column); - auto array_column = ColumnArray::create(std::move(nested_column.column), std::move(offsets_column)); + auto array_column = ColumnArray::create(nested_column.column, offsets_column); auto array_type = std::make_shared(nested_column.type); return {std::move(array_column), std::move(array_type), column_name}; } @@ -458,7 +458,7 @@ static ColumnWithTypeAndName readColumnFromArrowColumn( auto arrow_indexes_column = std::make_shared(indexes_array); auto indexes_column = readColumnWithIndexesData(arrow_indexes_column); - auto lc_column = ColumnLowCardinality::create(dict_values->column, std::move(indexes_column)); + auto lc_column = ColumnLowCardinality::create(dict_values->column, indexes_column); auto lc_type = std::make_shared(dict_values->type); return {std::move(lc_column), std::move(lc_type), column_name}; } diff --git a/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp b/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp index bb202a3e177..6918220feb4 100644 --- a/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp @@ -15,9 +15,9 @@ namespace ErrorCodes BinaryRowInputFormat::BinaryRowInputFormat(ReadBuffer & in_, Block header, Params params_, bool with_names_, bool with_types_, const FormatSettings & format_settings_) : RowInputFormatWithNamesAndTypes( - std::move(header), + header, in_, - std::move(params_), + params_, with_names_, with_types_, format_settings_, diff --git a/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp b/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp index b56a9c2729f..043e4f1e724 100644 --- a/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp +++ b/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp @@ -658,7 +658,7 @@ namespace DB auto nested_arrow_type = getArrowType(nested_types[i], tuple_column->getColumnPtr(i), name, format_name, out_is_column_nullable); nested_fields.push_back(std::make_shared(name, nested_arrow_type, *out_is_column_nullable)); } - return arrow::struct_(std::move(nested_fields)); + return arrow::struct_(nested_fields); } if (column_type->lowCardinality()) diff --git a/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp b/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp index 58f88c5c7cf..fd33abfb587 100644 --- a/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp @@ -169,7 +169,7 @@ static std::optional convertToDynamicValue( auto value_builder = initStructFieldBuilder(nested_column, row_num, struct_builder, value_field); auto value = convertToDynamicValue(nested_column, nullable_type->getNestedType(), row_num, value_builder, enum_comparing_mode, temporary_text_data_storage); if (value) - struct_builder.set(value_field, std::move(*value)); + struct_builder.set(value_field, *value); } } else @@ -184,7 +184,7 @@ static std::optional convertToDynamicValue( = initStructFieldBuilder(nested_columns[pos], row_num, struct_builder, nested_struct_schema.getFieldByName(name)); auto value = convertToDynamicValue(nested_columns[pos], nested_types[pos], row_num, field_builder, enum_comparing_mode, temporary_text_data_storage); if (value) - struct_builder.set(name, std::move(*value)); + struct_builder.set(name, *value); } } return std::nullopt; @@ -215,7 +215,7 @@ static std::optional convertToDynamicValue( auto value = convertToDynamicValue(nested_column, nested_type, offset + i, value_builder, enum_comparing_mode, temporary_text_data_storage); if (value) - list_builder.set(i, std::move(*value)); + list_builder.set(i, *value); } return std::nullopt; } diff --git a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp index dcab55743cb..c087749d8d8 100644 --- a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp @@ -27,7 +27,7 @@ JSONCompactEachRowRowInputFormat::JSONCompactEachRowRowInputFormat( : RowInputFormatWithNamesAndTypes( header_, in_, - std::move(params_), + params_, with_names_, with_types_, format_settings_, diff --git a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp index 61511d634d3..4950e1fb952 100644 --- a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp @@ -111,7 +111,7 @@ static void getFileReaderAndSchema( if (is_stopped) return; - auto result = arrow::adapters::orc::ORCFileReader::Open(std::move(arrow_file), arrow::default_memory_pool()); + auto result = arrow::adapters::orc::ORCFileReader::Open(arrow_file, arrow::default_memory_pool()); if (!result.ok()) throw Exception(result.status().ToString(), ErrorCodes::BAD_ARGUMENTS); file_reader = std::move(result).ValueOrDie(); diff --git a/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp index 72ad4616174..0247b8677af 100644 --- a/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp @@ -457,7 +457,7 @@ static void postprocessChunk( { const auto & from_type = desc.nested_type; const auto & to_type = desc.real_type; - res_columns[desc.column_numbers[0]] = recursiveTypeConversion(std::move(column), from_type, to_type); + res_columns[desc.column_numbers[0]] = recursiveTypeConversion(column, from_type, to_type); } else res_columns[desc.column_numbers[0]] = std::move(column); diff --git a/src/Processors/QueryPlan/CreatingSetsStep.cpp b/src/Processors/QueryPlan/CreatingSetsStep.cpp index 45c3719ebca..6b6f9d361ef 100644 --- a/src/Processors/QueryPlan/CreatingSetsStep.cpp +++ b/src/Processors/QueryPlan/CreatingSetsStep.cpp @@ -138,7 +138,7 @@ void addCreatingSetsStep( auto creating_set = std::make_unique( plan->getCurrentDataStream(), - std::move(description), + description, std::move(set), limits, context); diff --git a/src/Processors/QueryPlan/QueryPlan.cpp b/src/Processors/QueryPlan/QueryPlan.cpp index a271ef78dfa..d948c16a78d 100644 --- a/src/Processors/QueryPlan/QueryPlan.cpp +++ b/src/Processors/QueryPlan/QueryPlan.cpp @@ -22,8 +22,8 @@ namespace ErrorCodes QueryPlan::QueryPlan() = default; QueryPlan::~QueryPlan() = default; -QueryPlan::QueryPlan(QueryPlan &&) = default; -QueryPlan & QueryPlan::operator=(QueryPlan &&) = default; +QueryPlan::QueryPlan(QueryPlan &&) noexcept = default; +QueryPlan & QueryPlan::operator=(QueryPlan &&) noexcept = default; void QueryPlan::checkInitialized() const { diff --git a/src/Processors/QueryPlan/QueryPlan.h b/src/Processors/QueryPlan/QueryPlan.h index 4e342d746d1..5e064713abd 100644 --- a/src/Processors/QueryPlan/QueryPlan.h +++ b/src/Processors/QueryPlan/QueryPlan.h @@ -44,8 +44,8 @@ class QueryPlan public: QueryPlan(); ~QueryPlan(); - QueryPlan(QueryPlan &&); - QueryPlan & operator=(QueryPlan &&); + QueryPlan(QueryPlan &&) noexcept; + QueryPlan & operator=(QueryPlan &&) noexcept; void unitePlans(QueryPlanStepPtr step, std::vector plans); void addStep(QueryPlanStepPtr step); diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 9a9a71f9688..ad4d1ea86d6 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -982,7 +982,7 @@ ReadFromMergeTree::AnalysisResult ReadFromMergeTree::getAnalysisResult() const { auto result_ptr = analyzed_result_ptr ? analyzed_result_ptr : selectRangesToRead(prepared_parts); if (std::holds_alternative(result_ptr->result)) - std::rethrow_exception(std::move(std::get(result_ptr->result))); + std::rethrow_exception(std::get(result_ptr->result)); return std::get(result_ptr->result); } @@ -1326,7 +1326,7 @@ bool MergeTreeDataSelectAnalysisResult::error() const size_t MergeTreeDataSelectAnalysisResult::marks() const { if (std::holds_alternative(result)) - std::rethrow_exception(std::move(std::get(result))); + std::rethrow_exception(std::get(result)); const auto & index_stats = std::get(result).index_stats; if (index_stats.empty()) diff --git a/src/Processors/Sources/DelayedSource.cpp b/src/Processors/Sources/DelayedSource.cpp index 205ea6e2253..6cfdeeeeec5 100644 --- a/src/Processors/Sources/DelayedSource.cpp +++ b/src/Processors/Sources/DelayedSource.cpp @@ -64,7 +64,7 @@ IProcessor::Status DelayedSource::prepare() continue; } - if (!output->isNeeded()) + if (!output->canPush()) return Status::PortFull; if (input->isFinished()) diff --git a/src/Processors/Transforms/DistinctSortedTransform.cpp b/src/Processors/Transforms/DistinctSortedTransform.cpp index 01cef654388..5600476fd77 100644 --- a/src/Processors/Transforms/DistinctSortedTransform.cpp +++ b/src/Processors/Transforms/DistinctSortedTransform.cpp @@ -24,7 +24,7 @@ void DistinctSortedTransform::transform(Chunk & chunk) if (column_ptrs.empty()) return; - const ColumnRawPtrs clearing_hint_columns(getClearingColumns(chunk, column_ptrs)); + ColumnRawPtrs clearing_hint_columns(getClearingColumns(chunk, column_ptrs)); if (data.type == ClearableSetVariants::Type::EMPTY) data.init(ClearableSetVariants::chooseMethod(column_ptrs, key_sizes)); diff --git a/src/Processors/Transforms/ExceptionKeepingTransform.cpp b/src/Processors/Transforms/ExceptionKeepingTransform.cpp index f2b29a45f84..266407f21a5 100644 --- a/src/Processors/Transforms/ExceptionKeepingTransform.cpp +++ b/src/Processors/Transforms/ExceptionKeepingTransform.cpp @@ -138,7 +138,7 @@ void ExceptionKeepingTransform::work() { stage = Stage::Exception; ready_output = true; - data.exception = std::move(exception); + data.exception = exception; onException(); } } @@ -152,7 +152,7 @@ void ExceptionKeepingTransform::work() { stage = Stage::Exception; ready_output = true; - data.exception = std::move(exception); + data.exception = exception; onException(); } else @@ -166,7 +166,7 @@ void ExceptionKeepingTransform::work() { stage = Stage::Exception; ready_output = true; - data.exception = std::move(exception); + data.exception = exception; onException(); } else @@ -188,7 +188,7 @@ void ExceptionKeepingTransform::work() { stage = Stage::Exception; ready_output = true; - data.exception = std::move(exception); + data.exception = exception; onException(); } } diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp index 0da7541556b..df34c592819 100644 --- a/src/Processors/Transforms/WindowTransform.cpp +++ b/src/Processors/Transforms/WindowTransform.cpp @@ -206,7 +206,7 @@ WindowTransform::WindowTransform(const Block & input_header_, { column = std::move(column)->convertToFullColumnIfConst(); } - input_header.setColumns(std::move(input_columns)); + input_header.setColumns(input_columns); // Initialize window function workspaces. workspaces.reserve(functions.size()); diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index 19302afb5c9..a993b8acd7d 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -695,7 +695,7 @@ IProcessor::Status FinalizingViewsTransform::prepare() return Status::Ready; if (any_exception) - output.pushException(std::move(any_exception)); + output.pushException(any_exception); output.finish(); return Status::Finished; @@ -708,7 +708,7 @@ static std::exception_ptr addStorageToException(std::exception_ptr ptr, const St { try { - std::rethrow_exception(std::move(ptr)); + std::rethrow_exception(ptr); } catch (DB::Exception & exception) { @@ -736,7 +736,7 @@ void FinalizingViewsTransform::work() if (!any_exception) any_exception = status.exception; - view.setException(addStorageToException(std::move(status.exception), view.table_id)); + view.setException(addStorageToException(status.exception, view.table_id)); } else { diff --git a/src/QueryPipeline/BlockIO.cpp b/src/QueryPipeline/BlockIO.cpp index 671ba6e4c39..84cf3829a13 100644 --- a/src/QueryPipeline/BlockIO.cpp +++ b/src/QueryPipeline/BlockIO.cpp @@ -23,7 +23,7 @@ void BlockIO::reset() /// TODO Do we need also reset callbacks? In which order? } -BlockIO & BlockIO::operator= (BlockIO && rhs) +BlockIO & BlockIO::operator= (BlockIO && rhs) noexcept { if (this == &rhs) return *this; diff --git a/src/QueryPipeline/BlockIO.h b/src/QueryPipeline/BlockIO.h index 748e46c3a1e..94c6fbc83cb 100644 --- a/src/QueryPipeline/BlockIO.h +++ b/src/QueryPipeline/BlockIO.h @@ -14,7 +14,7 @@ struct BlockIO BlockIO() = default; BlockIO(BlockIO &&) = default; - BlockIO & operator= (BlockIO && rhs); + BlockIO & operator= (BlockIO && rhs) noexcept; ~BlockIO(); BlockIO(const BlockIO &) = delete; diff --git a/src/QueryPipeline/PipelineResourcesHolder.cpp b/src/QueryPipeline/PipelineResourcesHolder.cpp index a4b85ed662b..2f6b6a9de32 100644 --- a/src/QueryPipeline/PipelineResourcesHolder.cpp +++ b/src/QueryPipeline/PipelineResourcesHolder.cpp @@ -5,10 +5,10 @@ namespace DB { PipelineResourcesHolder::PipelineResourcesHolder() = default; -PipelineResourcesHolder::PipelineResourcesHolder(PipelineResourcesHolder &&) = default; +PipelineResourcesHolder::PipelineResourcesHolder(PipelineResourcesHolder &&) noexcept = default; PipelineResourcesHolder::~PipelineResourcesHolder() = default; -PipelineResourcesHolder & PipelineResourcesHolder::operator=(PipelineResourcesHolder && rhs) +PipelineResourcesHolder & PipelineResourcesHolder::operator=(PipelineResourcesHolder && rhs) noexcept { table_locks.insert(table_locks.end(), rhs.table_locks.begin(), rhs.table_locks.end()); storage_holders.insert(storage_holders.end(), rhs.storage_holders.begin(), rhs.storage_holders.end()); diff --git a/src/QueryPipeline/PipelineResourcesHolder.h b/src/QueryPipeline/PipelineResourcesHolder.h index 9fb1438424a..7853fa3ae4c 100644 --- a/src/QueryPipeline/PipelineResourcesHolder.h +++ b/src/QueryPipeline/PipelineResourcesHolder.h @@ -16,10 +16,10 @@ class Context; struct PipelineResourcesHolder { PipelineResourcesHolder(); - PipelineResourcesHolder(PipelineResourcesHolder &&); + PipelineResourcesHolder(PipelineResourcesHolder &&) noexcept; ~PipelineResourcesHolder(); /// Custom mode assignment does not destroy data from lhs. It appends data from rhs to lhs. - PipelineResourcesHolder& operator=(PipelineResourcesHolder &&); + PipelineResourcesHolder& operator=(PipelineResourcesHolder &&) noexcept; /// Some processors may implicitly use Context or temporary Storage created by Interpreter. /// But lifetime of Streams is not nested in lifetime of Interpreters, so we have to store it here, diff --git a/src/QueryPipeline/QueryPipeline.cpp b/src/QueryPipeline/QueryPipeline.cpp index ce1c9473f60..0412049bd58 100644 --- a/src/QueryPipeline/QueryPipeline.cpp +++ b/src/QueryPipeline/QueryPipeline.cpp @@ -24,8 +24,8 @@ namespace ErrorCodes } QueryPipeline::QueryPipeline() = default; -QueryPipeline::QueryPipeline(QueryPipeline &&) = default; -QueryPipeline & QueryPipeline::operator=(QueryPipeline &&) = default; +QueryPipeline::QueryPipeline(QueryPipeline &&) noexcept = default; +QueryPipeline & QueryPipeline::operator=(QueryPipeline &&) noexcept = default; QueryPipeline::~QueryPipeline() = default; static void checkInput(const InputPort & input, const ProcessorPtr & processor) diff --git a/src/QueryPipeline/QueryPipeline.h b/src/QueryPipeline/QueryPipeline.h index beb46361f95..29b5dd76017 100644 --- a/src/QueryPipeline/QueryPipeline.h +++ b/src/QueryPipeline/QueryPipeline.h @@ -32,10 +32,10 @@ class QueryPipeline { public: QueryPipeline(); - QueryPipeline(QueryPipeline &&); + QueryPipeline(QueryPipeline &&) noexcept; QueryPipeline(const QueryPipeline &) = delete; - QueryPipeline & operator=(QueryPipeline &&); + QueryPipeline & operator=(QueryPipeline &&) noexcept; QueryPipeline & operator=(const QueryPipeline &) = delete; ~QueryPipeline(); diff --git a/src/QueryPipeline/RemoteInserter.cpp b/src/QueryPipeline/RemoteInserter.cpp index 13d087f0db9..6acdf19090d 100644 --- a/src/QueryPipeline/RemoteInserter.cpp +++ b/src/QueryPipeline/RemoteInserter.cpp @@ -32,8 +32,19 @@ RemoteInserter::RemoteInserter( modified_client_info.query_kind = ClientInfo::QueryKind::SECONDARY_QUERY; if (CurrentThread::isInitialized()) { - modified_client_info.client_trace_context - = CurrentThread::get().thread_trace_context; + auto& thread_trace_context = CurrentThread::get().thread_trace_context; + + if (thread_trace_context.trace_id != UUID()) + { + // overwrite the trace context only if current thread trace context is available + modified_client_info.client_trace_context = thread_trace_context; + } + else + { + // if the trace on the thread local is not enabled(for example running in a background thread) + // we should not clear the trace context on the client info because the client info may hold trace context + // and this trace context should be propagated to the remote server so that the tracing of distributed table insert is complete. + } } /** Send query and receive "header", that describes table structure. diff --git a/src/QueryPipeline/RemoteQueryExecutorReadContext.cpp b/src/QueryPipeline/RemoteQueryExecutorReadContext.cpp index 4064643f1f8..575cdb95431 100644 --- a/src/QueryPipeline/RemoteQueryExecutorReadContext.cpp +++ b/src/QueryPipeline/RemoteQueryExecutorReadContext.cpp @@ -174,7 +174,7 @@ bool RemoteQueryExecutorReadContext::resumeRoutine() fiber = std::move(fiber).resume(); if (exception) - std::rethrow_exception(std::move(exception)); + std::rethrow_exception(exception); } return true; diff --git a/src/Server/CertificateReloader.cpp b/src/Server/CertificateReloader.cpp index f3f366876da..aaffd08365c 100644 --- a/src/Server/CertificateReloader.cpp +++ b/src/Server/CertificateReloader.cpp @@ -37,7 +37,7 @@ int CertificateReloader::setCertificate(SSL * ssl) return -1; SSL_use_certificate(ssl, const_cast(current->cert.certificate())); - SSL_use_RSAPrivateKey(ssl, current->key.impl()->getRSA()); + SSL_use_PrivateKey(ssl, const_cast(static_cast(current->key))); int err = SSL_check_private_key(ssl); if (err != 1) diff --git a/src/Server/CertificateReloader.h b/src/Server/CertificateReloader.h index 7f93b006875..88c732c2db6 100644 --- a/src/Server/CertificateReloader.h +++ b/src/Server/CertificateReloader.h @@ -74,7 +74,7 @@ private: struct Data { Poco::Crypto::X509Certificate cert; - Poco::Crypto::RSAKey key; + Poco::Crypto::EVPPKey key; Data(std::string cert_path, std::string key_path); }; diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp index d0f92535844..9218c75c390 100644 --- a/src/Server/HTTPHandler.cpp +++ b/src/Server/HTTPHandler.cpp @@ -1122,7 +1122,7 @@ std::string PredefinedQueryHandler::getQuery(HTTPServerRequest & request, HTMLFo HTTPRequestHandlerFactoryPtr createDynamicHandlerFactory(IServer & server, const std::string & config_prefix) { - const auto & query_param_name = server.config().getString(config_prefix + ".handler.query_param_name", "query"); + auto query_param_name = server.config().getString(config_prefix + ".handler.query_param_name", "query"); auto factory = std::make_shared>(server, std::move(query_param_name)); factory->addFiltersFromConfig(server.config(), config_prefix); diff --git a/src/Server/KeeperTCPHandler.cpp b/src/Server/KeeperTCPHandler.cpp index 07964c29577..3e354cfd18f 100644 --- a/src/Server/KeeperTCPHandler.cpp +++ b/src/Server/KeeperTCPHandler.cpp @@ -544,19 +544,13 @@ std::pair KeeperTCPHandler::receiveReque void KeeperTCPHandler::packageSent() { - { - std::lock_guard lock(conn_stats_mutex); - conn_stats.incrementPacketsSent(); - } + conn_stats.incrementPacketsSent(); keeper_dispatcher->incrementPacketsSent(); } void KeeperTCPHandler::packageReceived() { - { - std::lock_guard lock(conn_stats_mutex); - conn_stats.incrementPacketsReceived(); - } + conn_stats.incrementPacketsReceived(); keeper_dispatcher->incrementPacketsReceived(); } @@ -566,10 +560,7 @@ void KeeperTCPHandler::updateStats(Coordination::ZooKeeperResponsePtr & response if (response->xid != Coordination::WATCH_XID && response->getOpNum() != Coordination::OpNum::Heartbeat) { Int64 elapsed = (Poco::Timestamp() - operations[response->xid]) / 1000; - { - std::lock_guard lock(conn_stats_mutex); - conn_stats.updateLatency(elapsed); - } + conn_stats.updateLatency(elapsed); operations.erase(response->xid); keeper_dispatcher->updateKeeperStatLatency(elapsed); @@ -584,15 +575,14 @@ void KeeperTCPHandler::updateStats(Coordination::ZooKeeperResponsePtr & response } -KeeperConnectionStats KeeperTCPHandler::getConnectionStats() const +KeeperConnectionStats & KeeperTCPHandler::getConnectionStats() { - std::lock_guard lock(conn_stats_mutex); return conn_stats; } void KeeperTCPHandler::dumpStats(WriteBufferFromOwnString & buf, bool brief) { - KeeperConnectionStats stats = getConnectionStats(); + auto & stats = getConnectionStats(); writeText(' ', buf); writeText(socket().peerAddress().toString(), buf); @@ -641,10 +631,7 @@ void KeeperTCPHandler::dumpStats(WriteBufferFromOwnString & buf, bool brief) void KeeperTCPHandler::resetStats() { - { - std::lock_guard lock(conn_stats_mutex); - conn_stats.reset(); - } + conn_stats.reset(); last_op.set(std::make_unique(EMPTY_LAST_OP)); } diff --git a/src/Server/KeeperTCPHandler.h b/src/Server/KeeperTCPHandler.h index 7953dfd2cbe..b8cccafeca5 100644 --- a/src/Server/KeeperTCPHandler.h +++ b/src/Server/KeeperTCPHandler.h @@ -51,7 +51,7 @@ public: KeeperTCPHandler(IServer & server_, const Poco::Net::StreamSocket & socket_); void run() override; - KeeperConnectionStats getConnectionStats() const; + KeeperConnectionStats & getConnectionStats(); void dumpStats(WriteBufferFromOwnString & buf, bool brief); void resetStats(); @@ -100,7 +100,6 @@ private: LastOpMultiVersion last_op; - mutable std::mutex conn_stats_mutex; KeeperConnectionStats conn_stats; }; diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 99523ff09e3..f4592a8b2c9 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -31,7 +31,6 @@ #include #include #include -#include #include #include #include @@ -853,163 +852,15 @@ void TCPHandler::sendExtremes(const Block & extremes) } } - -namespace -{ - using namespace ProfileEvents; - - constexpr size_t NAME_COLUMN_INDEX = 4; - constexpr size_t VALUE_COLUMN_INDEX = 5; - - struct ProfileEventsSnapshot - { - UInt64 thread_id; - ProfileEvents::CountersIncrement counters; - Int64 memory_usage; - time_t current_time; - }; - - /* - * Add records about provided non-zero ProfileEvents::Counters. - */ - void dumpProfileEvents( - ProfileEventsSnapshot const & snapshot, - MutableColumns & columns, - String const & host_name) - { - size_t rows = 0; - auto & name_column = columns[NAME_COLUMN_INDEX]; - auto & value_column = columns[VALUE_COLUMN_INDEX]; - for (ProfileEvents::Event event = 0; event < ProfileEvents::Counters::num_counters; ++event) - { - Int64 value = snapshot.counters[event]; - - if (value == 0) - continue; - - const char * desc = ProfileEvents::getName(event); - name_column->insertData(desc, strlen(desc)); - value_column->insert(value); - rows++; - } - - // Fill the rest of the columns with data - for (size_t row = 0; row < rows; ++row) - { - size_t i = 0; - columns[i++]->insertData(host_name.data(), host_name.size()); - columns[i++]->insert(UInt64(snapshot.current_time)); - columns[i++]->insert(UInt64{snapshot.thread_id}); - columns[i++]->insert(ProfileEvents::Type::INCREMENT); - } - } - - void dumpMemoryTracker( - ProfileEventsSnapshot const & snapshot, - MutableColumns & columns, - String const & host_name) - { - { - size_t i = 0; - columns[i++]->insertData(host_name.data(), host_name.size()); - columns[i++]->insert(UInt64(snapshot.current_time)); - columns[i++]->insert(UInt64{snapshot.thread_id}); - columns[i++]->insert(ProfileEvents::Type::GAUGE); - - columns[i++]->insertData(MemoryTracker::USAGE_EVENT_NAME, strlen(MemoryTracker::USAGE_EVENT_NAME)); - columns[i++]->insert(snapshot.memory_usage); - } - } -} - - void TCPHandler::sendProfileEvents() { if (client_tcp_protocol_version < DBMS_MIN_PROTOCOL_VERSION_WITH_INCREMENTAL_PROFILE_EVENTS) return; - NamesAndTypesList column_names_and_types = { - { "host_name", std::make_shared() }, - { "current_time", std::make_shared() }, - { "thread_id", std::make_shared() }, - { "type", ProfileEvents::TypeEnum }, - { "name", std::make_shared() }, - { "value", std::make_shared() }, - }; - - ColumnsWithTypeAndName temp_columns; - for (auto const & name_and_type : column_names_and_types) - temp_columns.emplace_back(name_and_type.type, name_and_type.name); - - Block block(std::move(temp_columns)); - - MutableColumns columns = block.mutateColumns(); - auto thread_group = CurrentThread::getGroup(); - auto const current_thread_id = CurrentThread::get().thread_id; - std::vector snapshots; - ThreadIdToCountersSnapshot new_snapshots; - ProfileEventsSnapshot group_snapshot; + Block block; + ProfileEvents::getProfileEvents(server_display_name, state.profile_queue, block, last_sent_snapshots); + if (block.rows() != 0) { - auto stats = thread_group->getProfileEventsCountersAndMemoryForThreads(); - snapshots.reserve(stats.size()); - - for (auto & stat : stats) - { - auto const thread_id = stat.thread_id; - if (thread_id == current_thread_id) - continue; - auto current_time = time(nullptr); - auto previous_snapshot = last_sent_snapshots.find(thread_id); - auto increment = - previous_snapshot != last_sent_snapshots.end() - ? CountersIncrement(stat.counters, previous_snapshot->second) - : CountersIncrement(stat.counters); - snapshots.push_back(ProfileEventsSnapshot{ - thread_id, - std::move(increment), - stat.memory_usage, - current_time - }); - new_snapshots[thread_id] = std::move(stat.counters); - } - - group_snapshot.thread_id = 0; - group_snapshot.current_time = time(nullptr); - group_snapshot.memory_usage = thread_group->memory_tracker.get(); - auto group_counters = thread_group->performance_counters.getPartiallyAtomicSnapshot(); - auto prev_group_snapshot = last_sent_snapshots.find(0); - group_snapshot.counters = - prev_group_snapshot != last_sent_snapshots.end() - ? CountersIncrement(group_counters, prev_group_snapshot->second) - : CountersIncrement(group_counters); - new_snapshots[0] = std::move(group_counters); - } - last_sent_snapshots = std::move(new_snapshots); - - for (auto & snapshot : snapshots) - { - dumpProfileEvents(snapshot, columns, server_display_name); - dumpMemoryTracker(snapshot, columns, server_display_name); - } - dumpProfileEvents(group_snapshot, columns, server_display_name); - dumpMemoryTracker(group_snapshot, columns, server_display_name); - - MutableColumns logs_columns; - Block curr_block; - size_t rows = 0; - - for (; state.profile_queue->tryPop(curr_block); ++rows) - { - auto curr_columns = curr_block.getColumns(); - for (size_t j = 0; j < curr_columns.size(); ++j) - columns[j]->insertRangeFrom(*curr_columns[j], 0, curr_columns[j]->size()); - } - - bool empty = columns[0]->empty(); - if (!empty) - { - block.setColumns(std::move(columns)); - initProfileEventsBlockOutput(block); writeVarUInt(Protocol::Server::ProfileEvents, *out); diff --git a/src/Server/TCPHandler.h b/src/Server/TCPHandler.h index 6afda654e6a..b6ce9fa7507 100644 --- a/src/Server/TCPHandler.h +++ b/src/Server/TCPHandler.h @@ -3,9 +3,10 @@ #include #include -#include "Common/ProfileEvents.h" +#include #include #include +#include #include #include #include @@ -13,7 +14,7 @@ #include #include #include -#include +#include #include @@ -36,6 +37,8 @@ struct Settings; class ColumnsDescription; struct ProfileInfo; class TCPServer; +class NativeWriter; +class NativeReader; /// State of query processing. struct QueryState @@ -189,9 +192,7 @@ private: CurrentMetrics::Increment metric_increment{CurrentMetrics::TCPConnection}; - using ThreadIdToCountersSnapshot = std::unordered_map; - - ThreadIdToCountersSnapshot last_sent_snapshots; + ProfileEvents::ThreadIdToCountersSnapshot last_sent_snapshots; /// It is the name of the server that will be sent to the client. String server_display_name; diff --git a/src/Storages/ColumnsDescription.cpp b/src/Storages/ColumnsDescription.cpp index 3733d1214b0..8ca3c44bac2 100644 --- a/src/Storages/ColumnsDescription.cpp +++ b/src/Storages/ColumnsDescription.cpp @@ -117,7 +117,7 @@ void ColumnDescription::readText(ReadBuffer & buf) ParserColumnDeclaration column_parser(/* require type */ true); ASTPtr ast = parseQuery(column_parser, "x T " + modifiers, "column parser", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); - if (const auto * col_ast = ast->as()) + if (auto * col_ast = ast->as()) { if (col_ast->default_expression) { @@ -309,7 +309,7 @@ void ColumnsDescription::flattenNested() continue; } - ColumnDescription column = std::move(*it); + ColumnDescription column = *it; removeSubcolumns(column.name); it = columns.get<0>().erase(it); @@ -635,6 +635,22 @@ ColumnsDescription::ColumnTTLs ColumnsDescription::getColumnTTLs() const return ret; } +void ColumnsDescription::resetColumnTTLs() +{ + std::vector old_columns; + old_columns.reserve(columns.size()); + for (const auto & col : columns) + old_columns.emplace_back(col); + + columns.clear(); + + for (auto & col : old_columns) + { + col.ttl.reset(); + add(col); + } +} + String ColumnsDescription::toString() const { diff --git a/src/Storages/ColumnsDescription.h b/src/Storages/ColumnsDescription.h index 6cf863c6cb4..9fb03c70be9 100644 --- a/src/Storages/ColumnsDescription.h +++ b/src/Storages/ColumnsDescription.h @@ -104,6 +104,7 @@ public: using ColumnTTLs = std::unordered_map; ColumnTTLs getColumnTTLs() const; + void resetColumnTTLs(); bool has(const String & column_name) const; bool hasNested(const String & column_name) const; diff --git a/src/Storages/ConstraintsDescription.cpp b/src/Storages/ConstraintsDescription.cpp index 60202e2055e..7085c6e14c8 100644 --- a/src/Storages/ConstraintsDescription.cpp +++ b/src/Storages/ConstraintsDescription.cpp @@ -198,6 +198,20 @@ ConstraintsDescription & ConstraintsDescription::operator=(const ConstraintsDesc return *this; } +ConstraintsDescription::ConstraintsDescription(ConstraintsDescription && other) noexcept + : constraints(std::move(other.constraints)) +{ + update(); +} + +ConstraintsDescription & ConstraintsDescription::operator=(ConstraintsDescription && other) noexcept +{ + constraints = std::move(other.constraints); + update(); + + return *this; +} + void ConstraintsDescription::update() { if (constraints.empty()) diff --git a/src/Storages/ConstraintsDescription.h b/src/Storages/ConstraintsDescription.h index a5095a79ccb..eb1eb95d33d 100644 --- a/src/Storages/ConstraintsDescription.h +++ b/src/Storages/ConstraintsDescription.h @@ -18,6 +18,9 @@ public: ConstraintsDescription(const ConstraintsDescription & other); ConstraintsDescription & operator=(const ConstraintsDescription & other); + ConstraintsDescription(ConstraintsDescription && other) noexcept; + ConstraintsDescription & operator=(ConstraintsDescription && other) noexcept; + bool empty() const { return constraints.empty(); } String toString() const; diff --git a/src/Storages/Distributed/DistributedSink.cpp b/src/Storages/Distributed/DistributedSink.cpp index be0d2ea90db..9951fb436b5 100644 --- a/src/Storages/Distributed/DistributedSink.cpp +++ b/src/Storages/Distributed/DistributedSink.cpp @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -331,9 +332,14 @@ DistributedSink::runWritingJob(JobReplica & job, const Block & current_block, si const Settings & settings = context->getSettingsRef(); /// Do not initiate INSERT for empty block. - if (shard_block.rows() == 0) + size_t rows = shard_block.rows(); + if (rows == 0) return; + OpenTelemetrySpanHolder span(__PRETTY_FUNCTION__); + span.addAttribute("clickhouse.shard_num", shard_info.shard_num); + span.addAttribute("clickhouse.written_rows", rows); + if (!job.is_local_job || !settings.prefer_localhost_replica) { if (!job.executor) @@ -406,13 +412,15 @@ DistributedSink::runWritingJob(JobReplica & job, const Block & current_block, si } job.blocks_written += 1; - job.rows_written += shard_block.rows(); + job.rows_written += rows; }; } void DistributedSink::writeSync(const Block & block) { + OpenTelemetrySpanHolder span(__PRETTY_FUNCTION__); + const Settings & settings = context->getSettingsRef(); const auto & shards_info = cluster->getShardsInfo(); Block block_to_send = removeSuperfluousColumns(block); @@ -456,6 +464,10 @@ void DistributedSink::writeSync(const Block & block) size_t num_shards = end - start; + span.addAttribute("clickhouse.start_shard", start); + span.addAttribute("clickhouse.end_shard", end); + span.addAttribute("db.statement", this->query_string); + if (num_shards > 1) { auto current_selector = createSelector(block); @@ -489,6 +501,7 @@ void DistributedSink::writeSync(const Block & block) catch (Exception & exception) { exception.addMessage(getCurrentStateDescription()); + span.addAttribute(exception); throw; } @@ -597,10 +610,15 @@ void DistributedSink::writeSplitAsync(const Block & block) void DistributedSink::writeAsyncImpl(const Block & block, size_t shard_id) { + OpenTelemetrySpanHolder span("DistributedBlockOutputStream::writeAsyncImpl()"); + const auto & shard_info = cluster->getShardsInfo()[shard_id]; const auto & settings = context->getSettingsRef(); Block block_to_send = removeSuperfluousColumns(block); + span.addAttribute("clickhouse.shard_num", shard_info.shard_num); + span.addAttribute("clickhouse.written_rows", block.rows()); + if (shard_info.hasInternalReplication()) { if (shard_info.isLocal() && settings.prefer_localhost_replica) @@ -634,6 +652,9 @@ void DistributedSink::writeAsyncImpl(const Block & block, size_t shard_id) void DistributedSink::writeToLocal(const Block & block, size_t repeats) { + OpenTelemetrySpanHolder span(__PRETTY_FUNCTION__); + span.addAttribute("db.statement", this->query_string); + InterpreterInsertQuery interp(query_ast, context, allow_materialized); auto block_io = interp.execute(); @@ -647,6 +668,8 @@ void DistributedSink::writeToLocal(const Block & block, size_t repeats) void DistributedSink::writeToShard(const Block & block, const std::vector & dir_names) { + OpenTelemetrySpanHolder span(__PRETTY_FUNCTION__); + const auto & settings = context->getSettingsRef(); const auto & distributed_settings = storage.getDistributedSettingsRef(); @@ -713,7 +736,19 @@ void DistributedSink::writeToShard(const Block & block, const std::vectorgetSettingsRef().write(header_buf); - context->getClientInfo().write(header_buf, DBMS_TCP_PROTOCOL_VERSION); + + if (context->getClientInfo().client_trace_context.trace_id != UUID() && CurrentThread::isInitialized()) + { + // if the distributed tracing is enabled, use the trace context in current thread as parent of next span + auto client_info = context->getClientInfo(); + client_info.client_trace_context = CurrentThread::get().thread_trace_context; + client_info.write(header_buf, DBMS_TCP_PROTOCOL_VERSION); + } + else + { + context->getClientInfo().write(header_buf, DBMS_TCP_PROTOCOL_VERSION); + } + writeVarUInt(block.rows(), header_buf); writeVarUInt(block.bytes(), header_buf); writeStringBinary(block.cloneEmpty().dumpStructure(), header_buf); /// obsolete diff --git a/src/Storages/FileLog/StorageFileLog.cpp b/src/Storages/FileLog/StorageFileLog.cpp index dac70e362ed..700b35a5a48 100644 --- a/src/Storages/FileLog/StorageFileLog.cpp +++ b/src/Storages/FileLog/StorageFileLog.cpp @@ -53,6 +53,7 @@ StorageFileLog::StorageFileLog( ContextPtr context_, const ColumnsDescription & columns_, const String & path_, + const String & metadata_base_path_, const String & format_name_, std::unique_ptr settings, const String & comment, @@ -61,6 +62,7 @@ StorageFileLog::StorageFileLog( , WithContext(context_->getGlobalContext()) , filelog_settings(std::move(settings)) , path(path_) + , metadata_base_path(std::filesystem::path(metadata_base_path_) / "metadata") , format_name(format_name_) , log(&Poco::Logger::get("StorageFileLog (" + table_id_.table_name + ")")) , milliseconds_to_wait(filelog_settings->poll_directory_watch_events_backoff_init.totalMilliseconds()) @@ -94,18 +96,24 @@ StorageFileLog::StorageFileLog( void StorageFileLog::loadMetaFiles(bool attach) { - const auto & storage = getStorageID(); - /// FIXME Why do we need separate directory? Why not to use data directory? - root_meta_path - = std::filesystem::path(getContext()->getPath()) / "stream_engines/filelog/" / DatabaseCatalog::getPathForUUID(storage.uuid); - /// Attach table if (attach) { - /// Meta file may lost, log and create directory - if (!std::filesystem::exists(root_meta_path)) + const auto & storage = getStorageID(); + + auto metadata_path_exist = std::filesystem::exists(metadata_base_path); + auto previous_path = std::filesystem::path(getContext()->getPath()) / ".filelog_storage_metadata" / storage.getDatabaseName() / storage.getTableName(); + + /// For compatibility with the previous path version. + if (std::filesystem::exists(previous_path) && !metadata_path_exist) { - /// Create root_meta_path directory when store meta data + std::filesystem::copy(previous_path, metadata_base_path, std::filesystem::copy_options::recursive); + std::filesystem::remove_all(previous_path); + } + /// Meta file may lost, log and create directory + else if (!metadata_path_exist) + { + /// Create metadata_base_path directory when store meta data LOG_ERROR(log, "Metadata files of table {} are lost.", getStorageID().getTableName()); } /// Load all meta info to file_infos; @@ -114,14 +122,14 @@ void StorageFileLog::loadMetaFiles(bool attach) /// Create table, just create meta data directory else { - if (std::filesystem::exists(root_meta_path)) + if (std::filesystem::exists(metadata_base_path)) { throw Exception( ErrorCodes::TABLE_METADATA_ALREADY_EXISTS, "Metadata files already exist by path: {}, remove them manually if it is intended", - root_meta_path); + metadata_base_path); } - /// We do not create the root_meta_path directory at creation time, create it at the moment of serializing + /// We do not create the metadata_base_path directory at creation time, create it at the moment of serializing /// meta files, such that can avoid unnecessarily create this directory if create table failed. } } @@ -212,9 +220,9 @@ void StorageFileLog::loadFiles() void StorageFileLog::serialize() const { - if (!std::filesystem::exists(root_meta_path)) + if (!std::filesystem::exists(metadata_base_path)) { - std::filesystem::create_directories(root_meta_path); + std::filesystem::create_directories(metadata_base_path); } for (const auto & [inode, meta] : file_infos.meta_by_inode) { @@ -236,9 +244,9 @@ void StorageFileLog::serialize() const void StorageFileLog::serialize(UInt64 inode, const FileMeta & file_meta) const { - if (!std::filesystem::exists(root_meta_path)) + if (!std::filesystem::exists(metadata_base_path)) { - std::filesystem::create_directories(root_meta_path); + std::filesystem::create_directories(metadata_base_path); } auto full_name = getFullMetaPath(file_meta.file_name); if (!std::filesystem::exists(full_name)) @@ -257,11 +265,11 @@ void StorageFileLog::serialize(UInt64 inode, const FileMeta & file_meta) const void StorageFileLog::deserialize() { - if (!std::filesystem::exists(root_meta_path)) + if (!std::filesystem::exists(metadata_base_path)) return; /// In case of single file (not a watched directory), /// iterated directory always has one file inside. - for (const auto & dir_entry : std::filesystem::directory_iterator{root_meta_path}) + for (const auto & dir_entry : std::filesystem::directory_iterator{metadata_base_path}) { if (!dir_entry.is_regular_file()) { @@ -269,7 +277,7 @@ void StorageFileLog::deserialize() ErrorCodes::BAD_FILE_TYPE, "The file {} under {} is not a regular file when deserializing meta files", dir_entry.path().c_str(), - root_meta_path); + metadata_base_path); } ReadBufferFromFile in(dir_entry.path().c_str()); @@ -373,8 +381,8 @@ void StorageFileLog::drop() { try { - if (std::filesystem::exists(root_meta_path)) - std::filesystem::remove_all(root_meta_path); + if (std::filesystem::exists(metadata_base_path)) + std::filesystem::remove_all(metadata_base_path); } catch (...) { @@ -802,6 +810,7 @@ void registerStorageFileLog(StorageFactory & factory) args.getContext(), args.columns, path, + args.relative_data_path, format, std::move(filelog_settings), args.comment, @@ -818,6 +827,9 @@ void registerStorageFileLog(StorageFactory & factory) bool StorageFileLog::updateFileInfos() { + if (file_infos.file_names.empty()) + return false; + if (!directory_watch) { /// For table just watch one file, we can not use directory monitor to watch it diff --git a/src/Storages/FileLog/StorageFileLog.h b/src/Storages/FileLog/StorageFileLog.h index 1f5078ab68e..98915f10a05 100644 --- a/src/Storages/FileLog/StorageFileLog.h +++ b/src/Storages/FileLog/StorageFileLog.h @@ -89,7 +89,7 @@ public: auto & getFileInfos() { return file_infos; } - String getFullMetaPath(const String & file_name) const { return std::filesystem::path(root_meta_path) / file_name; } + String getFullMetaPath(const String & file_name) const { return std::filesystem::path(metadata_base_path) / file_name; } String getFullDataPath(const String & file_name) const { return std::filesystem::path(root_data_path) / file_name; } NamesAndTypesList getVirtuals() const override; @@ -131,6 +131,7 @@ protected: ContextPtr context_, const ColumnsDescription & columns_, const String & path_, + const String & metadata_base_path_, const String & format_name_, std::unique_ptr settings, const String & comment, @@ -145,7 +146,7 @@ private: /// If path argument of the table is a regular file, it equals to user_files_path /// otherwise, it equals to user_files_path/ + path_argument/, e.g. path String root_data_path; - String root_meta_path; + String metadata_base_path; FileInfos file_infos; diff --git a/src/Storages/HDFS/StorageHDFS.h b/src/Storages/HDFS/StorageHDFS.h index c8ad6ffdeaf..99b5ba95d25 100644 --- a/src/Storages/HDFS/StorageHDFS.h +++ b/src/Storages/HDFS/StorageHDFS.h @@ -47,7 +47,7 @@ public: /// Is is useful because column oriented formats could effectively skip unknown columns /// So we can create a header of only required columns in read method and ask /// format to read only them. Note: this hack cannot be done with ordinary formats like TSV. - bool isColumnOriented() const; + bool isColumnOriented() const override; static ColumnsDescription getTableStructureFromData( const String & format, diff --git a/src/Storages/Hive/HiveCommon.cpp b/src/Storages/Hive/HiveCommon.cpp index aa19ff042e2..a9d0c22d6a5 100644 --- a/src/Storages/Hive/HiveCommon.cpp +++ b/src/Storages/Hive/HiveCommon.cpp @@ -1,3 +1,4 @@ +#include #include #if USE_HIVE @@ -5,6 +6,7 @@ #include #include #include +#include namespace DB @@ -15,6 +17,18 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } +static const unsigned max_hive_metastore_client_connections = 16; +static const int max_hive_metastore_client_retry = 3; +static const UInt64 get_hive_metastore_client_timeout = 1000000; +static const int hive_metastore_client_conn_timeout_ms = 10000; +static const int hive_metastore_client_recv_timeout_ms = 10000; +static const int hive_metastore_client_send_timeout_ms = 10000; + +ThriftHiveMetastoreClientPool::ThriftHiveMetastoreClientPool(ThriftHiveMetastoreClientBuilder builder_) + : PoolBase(max_hive_metastore_client_connections, &Poco::Logger::get("ThriftHiveMetastoreClientPool")), builder(builder_) +{ +} + bool HiveMetastoreClient::shouldUpdateTableMetadata( const String & db_name, const String & table_name, const std::vector & partitions) { @@ -40,25 +54,42 @@ bool HiveMetastoreClient::shouldUpdateTableMetadata( return false; } +void HiveMetastoreClient::tryCallHiveClient(std::function func) +{ + int i = 0; + String err_msg; + for (; i < max_hive_metastore_client_retry; ++i) + { + auto client = client_pool.get(get_hive_metastore_client_timeout); + try + { + func(client); + } + catch (apache::thrift::transport::TTransportException & e) + { + client.expire(); + err_msg = e.what(); + continue; + } + break; + } + if (i >= max_hive_metastore_client_retry) + throw Exception(ErrorCodes::NO_HIVEMETASTORE, "Hive Metastore expired because {}", err_msg); +} + HiveMetastoreClient::HiveTableMetadataPtr HiveMetastoreClient::getTableMetadata(const String & db_name, const String & table_name) { LOG_TRACE(log, "Get table metadata for {}.{}", db_name, table_name); - std::lock_guard lock{mutex}; auto table = std::make_shared(); std::vector partitions; - try + auto client_call = [&](ThriftHiveMetastoreClientPool::Entry & client) { client->get_table(*table, db_name, table_name); - /// Query the latest partition info to check new change. client->get_partitions(partitions, db_name, table_name, -1); - } - catch (apache::thrift::transport::TTransportException & e) - { - setExpired(); - throw Exception(ErrorCodes::NO_HIVEMETASTORE, "Hive Metastore expired because {}", String(e.what())); - } + }; + tryCallHiveClient(client_call); bool update_cache = shouldUpdateTableMetadata(db_name, table_name, partitions); String cache_key = getCacheKey(db_name, table_name); @@ -103,23 +134,26 @@ HiveMetastoreClient::HiveTableMetadataPtr HiveMetastoreClient::getTableMetadata( return metadata; } +std::shared_ptr HiveMetastoreClient::getHiveTable(const String & db_name, const String & table_name) +{ + auto table = std::make_shared(); + auto client_call = [&](ThriftHiveMetastoreClientPool::Entry & client) + { + client->get_table(*table, db_name, table_name); + }; + tryCallHiveClient(client_call); + return table; +} + void HiveMetastoreClient::clearTableMetadata(const String & db_name, const String & table_name) { String cache_key = getCacheKey(db_name, table_name); - std::lock_guard lock{mutex}; HiveTableMetadataPtr metadata = table_metadata_cache.get(cache_key); if (metadata) table_metadata_cache.remove(cache_key); } -void HiveMetastoreClient::setClient(std::shared_ptr client_) -{ - std::lock_guard lock{mutex}; - client = client_; - clearExpired(); -} - bool HiveMetastoreClient::PartitionInfo::haveSameParameters(const Apache::Hadoop::Hive::Partition & other) const { /// Parameters include keys:numRows,numFiles,rawDataSize,totalSize,transient_lastDdlTime @@ -192,53 +226,52 @@ HiveMetastoreClientFactory & HiveMetastoreClientFactory::instance() return factory; } +using namespace apache::thrift; +using namespace apache::thrift::protocol; +using namespace apache::thrift::transport; +using namespace Apache::Hadoop::Hive; + HiveMetastoreClientPtr HiveMetastoreClientFactory::getOrCreate(const String & name, ContextPtr context) { - using namespace apache::thrift; - using namespace apache::thrift::protocol; - using namespace apache::thrift::transport; - using namespace Apache::Hadoop::Hive; std::lock_guard lock(mutex); auto it = clients.find(name); - if (it == clients.end() || it->second->isExpired()) + if (it == clients.end()) { - /// Connect to hive metastore - Poco::URI hive_metastore_url(name); - const auto & host = hive_metastore_url.getHost(); - auto port = hive_metastore_url.getPort(); - - std::shared_ptr socket = std::make_shared(host, port); - socket->setKeepAlive(true); - socket->setConnTimeout(conn_timeout_ms); - socket->setRecvTimeout(recv_timeout_ms); - socket->setSendTimeout(send_timeout_ms); - std::shared_ptr transport(new TBufferedTransport(socket)); - std::shared_ptr protocol(new TBinaryProtocol(transport)); - std::shared_ptr thrift_client = std::make_shared(protocol); - try + auto builder = [name]() { - transport->open(); - } - catch (TException & tx) - { - throw Exception("connect to hive metastore:" + name + " failed." + tx.what(), ErrorCodes::BAD_ARGUMENTS); - } - - if (it == clients.end()) - { - HiveMetastoreClientPtr client = std::make_shared(std::move(thrift_client), context); - clients[name] = client; - return client; - } - else - { - it->second->setClient(std::move(thrift_client)); - return it->second; - } + return createThriftHiveMetastoreClient(name); + }; + auto client = std::make_shared(builder, context->getGlobalContext()); + clients[name] = client; + return client; } return it->second; } +std::shared_ptr HiveMetastoreClientFactory::createThriftHiveMetastoreClient(const String &name) +{ + Poco::URI hive_metastore_url(name); + const auto & host = hive_metastore_url.getHost(); + auto port = hive_metastore_url.getPort(); + + std::shared_ptr socket = std::make_shared(host, port); + socket->setKeepAlive(true); + socket->setConnTimeout(hive_metastore_client_conn_timeout_ms); + socket->setRecvTimeout(hive_metastore_client_recv_timeout_ms); + socket->setSendTimeout(hive_metastore_client_send_timeout_ms); + std::shared_ptr transport = std::make_shared(socket); + std::shared_ptr protocol = std::make_shared(transport); + std::shared_ptr thrift_client = std::make_shared(protocol); + try + { + transport->open(); + } + catch (TException & tx) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "connect to hive metastore: {} failed. {}", name, tx.what()); + } + return thrift_client; +} } #endif diff --git a/src/Storages/Hive/HiveCommon.h b/src/Storages/Hive/HiveCommon.h index e88e67b0257..b8075457a02 100644 --- a/src/Storages/Hive/HiveCommon.h +++ b/src/Storages/Hive/HiveCommon.h @@ -1,5 +1,6 @@ #pragma once +#include #include #if USE_HIVE @@ -10,12 +11,32 @@ #include #include +#include #include namespace DB { +using ThriftHiveMetastoreClientBuilder = std::function()>; + +class ThriftHiveMetastoreClientPool : public PoolBase +{ +public: + using Object = Apache::Hadoop::Hive::ThriftHiveMetastoreClient; + using ObjectPtr = std::shared_ptr; + using Entry = PoolBase::Entry; + explicit ThriftHiveMetastoreClientPool(ThriftHiveMetastoreClientBuilder builder_); + +protected: + ObjectPtr allocObject() override + { + return builder(); + } + +private: + ThriftHiveMetastoreClientBuilder builder; +}; class HiveMetastoreClient : public WithContext { public: @@ -26,7 +47,9 @@ public: UInt64 last_modify_time; /// In ms size_t size; - FileInfo() = default; + explicit FileInfo() = default; + FileInfo & operator = (const FileInfo &) = default; + FileInfo(const FileInfo &) = default; FileInfo(const String & path_, UInt64 last_modify_time_, size_t size_) : path(path_), last_modify_time(last_modify_time_), size(size_) { @@ -94,17 +117,18 @@ public: using HiveTableMetadataPtr = std::shared_ptr; - explicit HiveMetastoreClient(std::shared_ptr client_, ContextPtr context_) - : WithContext(context_), client(client_), table_metadata_cache(1000) + explicit HiveMetastoreClient(ThriftHiveMetastoreClientBuilder builder_, ContextPtr context_) + : WithContext(context_) + , table_metadata_cache(1000) + , client_pool(builder_) { } + HiveTableMetadataPtr getTableMetadata(const String & db_name, const String & table_name); + // Access hive table information by hive client + std::shared_ptr getHiveTable(const String & db_name, const String & table_name); void clearTableMetadata(const String & db_name, const String & table_name); - void setClient(std::shared_ptr client_); - bool isExpired() const { return expired; } - void setExpired() { expired = true; } - void clearExpired() { expired = false; } private: static String getCacheKey(const String & db_name, const String & table_name) { return db_name + "." + table_name; } @@ -112,10 +136,10 @@ private: bool shouldUpdateTableMetadata( const String & db_name, const String & table_name, const std::vector & partitions); - std::shared_ptr client; + void tryCallHiveClient(std::function func); + LRUCache table_metadata_cache; - mutable std::mutex mutex; - std::atomic expired{false}; + ThriftHiveMetastoreClientPool client_pool; Poco::Logger * log = &Poco::Logger::get("HiveMetastoreClient"); }; @@ -128,13 +152,11 @@ public: HiveMetastoreClientPtr getOrCreate(const String & name, ContextPtr context); + static std::shared_ptr createThriftHiveMetastoreClient(const String & name); + private: std::mutex mutex; std::map clients; - - const int conn_timeout_ms = 10000; - const int recv_timeout_ms = 10000; - const int send_timeout_ms = 10000; }; } diff --git a/src/Storages/Hive/HiveFile.cpp b/src/Storages/Hive/HiveFile.cpp index b0cfa9809e1..dffcca61a9c 100644 --- a/src/Storages/Hive/HiveFile.cpp +++ b/src/Storages/Hive/HiveFile.cpp @@ -8,8 +8,10 @@ #include #include #include +#include #include #include +#include #include #include #include diff --git a/src/Storages/Hive/HiveFile.h b/src/Storages/Hive/HiveFile.h index 63cca2562eb..6d2ba29ba0f 100644 --- a/src/Storages/Hive/HiveFile.h +++ b/src/Storages/Hive/HiveFile.h @@ -7,8 +7,6 @@ #include #include -#include -#include #include #include @@ -18,6 +16,8 @@ namespace orc { class Reader; +class Statistics; +class ColumnStatistics; } namespace parquet @@ -36,6 +36,11 @@ namespace io class RandomAccessFile; } +namespace fs +{ + class FileSystem; +} + class Buffer; } diff --git a/src/Storages/Hive/StorageHive.cpp b/src/Storages/Hive/StorageHive.cpp index 3040ad23283..63445c4a24c 100644 --- a/src/Storages/Hive/StorageHive.cpp +++ b/src/Storages/Hive/StorageHive.cpp @@ -286,14 +286,22 @@ StorageHive::StorageHive( storage_metadata.setConstraints(constraints_); storage_metadata.setComment(comment_); setInMemoryMetadata(storage_metadata); +} + +void StorageHive::lazyInitialize() +{ + std::lock_guard lock{init_mutex}; + if (has_initialized) + return; + auto hive_metastore_client = HiveMetastoreClientFactory::instance().getOrCreate(hive_metastore_url, getContext()); - auto hive_table_metadata = hive_metastore_client->getTableMetadata(hive_database, hive_table); + auto hive_table_metadata = hive_metastore_client->getHiveTable(hive_database, hive_table); - hdfs_namenode_url = getNameNodeUrl(hive_table_metadata->getTable()->sd.location); - table_schema = hive_table_metadata->getTable()->sd.cols; + hdfs_namenode_url = getNameNodeUrl(hive_table_metadata->sd.location); + table_schema = hive_table_metadata->sd.cols; - FileFormat hdfs_file_format = IHiveFile::toFileFormat(hive_table_metadata->getTable()->sd.inputFormat); + FileFormat hdfs_file_format = IHiveFile::toFileFormat(hive_table_metadata->sd.inputFormat); switch (hdfs_file_format) { case FileFormat::TEXT: @@ -331,6 +339,7 @@ StorageHive::StorageHive( } initMinMaxIndexExpression(); + has_initialized = true; } void StorageHive::initMinMaxIndexExpression() @@ -552,6 +561,8 @@ Pipe StorageHive::read( size_t max_block_size, unsigned num_streams) { + lazyInitialize(); + HDFSBuilderWrapper builder = createHDFSBuilder(hdfs_namenode_url, context_->getGlobalContext()->getConfigRef()); HDFSFSPtr fs = createHDFSFS(builder.get()); auto hive_metastore_client = HiveMetastoreClientFactory::instance().getOrCreate(hive_metastore_url, getContext()); diff --git a/src/Storages/Hive/StorageHive.h b/src/Storages/Hive/StorageHive.h index 9629629e057..40787a409e8 100644 --- a/src/Storages/Hive/StorageHive.h +++ b/src/Storages/Hive/StorageHive.h @@ -36,7 +36,7 @@ public: ContextPtr /* query_context */, const StorageMetadataPtr & /* metadata_snapshot */) const override { - return false; + return true; } @@ -94,6 +94,9 @@ private: String hive_database; String hive_table; + std::mutex init_mutex; + bool has_initialized = false; + /// Hive table meta std::vector table_schema; Names text_input_field_names; /// Defines schema of hive file, only used when text input format is TEXT @@ -116,6 +119,8 @@ private: std::shared_ptr storage_settings; Poco::Logger * log = &Poco::Logger::get("StorageHive"); + + void lazyInitialize(); }; } diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index fd4679c4b3d..186cde263bb 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -567,6 +567,8 @@ public: /// Returns true if all disks of storage are read-only. virtual bool isStaticStorage() const; + virtual bool isColumnOriented() const { return false; } + /// If it is possible to quickly determine exact number of rows in the table at this moment of time, then return it. /// Used for: /// - Simple count() optimization diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 2aa1327b54b..e28c065def7 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -3848,7 +3848,7 @@ RestoreDataTasks MergeTreeData::restoreDataPartsFromBackup(const BackupPtr & bac Strings part_names = backup->listFiles(data_path_in_backup); for (const String & part_name : part_names) { - const auto part_info = MergeTreePartInfo::tryParsePartName(part_name, format_version); + auto part_info = MergeTreePartInfo::tryParsePartName(part_name, format_version); if (!part_info) continue; @@ -5012,7 +5012,7 @@ std::optional MergeTreeData::getQueryProcessingStageWithAgg query_options, /* prepared_sets_= */ query_info.sets); const auto & analysis_result = select.getAnalysisResult(); - query_info.sets = std::move(select.getQueryAnalyzer()->getPreparedSets()); + query_info.sets = select.getQueryAnalyzer()->getPreparedSets(); bool can_use_aggregate_projection = true; /// If the first stage of the query pipeline is more complex than Aggregating - Expression - Filter - ReadFromStorage, diff --git a/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp b/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp index 1df97dc9241..737e89979a6 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp @@ -231,8 +231,10 @@ void MergeTreeDataPartChecksums::addFile(const String & file_name, UInt64 file_s void MergeTreeDataPartChecksums::add(MergeTreeDataPartChecksums && rhs_checksums) { - for (auto & checksum : rhs_checksums.files) - files[std::move(checksum.first)] = std::move(checksum.second); + for (auto && checksum : rhs_checksums.files) + { + files[checksum.first] = std::move(checksum.second); + } rhs_checksums.files.clear(); } diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index ea7a4808cbe..0dfced95cfa 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -824,7 +824,7 @@ RangesInDataParts MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipInd { auto [it, inserted] = merged_indices.try_emplace({index_helper->index.type, index_helper->getGranularity()}); if (inserted) - it->second.condition = index_helper->createIndexMergedCondtition(query_info, metadata_snapshot); + it->second.condition = index_helper->createIndexMergedCondition(query_info, metadata_snapshot); it->second.addIndex(index_helper); } diff --git a/src/Storages/MergeTree/MergeTreeIndexFullText.cpp b/src/Storages/MergeTree/MergeTreeIndexFullText.cpp index f87584c9cd6..5ecb7b537e2 100644 --- a/src/Storages/MergeTree/MergeTreeIndexFullText.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexFullText.cpp @@ -710,9 +710,14 @@ void bloomFilterIndexValidator(const IndexDescription & index, bool /*attach*/) const auto & array_type = assert_cast(*index_data_type); data_type = WhichDataType(array_type.getNestedType()); } + else if (data_type.isLowCarnality()) + { + const auto & low_cardinality = assert_cast(*index_data_type); + data_type = WhichDataType(low_cardinality.getDictionaryType()); + } if (!data_type.isString() && !data_type.isFixedString()) - throw Exception("Bloom filter index can be used only with `String`, `FixedString` column or Array with `String` or `FixedString` values column.", ErrorCodes::INCORRECT_QUERY); + throw Exception("Bloom filter index can be used only with `String`, `FixedString`, `LowCardinality(String)`, `LowCardinality(FixedString)` column or Array with `String` or `FixedString` values column.", ErrorCodes::INCORRECT_QUERY); } if (index.type == NgramTokenExtractor::getName()) diff --git a/src/Storages/MergeTree/MergeTreeIndexHypothesis.cpp b/src/Storages/MergeTree/MergeTreeIndexHypothesis.cpp index 30995a162dc..6658730b7c1 100644 --- a/src/Storages/MergeTree/MergeTreeIndexHypothesis.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexHypothesis.cpp @@ -84,7 +84,7 @@ MergeTreeIndexConditionPtr MergeTreeIndexHypothesis::createIndexCondition( throw Exception("Not supported", ErrorCodes::LOGICAL_ERROR); } -MergeTreeIndexMergedConditionPtr MergeTreeIndexHypothesis::createIndexMergedCondtition( +MergeTreeIndexMergedConditionPtr MergeTreeIndexHypothesis::createIndexMergedCondition( const SelectQueryInfo & query_info, StorageMetadataPtr storage_metadata) const { return std::make_shared( diff --git a/src/Storages/MergeTree/MergeTreeIndexHypothesis.h b/src/Storages/MergeTree/MergeTreeIndexHypothesis.h index bbdf70a052c..43b56d9559f 100644 --- a/src/Storages/MergeTree/MergeTreeIndexHypothesis.h +++ b/src/Storages/MergeTree/MergeTreeIndexHypothesis.h @@ -70,7 +70,7 @@ public: MergeTreeIndexConditionPtr createIndexCondition( const SelectQueryInfo & query, ContextPtr context) const override; - MergeTreeIndexMergedConditionPtr createIndexMergedCondtition( + MergeTreeIndexMergedConditionPtr createIndexMergedCondition( const SelectQueryInfo & query_info, StorageMetadataPtr storage_metadata) const override; bool mayBenefitFromIndexForIn(const ASTPtr & node) const override; diff --git a/src/Storages/MergeTree/MergeTreeIndices.h b/src/Storages/MergeTree/MergeTreeIndices.h index 1e001d01ada..a761fc3124e 100644 --- a/src/Storages/MergeTree/MergeTreeIndices.h +++ b/src/Storages/MergeTree/MergeTreeIndices.h @@ -164,7 +164,7 @@ struct IMergeTreeIndex virtual MergeTreeIndexConditionPtr createIndexCondition( const SelectQueryInfo & query_info, ContextPtr context) const = 0; - virtual MergeTreeIndexMergedConditionPtr createIndexMergedCondtition( + virtual MergeTreeIndexMergedConditionPtr createIndexMergedCondition( const SelectQueryInfo & /*query_info*/, StorageMetadataPtr /*storage_metadata*/) const { throw Exception(ErrorCodes::NOT_IMPLEMENTED, diff --git a/src/Storages/MergeTree/MergeTreeRangeReader.cpp b/src/Storages/MergeTree/MergeTreeRangeReader.cpp index e22f662960c..d8dba458203 100644 --- a/src/Storages/MergeTree/MergeTreeRangeReader.cpp +++ b/src/Storages/MergeTree/MergeTreeRangeReader.cpp @@ -707,7 +707,7 @@ MergeTreeRangeReader::ReadResult MergeTreeRangeReader::read(size_t max_rows, Mar { auto old_columns = block_before_prewhere.getColumns(); filterColumns(old_columns, read_result.getFilterOriginal()->getData()); - block_before_prewhere.setColumns(std::move(old_columns)); + block_before_prewhere.setColumns(old_columns); } for (auto & column : block_before_prewhere) diff --git a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp index 726b2141ffb..2e17611cd93 100644 --- a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp @@ -26,14 +26,14 @@ MergeTreeReaderCompact::MergeTreeReaderCompact( const ReadBufferFromFileBase::ProfileCallback & profile_callback_, clockid_t clock_type_) : IMergeTreeReader( - std::move(data_part_), - std::move(columns_), + data_part_, + columns_, metadata_snapshot_, uncompressed_cache_, mark_cache_, - std::move(mark_ranges_), - std::move(settings_), - std::move(avg_value_size_hints_)) + mark_ranges_, + settings_, + avg_value_size_hints_) , marks_loader( data_part->volume->getDisk(), mark_cache, diff --git a/src/Storages/MergeTree/MergeTreeReaderInMemory.cpp b/src/Storages/MergeTree/MergeTreeReaderInMemory.cpp index 8a69183e858..9599e3ee82c 100644 --- a/src/Storages/MergeTree/MergeTreeReaderInMemory.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderInMemory.cpp @@ -20,9 +20,15 @@ MergeTreeReaderInMemory::MergeTreeReaderInMemory( const StorageMetadataPtr & metadata_snapshot_, MarkRanges mark_ranges_, MergeTreeReaderSettings settings_) - : IMergeTreeReader(data_part_, std::move(columns_), metadata_snapshot_, - nullptr, nullptr, std::move(mark_ranges_), - std::move(settings_), {}) + : IMergeTreeReader( + data_part_, + columns_, + metadata_snapshot_, + nullptr, + nullptr, + mark_ranges_, + settings_, + {}) , part_in_memory(std::move(data_part_)) { for (const auto & name_and_type : columns) diff --git a/src/Storages/MergeTree/MergeTreeReaderWide.cpp b/src/Storages/MergeTree/MergeTreeReaderWide.cpp index 99a36a8e08a..7d7975e0bc0 100644 --- a/src/Storages/MergeTree/MergeTreeReaderWide.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderWide.cpp @@ -36,14 +36,14 @@ MergeTreeReaderWide::MergeTreeReaderWide( const ReadBufferFromFileBase::ProfileCallback & profile_callback_, clockid_t clock_type_) : IMergeTreeReader( - std::move(data_part_), - std::move(columns_), + data_part_, + columns_, metadata_snapshot_, uncompressed_cache_, - std::move(mark_cache_), - std::move(mark_ranges_), - std::move(settings_), - std::move(avg_value_size_hints_)) + mark_cache_, + mark_ranges_, + settings_, + avg_value_size_hints_) { try { diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/src/Storages/MergeTree/MergedBlockOutputStream.cpp index 1222365b01d..7066d7bb9cd 100644 --- a/src/Storages/MergeTree/MergedBlockOutputStream.cpp +++ b/src/Storages/MergeTree/MergedBlockOutputStream.cpp @@ -114,8 +114,8 @@ MergedBlockOutputStream::Finalizer::~Finalizer() } } -MergedBlockOutputStream::Finalizer::Finalizer(Finalizer &&) = default; -MergedBlockOutputStream::Finalizer & MergedBlockOutputStream::Finalizer::operator=(Finalizer &&) = default; +MergedBlockOutputStream::Finalizer::Finalizer(Finalizer &&) noexcept = default; +MergedBlockOutputStream::Finalizer & MergedBlockOutputStream::Finalizer::operator=(Finalizer &&) noexcept = default; MergedBlockOutputStream::Finalizer::Finalizer(std::unique_ptr impl_) : impl(std::move(impl_)) {} void MergedBlockOutputStream::finalizePart( diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.h b/src/Storages/MergeTree/MergedBlockOutputStream.h index 0a4f9d23927..3f325524167 100644 --- a/src/Storages/MergeTree/MergedBlockOutputStream.h +++ b/src/Storages/MergeTree/MergedBlockOutputStream.h @@ -43,8 +43,8 @@ public: explicit Finalizer(std::unique_ptr impl_); ~Finalizer(); - Finalizer(Finalizer &&); - Finalizer & operator=(Finalizer &&); + Finalizer(Finalizer &&) noexcept; + Finalizer & operator=(Finalizer &&) noexcept; void finish(); }; diff --git a/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp b/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp index cce9fdba7cd..fda7aabaa0e 100644 --- a/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp +++ b/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp @@ -99,6 +99,7 @@ bool MutatePlainMergeTreeTask::executeStep() LOG_ERROR(&Poco::Logger::get("MutatePlainMergeTreeTask"), "{}", exception_message); storage.updateMutationEntriesErrors(future_part, false, exception_message); write_part_log(ExecutionStatus::fromCurrentException()); + tryLogCurrentException(__PRETTY_FUNCTION__); return false; } } diff --git a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.h b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.h index f37cb3bffef..41284b5b5a1 100644 --- a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.h +++ b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.h @@ -22,6 +22,9 @@ struct StorageInfo StorageInfo(StoragePtr storage_, const PostgreSQLTableStructure::Attributes & attributes_) : storage(storage_), attributes(attributes_) {} + + StorageInfo(StoragePtr storage_, PostgreSQLTableStructure::Attributes && attributes_) + : storage(storage_), attributes(std::move(attributes_)) {} }; using StorageInfos = std::unordered_map; diff --git a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp index 3dbb5b18de9..bd525ca9e5a 100644 --- a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp +++ b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp @@ -240,7 +240,7 @@ public: WriteBufferFromString wb(serialized_keys[rows_processed]); key_column_type->getDefaultSerialization()->serializeBinary(*it, wb); wb.finalize(); - slices_keys[rows_processed] = std::move(serialized_keys[rows_processed]); + slices_keys[rows_processed] = serialized_keys[rows_processed]; ++it; ++rows_processed; diff --git a/src/Storages/StorageDictionary.cpp b/src/Storages/StorageDictionary.cpp index da8c5f115b2..e6d856b80fc 100644 --- a/src/Storages/StorageDictionary.cpp +++ b/src/Storages/StorageDictionary.cpp @@ -295,7 +295,7 @@ void StorageDictionary::alter(const AlterCommands & params, ContextPtr alter_con } std::lock_guard lock(dictionary_config_mutex); - configuration->setString("dictionary.comment", std::move(new_comment)); + configuration->setString("dictionary.comment", new_comment); } void registerStorageDictionary(StorageFactory & factory) diff --git a/src/Storages/StorageFile.h b/src/Storages/StorageFile.h index ed5431d5e03..bc2bd3bc933 100644 --- a/src/Storages/StorageFile.h +++ b/src/Storages/StorageFile.h @@ -12,9 +12,6 @@ namespace DB { -class StorageFileBlockInputStream; -class StorageFileBlockOutputStream; - class StorageFile final : public shared_ptr_helper, public IStorage { friend struct shared_ptr_helper; @@ -67,7 +64,7 @@ public: /// Is is useful because column oriented formats could effectively skip unknown columns /// So we can create a header of only required columns in read method and ask /// format to read only them. Note: this hack cannot be done with ordinary formats like TSV. - bool isColumnOriented() const; + bool isColumnOriented() const override; bool supportsPartitionBy() const override { return true; } diff --git a/src/Storages/StorageGenerateRandom.cpp b/src/Storages/StorageGenerateRandom.cpp index 19e8f78d877..8934fd0ccbf 100644 --- a/src/Storages/StorageGenerateRandom.cpp +++ b/src/Storages/StorageGenerateRandom.cpp @@ -172,7 +172,7 @@ ColumnPtr fillColumnWithRandomData( auto data_column = fillColumnWithRandomData(nested_type, offset, max_array_length, max_string_length, rng, context); - return ColumnArray::create(std::move(data_column), std::move(offsets_column)); + return ColumnArray::create(data_column, std::move(offsets_column)); } case TypeIndex::Tuple: @@ -198,7 +198,7 @@ ColumnPtr fillColumnWithRandomData( for (UInt64 i = 0; i < limit; ++i) null_map[i] = rng() % 16 == 0; /// No real motivation for this. - return ColumnNullable::create(std::move(nested_column), std::move(null_map_column)); + return ColumnNullable::create(nested_column, std::move(null_map_column)); } case TypeIndex::UInt8: @@ -395,7 +395,7 @@ protected: for (const auto & elem : block_to_fill) columns.emplace_back(fillColumnWithRandomData(elem.type, block_size, max_array_length, max_string_length, rng, context)); - columns = Nested::flatten(block_to_fill.cloneWithColumns(std::move(columns))).getColumns(); + columns = Nested::flatten(block_to_fill.cloneWithColumns(columns)).getColumns(); return {std::move(columns), block_size}; } diff --git a/src/Storages/StorageInMemoryMetadata.h b/src/Storages/StorageInMemoryMetadata.h index 9d6935b609b..bdaed8b2624 100644 --- a/src/Storages/StorageInMemoryMetadata.h +++ b/src/Storages/StorageInMemoryMetadata.h @@ -55,6 +55,9 @@ struct StorageInMemoryMetadata StorageInMemoryMetadata(const StorageInMemoryMetadata & other); StorageInMemoryMetadata & operator=(const StorageInMemoryMetadata & other); + StorageInMemoryMetadata(StorageInMemoryMetadata && other) = default; + StorageInMemoryMetadata & operator=(StorageInMemoryMetadata && other) = default; + /// NOTE: Thread unsafe part. You should modify same StorageInMemoryMetadata /// structure from different threads. It should be used as MultiVersion /// object. See example in IStorage. diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp index 5ba1514877a..d5d1f312bec 100644 --- a/src/Storages/StorageLog.cpp +++ b/src/Storages/StorageLog.cpp @@ -171,7 +171,7 @@ Chunk LogSource::generate() } if (!column->empty()) - res.insert(ColumnWithTypeAndName(std::move(column), name_type.type, name_type.name)); + res.insert(ColumnWithTypeAndName(column, name_type.type, name_type.name)); } if (res) diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index 433fdb5b0b5..15e499c6e6c 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -730,7 +730,7 @@ void StorageMerge::convertingSourceStream( for (const auto & alias : aliases) { pipe_columns.emplace_back(NameAndTypePair(alias.name, alias.type)); - ASTPtr expr = std::move(alias.expression); + ASTPtr expr = alias.expression; auto syntax_result = TreeRewriter(local_context).analyze(expr, pipe_columns); auto expression_analyzer = ExpressionAnalyzer{alias.expression, syntax_result, local_context}; diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index b555ca5ee01..e425caa0532 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -1157,7 +1157,7 @@ bool StorageMergeTree::scheduleDataProcessingJob(BackgroundJobsAssignee & assign } bool scheduled = false; - if (time_after_previous_cleanup_temporary_directories.compareAndRestartDeferred( + if (auto lock = time_after_previous_cleanup_temporary_directories.compareAndRestartDeferred( getSettings()->merge_tree_clear_old_temporary_directories_interval_seconds)) { assignee.scheduleCommonTask(ExecutableLambdaAdapter::create( diff --git a/src/Storages/StorageMySQL.cpp b/src/Storages/StorageMySQL.cpp index 83cf2b07b21..10b6c7ca447 100644 --- a/src/Storages/StorageMySQL.cpp +++ b/src/Storages/StorageMySQL.cpp @@ -178,7 +178,7 @@ public: { /// Avoid Excessive copy when block is small enough if (block.rows() <= max_rows) - return Blocks{std::move(block)}; + return {block}; const size_t split_block_size = ceil(block.rows() * 1.0 / max_rows); Blocks split_blocks(split_block_size); diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index ac462e5cb6e..c685a542d13 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -615,6 +615,11 @@ std::shared_ptr StorageS3::createFileIterator( } } +bool StorageS3::isColumnOriented() const +{ + return FormatFactory::instance().checkIfFormatIsColumnOriented(format_name); +} + Pipe StorageS3::read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, @@ -639,6 +644,20 @@ Pipe StorageS3::read( std::shared_ptr iterator_wrapper = createFileIterator(client_auth, keys, is_key_with_globs, distributed_processing, local_context); + ColumnsDescription columns_description; + Block block_for_format; + if (isColumnOriented()) + { + columns_description = ColumnsDescription{ + metadata_snapshot->getSampleBlockForColumns(column_names, getVirtuals(), getStorageID()).getNamesAndTypesList()}; + block_for_format = metadata_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical()); + } + else + { + columns_description = metadata_snapshot->getColumns(); + block_for_format = metadata_snapshot->getSampleBlock(); + } + for (size_t i = 0; i < num_streams; ++i) { pipes.emplace_back(std::make_shared( @@ -646,10 +665,10 @@ Pipe StorageS3::read( need_file_column, format_name, getName(), - metadata_snapshot->getSampleBlock(), + block_for_format, local_context, format_settings, - metadata_snapshot->getColumns(), + columns_description, max_block_size, max_single_read_retries, compression_method, diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h index 03b54706b4a..cecf7f50860 100644 --- a/src/Storages/StorageS3.h +++ b/src/Storages/StorageS3.h @@ -218,6 +218,8 @@ private: bool is_key_with_globs, const std::optional & format_settings, ContextPtr ctx); + + bool isColumnOriented() const override; }; } diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index e62b14224ec..768f01efd24 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -405,7 +405,7 @@ std::vector> IStorageURLBase::getReadURIPara std::function IStorageURLBase::getReadPOSTDataCallback( const Names & /*column_names*/, - const StorageMetadataPtr & /*metadata_snapshot*/, + const ColumnsDescription & /* columns_description */, const SelectQueryInfo & /*query_info*/, ContextPtr /*context*/, QueryProcessingStage::Enum & /*processed_stage*/, @@ -482,6 +482,11 @@ ColumnsDescription IStorageURLBase::getTableStructureFromData( throw Exception(ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, "All attempts to extract table structure from urls failed. Errors:\n{}", exception_messages); } +bool IStorageURLBase::isColumnOriented() const +{ + return FormatFactory::instance().checkIfFormatIsColumnOriented(format_name); +} + Pipe IStorageURLBase::read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, @@ -493,6 +498,20 @@ Pipe IStorageURLBase::read( { auto params = getReadURIParams(column_names, metadata_snapshot, query_info, local_context, processed_stage, max_block_size); + ColumnsDescription columns_description; + Block block_for_format; + if (isColumnOriented()) + { + columns_description = ColumnsDescription{ + metadata_snapshot->getSampleBlockForColumns(column_names, getVirtuals(), getStorageID()).getNamesAndTypesList()}; + block_for_format = metadata_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical()); + } + else + { + columns_description = metadata_snapshot->getColumns(); + block_for_format = metadata_snapshot->getSampleBlock(); + } + if (urlWithGlobs(uri)) { size_t max_addresses = local_context->getSettingsRef().glob_expansion_max_elements; @@ -515,14 +534,14 @@ Pipe IStorageURLBase::read( uri_info, getReadMethod(), getReadPOSTDataCallback( - column_names, metadata_snapshot, query_info, + column_names, columns_description, query_info, local_context, processed_stage, max_block_size), format_name, format_settings, getName(), - getHeaderBlock(column_names, metadata_snapshot), + block_for_format, local_context, - metadata_snapshot->getColumns(), + columns_description, max_block_size, ConnectionTimeouts::getHTTPTimeouts(local_context), compression_method, headers, params, /* glob_url */true)); @@ -537,14 +556,14 @@ Pipe IStorageURLBase::read( uri_info, getReadMethod(), getReadPOSTDataCallback( - column_names, metadata_snapshot, query_info, + column_names, columns_description, query_info, local_context, processed_stage, max_block_size), format_name, format_settings, getName(), - getHeaderBlock(column_names, metadata_snapshot), + block_for_format, local_context, - metadata_snapshot->getColumns(), + columns_description, max_block_size, ConnectionTimeouts::getHTTPTimeouts(local_context), compression_method, headers, params)); @@ -561,6 +580,20 @@ Pipe StorageURLWithFailover::read( size_t max_block_size, unsigned /*num_streams*/) { + ColumnsDescription columns_description; + Block block_for_format; + if (isColumnOriented()) + { + columns_description = ColumnsDescription{ + metadata_snapshot->getSampleBlockForColumns(column_names, getVirtuals(), getStorageID()).getNamesAndTypesList()}; + block_for_format = metadata_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical()); + } + else + { + columns_description = metadata_snapshot->getColumns(); + block_for_format = metadata_snapshot->getSampleBlock(); + } + auto params = getReadURIParams(column_names, metadata_snapshot, query_info, local_context, processed_stage, max_block_size); auto uri_info = std::make_shared(); @@ -569,14 +602,14 @@ Pipe StorageURLWithFailover::read( uri_info, getReadMethod(), getReadPOSTDataCallback( - column_names, metadata_snapshot, query_info, + column_names, columns_description, query_info, local_context, processed_stage, max_block_size), format_name, format_settings, getName(), - getHeaderBlock(column_names, metadata_snapshot), + block_for_format, local_context, - metadata_snapshot->getColumns(), + columns_description, max_block_size, ConnectionTimeouts::getHTTPTimeouts(local_context), compression_method, headers, params)); @@ -649,7 +682,7 @@ StorageURLWithFailover::StorageURLWithFailover( Poco::URI poco_uri(uri_option); context_->getRemoteHostFilter().checkURL(poco_uri); LOG_DEBUG(&Poco::Logger::get("StorageURLDistributed"), "Adding URL option: {}", uri_option); - uri_options.emplace_back(std::move(uri_option)); + uri_options.emplace_back(uri_option); } } diff --git a/src/Storages/StorageURL.h b/src/Storages/StorageURL.h index 790f01135d3..79d2489f241 100644 --- a/src/Storages/StorageURL.h +++ b/src/Storages/StorageURL.h @@ -88,12 +88,14 @@ protected: virtual std::function getReadPOSTDataCallback( const Names & column_names, - const StorageMetadataPtr & /*metadata_snapshot*/, + const ColumnsDescription & columns_description, const SelectQueryInfo & query_info, ContextPtr context, QueryProcessingStage::Enum & processed_stage, size_t max_block_size) const; + bool isColumnOriented() const override; + private: virtual Block getHeaderBlock(const Names & column_names, const StorageMetadataPtr & metadata_snapshot) const = 0; }; diff --git a/src/Storages/StorageXDBC.cpp b/src/Storages/StorageXDBC.cpp index 90ac04ed250..3cb6c9d0359 100644 --- a/src/Storages/StorageXDBC.cpp +++ b/src/Storages/StorageXDBC.cpp @@ -68,14 +68,14 @@ std::vector> StorageXDBC::getReadURIParams( std::function StorageXDBC::getReadPOSTDataCallback( const Names & column_names, - const StorageMetadataPtr & metadata_snapshot, + const ColumnsDescription & columns_description, const SelectQueryInfo & query_info, ContextPtr local_context, QueryProcessingStage::Enum & /*processed_stage*/, size_t /*max_block_size*/) const { String query = transformQueryForExternalDatabase(query_info, - metadata_snapshot->getColumns().getOrdinary(), + columns_description.getOrdinary(), bridge_helper->getIdentifierQuotingStyle(), remote_database_name, remote_table_name, @@ -85,7 +85,7 @@ std::function StorageXDBC::getReadPOSTDataCallback( NamesAndTypesList cols; for (const String & name : column_names) { - auto column_data = metadata_snapshot->getColumns().getPhysical(name); + auto column_data = columns_description.getPhysical(name); cols.emplace_back(column_data.name, column_data.type); } @@ -114,7 +114,7 @@ Pipe StorageXDBC::read( return IStorageURLBase::read(column_names, metadata_snapshot, query_info, local_context, processed_stage, max_block_size, num_streams); } -SinkToStoragePtr StorageXDBC::write(const ASTPtr & /*query*/, const StorageMetadataPtr & metadata_snapshot, ContextPtr local_context) +SinkToStoragePtr StorageXDBC::write(const ASTPtr & /* query */, const StorageMetadataPtr & metadata_snapshot, ContextPtr local_context) { bridge_helper->startBridgeSync(); @@ -140,6 +140,11 @@ SinkToStoragePtr StorageXDBC::write(const ASTPtr & /*query*/, const StorageMetad chooseCompressionMethod(uri, compression_method)); } +bool StorageXDBC::isColumnOriented() const +{ + return true; +} + Block StorageXDBC::getHeaderBlock(const Names & column_names, const StorageMetadataPtr & metadata_snapshot) const { return metadata_snapshot->getSampleBlockForColumns(column_names, getVirtuals(), getStorageID()); diff --git a/src/Storages/StorageXDBC.h b/src/Storages/StorageXDBC.h index 4438e1c4737..d8771c4ed83 100644 --- a/src/Storages/StorageXDBC.h +++ b/src/Storages/StorageXDBC.h @@ -59,13 +59,15 @@ private: std::function getReadPOSTDataCallback( const Names & column_names, - const StorageMetadataPtr & metadata_snapshot, + const ColumnsDescription & columns_description, const SelectQueryInfo & query_info, ContextPtr context, QueryProcessingStage::Enum & processed_stage, size_t max_block_size) const override; Block getHeaderBlock(const Names & column_names, const StorageMetadataPtr & metadata_snapshot) const override; + + bool isColumnOriented() const override; }; } diff --git a/src/Storages/System/CMakeLists.txt b/src/Storages/System/CMakeLists.txt index 133761cbe22..efc4c0ed37b 100644 --- a/src/Storages/System/CMakeLists.txt +++ b/src/Storages/System/CMakeLists.txt @@ -39,7 +39,13 @@ if(Git_FOUND) ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) endif() -configure_file (StorageSystemBuildOptions.generated.cpp.in ${CONFIG_BUILD}) +function(generate_system_build_options) + include(${ClickHouse_SOURCE_DIR}/src/configure_config.cmake) + include(${ClickHouse_SOURCE_DIR}/src/Functions/configure_config.cmake) + include(${ClickHouse_SOURCE_DIR}/src/Formats/configure_config.cmake) + configure_file(StorageSystemBuildOptions.generated.cpp.in ${CONFIG_BUILD}) +endfunction() +generate_system_build_options() include("${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake") add_headers_and_sources(storages_system .) diff --git a/src/Storages/System/StorageSystemAsynchronousInserts.cpp b/src/Storages/System/StorageSystemAsynchronousInserts.cpp index 0fa6c1b653c..80fc070c83a 100644 --- a/src/Storages/System/StorageSystemAsynchronousInserts.cpp +++ b/src/Storages/System/StorageSystemAsynchronousInserts.cpp @@ -41,10 +41,10 @@ void StorageSystemAsynchronousInserts::fillData(MutableColumns & res_columns, Co if (!insert_queue) return; - auto queue = insert_queue->getQueue(); + auto [queue, queue_lock] = insert_queue->getQueueLocked(); for (const auto & [key, elem] : queue) { - std::lock_guard lock(elem->mutex); + std::lock_guard elem_lock(elem->mutex); if (!elem->data) continue; @@ -62,8 +62,19 @@ void StorageSystemAsynchronousInserts::fillData(MutableColumns & res_columns, Co size_t i = 0; res_columns[i++]->insert(queryToString(insert_query)); - res_columns[i++]->insert(insert_query.table_id.getDatabaseName()); - res_columns[i++]->insert(insert_query.table_id.getTableName()); + + /// If query is "INSERT INTO FUNCTION" then table_id is empty. + if (insert_query.table_id) + { + res_columns[i++]->insert(insert_query.table_id.getDatabaseName()); + res_columns[i++]->insert(insert_query.table_id.getTableName()); + } + else + { + res_columns[i++]->insertDefault(); + res_columns[i++]->insertDefault(); + } + res_columns[i++]->insert(insert_query.format); res_columns[i++]->insert(time_in_microseconds(elem->data->first_update)); res_columns[i++]->insert(time_in_microseconds(elem->data->last_update)); diff --git a/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in b/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in index 5c25322b4f0..d7034cf828b 100644 --- a/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in +++ b/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in @@ -11,7 +11,6 @@ const char * auto_config_build[] "VERSION_DATE", "@VERSION_DATE@", "BUILD_TYPE", "@CMAKE_BUILD_TYPE@", "SYSTEM_PROCESSOR", "@CMAKE_SYSTEM_PROCESSOR@", - "LIBRARY_ARCHITECTURE", "@CMAKE_LIBRARY_ARCHITECTURE@", "CMAKE_VERSION", "@CMAKE_VERSION@", "C_COMPILER", "@CMAKE_C_COMPILER@", "C_COMPILER_VERSION", "@CMAKE_C_COMPILER_VERSION@", @@ -19,7 +18,7 @@ const char * auto_config_build[] "CXX_COMPILER_VERSION", "@CMAKE_CXX_COMPILER_VERSION@", "C_FLAGS", "@FULL_C_FLAGS_NORMALIZED@", "CXX_FLAGS", "@FULL_CXX_FLAGS_NORMALIZED@", - "LINK_FLAGS", "@CMAKE_EXE_LINKER_FLAGS_NORMALIZED@", + "LINK_FLAGS", "@FULL_EXE_LINKER_FLAGS_NORMALIZED@", "BUILD_COMPILE_DEFINITIONS", "@BUILD_COMPILE_DEFINITIONS@", "STATIC", "@USE_STATIC_LIBRARIES@", "SPLIT_BINARY", "@CLICKHOUSE_SPLIT_BINARY@", diff --git a/src/Storages/System/StorageSystemRowPolicies.cpp b/src/Storages/System/StorageSystemRowPolicies.cpp index 455d715d5da..cd4f3dab109 100644 --- a/src/Storages/System/StorageSystemRowPolicies.cpp +++ b/src/Storages/System/StorageSystemRowPolicies.cpp @@ -14,7 +14,7 @@ #include #include #include -#include +#include namespace DB @@ -43,7 +43,8 @@ NamesAndTypesList StorageSystemRowPolicies::getNamesAndTypes() {"apply_to_except", std::make_shared(std::make_shared())} }; - boost::range::push_back(names_and_types, std::move(extra_names_and_types)); + insertAtEnd(names_and_types, extra_names_and_types); + return names_and_types; } diff --git a/src/Storages/System/StorageSystemTables.cpp b/src/Storages/System/StorageSystemTables.cpp index 24e3fe4f7a9..9332bc6a004 100644 --- a/src/Storages/System/StorageSystemTables.cpp +++ b/src/Storages/System/StorageSystemTables.cpp @@ -509,8 +509,8 @@ protected: loading_dependencies_tables.reserve(info.dependencies.size()); for (auto && dependency : info.dependencies) { - loading_dependencies_databases.push_back(std::move(dependency.database)); - loading_dependencies_tables.push_back(std::move(dependency.table)); + loading_dependencies_databases.push_back(dependency.database); + loading_dependencies_tables.push_back(dependency.table); } Array loading_dependent_databases; @@ -519,8 +519,8 @@ protected: loading_dependent_tables.reserve(info.dependencies.size()); for (auto && dependent : info.dependent_database_objects) { - loading_dependent_databases.push_back(std::move(dependent.database)); - loading_dependent_tables.push_back(std::move(dependent.table)); + loading_dependent_databases.push_back(dependent.database); + loading_dependent_tables.push_back(dependent.table); } if (columns_mask[src_index++]) diff --git a/src/Storages/System/StorageSystemZooKeeper.cpp b/src/Storages/System/StorageSystemZooKeeper.cpp index f2b2102c7ff..879951df162 100644 --- a/src/Storages/System/StorageSystemZooKeeper.cpp +++ b/src/Storages/System/StorageSystemZooKeeper.cpp @@ -16,6 +16,8 @@ #include #include #include +#include +#include namespace DB @@ -47,14 +49,23 @@ NamesAndTypesList StorageSystemZooKeeper::getNamesAndTypes() }; } -using Paths = Strings; +/// Type of path to be fetched +enum class ZkPathType +{ + Exact, /// Fetch all nodes under this path + Prefix, /// Fetch all nodes starting with this prefix, recursively (multiple paths may match prefix) + Recurse, /// Fatch all nodes under this path, recursively +}; + +/// List of paths to be feched from zookeeper +using Paths = std::deque>; static String pathCorrected(const String & path) { String path_corrected; /// path should starts with '/', otherwise ZBADARGUMENTS will be thrown in /// ZooKeeper::sendThread and the session will fail. - if (path[0] != '/') + if (path.empty() || path[0] != '/') path_corrected = '/'; path_corrected += path; /// In all cases except the root, path must not end with a slash. @@ -64,7 +75,7 @@ static String pathCorrected(const String & path) } -static bool extractPathImpl(const IAST & elem, Paths & res, ContextPtr context) +static bool extractPathImpl(const IAST & elem, Paths & res, ContextPtr context, bool allow_unrestricted) { const auto * function = elem.as(); if (!function) @@ -73,7 +84,7 @@ static bool extractPathImpl(const IAST & elem, Paths & res, ContextPtr context) if (function->name == "and") { for (const auto & child : function->arguments->children) - if (extractPathImpl(*child, res, context)) + if (extractPathImpl(*child, res, context, allow_unrestricted)) return true; return false; @@ -110,7 +121,7 @@ static bool extractPathImpl(const IAST & elem, Paths & res, ContextPtr context) set.checkColumnsNumber(1); const auto & set_column = *set.getSetElements()[0]; for (size_t row = 0; row < set_column.size(); ++row) - res.emplace_back(set_column[row].safeGet()); + res.emplace_back(set_column[row].safeGet(), ZkPathType::Exact); } else { @@ -121,12 +132,12 @@ static bool extractPathImpl(const IAST & elem, Paths & res, ContextPtr context) if (String str; literal->value.tryGet(str)) { - res.emplace_back(str); + res.emplace_back(str, ZkPathType::Exact); } else if (Tuple tuple; literal->value.tryGet(tuple)) { for (auto element : tuple) - res.emplace_back(element.safeGet()); + res.emplace_back(element.safeGet(), ZkPathType::Exact); } else return false; @@ -156,7 +167,61 @@ static bool extractPathImpl(const IAST & elem, Paths & res, ContextPtr context) if (literal->value.getType() != Field::Types::String) return false; - res.emplace_back(literal->value.safeGet()); + res.emplace_back(literal->value.safeGet(), ZkPathType::Exact); + return true; + } + else if (allow_unrestricted && function->name == "like") + { + const ASTIdentifier * ident; + ASTPtr value; + if ((ident = args.children.at(0)->as())) + value = args.children.at(1); + else if ((ident = args.children.at(1)->as())) + value = args.children.at(0); + else + return false; + + if (ident->name() != "path") + return false; + + auto evaluated = evaluateConstantExpressionAsLiteral(value, context); + const auto * literal = evaluated->as(); + if (!literal) + return false; + + if (literal->value.getType() != Field::Types::String) + return false; + + String pattern = literal->value.safeGet(); + bool has_metasymbol = false; + String prefix; // pattern prefix before the first metasymbol occurrence + for (size_t i = 0; i < pattern.size(); i++) + { + char c = pattern[i]; + // Handle escaping of metasymbols + if (c == '\\' && i + 1 < pattern.size()) + { + char c2 = pattern[i + 1]; + if (c2 == '_' || c2 == '%') + { + prefix.append(1, c2); + i++; // to skip two bytes + continue; + } + } + + // Stop prefix on the first metasymbols occurrence + if (c == '_' || c == '%') + { + has_metasymbol = true; + break; + } + + prefix.append(1, c); + } + + res.emplace_back(prefix, has_metasymbol ? ZkPathType::Prefix : ZkPathType::Exact); + return true; } @@ -166,39 +231,60 @@ static bool extractPathImpl(const IAST & elem, Paths & res, ContextPtr context) /** Retrieve from the query a condition of the form `path = 'path'`, from conjunctions in the WHERE clause. */ -static Paths extractPath(const ASTPtr & query, ContextPtr context) +static Paths extractPath(const ASTPtr & query, ContextPtr context, bool allow_unrestricted) { const auto & select = query->as(); if (!select.where()) - return Paths(); + return allow_unrestricted ? Paths{{"/", ZkPathType::Recurse}} : Paths(); Paths res; - return extractPathImpl(*select.where(), res, context) ? res : Paths(); + return extractPathImpl(*select.where(), res, context, allow_unrestricted) ? res : Paths(); } void StorageSystemZooKeeper::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const { - const Paths & paths = extractPath(query_info.query, context); - if (paths.empty()) - throw Exception("SELECT from system.zookeeper table must contain condition like path = 'path' or path IN ('path1','path2'...) or path IN (subquery) in WHERE clause.", ErrorCodes::BAD_ARGUMENTS); + Paths paths = extractPath(query_info.query, context, context->getSettingsRef().allow_unrestricted_reads_from_keeper); zkutil::ZooKeeperPtr zookeeper = context->getZooKeeper(); - std::unordered_set paths_corrected; - for (const auto & path : paths) - { - const String & path_corrected = pathCorrected(path); - auto [it, inserted] = paths_corrected.emplace(path_corrected); - if (!inserted) /// Do not repeat processing. - continue; + if (paths.empty()) + throw Exception("SELECT from system.zookeeper table must contain condition like path = 'path' or path IN ('path1','path2'...) or path IN (subquery) in WHERE clause unless `set allow_unrestricted_reads_from_keeper = 'true'`.", ErrorCodes::BAD_ARGUMENTS); - zkutil::Strings nodes = zookeeper->getChildren(path_corrected); + std::unordered_set added; + while (!paths.empty()) + { + auto [path, path_type] = std::move(paths.front()); + paths.pop_front(); + + String prefix; + if (path_type == ZkPathType::Prefix) + { + prefix = path; + size_t last_slash = prefix.rfind('/'); + path = prefix.substr(0, last_slash == String::npos ? 0 : last_slash); + } + + String path_corrected = pathCorrected(path); + + /// Node can be deleted concurrently. It's Ok, we don't provide any + /// consistency guarantees for system.zookeeper table. + zkutil::Strings nodes; + zookeeper->tryGetChildren(path_corrected, nodes); String path_part = path_corrected; if (path_part == "/") path_part.clear(); + if (!prefix.empty()) + { + // Remove nodes that do not match specified prefix + nodes.erase(std::remove_if(nodes.begin(), nodes.end(), [&prefix, &path_part] (const String & node) + { + return (path_part + '/' + node).substr(0, prefix.size()) != prefix; + }), nodes.end()); + } + std::vector> futures; futures.reserve(nodes.size()); for (const String & node : nodes) @@ -210,6 +296,11 @@ void StorageSystemZooKeeper::fillData(MutableColumns & res_columns, ContextPtr c if (res.error == Coordination::Error::ZNONODE) continue; /// Node was deleted meanwhile. + // Deduplication + String key = path_part + '/' + nodes[i]; + if (auto [it, inserted] = added.emplace(key); !inserted) + continue; + const Coordination::Stat & stat = res.stat; size_t col_num = 0; @@ -228,6 +319,11 @@ void StorageSystemZooKeeper::fillData(MutableColumns & res_columns, ContextPtr c res_columns[col_num++]->insert(stat.pzxid); res_columns[col_num++]->insert( path); /// This is the original path. In order to process the request, condition in WHERE should be triggered. + + if (path_type != ZkPathType::Exact && res.stat.numChildren > 0) + { + paths.emplace_back(key, ZkPathType::Recurse); + } } } } diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index 37c913f58a9..a329b01e9f2 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -639,10 +639,43 @@ std::shared_ptr StorageWindowView::getInnerTableCreateQuery( "The first argument of time window function should not be a constant value.", ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_WINDOW_VIEW); + ToIdentifierMatcher::Data query_data; + query_data.window_id_name = window_id_name; + query_data.window_id_alias = window_id_alias; + ToIdentifierMatcher::Visitor to_identifier_visitor(query_data); + + ReplaceFunctionNowData time_now_data; + ReplaceFunctionNowVisitor time_now_visitor(time_now_data); + ReplaceFunctionWindowMatcher::Data func_hop_data; + ReplaceFunctionWindowMatcher::Visitor func_window_visitor(func_hop_data); + + DropTableIdentifierMatcher::Data drop_table_identifier_data; + DropTableIdentifierMatcher::Visitor drop_table_identifier_visitor(drop_table_identifier_data); + + auto visit = [&](const IAST * ast) + { + auto node = ast->clone(); + QueryNormalizer(normalizer_data).visit(node); + /// now() -> ____timestamp + if (is_time_column_func_now) + { + time_now_visitor.visit(node); + function_now_timezone = time_now_data.now_timezone; + } + drop_table_identifier_visitor.visit(node); + /// tumble/hop -> windowID + func_window_visitor.visit(node); + to_identifier_visitor.visit(node); + node->setAlias(""); + return node; + }; + auto new_storage = std::make_shared(); /// storage != nullptr in case create window view with ENGINE syntax if (storage) { + new_storage->set(new_storage->engine, storage->engine->clone()); + if (storage->ttl_table) throw Exception( ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_WINDOW_VIEW, @@ -654,46 +687,14 @@ std::shared_ptr StorageWindowView::getInnerTableCreateQuery( "The ENGINE of WindowView must be MergeTree family of table engines " "including the engines with replication support"); - ToIdentifierMatcher::Data query_data; - query_data.window_id_name = window_id_name; - query_data.window_id_alias = window_id_alias; - ToIdentifierMatcher::Visitor to_identifier_visitor(query_data); - - ReplaceFunctionNowData time_now_data; - ReplaceFunctionNowVisitor time_now_visitor(time_now_data); - ReplaceFunctionWindowMatcher::Data func_hop_data; - ReplaceFunctionWindowMatcher::Visitor func_window_visitor(func_hop_data); - - DropTableIdentifierMatcher::Data drop_table_identifier_data; - DropTableIdentifierMatcher::Visitor drop_table_identifier_visitor(drop_table_identifier_data); - - new_storage->set(new_storage->engine, storage->engine->clone()); - - auto visit = [&](const IAST * ast, IAST *& field) - { - if (ast) - { - auto node = ast->clone(); - QueryNormalizer(normalizer_data).visit(node); - /// now() -> ____timestamp - if (is_time_column_func_now) - { - time_now_visitor.visit(node); - function_now_timezone = time_now_data.now_timezone; - } - drop_table_identifier_visitor.visit(node); - /// tumble/hop -> windowID - func_window_visitor.visit(node); - to_identifier_visitor.visit(node); - node->setAlias(""); - new_storage->set(field, node); - } - }; - - visit(storage->partition_by, new_storage->partition_by); - visit(storage->primary_key, new_storage->primary_key); - visit(storage->order_by, new_storage->order_by); - visit(storage->sample_by, new_storage->sample_by); + if (storage->partition_by) + new_storage->set(new_storage->partition_by, visit(storage->partition_by)); + if (storage->primary_key) + new_storage->set(new_storage->primary_key, visit(storage->primary_key)); + if (storage->order_by) + new_storage->set(new_storage->order_by, visit(storage->order_by)); + if (storage->sample_by) + new_storage->set(new_storage->sample_by, visit(storage->sample_by)); if (storage->settings) new_storage->set(new_storage->settings, storage->settings->clone()); @@ -702,8 +703,21 @@ std::shared_ptr StorageWindowView::getInnerTableCreateQuery( { new_storage->set(new_storage->engine, makeASTFunction("AggregatingMergeTree")); - new_storage->set(new_storage->order_by, std::make_shared(window_id_column_name)); - new_storage->set(new_storage->primary_key, std::make_shared(window_id_column_name)); + if (inner_select_query->groupBy()->children.size() == 1) //GROUP BY windowID + { + auto node = visit(inner_select_query->groupBy()->children[0].get()); + new_storage->set(new_storage->order_by, std::make_shared(node->getColumnName())); + } + else + { + auto group_by_function = makeASTFunction("tuple"); + for (auto & child : inner_select_query->groupBy()->children) + { + auto node = visit(child.get()); + group_by_function->arguments->children.push_back(std::make_shared(node->getColumnName())); + } + new_storage->set(new_storage->order_by, group_by_function); + } } auto new_columns = std::make_shared(); diff --git a/src/TableFunctions/CMakeLists.txt b/src/TableFunctions/CMakeLists.txt index 576d1ea23ff..c9948a4b131 100644 --- a/src/TableFunctions/CMakeLists.txt +++ b/src/TableFunctions/CMakeLists.txt @@ -1,9 +1,17 @@ include("${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake") add_headers_and_sources(clickhouse_table_functions .) +if (TARGET ch_contrib::hivemetastore) + add_headers_and_sources(clickhouse_table_functions Hive) +endif () list(REMOVE_ITEM clickhouse_table_functions_sources ITableFunction.cpp TableFunctionFactory.cpp) list(REMOVE_ITEM clickhouse_table_functions_headers ITableFunction.h TableFunctionFactory.h) add_library(clickhouse_table_functions ${clickhouse_table_functions_sources}) -target_link_libraries(clickhouse_table_functions PRIVATE clickhouse_parsers clickhouse_storages_system dbms) +if (TARGET ch_contrib::hivemetastore) + target_link_libraries(clickhouse_table_functions PRIVATE clickhouse_parsers clickhouse_storages_system dbms ch_contrib::hivemetastore ch_contrib::hdfs) +else () + target_link_libraries(clickhouse_table_functions PRIVATE clickhouse_parsers clickhouse_storages_system dbms) +endif () + diff --git a/src/TableFunctions/Hive/TableFunctionHive.cpp b/src/TableFunctions/Hive/TableFunctionHive.cpp new file mode 100644 index 00000000000..e7de55181c3 --- /dev/null +++ b/src/TableFunctions/Hive/TableFunctionHive.cpp @@ -0,0 +1,91 @@ +#include +#if USE_HIVE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + namespace ErrorCodes + { + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + } + + void TableFunctionHive::parseArguments(const ASTPtr & ast_function_, ContextPtr context_) + { + ASTs & args_func = ast_function_->children; + if (args_func.size() != 1) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Table function '{}' must have arguments.", getName()); + + ASTs & args = args_func.at(0)->children; + + const auto message = fmt::format( + "The signature of function {} is:\n" + " - hive_url, hive_database, hive_table, structure, partition_by_keys", + getName()); + + if (args.size() != 5) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, message); + + for (auto & arg : args) + arg = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context_); + + hive_metastore_url = args[0]->as().value.safeGet(); + hive_database = args[1]->as().value.safeGet(); + hive_table = args[2]->as().value.safeGet(); + table_structure = args[3]->as().value.safeGet(); + partition_by_def = args[4]->as().value.safeGet(); + + actual_columns = parseColumnsListFromString(table_structure, context_); + } + + ColumnsDescription TableFunctionHive::getActualTableStructure(ContextPtr /*context_*/) const { return actual_columns; } + + StoragePtr TableFunctionHive::executeImpl( + const ASTPtr & /*ast_function_*/, + ContextPtr context_, + const std::string & table_name_, + ColumnsDescription /*cached_columns_*/) const + { + const Settings & settings = context_->getSettings(); + ParserLambdaExpression partition_by_parser; + ASTPtr partition_by_ast = parseQuery( + partition_by_parser, + "(" + partition_by_def + ")", + "partition by declaration list", + settings.max_query_size, + settings.max_parser_depth); + StoragePtr storage; + storage = StorageHive::create( + hive_metastore_url, + hive_database, + hive_table, + StorageID(getDatabaseName(), table_name_), + actual_columns, + ConstraintsDescription{}, + "", + partition_by_ast, + std::make_unique(), + context_); + + return storage; + } + + + void registerTableFunctionHive(TableFunctionFactory & factory_) { factory_.registerFunction(); } + +} +#endif diff --git a/src/TableFunctions/Hive/TableFunctionHive.h b/src/TableFunctions/Hive/TableFunctionHive.h new file mode 100644 index 00000000000..0973bdda329 --- /dev/null +++ b/src/TableFunctions/Hive/TableFunctionHive.h @@ -0,0 +1,38 @@ +#pragma once +#include +#if USE_HIVE +#include +#include +namespace DB +{ +class Context; +class TableFunctionHive : public ITableFunction +{ +public: + static constexpr auto name = "hive"; + static constexpr auto storage_type_name = "hive"; + std::string getName() const override { return name; } + + bool hasStaticStructure() const override { return true; } + + StoragePtr executeImpl( + const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns) const override; + + const char * getStorageTypeName() const override { return storage_type_name; } + ColumnsDescription getActualTableStructure(ContextPtr) const override; + void parseArguments(const ASTPtr & ast_function_, ContextPtr context_) override; + +private: + Poco::Logger * logger = &Poco::Logger::get("TableFunctionHive"); + + String cluster_name; + String hive_metastore_url; + String hive_database; + String hive_table; + String table_structure; + String partition_by_def; + + ColumnsDescription actual_columns; +}; +} +#endif diff --git a/src/TableFunctions/ITableFunction.h b/src/TableFunctions/ITableFunction.h index 93cf5057e88..9c8d694865b 100644 --- a/src/TableFunctions/ITableFunction.h +++ b/src/TableFunctions/ITableFunction.h @@ -52,6 +52,16 @@ public: /// Returns actual table structure probably requested from remote server, may fail virtual ColumnsDescription getActualTableStructure(ContextPtr /*context*/) const = 0; + /// Check if table function needs a structure hint from SELECT query in case of + /// INSERT INTO FUNCTION ... SELECT ... + /// It's used for schema inference. + virtual bool needStructureHint() const { return false; } + + /// Set a structure hint from SELECT query in case of + /// INSERT INTO FUNCTION ... SELECT ... + /// This hint could be used not to repeat schema in function arguments. + virtual void setStructureHint(const ColumnsDescription &) {} + /// Create storage according to the query. StoragePtr execute(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns_ = {}, bool use_global_context = false) const; diff --git a/src/TableFunctions/ITableFunctionFileLike.cpp b/src/TableFunctions/ITableFunctionFileLike.cpp index 5328abd1654..3388a7ec9f6 100644 --- a/src/TableFunctions/ITableFunctionFileLike.cpp +++ b/src/TableFunctions/ITableFunctionFileLike.cpp @@ -95,6 +95,9 @@ StoragePtr ITableFunctionFileLike::executeImpl(const ASTPtr & /*ast_function*/, ColumnsDescription columns; if (structure != "auto") columns = parseColumnsListFromString(structure, context); + else if (!structure_hint.empty()) + columns = structure_hint; + StoragePtr storage = getStorage(filename, format, columns, context, table_name, compression_method); storage->startup(); return storage; diff --git a/src/TableFunctions/ITableFunctionFileLike.h b/src/TableFunctions/ITableFunctionFileLike.h index 6e00aac9c37..cd85f20fdc0 100644 --- a/src/TableFunctions/ITableFunctionFileLike.h +++ b/src/TableFunctions/ITableFunctionFileLike.h @@ -12,6 +12,10 @@ class Context; */ class ITableFunctionFileLike : public ITableFunction { +public: + bool needStructureHint() const override { return structure == "auto"; } + + void setStructureHint(const ColumnsDescription & structure_hint_) override { structure_hint = structure_hint_; } protected: void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; @@ -20,6 +24,7 @@ protected: String format = "auto"; String structure = "auto"; String compression_method = "auto"; + ColumnsDescription structure_hint; private: StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns) const override; diff --git a/src/TableFunctions/TableFunctionExecutable.cpp b/src/TableFunctions/TableFunctionExecutable.cpp index 41ba2db5c33..18d7d8867e8 100644 --- a/src/TableFunctions/TableFunctionExecutable.cpp +++ b/src/TableFunctions/TableFunctionExecutable.cpp @@ -78,7 +78,7 @@ StoragePtr TableFunctionExecutable::executeImpl(const ASTPtr & /*ast_function*/, auto global_context = context->getGlobalContext(); ExecutableSettings settings; settings.script_name = script_name; - settings.script_arguments = std::move(arguments); + settings.script_arguments = arguments; auto storage = StorageExecutable::create(storage_id, format, settings, input_queries, getActualTableStructure(context), ConstraintsDescription{}); storage->startup(); diff --git a/src/TableFunctions/TableFunctionFile.cpp b/src/TableFunctions/TableFunctionFile.cpp index 192846f7f11..b09bb8b6ae1 100644 --- a/src/TableFunctions/TableFunctionFile.cpp +++ b/src/TableFunctions/TableFunctionFile.cpp @@ -41,6 +41,7 @@ ColumnsDescription TableFunctionFile::getActualTableStructure(ContextPtr context return StorageFile::getTableStructureFromFile(format, paths, compression_method, std::nullopt, context); } + return parseColumnsListFromString(structure, context); } diff --git a/src/TableFunctions/TableFunctionS3.cpp b/src/TableFunctions/TableFunctionS3.cpp index 1660cadab04..1cce6c4b292 100644 --- a/src/TableFunctions/TableFunctionS3.cpp +++ b/src/TableFunctions/TableFunctionS3.cpp @@ -168,6 +168,8 @@ StoragePtr TableFunctionS3::executeImpl(const ASTPtr & /*ast_function*/, Context ColumnsDescription columns; if (s3_configuration->structure != "auto") columns = parseColumnsListFromString(s3_configuration->structure, context); + else if (!structure_hint.empty()) + columns = structure_hint; StoragePtr storage = StorageS3::create( s3_uri, diff --git a/src/TableFunctions/TableFunctionS3.h b/src/TableFunctions/TableFunctionS3.h index bd0226e348a..06a327593b0 100644 --- a/src/TableFunctions/TableFunctionS3.h +++ b/src/TableFunctions/TableFunctionS3.h @@ -25,6 +25,10 @@ public: } bool hasStaticStructure() const override { return s3_configuration->structure != "auto"; } + bool needStructureHint() const override { return s3_configuration->structure == "auto"; } + + void setStructureHint(const ColumnsDescription & structure_hint_) override { structure_hint = structure_hint_; } + protected: StoragePtr executeImpl( const ASTPtr & ast_function, @@ -38,6 +42,7 @@ protected: void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; std::optional s3_configuration; + ColumnsDescription structure_hint; }; class TableFunctionCOS : public TableFunctionS3 diff --git a/src/TableFunctions/registerTableFunctions.cpp b/src/TableFunctions/registerTableFunctions.cpp index ed08972e74d..9e09fac665a 100644 --- a/src/TableFunctions/registerTableFunctions.cpp +++ b/src/TableFunctions/registerTableFunctions.cpp @@ -31,6 +31,10 @@ void registerTableFunctions() registerTableFunctionHDFSCluster(factory); #endif +#if USE_HIVE + registerTableFunctionHive(factory); +#endif + registerTableFunctionODBC(factory); registerTableFunctionJDBC(factory); diff --git a/src/TableFunctions/registerTableFunctions.h b/src/TableFunctions/registerTableFunctions.h index 72ca185f656..e39d21cb580 100644 --- a/src/TableFunctions/registerTableFunctions.h +++ b/src/TableFunctions/registerTableFunctions.h @@ -29,6 +29,10 @@ void registerTableFunctionHDFS(TableFunctionFactory & factory); void registerTableFunctionHDFSCluster(TableFunctionFactory & factory); #endif +#if USE_HIVE +void registerTableFunctionHive(TableFunctionFactory & factory); +#endif + void registerTableFunctionODBC(TableFunctionFactory & factory); void registerTableFunctionJDBC(TableFunctionFactory & factory); diff --git a/tests/ci/push_to_artifactory.py b/tests/ci/push_to_artifactory.py index 2ba47b89535..44f8ca43339 100755 --- a/tests/ci/push_to_artifactory.py +++ b/tests/ci/push_to_artifactory.py @@ -8,6 +8,7 @@ from typing import Tuple from artifactory import ArtifactorySaaSPath # type: ignore from build_download_helper import dowload_build_with_progress +from env_helper import RUNNER_TEMP from git_helper import TAG_REGEXP, commit, removeprefix, removesuffix @@ -19,7 +20,7 @@ def getenv(name: str, default: str = None): raise KeyError(f"Necessary {name} environment is not set") -TEMP_PATH = getenv("TEMP_PATH", ".") +TEMP_PATH = os.path.join(RUNNER_TEMP, "push_to_artifactory") # One of the following ENVs is necessary JFROG_API_KEY = getenv("JFROG_API_KEY", "") JFROG_TOKEN = getenv("JFROG_TOKEN", "") @@ -45,11 +46,11 @@ class Packages: for name, arch in self.packages ) - self.tgz = tuple("{}-{}.tgz".format(name, version) for name, _ in self.packages) + self.tgz = tuple(f"{name}-{version}.tgz" for name, _ in self.packages) def arch(self, deb_pkg: str) -> str: if deb_pkg not in self.deb: - raise ValueError("{} not in {}".format(deb_pkg, self.deb)) + raise ValueError(f"{deb_pkg} not in {self.deb}") return removesuffix(deb_pkg, ".deb").split("_")[-1] @staticmethod diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 111222e90af..121a283d0e4 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -337,6 +337,26 @@ class FailureReason(enum.Enum): INTERNAL_ERROR = "Test internal error: " +class SettingsRandomizer: + settings = { + "max_insert_threads": lambda: 0 if random.random() < 0.5 else random.randint(1, 16), + "group_by_two_level_threshold": lambda: 1 if random.random() < 0.1 else 2 ** 60 if random.random() < 0.11 else 100000, + "group_by_two_level_threshold_bytes": lambda: 1 if random.random() < 0.1 else 2 ** 60 if random.random() < 0.11 else 50000000, + "distributed_aggregation_memory_efficient": lambda: random.randint(0, 1), + "fsync_metadata": lambda: random.randint(0, 1), + "priority": lambda: int(abs(random.gauss(0, 2))), + "output_format_parallel_formatting": lambda: random.randint(0, 1), + "input_format_parallel_parsing": lambda: random.randint(0, 1), + } + + @staticmethod + def get_random_settings(): + random_settings = [] + for setting, generator in SettingsRandomizer.settings.items(): + random_settings.append(setting + "=" + str(generator()) + "") + return random_settings + + class TestResult: def __init__(self, case_name: str, status: TestStatus, reason: Optional[FailureReason], total_time: float, description: str): self.case_name: str = case_name @@ -417,6 +437,29 @@ class TestCase: return testcase_args + def add_random_settings(self, client_options): + if self.tags and 'no-random-settings' in self.tags: + return client_options + + if len(self.base_url_params) == 0: + os.environ['CLICKHOUSE_URL_PARAMS'] = '&'.join(self.random_settings) + else: + os.environ['CLICKHOUSE_URL_PARAMS'] = self.base_url_params + '&' + '&'.join(self.random_settings) + + new_options = " --allow_repeated_settings --" + " --".join(self.random_settings) + os.environ['CLICKHOUSE_CLIENT_OPT'] = self.base_client_options + new_options + ' ' + return client_options + new_options + + def remove_random_settings_from_env(self): + os.environ['CLICKHOUSE_URL_PARAMS'] = self.base_url_params + os.environ['CLICKHOUSE_CLIENT_OPT'] = self.base_client_options + + def add_info_about_settings(self, description): + if self.tags and 'no-random-settings' in self.tags: + return description + + return description + "\n" + "Settings used in the test: " + "--" + " --".join(self.random_settings) + "\n" + def __init__(self, suite, case: str, args, is_concurrent: bool): self.case: str = case # case file name self.tags: Set[str] = suite.all_tags[case] if case in suite.all_tags else set() @@ -432,6 +475,10 @@ class TestCase: self.testcase_args = None self.runs_count = 0 + self.random_settings = SettingsRandomizer.get_random_settings() + self.base_url_params = os.environ['CLICKHOUSE_URL_PARAMS'] if 'CLICKHOUSE_URL_PARAMS' in os.environ else '' + self.base_client_options = os.environ['CLICKHOUSE_CLIENT_OPT'] if 'CLICKHOUSE_CLIENT_OPT' in os.environ else '' + # should skip test, should increment skipped_total, skip reason def should_skip_test(self, suite) -> Optional[FailureReason]: tags = self.tags @@ -673,10 +720,13 @@ class TestCase: self.runs_count += 1 self.testcase_args = self.configure_testcase_args(args, self.case_file, suite.suite_tmp_path) + client_options = self.add_random_settings(client_options) proc, stdout, stderr, total_time = self.run_single_test(server_logs_level, client_options) result = self.process_result_impl(proc, stdout, stderr, total_time) result.check_if_need_retry(args, stdout, stderr, self.runs_count) + if result.status == TestStatus.FAIL: + result.description = self.add_info_about_settings(result.description) return result except KeyboardInterrupt as e: raise e @@ -684,17 +734,20 @@ class TestCase: return TestResult(self.name, TestStatus.FAIL, FailureReason.INTERNAL_QUERY_FAIL, 0., - self.get_description_from_exception_info(sys.exc_info())) + self.add_info_about_settings(self.get_description_from_exception_info(sys.exc_info()))) except (ConnectionRefusedError, ConnectionResetError): return TestResult(self.name, TestStatus.FAIL, FailureReason.SERVER_DIED, 0., - self.get_description_from_exception_info(sys.exc_info())) + self.add_info_about_settings(self.get_description_from_exception_info(sys.exc_info()))) except: return TestResult(self.name, TestStatus.UNKNOWN, FailureReason.INTERNAL_ERROR, 0., self.get_description_from_exception_info(sys.exc_info())) + finally: + self.remove_random_settings_from_env() + class TestSuite: @staticmethod @@ -1078,11 +1131,15 @@ def collect_build_flags(args): if value == 0: result.append(BuildFlags.POLYMORPHIC_PARTS) - use_flags = clickhouse_execute(args, "SELECT name FROM system.build_options WHERE name like 'USE_%' AND value in ('ON', '1');") + use_flags = clickhouse_execute(args, "SELECT name FROM system.build_options WHERE name like 'USE_%' AND value in ('ON', '1')") for use_flag in use_flags.strip().splitlines(): use_flag = use_flag.decode().lower() result.append(use_flag) + system_processor = clickhouse_execute(args, "SELECT value FROM system.build_options WHERE name = 'SYSTEM_PROCESSOR' LIMIT 1").strip() + if system_processor: + result.append(f'cpu-{system_processor.decode().lower()}') + return result diff --git a/tests/integration/test_access_for_functions/test.py b/tests/integration/test_access_for_functions/test.py index ebd0f6bd907..0abe74e31a3 100644 --- a/tests/integration/test_access_for_functions/test.py +++ b/tests/integration/test_access_for_functions/test.py @@ -1,8 +1,9 @@ import pytest +import uuid from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) -instance = cluster.add_instance('instance') +instance = cluster.add_instance('instance', stay_alive=True) @pytest.fixture(scope="module", autouse=True) @@ -14,7 +15,8 @@ def started_cluster(): finally: cluster.shutdown() -def test_access_rights_for_funtion(): + +def test_access_rights_for_function(): create_function_query = "CREATE FUNCTION MySum AS (a, b) -> a + b" instance.query("CREATE USER A") @@ -37,3 +39,19 @@ def test_access_rights_for_funtion(): instance.query("DROP USER IF EXISTS A") instance.query("DROP USER IF EXISTS B") + + +def test_ignore_obsolete_grant_on_database(): + instance.stop_clickhouse() + + user_id = uuid.uuid4() + instance.exec_in_container(["bash", "-c" , f""" + cat > /var/lib/clickhouse/access/{user_id}.sql << EOF +ATTACH USER X; +ATTACH GRANT CREATE FUNCTION, SELECT ON mydb.* TO X; +EOF"""]) + + instance.exec_in_container(["bash", "-c" , "touch /var/lib/clickhouse/access/need_rebuild_lists.mark"]) + instance.start_clickhouse() + + assert instance.query("SHOW GRANTS FOR X") == "GRANT SELECT ON mydb.* TO X\n" diff --git a/tests/integration/test_distributed_ddl_on_cross_replication/test.py b/tests/integration/test_distributed_ddl_on_cross_replication/test.py index 833a3fb1f04..b61bfc5d83f 100644 --- a/tests/integration/test_distributed_ddl_on_cross_replication/test.py +++ b/tests/integration/test_distributed_ddl_on_cross_replication/test.py @@ -104,3 +104,11 @@ def test_atomic_database(started_cluster): node1.query("INSERT INTO replica_1.rmt VALUES (1, 'test')") node2.query("SYSTEM SYNC REPLICA replica_2.rmt", timeout=5) assert_eq_with_retry(node2, "SELECT * FROM replica_2.rmt", '1\ttest') + +def test_non_query_with_table_ddl(started_cluster): + node1.query("CREATE USER A ON CLUSTER cross_3shards_2replicas") + + assert node1.query("SELECT 1", user='A') == "1\n" + assert node2.query("SELECT 1", user='A') == "1\n" + + node2.query("DROP USER A ON CLUSTER cross_3shards_2replicas") diff --git a/tests/integration/test_hive_query/test.py b/tests/integration/test_hive_query/test.py index a68ae0b066d..9997457e93d 100644 --- a/tests/integration/test_hive_query/test.py +++ b/tests/integration/test_hive_query/test.py @@ -30,7 +30,19 @@ def test_create_parquet_table(started_cluster): node.query("set input_format_parquet_allow_missing_columns = true") result = node.query(""" DROP TABLE IF EXISTS default.demo_parquet; - CREATE TABLE default.demo_parquet (`id` Nullable(String), `score` Nullable(Int32), `day` Nullable(String)) ENGINE = Hive('thrift://hivetest:9083', 'test', 'demo') PARTITION BY(day) + CREATE TABLE default.demo_parquet (`id` Nullable(String), `score` Nullable(Int32), `day` Nullable(String)) ENGINE = Hive('thrift://hivetest:9083', 'test', 'demo') PARTITION BY(day); + """) + logging.info("create result {}".format(result)) + time.sleep(120) + assert result.strip() == '' + +def test_create_parquet_table_1(started_cluster): + logging.info('Start testing creating hive table ...') + node = started_cluster.instances['h0_0_0'] + node.query("set input_format_parquet_allow_missing_columns = true") + result = node.query(""" + DROP TABLE IF EXISTS default.demo_parquet_parts; + CREATE TABLE default.demo_parquet_parts (`id` Nullable(String), `score` Nullable(Int32), `day` Nullable(String), `hour` String) ENGINE = Hive('thrift://hivetest:9083', 'test', 'parquet_demo') PARTITION BY(day, hour); """) logging.info("create result {}".format(result)) time.sleep(120) @@ -70,6 +82,17 @@ def test_parquet_groupby(started_cluster): 2021-11-16 2 """ assert result == expected_result + +def test_parquet_in_filter(started_cluster): + logging.info('Start testing groupby ...') + node = started_cluster.instances['h0_0_0'] + result = node.query(""" + SELECT count(*) FROM default.demo_parquet_parts where day = '2021-11-05' and hour in ('00') + """) + expected_result = """2 +""" + logging.info("query result:{}".format(result)) + assert result == expected_result def test_orc_groupby(started_cluster): logging.info('Start testing groupby ...') node = started_cluster.instances['h0_0_0'] @@ -107,6 +130,20 @@ def test_parquet_groupby_with_cache(started_cluster): 2021-11-16 2 """ assert result == expected_result + +def test_parquet_groupby_by_hive_function(started_cluster): + logging.info('Start testing groupby ...') + node = started_cluster.instances['h0_0_0'] + result = node.query(""" + SELECT day, count(*) FROM hive('thrift://hivetest:9083', 'test', 'demo', '`id` Nullable(String), `score` Nullable(Int32), `day` Nullable(String)', 'day') group by day order by day + """) + expected_result = """2021-11-01 1 +2021-11-05 2 +2021-11-11 1 +2021-11-16 2 +""" + assert result == expected_result + def test_cache_read_bytes(started_cluster): node = started_cluster.instances['h0_0_0'] node.query("set input_format_parquet_allow_missing_columns = true") @@ -129,4 +166,5 @@ def test_cache_read_bytes(started_cluster): assert result == expected_result result = node.query("select sum(ProfileEvent_ExternalDataSourceLocalCacheReadBytes) from system.metric_log where ProfileEvent_ExternalDataSourceLocalCacheReadBytes > 0") logging.info("Read bytes from cache:{}".format(result)) + assert result.strip() != '0' diff --git a/tests/integration/test_materialized_mysql_database/materialize_with_ddl.py b/tests/integration/test_materialized_mysql_database/materialize_with_ddl.py index 377a48be7ed..0f66fd5dcdf 100644 --- a/tests/integration/test_materialized_mysql_database/materialize_with_ddl.py +++ b/tests/integration/test_materialized_mysql_database/materialize_with_ddl.py @@ -1183,3 +1183,39 @@ def materialized_database_support_all_kinds_of_mysql_datatype(clickhouse_node, m "\t2021\t3020399000000\t3020399000000\t00000000010100000000000000000000000000000000000000\t10\t1\t11\tvarbinary\tRED\n" + "2\t2\t22\t9223372036854775807\t-2\t2\t22\t18446744073709551615\t-2.2\t2.2\t-2.22\t2.222\t2.2222\t2021-10-07\ttext\tvarchar\tBLOB\t2021-10-07 18:32:57\t2021-10-07 18:32:57.482786\t2021-10-07 18:32:57\t2021-10-07 18:32:57.482786" + "\t2021\t-3020399000000\t-46798000001\t000000000101000000D55C6E30D4095E40DCF0BBE996493E40\t11\t3\t22\tvarbinary\tGREEN\n") + + +def materialized_database_settings_materialized_mysql_tables_list(clickhouse_node, mysql_node, service_name): + mysql_node.query("DROP DATABASE IF EXISTS test_database") + clickhouse_node.query("DROP DATABASE IF EXISTS test_database") + mysql_node.query("CREATE DATABASE test_database") + mysql_node.query("CREATE TABLE test_database.a (id INT(11) NOT NULL PRIMARY KEY, value VARCHAR(255))") + mysql_node.query("INSERT INTO test_database.a VALUES(1, 'foo')") + mysql_node.query("INSERT INTO test_database.a VALUES(2, 'bar')") + # table b(include json type, not in materialized_mysql_tables_list) can be skip + mysql_node.query("CREATE TABLE test_database.b (id INT(11) NOT NULL PRIMARY KEY, value JSON)") + + clickhouse_node.query("CREATE DATABASE test_database ENGINE = MaterializedMySQL('{}:3306', 'test_database', 'root', 'clickhouse') SETTINGS materialized_mysql_tables_list = ' a,c,d'".format(service_name)) + + check_query(clickhouse_node, "SELECT name from system.tables where database = 'test_database' FORMAT TSV", "a\n") + check_query(clickhouse_node, "SELECT COUNT() FROM test_database.a FORMAT TSV", "2\n") + + # mysql data(binlog) can be skip + mysql_node.query("INSERT INTO test_database.b VALUES(1, '{\"name\":\"testjson\"}')") + mysql_node.query("INSERT INTO test_database.b VALUES(2, '{\"name\":\"testjson\"}')") + + # irrelevant database can be skip + mysql_node.query("DROP DATABASE IF EXISTS other_database") + mysql_node.query("CREATE DATABASE other_database") + mysql_node.query("CREATE TABLE other_database.d (id INT(11) NOT NULL PRIMARY KEY, value json)") + mysql_node.query("INSERT INTO other_database.d VALUES(1, '{\"name\":\"testjson\"}')") + + mysql_node.query("CREATE TABLE test_database.c (id INT(11) NOT NULL PRIMARY KEY, value VARCHAR(255))") + mysql_node.query("INSERT INTO test_database.c VALUES(1, 'foo')") + mysql_node.query("INSERT INTO test_database.c VALUES(2, 'bar')") + + check_query(clickhouse_node, "SELECT name from system.tables where database = 'test_database' FORMAT TSV", "a\nc\n") + check_query(clickhouse_node, "SELECT COUNT() FROM test_database.c FORMAT TSV", "2\n") + + clickhouse_node.query("DROP DATABASE test_database") + mysql_node.query("DROP DATABASE test_database") \ No newline at end of file diff --git a/tests/integration/test_materialized_mysql_database/test.py b/tests/integration/test_materialized_mysql_database/test.py index 501c0cd78fa..3f9d4d5ce40 100644 --- a/tests/integration/test_materialized_mysql_database/test.py +++ b/tests/integration/test_materialized_mysql_database/test.py @@ -257,3 +257,7 @@ def test_table_overrides(started_cluster, started_mysql_8_0, started_mysql_5_7, def test_materialized_database_support_all_kinds_of_mysql_datatype(started_cluster, started_mysql_8_0, started_mysql_5_7, clickhouse_node): materialize_with_ddl.materialized_database_support_all_kinds_of_mysql_datatype(clickhouse_node, started_mysql_8_0, "mysql80") materialize_with_ddl.materialized_database_support_all_kinds_of_mysql_datatype(clickhouse_node, started_mysql_5_7, "mysql57") + +def test_materialized_database_settings_materialized_mysql_tables_list(started_cluster, started_mysql_8_0, started_mysql_5_7, clickhouse_node): + materialize_with_ddl.materialized_database_settings_materialized_mysql_tables_list(clickhouse_node, started_mysql_8_0, "mysql80") + materialize_with_ddl.materialized_database_settings_materialized_mysql_tables_list(clickhouse_node, started_mysql_5_7, "mysql57") diff --git a/tests/integration/test_reload_certificate/configs/ECcert.crt b/tests/integration/test_reload_certificate/configs/ECcert.crt new file mode 100644 index 00000000000..b87ce0099dc --- /dev/null +++ b/tests/integration/test_reload_certificate/configs/ECcert.crt @@ -0,0 +1,16 @@ +-----BEGIN CERTIFICATE----- +MIICkzCCAhigAwIBAgIUcrahhUuSDdw60Wyfo2E4kVUWWQ8wCgYIKoZIzj0EAwIw +fzELMAkGA1UEBhMCQ04xEzARBgNVBAgMClNvbWUtU3RhdGUxDTALBgNVBAcMBGNp +dHkxEDAOBgNVBAoMB2NvbXBhbnkxEDAOBgNVBAsMB3NlY3Rpb24xEjAQBgNVBAMM +CWxvY2FsaG9zdDEUMBIGCSqGSIb3DQEJARYFZW1haWwwIBcNMjIwMjI3MTg1NzQz +WhgPMjEyMjAyMDMxODU3NDNaMH8xCzAJBgNVBAYTAkNOMRMwEQYDVQQIDApTb21l +LVN0YXRlMQ0wCwYDVQQHDARjaXR5MRAwDgYDVQQKDAdjb21wYW55MRAwDgYDVQQL +DAdzZWN0aW9uMRIwEAYDVQQDDAlsb2NhbGhvc3QxFDASBgkqhkiG9w0BCQEWBWVt +YWlsMHYwEAYHKoZIzj0CAQYFK4EEACIDYgAEgoPY/r89/83zHzmpbsA+kW3YflVQ +tKXO8Kl7ki5q+v1qUu3xmr4HttNxvHLOCfK798KMGg9y+NO5y4D4D2ZgLGxkNt8X +yWvhkbe3xKdGSqBpplbLT+M9FtmQ6tzzzFJVo1MwUTAdBgNVHQ4EFgQUmpLPeJBD +ID5s1AeWsVIEt6Z/ca0wHwYDVR0jBBgwFoAUmpLPeJBDID5s1AeWsVIEt6Z/ca0w +DwYDVR0TAQH/BAUwAwEB/zAKBggqhkjOPQQDAgNpADBmAjEAv4uNU4NgprBgNQxk +fIZpJCf/TpisuVsLUHXl8JrMVKKVUf7zr59GH2yiOoukfD5hAjEAlCohSA6/Ken4 +JWkKPCrfnsBZ7VX8Y+4ZqLKuG+IGAu2vQTg+Jc6M23M1vEgi1dqf +-----END CERTIFICATE----- diff --git a/tests/integration/test_reload_certificate/configs/ECcert.key b/tests/integration/test_reload_certificate/configs/ECcert.key new file mode 100644 index 00000000000..b127f8a53fe --- /dev/null +++ b/tests/integration/test_reload_certificate/configs/ECcert.key @@ -0,0 +1,6 @@ +-----BEGIN PRIVATE KEY----- +MIG2AgEAMBAGByqGSM49AgEGBSuBBAAiBIGeMIGbAgEBBDAJbfB78wfRHn5A4x3e +EAqrFk/hbBD+c8snbFgjQqxg4qTcp154Rc01B9V0US27MJuhZANiAASCg9j+vz3/ +zfMfOaluwD6Rbdh+VVC0pc7wqXuSLmr6/WpS7fGavge203G8cs4J8rv3wowaD3L4 +07nLgPgPZmAsbGQ23xfJa+GRt7fEp0ZKoGmmVstP4z0W2ZDq3PPMUlU= +-----END PRIVATE KEY----- diff --git a/tests/integration/test_reload_certificate/test.py b/tests/integration/test_reload_certificate/test.py index dc0c391d6f0..d37fd1bccbc 100644 --- a/tests/integration/test_reload_certificate/test.py +++ b/tests/integration/test_reload_certificate/test.py @@ -6,6 +6,7 @@ SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) cluster = ClickHouseCluster(__file__) node = cluster.add_instance('node', main_configs=["configs/first.crt", "configs/first.key", "configs/second.crt", "configs/second.key", + "configs/ECcert.crt", "configs/ECcert.key", "configs/cert.xml"]) @pytest.fixture(scope="module", autouse=True) @@ -73,3 +74,27 @@ def test_first_than_second_cert(): assert False except: assert True + +def test_ECcert_reload(): + # Set first key + change_config_to_key('first') + + # Command with correct certificate + assert node.exec_in_container(['curl', '--silent', '--cacert', '/etc/clickhouse-server/config.d/{cur_name}.crt'.format(cur_name='first'), + 'https://localhost:8443/']) == 'Ok.\n' + + # Change to other key + change_config_to_key('ECcert') + + # Command with correct certificate + assert node.exec_in_container(['curl', '--silent', '--cacert', '/etc/clickhouse-server/config.d/{cur_name}.crt'.format(cur_name='ECcert'), + 'https://localhost:8443/']) == 'Ok.\n' + + # Command with wrong certificate + # Same as previous + try: + node.exec_in_container(['curl', '--silent', '--cacert', '/etc/clickhouse-server/config.d/{cur_name}.crt'.format(cur_name='first'), + 'https://localhost:8443/']) + assert False + except: + assert True diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py index 82b06ade6be..4e848dc2915 100644 --- a/tests/integration/test_storage_hdfs/test.py +++ b/tests/integration/test_storage_hdfs/test.py @@ -421,6 +421,16 @@ def test_schema_inference_with_globs(started_cluster): assert(sorted(result.split()) == ['0', '\\N']) +def test_insert_select_schema_inference(started_cluster): + node1.query(f"insert into table function hdfs('hdfs://hdfs1:9000/test.native.zst') select toUInt64(1) as x") + + result = node1.query(f"desc hdfs('hdfs://hdfs1:9000/test.native.zst')") + assert(result.strip() == 'x\tUInt64') + + result = node1.query(f"select * from hdfs('hdfs://hdfs1:9000/test.native.zst')") + assert(int(result) == 1) + + if __name__ == '__main__': cluster.start() input("Cluster created, press any key to destroy...") diff --git a/tests/integration/test_storage_postgresql/test.py b/tests/integration/test_storage_postgresql/test.py index 55be61e052b..854a1021b99 100644 --- a/tests/integration/test_storage_postgresql/test.py +++ b/tests/integration/test_storage_postgresql/test.py @@ -3,6 +3,7 @@ import pytest from multiprocessing.dummy import Pool from helpers.cluster import ClickHouseCluster +from helpers.postgres_utility import get_postgres_conn cluster = ClickHouseCluster(__file__) node1 = cluster.add_instance('node1', main_configs=['configs/named_collections.xml'], with_postgres=True) @@ -186,55 +187,65 @@ def test_non_default_scema(started_cluster): def test_concurrent_queries(started_cluster): - cursor = started_cluster.postgres_conn.cursor() - - node1.query(''' - CREATE TABLE test_table (key UInt32, value UInt32) - ENGINE = PostgreSQL('postgres1:5432', 'postgres', 'test_table', 'postgres', 'mysecretpassword')''') + conn = get_postgres_conn(started_cluster.postgres_ip, started_cluster.postgres_port, database=False) + cursor = conn.cursor() + database_name = 'concurrent_test' + cursor.execute(f'DROP DATABASE IF EXISTS {database_name}') + cursor.execute(f'CREATE DATABASE {database_name}') + conn = get_postgres_conn(started_cluster.postgres_ip, started_cluster.postgres_port, database=True, database_name=database_name) + cursor = conn.cursor() cursor.execute('CREATE TABLE test_table (key integer, value integer)') - prev_count = node1.count_in_log('New connection to postgres1:5432') + node1.query(f''' + CREATE TABLE test.test_table (key UInt32, value UInt32) + ENGINE = PostgreSQL(postgres1, database='{database_name}', table='test_table') + ''') + + node1.query(f''' + CREATE TABLE test.stat (numbackends UInt32, datname String) + ENGINE = PostgreSQL(postgres1, database='{database_name}', table='pg_stat_database') + ''') + def node_select(_): for i in range(20): - result = node1.query("SELECT * FROM test_table", user='default') - busy_pool = Pool(20) - p = busy_pool.map_async(node_select, range(20)) - p.wait() - count = node1.count_in_log('New connection to postgres1:5432') - logging.debug(f'count {count}, prev_count {prev_count}') - # 16 is default size for connection pool - assert(int(count) <= int(prev_count) + 16) + result = node1.query("SELECT * FROM test.test_table", user='default') def node_insert(_): - for i in range(5): - result = node1.query("INSERT INTO test_table SELECT number, number FROM numbers(1000)", user='default') - - busy_pool = Pool(5) - p = busy_pool.map_async(node_insert, range(5)) - p.wait() - result = node1.query("SELECT count() FROM test_table", user='default') - logging.debug(result) - assert(int(result) == 5 * 5 * 1000) + for i in range(20): + result = node1.query("INSERT INTO test.test_table SELECT number, number FROM numbers(1000)", user='default') def node_insert_select(_): - for i in range(5): - result = node1.query("INSERT INTO test_table SELECT number, number FROM numbers(1000)", user='default') - result = node1.query("SELECT * FROM test_table LIMIT 100", user='default') + for i in range(20): + result = node1.query("INSERT INTO test.test_table SELECT number, number FROM numbers(1000)", user='default') + result = node1.query("SELECT * FROM test.test_table LIMIT 100", user='default') - busy_pool = Pool(5) - p = busy_pool.map_async(node_insert_select, range(5)) + busy_pool = Pool(30) + p = busy_pool.map_async(node_select, range(30)) p.wait() - result = node1.query("SELECT count() FROM test_table", user='default') - logging.debug(result) - assert(int(result) == 5 * 5 * 1000 * 2) - node1.query('DROP TABLE test_table;') - cursor.execute('DROP TABLE test_table;') + count = int(node1.query(f"SELECT numbackends FROM test.stat WHERE datname = '{database_name}'")) + print(count) + assert(count <= 18) - count = node1.count_in_log('New connection to postgres1:5432') - logging.debug(f'count {count}, prev_count {prev_count}') - assert(int(count) <= int(prev_count) + 16) + busy_pool = Pool(30) + p = busy_pool.map_async(node_insert, range(30)) + p.wait() + + count = int(node1.query(f"SELECT numbackends FROM test.stat WHERE datname = '{database_name}'")) + print(count) + assert(count <= 18) + + busy_pool = Pool(30) + p = busy_pool.map_async(node_insert_select, range(30)) + p.wait() + + count = int(node1.query(f"SELECT numbackends FROM test.stat WHERE datname = '{database_name}'")) + print(count) + assert(count <= 18) + + node1.query('DROP TABLE test.test_table;') + node1.query('DROP TABLE test.stat;') def test_postgres_distributed(started_cluster): diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py index 7fb880119a7..b2c7bbc1510 100644 --- a/tests/integration/test_storage_s3/test.py +++ b/tests/integration/test_storage_s3/test.py @@ -818,8 +818,9 @@ def test_seekable_formats(started_cluster): instance.query("SYSTEM FLUSH LOGS") result = instance.query(f"SELECT formatReadableSize(memory_usage) FROM system.query_log WHERE startsWith(query, 'SELECT count() FROM s3') AND memory_usage > 0 ORDER BY event_time desc") - print(result[:3]) - assert(int(result[:3]) < 200) + + result = result[:result.index('.')] + assert(int(result) < 200) def test_seekable_formats_url(started_cluster): @@ -842,8 +843,9 @@ def test_seekable_formats_url(started_cluster): instance.query("SYSTEM FLUSH LOGS") result = instance.query(f"SELECT formatReadableSize(memory_usage) FROM system.query_log WHERE startsWith(query, 'SELECT count() FROM url') AND memory_usage > 0 ORDER BY event_time desc") - print(result[:3]) - assert(int(result[:3]) < 200) + + result = result[:result.index('.')] + assert(int(result) < 200) def test_empty_file(started_cluster): @@ -886,7 +888,7 @@ def test_s3_schema_inference(started_cluster): result = instance.query(f"select count(*) from schema_inference") assert(int(result) == 5000000) - + table_function = f"url('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_native', 'Native')" result = instance.query(f"desc {table_function}") assert result == "a\tInt32\t\t\t\t\t\nb\tString\t\t\t\t\t\n" @@ -949,7 +951,7 @@ def test_create_new_files_on_insert(started_cluster): instance.query(f"insert into test_multiple_inserts select number, randomString(100) from numbers(10) settings s3_truncate_on_insert=1") instance.query(f"insert into test_multiple_inserts select number, randomString(100) from numbers(20) settings s3_create_new_file_on_insert=1") instance.query(f"insert into test_multiple_inserts select number, randomString(100) from numbers(30) settings s3_create_new_file_on_insert=1") - + result = instance.query(f"select count() from test_multiple_inserts") assert(int(result) == 60) @@ -961,11 +963,11 @@ def test_create_new_files_on_insert(started_cluster): instance.query(f"insert into test_multiple_inserts select number, randomString(100) from numbers(10) settings s3_truncate_on_insert=1") instance.query(f"insert into test_multiple_inserts select number, randomString(100) from numbers(20) settings s3_create_new_file_on_insert=1") instance.query(f"insert into test_multiple_inserts select number, randomString(100) from numbers(30) settings s3_create_new_file_on_insert=1") - + result = instance.query(f"select count() from test_multiple_inserts") assert(int(result) == 60) - + def test_format_detection(started_cluster): bucket = started_cluster.minio_bucket instance = started_cluster.instances["dummy"] @@ -1038,3 +1040,37 @@ def test_signatures(started_cluster): result = instance.query(f"select * from s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test.arrow', 'minio', 'minio123', 'Arrow')") assert(int(result) == 1) + +def test_select_columns(started_cluster): + bucket = started_cluster.minio_bucket + instance = started_cluster.instances["dummy"] + name = "test_table2" + structure = "id UInt32, value1 Int32, value2 Int32" + + instance.query(f"drop table if exists {name}") + instance.query(f"CREATE TABLE {name} ({structure}) ENGINE = S3(s3_conf1, format='Parquet')") + + limit = 10000000 + instance.query(f"INSERT INTO {name} SELECT * FROM generateRandom('{structure}') LIMIT {limit} SETTINGS s3_truncate_on_insert=1") + instance.query(f"SELECT value2 FROM {name}") + + instance.query("SYSTEM FLUSH LOGS") + result1 = instance.query(f"SELECT read_bytes FROM system.query_log WHERE type='QueryFinish' and query LIKE 'SELECT value2 FROM {name}'") + + instance.query(f"SELECT * FROM {name}") + instance.query("SYSTEM FLUSH LOGS") + result2 = instance.query(f"SELECT read_bytes FROM system.query_log WHERE type='QueryFinish' and query LIKE 'SELECT * FROM {name}'") + + assert(int(result1) * 3 <= int(result2)) + + +def test_insert_select_schema_inference(started_cluster): + bucket = started_cluster.minio_bucket + instance = started_cluster.instances["dummy"] + + instance.query(f"insert into function s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test.arrow') select toUInt64(1) as x settings s3_truncate_on_insert=1") + result = instance.query(f"desc s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test.arrow')") + assert(result.strip() == 'x\tUInt64') + + result = instance.query(f"select * from s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test.arrow')") + assert(int(result) == 1) diff --git a/tests/integration/test_timezone_config/test.py b/tests/integration/test_timezone_config/test.py index ac12eddc709..af7e3548e6a 100644 --- a/tests/integration/test_timezone_config/test.py +++ b/tests/integration/test_timezone_config/test.py @@ -17,3 +17,25 @@ def start_cluster(): def test_check_timezone_config(start_cluster): assert node.query("SELECT toDateTime(1111111111)") == "2005-03-17 17:58:31\n" + +def test_overflow_toDate(start_cluster): + assert node.query("SELECT toDate('2999-12-31','UTC')") == "2149-06-06\n" + assert node.query("SELECT toDate('2021-12-21','UTC')") == "2021-12-21\n" + assert node.query("SELECT toDate('1000-12-31','UTC')") == "1970-01-01\n" + +def test_overflow_toDate32(start_cluster): + assert node.query("SELECT toDate32('2999-12-31','UTC')") == "2283-11-11\n" + assert node.query("SELECT toDate32('2021-12-21','UTC')") == "2021-12-21\n" + assert node.query("SELECT toDate32('1000-12-31','UTC')") == "1925-01-01\n" + +def test_overflow_toDateTime(start_cluster): + assert node.query("SELECT toDateTime('2999-12-31 00:00:00','UTC')") == "2106-02-07 06:28:15\n" + assert node.query("SELECT toDateTime('2106-02-07 06:28:15','UTC')") == "2106-02-07 06:28:15\n" + assert node.query("SELECT toDateTime('1970-01-01 00:00:00','UTC')") == "1970-01-01 00:00:00\n" + assert node.query("SELECT toDateTime('1000-01-01 00:00:00','UTC')") == "1970-01-01 00:00:00\n" + +def test_overflow_parseDateTimeBestEffort(start_cluster): + assert node.query("SELECT parseDateTimeBestEffort('2999-12-31 00:00:00','UTC')") == "2106-02-07 06:28:15\n" + assert node.query("SELECT parseDateTimeBestEffort('2106-02-07 06:28:15','UTC')") == "2106-02-07 06:28:15\n" + assert node.query("SELECT parseDateTimeBestEffort('1970-01-01 00:00:00','UTC')") == "1970-01-01 00:00:00\n" + assert node.query("SELECT parseDateTimeBestEffort('1000-01-01 00:00:00','UTC')") == "1970-01-01 00:00:00\n" diff --git a/tests/integration/test_user_zero_database_access/configs/users.xml b/tests/integration/test_user_zero_database_access/configs/users.xml index 8c8dfbb5b7e..25c598aa560 100644 --- a/tests/integration/test_user_zero_database_access/configs/users.xml +++ b/tests/integration/test_user_zero_database_access/configs/users.xml @@ -37,6 +37,24 @@ db1 + + + clickhouse + + ::/0 + + default + default + + + + + + ::/0 + + default + default + diff --git a/tests/integration/test_user_zero_database_access/test_user_zero_database_access.py b/tests/integration/test_user_zero_database_access/test_user_zero_database_access.py index dd3789cde57..d77e8383df7 100644 --- a/tests/integration/test_user_zero_database_access/test_user_zero_database_access.py +++ b/tests/integration/test_user_zero_database_access/test_user_zero_database_access.py @@ -70,3 +70,17 @@ def test_user_zero_database_access(start_cluster): ["bash", "-c", "/usr/bin/clickhouse client --user 'default' --query 'DROP DATABASE test2'"], user='root') except Exception as ex: assert False, "user with full access rights can't drop database test2" + + try: + name = node.exec_in_container( + ["bash", "-c", "export CLICKHOUSE_USER=env_user_not_with_password && /usr/bin/clickhouse client --query 'SELECT currentUser()'"], user='root') + assert name.strip() == "env_user_not_with_password" + except Exception as ex: + assert False, "set env CLICKHOUSE_USER can not connect server" + + try: + name = node.exec_in_container( + ["bash", "-c", "export CLICKHOUSE_USER=env_user_with_password && export CLICKHOUSE_PASSWORD=clickhouse && /usr/bin/clickhouse client --query 'SELECT currentUser()'"], user='root') + assert name.strip() == "env_user_with_password" + except Exception as ex: + assert False, "set env CLICKHOUSE_USER CLICKHOUSE_PASSWORD can not connect server" diff --git a/tests/queries/0_stateless/00124_shard_distributed_with_many_replicas.sql b/tests/queries/0_stateless/00124_shard_distributed_with_many_replicas.sql index f9cbf92db41..e29a166c1ee 100644 --- a/tests/queries/0_stateless/00124_shard_distributed_with_many_replicas.sql +++ b/tests/queries/0_stateless/00124_shard_distributed_with_many_replicas.sql @@ -1,5 +1,6 @@ -- Tags: replica, distributed +SET allow_experimental_parallel_reading_from_replicas = 0; SET max_parallel_replicas = 2; DROP TABLE IF EXISTS report; diff --git a/tests/queries/0_stateless/00135_duplicate_group_by_keys_segfault.sql b/tests/queries/0_stateless/00135_duplicate_group_by_keys_segfault.sql index 16356046a36..c54593056cf 100644 --- a/tests/queries/0_stateless/00135_duplicate_group_by_keys_segfault.sql +++ b/tests/queries/0_stateless/00135_duplicate_group_by_keys_segfault.sql @@ -1,3 +1,5 @@ +-- Tags: no-random-settings + SET max_rows_to_read = 1000000; SET read_overflow_mode = 'break'; SELECT concat(toString(number % 256 AS n), '') AS s, n, max(s) FROM system.numbers_mt GROUP BY s, n, n, n, n, n, n, n, n, n ORDER BY s, n; diff --git a/tests/queries/0_stateless/00474_readonly_settings.reference b/tests/queries/0_stateless/00474_readonly_settings.reference index b1da40ce414..e2b45931965 100644 --- a/tests/queries/0_stateless/00474_readonly_settings.reference +++ b/tests/queries/0_stateless/00474_readonly_settings.reference @@ -2,13 +2,11 @@ "value": 4611686018427387904 "name": "value", "value": "4611686018427387904" -value -value -Cannot modify 'output_format_json_quote_64bit_integers' setting in readonly mode +OK +OK "name": "value", "value": "9223372036854775808" "name": "value", "value": 9223372036854775808 -value -value -Cannot modify 'output_format_json_quote_64bit_integers' setting in readonly mode +OK +OK diff --git a/tests/queries/0_stateless/00474_readonly_settings.sh b/tests/queries/0_stateless/00474_readonly_settings.sh index 0887ecfa14e..07b78c64a7e 100755 --- a/tests/queries/0_stateless/00474_readonly_settings.sh +++ b/tests/queries/0_stateless/00474_readonly_settings.sh @@ -9,13 +9,15 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) $CLICKHOUSE_CLIENT --query="select toUInt64(pow(2, 62)) as value format JSON" --output_format_json_quote_64bit_integers=0 | grep value $CLICKHOUSE_CLIENT --query="select toUInt64(pow(2, 62)) as value format JSON" --output_format_json_quote_64bit_integers=1 | grep value -$CLICKHOUSE_CLIENT --readonly=1 --multiquery --query="set output_format_json_quote_64bit_integers=1 ; select toUInt64(pow(2, 63)) as value format JSON" --server_logs_file=/dev/null 2>&1 | grep -o 'value\|Cannot modify .* setting in readonly mode' -$CLICKHOUSE_CLIENT --readonly=1 --multiquery --query="set output_format_json_quote_64bit_integers=0 ; select toUInt64(pow(2, 63)) as value format JSON" --server_logs_file=/dev/null 2>&1 | grep -o 'value\|Cannot modify .* setting in readonly mode' +$CLICKHOUSE_CLIENT --readonly=1 --multiquery --query="set output_format_json_quote_64bit_integers=1 ; select toUInt64(pow(2, 63)) as value format JSON" --server_logs_file=/dev/null 2>&1 | grep -o -q 'value\|Cannot modify .* setting in readonly mode' && echo "OK" || echo "FAIL" +$CLICKHOUSE_CLIENT --readonly=1 --multiquery --query="set output_format_json_quote_64bit_integers=0 ; select toUInt64(pow(2, 63)) as value format JSON" --server_logs_file=/dev/null 2>&1 | grep -o -q 'value\|Cannot modify .* setting in readonly mode' && echo "OK" || echo "FAIL" + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&query=SELECT+toUInt64(pow(2,+63))+as+value+format+JSON&output_format_json_quote_64bit_integers=1" | grep value ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&query=SELECT+toUInt64(pow(2,+63))+as+value+format+JSON&output_format_json_quote_64bit_integers=0" | grep value -${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&session_id=readonly&session_timeout=3600" -d 'SET readonly = 1' +#${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&session_id=readonly&session_timeout=3600" -d 'SET readonly = 1' + +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&session_id=readonly&query=SELECT+toUInt64(pow(2,+63))+as+value+format+JSON&output_format_json_quote_64bit_integers=1" 2>&1 | grep -o -q 'value\|Cannot modify .* setting in readonly mode.' && echo "OK" || echo "FAIL" +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&session_id=readonly&query=SELECT+toUInt64(pow(2,+63))+as+value+format+JSON&output_format_json_quote_64bit_integers=0" 2>&1 | grep -o -q 'value\|Cannot modify .* setting in readonly mode' && echo "OK" || echo "FAIL" -${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&session_id=readonly&query=SELECT+toUInt64(pow(2,+63))+as+value+format+JSON&output_format_json_quote_64bit_integers=1" 2>&1 | grep -o 'value\|Cannot modify .* setting in readonly mode.' -${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&session_id=readonly&query=SELECT+toUInt64(pow(2,+63))+as+value+format+JSON&output_format_json_quote_64bit_integers=0" 2>&1 | grep -o 'value\|Cannot modify .* setting in readonly mode' diff --git a/tests/queries/0_stateless/00808_not_optimize_predicate.sql b/tests/queries/0_stateless/00808_not_optimize_predicate.sql index 7c1e57706e2..ba8f5eb5753 100644 --- a/tests/queries/0_stateless/00808_not_optimize_predicate.sql +++ b/tests/queries/0_stateless/00808_not_optimize_predicate.sql @@ -1,4 +1,5 @@ SET send_logs_level = 'fatal'; +SET convert_query_to_cnf = 0; DROP TABLE IF EXISTS test_00808; CREATE TABLE test_00808(date Date, id Int8, name String, value Int64, sign Int8) ENGINE = CollapsingMergeTree(sign) ORDER BY (id, date); diff --git a/tests/queries/0_stateless/00826_cross_to_inner_join.sql b/tests/queries/0_stateless/00826_cross_to_inner_join.sql index 392ade02ab7..ce0c8ea2bfc 100644 --- a/tests/queries/0_stateless/00826_cross_to_inner_join.sql +++ b/tests/queries/0_stateless/00826_cross_to_inner_join.sql @@ -1,4 +1,6 @@ SET enable_optimize_predicate_expression = 0; +SET optimize_move_to_prewhere = 1; +SET convert_query_to_cnf = 0; select * from system.one l cross join system.one r; diff --git a/tests/queries/0_stateless/00849_multiple_comma_join_2.sql b/tests/queries/0_stateless/00849_multiple_comma_join_2.sql index 58535f556d9..eabede3ff00 100644 --- a/tests/queries/0_stateless/00849_multiple_comma_join_2.sql +++ b/tests/queries/0_stateless/00849_multiple_comma_join_2.sql @@ -1,4 +1,5 @@ SET enable_optimize_predicate_expression = 0; +SET convert_query_to_cnf = 0; DROP TABLE IF EXISTS t1; DROP TABLE IF EXISTS t2; diff --git a/tests/queries/0_stateless/00965_shard_unresolvable_addresses.sql b/tests/queries/0_stateless/00965_shard_unresolvable_addresses.sql index 0d82519e4d3..555e7a98380 100644 --- a/tests/queries/0_stateless/00965_shard_unresolvable_addresses.sql +++ b/tests/queries/0_stateless/00965_shard_unresolvable_addresses.sql @@ -1,5 +1,7 @@ -- Tags: shard +SET prefer_localhost_replica = 1; + SELECT count() FROM remote('127.0.0.1,localhos', system.one); -- { serverError 198 } SELECT count() FROM remote('127.0.0.1|localhos', system.one); diff --git a/tests/queries/0_stateless/00974_query_profiler.sql b/tests/queries/0_stateless/00974_query_profiler.sql index 24e4241b813..b697bd56800 100644 --- a/tests/queries/0_stateless/00974_query_profiler.sql +++ b/tests/queries/0_stateless/00974_query_profiler.sql @@ -1,4 +1,4 @@ --- Tags: no-tsan, no-asan, no-ubsan, no-msan, no-debug, no-fasttest +-- Tags: no-tsan, no-asan, no-ubsan, no-msan, no-debug, no-fasttest, no-cpu-aarch64 -- Tag no-fasttest: Not sure why fail even in sequential mode. Disabled for now to make some progress. SET allow_introspection_functions = 1; diff --git a/tests/queries/0_stateless/01010_pmj_right_table_memory_limits.sql b/tests/queries/0_stateless/01010_pmj_right_table_memory_limits.sql index af747c93678..7804ce32a5a 100644 --- a/tests/queries/0_stateless/01010_pmj_right_table_memory_limits.sql +++ b/tests/queries/0_stateless/01010_pmj_right_table_memory_limits.sql @@ -1,4 +1,4 @@ --- Tags: no-parallel, no-fasttest +-- Tags: no-parallel, no-fasttest, no-random-settings SET max_memory_usage = 32000000; SET join_on_disk_max_files_to_merge = 4; diff --git a/tests/queries/0_stateless/01016_simhash_minhash.reference b/tests/queries/0_stateless/01016_simhash_minhash.reference index 3a668e6dcdb..d4fdcfea6a5 100644 --- a/tests/queries/0_stateless/01016_simhash_minhash.reference +++ b/tests/queries/0_stateless/01016_simhash_minhash.reference @@ -50,92 +50,92 @@ (14260447771268573594,3863279269132177973) uniqExact 6 ngramSimHash -ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 3906262823 -ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 2857686823 ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 2 676648743 ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 1012193063 +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 2857686823 ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 3092567843 +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 3906262823 ngramSimHashCaseInsensitive +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 3 2824132391 ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 2891240999 ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 3092567591 -ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 3 2824132391 ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 3908359975 ngramSimHashUTF8 -ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 3159676711 ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 2 676648743 ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 1012193063 ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 2924795687 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 3159676711 ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 3897874215 ngramSimHashCaseInsensitiveUTF8 -ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 3906262823 -ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 3092567591 ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 3 2824132391 ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 2891241255 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 3092567591 +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 3906262823 wordShingleSimHash -ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 857724390 -ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 404215270 -ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 991679910 -ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 425963587 ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 404215014 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 404215270 +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 425963587 ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 563598566 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 857724390 +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 991679910 wordShingleSimHashCaseInsensitive -ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 959182215 -ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 429118950 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 420713958 ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 421737795 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 429118950 +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 959182215 ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 964941252 ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 965465540 -ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 420713958 wordShingleSimHashUTF8 -ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 857724390 -ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 404215270 -ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 991679910 -ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 425963587 ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 404215014 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 404215270 +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 425963587 ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 563598566 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 857724390 +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 991679910 wordShingleSimHashCaseInsensitiveUTF8 -ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 959182215 -ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 429118950 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 420713958 ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 421737795 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 429118950 +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 959182215 ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 964941252 ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 965465540 -ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 420713958 ngramMinHash -ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 3 (6021986790841777095,17443426065825246292) -ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 (13225377334870249827,17443426065825246292) ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 (4388091710993602029,17613327300639166679) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 3 (6021986790841777095,17443426065825246292) ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (7962672159337006560,17443426065825246292) +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 (13225377334870249827,17443426065825246292) ngramMinHashCaseInsensitive -ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 3 (6021986790841777095,8535005350590298790) -ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 (13225377334870249827,8535005350590298790) ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 (4388091710993602029,17613327300639166679) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 3 (6021986790841777095,8535005350590298790) ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (7962672159337006560,8535005350590298790) +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 (13225377334870249827,8535005350590298790) ngramMinHashUTF8 +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 (4388091710993602029,17613327300639166679) ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 3 (6021986790841777095,17443426065825246292) -ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 (13225377334870249827,17443426065825246292) -ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 (4388091710993602029,17613327300639166679) ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (7962672159337006560,17443426065825246292) +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 (13225377334870249827,17443426065825246292) ngramMinHashCaseInsensitiveUTF8 -ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 3 (6021986790841777095,8535005350590298790) -ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 (13225377334870249827,8535005350590298790) ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 (4388091710993602029,17613327300639166679) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 3 (6021986790841777095,8535005350590298790) ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (7962672159337006560,8535005350590298790) +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 (13225377334870249827,8535005350590298790) wordShingleMinHash -ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (18148981179837829400,14581416672396321264) -ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 (16224204290372720939,13975393268888698430) ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 3 (5044918525503962090,12338022931991160906) +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 (16224204290372720939,13975393268888698430) ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 (18148981179837829400,6048943706095721476) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (18148981179837829400,14581416672396321264) wordShingleMinHashCaseInsensitive -ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 (15504011608613565061,6048943706095721476) -ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 (16224204290372720939,13975393268888698430) ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 3 (5044918525503962090,3381836163833256482) +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 (15504011608613565061,6048943706095721476) ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (15504011608613565061,14581416672396321264) +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 (16224204290372720939,13975393268888698430) wordShingleMinHashUTF8 -ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (18148981179837829400,14581416672396321264) -ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 (16224204290372720939,13975393268888698430) ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 3 (5044918525503962090,12338022931991160906) -ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 (18148981179837829400,6048943706095721476) -wordShingleMinHashCaseInsensitiveUTF8 -ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 (15504011608613565061,6048943706095721476) ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 (16224204290372720939,13975393268888698430) +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 (18148981179837829400,6048943706095721476) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (18148981179837829400,14581416672396321264) +wordShingleMinHashCaseInsensitiveUTF8 ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 3 (5044918525503962090,3381836163833256482) +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 (15504011608613565061,6048943706095721476) ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (15504011608613565061,14581416672396321264) +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 (16224204290372720939,13975393268888698430) diff --git a/tests/queries/0_stateless/01016_simhash_minhash.sql b/tests/queries/0_stateless/01016_simhash_minhash.sql index 01af9451381..1e77b487851 100644 --- a/tests/queries/0_stateless/01016_simhash_minhash.sql +++ b/tests/queries/0_stateless/01016_simhash_minhash.sql @@ -75,38 +75,38 @@ SELECT 'uniqExact', uniqExact(s) FROM defaults; SELECT 'ngramSimHash'; -SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramSimHash(s) as h FROM defaults GROUP BY h; +SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramSimHash(s) as h FROM defaults GROUP BY h ORDER BY h; SELECT 'ngramSimHashCaseInsensitive'; -SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramSimHashCaseInsensitive(s) as h FROM defaults GROUP BY h; +SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramSimHashCaseInsensitive(s) as h FROM defaults GROUP BY h ORDER BY h; SELECT 'ngramSimHashUTF8'; -SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramSimHashUTF8(s) as h FROM defaults GROUP BY h; +SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramSimHashUTF8(s) as h FROM defaults GROUP BY h ORDER BY h; SELECT 'ngramSimHashCaseInsensitiveUTF8'; -SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramSimHashCaseInsensitiveUTF8(s) as h FROM defaults GROUP BY h; +SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramSimHashCaseInsensitiveUTF8(s) as h FROM defaults GROUP BY h ORDER BY h; SELECT 'wordShingleSimHash'; -SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleSimHash(s, 2) as h FROM defaults GROUP BY h; +SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleSimHash(s, 2) as h FROM defaults GROUP BY h ORDER BY h; SELECT 'wordShingleSimHashCaseInsensitive'; -SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleSimHashCaseInsensitive(s, 2) as h FROM defaults GROUP BY h; +SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleSimHashCaseInsensitive(s, 2) as h FROM defaults GROUP BY h ORDER BY h; SELECT 'wordShingleSimHashUTF8'; -SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleSimHashUTF8(s, 2) as h FROM defaults GROUP BY h; +SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleSimHashUTF8(s, 2) as h FROM defaults GROUP BY h ORDER BY h; SELECT 'wordShingleSimHashCaseInsensitiveUTF8'; -SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleSimHashCaseInsensitiveUTF8(s, 2) as h FROM defaults GROUP BY h; +SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleSimHashCaseInsensitiveUTF8(s, 2) as h FROM defaults GROUP BY h ORDER BY h; SELECT 'ngramMinHash'; -SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramMinHash(s) as h FROM defaults GROUP BY h; +SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramMinHash(s) as h FROM defaults GROUP BY h ORDER BY h; SELECT 'ngramMinHashCaseInsensitive'; -SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramMinHashCaseInsensitive(s) as h FROM defaults GROUP BY h; +SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramMinHashCaseInsensitive(s) as h FROM defaults GROUP BY h ORDER BY h; SELECT 'ngramMinHashUTF8'; -SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramMinHashUTF8(s) as h FROM defaults GROUP BY h; +SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramMinHashUTF8(s) as h FROM defaults GROUP BY h ORDER BY h; SELECT 'ngramMinHashCaseInsensitiveUTF8'; -SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramMinHashCaseInsensitiveUTF8(s) as h FROM defaults GROUP BY h; +SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramMinHashCaseInsensitiveUTF8(s) as h FROM defaults GROUP BY h ORDER BY h; SELECT 'wordShingleMinHash'; -SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleMinHash(s, 2, 3) as h FROM defaults GROUP BY h; +SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleMinHash(s, 2, 3) as h FROM defaults GROUP BY h ORDER BY h; SELECT 'wordShingleMinHashCaseInsensitive'; -SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleMinHashCaseInsensitive(s, 2, 3) as h FROM defaults GROUP BY h; +SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleMinHashCaseInsensitive(s, 2, 3) as h FROM defaults GROUP BY h ORDER BY h; SELECT 'wordShingleMinHashUTF8'; -SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleMinHashUTF8(s, 2, 3) as h FROM defaults GROUP BY h; +SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleMinHashUTF8(s, 2, 3) as h FROM defaults GROUP BY h ORDER BY h; SELECT 'wordShingleMinHashCaseInsensitiveUTF8'; -SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleMinHashCaseInsensitiveUTF8(s, 2, 3) as h FROM defaults GROUP BY h; +SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleMinHashCaseInsensitiveUTF8(s, 2, 3) as h FROM defaults GROUP BY h ORDER BY h; SELECT wordShingleSimHash('foobar', 9223372036854775807); -- { serverError 69 } SELECT wordShingleSimHash('foobar', 1001); -- { serverError 69 } diff --git a/tests/queries/0_stateless/01017_uniqCombined_memory_usage.sql b/tests/queries/0_stateless/01017_uniqCombined_memory_usage.sql index d47dc6b8d5f..69bd15e3f54 100644 --- a/tests/queries/0_stateless/01017_uniqCombined_memory_usage.sql +++ b/tests/queries/0_stateless/01017_uniqCombined_memory_usage.sql @@ -1,4 +1,4 @@ --- Tags: no-tsan, no-asan, no-msan, no-replicated-database +-- Tags: no-tsan, no-asan, no-msan, no-replicated-database, no-random-settings -- Tag no-tsan: Fine thresholds on memory usage -- Tag no-asan: Fine thresholds on memory usage -- Tag no-msan: Fine thresholds on memory usage @@ -7,6 +7,8 @@ -- sizeof(HLL) is (2^K * 6 / 8) -- hence max_memory_usage for 100 rows = (96<<10)*100 = 9830400 +SET use_uncompressed_cache = 0; + -- HashTable for UInt32 (used until (1<<13) elements), hence 8192 elements SELECT 'UInt32'; SET max_memory_usage = 4000000; @@ -19,6 +21,8 @@ SELECT 'UInt64'; SET max_memory_usage = 4000000; SELECT sum(u) FROM (SELECT intDiv(number, 4096) AS k, uniqCombined(reinterpretAsString(number % 4096)) u FROM numbers(4096 * 100) GROUP BY k); -- { serverError 241 } SET max_memory_usage = 9830400; + + SELECT sum(u) FROM (SELECT intDiv(number, 4096) AS k, uniqCombined(reinterpretAsString(number % 4096)) u FROM numbers(4096 * 100) GROUP BY k); SELECT 'K=16'; diff --git a/tests/queries/0_stateless/01020_function_array_compact.sql b/tests/queries/0_stateless/01020_function_array_compact.sql index d4aaa4d3fca..29adb007dc4 100644 --- a/tests/queries/0_stateless/01020_function_array_compact.sql +++ b/tests/queries/0_stateless/01020_function_array_compact.sql @@ -7,5 +7,5 @@ select arrayCompact([1,1,2]); select arrayCompact([1,2,1]); select arrayCompact([2,1,1]); select arrayCompact([1,2,2,3,3,3,4,4,4,4,5,5,5,5,5]); -SELECT arrayCompact(x->0, [NULL]); -SELECT toString(arrayCompact(x->0, [NULL])); +SELECT arrayCompact(arrayMap(x->0, [NULL])); +SELECT toString(arrayCompact(arrayMap(x->0, [NULL]))); diff --git a/tests/queries/0_stateless/01025_array_compact_generic.reference b/tests/queries/0_stateless/01025_array_compact_generic.reference index d95e269cd3f..572c7ee140c 100644 --- a/tests/queries/0_stateless/01025_array_compact_generic.reference +++ b/tests/queries/0_stateless/01025_array_compact_generic.reference @@ -15,3 +15,6 @@ ['0','1','2'] ['0','1','2'] ['0','1','2'] +[(0,0),(3,1),(6,2),(9,0)] +[('0','0'),('3','1'),('6','2'),('9','0')] +[('0',0),('3',1),('6',2),('9',0)] diff --git a/tests/queries/0_stateless/01025_array_compact_generic.sql b/tests/queries/0_stateless/01025_array_compact_generic.sql index bea39bfbd44..4446d10e9d4 100644 --- a/tests/queries/0_stateless/01025_array_compact_generic.sql +++ b/tests/queries/0_stateless/01025_array_compact_generic.sql @@ -5,4 +5,7 @@ SELECT arrayCompact([1, 1, NULL, NULL, 2, 2, 2]); SELECT arrayCompact([1, 1, NULL, NULL, nan, nan, 2, 2, 2]); SELECT arrayCompact(['hello', '', '', '', 'world', 'world']); SELECT arrayCompact([[[]], [[], []], [[], []], [[]]]); -SELECT arrayCompact(x -> toString(intDiv(x, 3)), range(number)) FROM numbers(10); +SELECT arrayCompact(arrayMap(x -> toString(intDiv(x, 3)), range(number))) FROM numbers(10); +SELECT arrayCompact(x -> x.2, groupArray((number, intDiv(number, 3) % 3))) FROM numbers(10); +SELECT arrayCompact(x -> x.2, groupArray((toString(number), toString(intDiv(number, 3) % 3)))) FROM numbers(10); +SELECT arrayCompact(x -> x.2, groupArray((toString(number), intDiv(number, 3) % 3))) FROM numbers(10); diff --git a/tests/queries/0_stateless/01034_prewhere_max_parallel_replicas_distributed.sql b/tests/queries/0_stateless/01034_prewhere_max_parallel_replicas_distributed.sql index 4eea4fd47c7..6d1c7fd5ef6 100644 --- a/tests/queries/0_stateless/01034_prewhere_max_parallel_replicas_distributed.sql +++ b/tests/queries/0_stateless/01034_prewhere_max_parallel_replicas_distributed.sql @@ -1,5 +1,7 @@ -- Tags: replica, distributed +set allow_experimental_parallel_reading_from_replicas=0; + drop table if exists test_max_parallel_replicas_lr; -- If you wonder why the table is named with "_lr" suffix in this test. diff --git a/tests/queries/0_stateless/01034_sample_final_distributed.sql b/tests/queries/0_stateless/01034_sample_final_distributed.sql index b784b35cbb3..a81fef645db 100644 --- a/tests/queries/0_stateless/01034_sample_final_distributed.sql +++ b/tests/queries/0_stateless/01034_sample_final_distributed.sql @@ -1,5 +1,7 @@ -- Tags: distributed +set allow_experimental_parallel_reading_from_replicas = 0; + drop table if exists sample_final; create table sample_final (CounterID UInt32, EventDate Date, EventTime DateTime, UserID UInt64, Sign Int8) engine = CollapsingMergeTree(Sign) order by (CounterID, EventDate, intHash32(UserID), EventTime) sample by intHash32(UserID); insert into sample_final select number / (8192 * 4), toDate('2019-01-01'), toDateTime('2019-01-01 00:00:01') + number, number / (8192 * 2), number % 3 = 1 ? -1 : 1 from numbers(1000000); diff --git a/tests/queries/0_stateless/01048_window_view_parser.reference b/tests/queries/0_stateless/01048_window_view_parser.reference index c055971bef3..947b68c3a89 100644 --- a/tests/queries/0_stateless/01048_window_view_parser.reference +++ b/tests/queries/0_stateless/01048_window_view_parser.reference @@ -1,34 +1,34 @@ ---TUMBLE--- ||---WINDOW COLUMN NAME--- -CREATE TABLE test_01048.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(1))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(1))`\nORDER BY `windowID(timestamp, toIntervalSecond(1))`\nSETTINGS index_granularity = 8192 +CREATE TABLE test_01048.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(1))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY `windowID(timestamp, toIntervalSecond(1))`\nSETTINGS index_granularity = 8192 ||---WINDOW COLUMN ALIAS--- -CREATE TABLE test_01048.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(\'1\'))`\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'))`\nSETTINGS index_granularity = 8192 +CREATE TABLE test_01048.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'))`\nSETTINGS index_granularity = 8192 ||---IDENTIFIER--- -CREATE TABLE test_01048.`.inner.wv`\n(\n `b` Int32,\n `windowID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(\'1\'))`\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'))`\nSETTINGS index_granularity = 8192 -CREATE TABLE test_01048.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `b` Int32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(\'1\'))`\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'))`\nSETTINGS index_granularity = 8192 +CREATE TABLE test_01048.`.inner.wv`\n(\n `b` Int32,\n `windowID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY (b, `windowID(timestamp, toIntervalSecond(\'1\'))`)\nSETTINGS index_granularity = 8192 +CREATE TABLE test_01048.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `b` Int32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY (`windowID(timestamp, toIntervalSecond(\'1\'))`, b)\nSETTINGS index_granularity = 8192 ||---FUNCTION--- -CREATE TABLE test_01048.`.inner.wv`\n(\n `plus(a, b)` Int64,\n `windowID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(\'1\'))`\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'))`\nSETTINGS index_granularity = 8192 -CREATE TABLE test_01048.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `plus(a, b)` Int64,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(\'1\'))`\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'))`\nSETTINGS index_granularity = 8192 +CREATE TABLE test_01048.`.inner.wv`\n(\n `plus(a, b)` Int64,\n `windowID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY (`plus(a, b)`, `windowID(timestamp, toIntervalSecond(\'1\'))`)\nSETTINGS index_granularity = 8192 +CREATE TABLE test_01048.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `plus(a, b)` Int64,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY (`windowID(timestamp, toIntervalSecond(\'1\'))`, `plus(a, b)`)\nSETTINGS index_granularity = 8192 ||---TimeZone--- -CREATE TABLE test_01048.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'), \'Asia/Shanghai\')` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(\'1\'), \'Asia/Shanghai\')`\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'), \'Asia/Shanghai\')`\nSETTINGS index_granularity = 8192 +CREATE TABLE test_01048.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'), \'Asia/Shanghai\')` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'), \'Asia/Shanghai\')`\nSETTINGS index_granularity = 8192 ||---DATA COLUMN ALIAS--- -CREATE TABLE test_01048.`.inner.wv`\n(\n `b` Int32,\n `windowID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(\'1\'))`\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'))`\nSETTINGS index_granularity = 8192 +CREATE TABLE test_01048.`.inner.wv`\n(\n `b` Int32,\n `windowID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY (b, `windowID(timestamp, toIntervalSecond(\'1\'))`)\nSETTINGS index_granularity = 8192 ||---JOIN--- -CREATE TABLE test_01048.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32),\n `count(mt_2.b)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(\'1\'))`\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'))`\nSETTINGS index_granularity = 8192 +CREATE TABLE test_01048.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32),\n `count(mt_2.b)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'))`\nSETTINGS index_granularity = 8192 ---HOP--- ||---WINDOW COLUMN NAME--- -CREATE TABLE test_01048.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(1), toIntervalSecond(3))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(1), toIntervalSecond(3))`\nORDER BY `windowID(timestamp, toIntervalSecond(1), toIntervalSecond(3))`\nSETTINGS index_granularity = 8192 +CREATE TABLE test_01048.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(1), toIntervalSecond(3))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY `windowID(timestamp, toIntervalSecond(1), toIntervalSecond(3))`\nSETTINGS index_granularity = 8192 ||---WINDOW COLUMN ALIAS--- -CREATE TABLE test_01048.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nSETTINGS index_granularity = 8192 +CREATE TABLE test_01048.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nSETTINGS index_granularity = 8192 ||---IDENTIFIER--- -CREATE TABLE test_01048.`.inner.wv`\n(\n `b` Int32,\n `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nSETTINGS index_granularity = 8192 -CREATE TABLE test_01048.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `b` Int32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nSETTINGS index_granularity = 8192 +CREATE TABLE test_01048.`.inner.wv`\n(\n `b` Int32,\n `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY (b, `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`)\nSETTINGS index_granularity = 8192 +CREATE TABLE test_01048.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `b` Int32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY (`windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`, b)\nSETTINGS index_granularity = 8192 ||---FUNCTION--- -CREATE TABLE test_01048.`.inner.wv`\n(\n `plus(a, b)` Int64,\n `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nSETTINGS index_granularity = 8192 -CREATE TABLE test_01048.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `plus(a, b)` Int64,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nSETTINGS index_granularity = 8192 +CREATE TABLE test_01048.`.inner.wv`\n(\n `plus(a, b)` Int64,\n `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY (`plus(a, b)`, `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`)\nSETTINGS index_granularity = 8192 +CREATE TABLE test_01048.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `plus(a, b)` Int64,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY (`windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`, `plus(a, b)`)\nSETTINGS index_granularity = 8192 ||---TimeZone--- -CREATE TABLE test_01048.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(1), toIntervalSecond(3), \'Asia/Shanghai\')` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(1), toIntervalSecond(3), \'Asia/Shanghai\')`\nORDER BY `windowID(timestamp, toIntervalSecond(1), toIntervalSecond(3), \'Asia/Shanghai\')`\nSETTINGS index_granularity = 8192 +CREATE TABLE test_01048.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(1), toIntervalSecond(3), \'Asia/Shanghai\')` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY `windowID(timestamp, toIntervalSecond(1), toIntervalSecond(3), \'Asia/Shanghai\')`\nSETTINGS index_granularity = 8192 ||---DATA COLUMN ALIAS--- -CREATE TABLE test_01048.`.inner.wv`\n(\n `b` Int32,\n `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nSETTINGS index_granularity = 8192 +CREATE TABLE test_01048.`.inner.wv`\n(\n `b` Int32,\n `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY (b, `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`)\nSETTINGS index_granularity = 8192 ||---JOIN--- -CREATE TABLE test_01048.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32),\n `count(mt_2.b)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nSETTINGS index_granularity = 8192 +CREATE TABLE test_01048.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32),\n `count(mt_2.b)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nSETTINGS index_granularity = 8192 diff --git a/tests/queries/0_stateless/01056_predicate_optimizer_bugs.sql b/tests/queries/0_stateless/01056_predicate_optimizer_bugs.sql index d59b8fc30ac..6d2bb2964d6 100644 --- a/tests/queries/0_stateless/01056_predicate_optimizer_bugs.sql +++ b/tests/queries/0_stateless/01056_predicate_optimizer_bugs.sql @@ -1,5 +1,6 @@ SET enable_optimize_predicate_expression = 1; SET joined_subquery_requires_alias = 0; +SET convert_query_to_cnf = 0; -- https://github.com/ClickHouse/ClickHouse/issues/3885 -- https://github.com/ClickHouse/ClickHouse/issues/5485 diff --git a/tests/queries/0_stateless/01060_window_view_event_tumble_to_asc.reference b/tests/queries/0_stateless/01060_window_view_event_tumble_to_asc.reference index 55c1ee45827..de722f47f08 100644 --- a/tests/queries/0_stateless/01060_window_view_event_tumble_to_asc.reference +++ b/tests/queries/0_stateless/01060_window_view_event_tumble_to_asc.reference @@ -1,3 +1,7 @@ -3 1990-01-01 12:00:05 -2 1990-01-01 12:00:10 -2 1990-01-01 12:00:15 +1 1 1990-01-01 12:00:05 +1 2 1990-01-01 12:00:05 +1 3 1990-01-01 12:00:05 +1 4 1990-01-01 12:00:10 +1 5 1990-01-01 12:00:10 +1 6 1990-01-01 12:00:15 +1 7 1990-01-01 12:00:15 diff --git a/tests/queries/0_stateless/01060_window_view_event_tumble_to_asc.sh b/tests/queries/0_stateless/01060_window_view_event_tumble_to_asc.sh index e570f405f62..9163fe8af27 100755 --- a/tests/queries/0_stateless/01060_window_view_event_tumble_to_asc.sh +++ b/tests/queries/0_stateless/01060_window_view_event_tumble_to_asc.sh @@ -10,25 +10,25 @@ DROP TABLE IF EXISTS mt; DROP TABLE IF EXISTS dst; DROP TABLE IF EXISTS wv; -CREATE TABLE dst(count UInt64, w_end DateTime) Engine=MergeTree ORDER BY tuple(); -CREATE TABLE mt(a Int32, timestamp DateTime) ENGINE=MergeTree ORDER BY tuple(); -CREATE WINDOW VIEW wv TO dst WATERMARK=ASCENDING AS SELECT count(a) AS count, tumbleEnd(wid) AS w_end FROM mt GROUP BY tumble(timestamp, INTERVAL '5' SECOND, 'US/Samoa') AS wid; +CREATE TABLE dst(count UInt64, market Int32, w_end DateTime) Engine=MergeTree ORDER BY tuple(); +CREATE TABLE mt(a Int32, market Int32, timestamp DateTime) ENGINE=MergeTree ORDER BY tuple(); +CREATE WINDOW VIEW wv TO dst WATERMARK=ASCENDING AS SELECT count(a) AS count, market, tumbleEnd(wid) AS w_end FROM mt GROUP BY tumble(timestamp, INTERVAL '5' SECOND, 'US/Samoa') AS wid, market; -INSERT INTO mt VALUES (1, '1990/01/01 12:00:00'); -INSERT INTO mt VALUES (1, '1990/01/01 12:00:01'); -INSERT INTO mt VALUES (1, '1990/01/01 12:00:02'); -INSERT INTO mt VALUES (1, '1990/01/01 12:00:05'); -INSERT INTO mt VALUES (1, '1990/01/01 12:00:06'); -INSERT INTO mt VALUES (1, '1990/01/01 12:00:10'); -INSERT INTO mt VALUES (1, '1990/01/01 12:00:11'); -INSERT INTO mt VALUES (1, '1990/01/01 12:00:30'); +INSERT INTO mt VALUES (1, 1, '1990/01/01 12:00:00'); +INSERT INTO mt VALUES (1, 2, '1990/01/01 12:00:01'); +INSERT INTO mt VALUES (1, 3, '1990/01/01 12:00:02'); +INSERT INTO mt VALUES (1, 4, '1990/01/01 12:00:05'); +INSERT INTO mt VALUES (1, 5, '1990/01/01 12:00:06'); +INSERT INTO mt VALUES (1, 6, '1990/01/01 12:00:10'); +INSERT INTO mt VALUES (1, 7, '1990/01/01 12:00:11'); +INSERT INTO mt VALUES (1, 8, '1990/01/01 12:00:30'); EOF while true; do - $CLICKHOUSE_CLIENT --query="SELECT count(*) FROM dst" | grep -q "3" && break || sleep .5 ||: + $CLICKHOUSE_CLIENT --query="SELECT count(*) FROM dst" | grep -q "7" && break || sleep .5 ||: done -$CLICKHOUSE_CLIENT --query="SELECT * FROM dst ORDER BY w_end;" +$CLICKHOUSE_CLIENT --query="SELECT * FROM dst ORDER BY market, w_end;" $CLICKHOUSE_CLIENT --query="DROP TABLE wv" $CLICKHOUSE_CLIENT --query="DROP TABLE mt" $CLICKHOUSE_CLIENT --query="DROP TABLE dst" diff --git a/tests/queries/0_stateless/01083_cross_to_inner_with_like.sql b/tests/queries/0_stateless/01083_cross_to_inner_with_like.sql index 644190cbddf..6ec6e80692c 100644 --- a/tests/queries/0_stateless/01083_cross_to_inner_with_like.sql +++ b/tests/queries/0_stateless/01083_cross_to_inner_with_like.sql @@ -1,3 +1,5 @@ +SET convert_query_to_cnf = 0; + DROP TABLE IF EXISTS n; DROP TABLE IF EXISTS r; diff --git a/tests/queries/0_stateless/01092_memory_profiler.sql b/tests/queries/0_stateless/01092_memory_profiler.sql index 9d042860ac0..3869bf941c0 100644 --- a/tests/queries/0_stateless/01092_memory_profiler.sql +++ b/tests/queries/0_stateless/01092_memory_profiler.sql @@ -1,4 +1,4 @@ --- Tags: no-tsan, no-asan, no-ubsan, no-msan, no-debug, no-parallel, no-fasttest +-- Tags: no-tsan, no-asan, no-ubsan, no-msan, no-debug, no-parallel, no-fasttest, no-cpu-aarch64 SET allow_introspection_functions = 1; diff --git a/tests/queries/0_stateless/01099_parallel_distributed_insert_select.sql b/tests/queries/0_stateless/01099_parallel_distributed_insert_select.sql index 4e011bf6b31..de93166d891 100644 --- a/tests/queries/0_stateless/01099_parallel_distributed_insert_select.sql +++ b/tests/queries/0_stateless/01099_parallel_distributed_insert_select.sql @@ -2,6 +2,8 @@ -- set insert_distributed_sync = 1; -- see https://github.com/ClickHouse/ClickHouse/issues/18971 +SET allow_experimental_parallel_reading_from_replicas = 0; -- see https://github.com/ClickHouse/ClickHouse/issues/34525 + DROP TABLE IF EXISTS local_01099_a; DROP TABLE IF EXISTS local_01099_b; DROP TABLE IF EXISTS distributed_01099_a; diff --git a/tests/queries/0_stateless/01103_check_cpu_instructions_at_startup.sh b/tests/queries/0_stateless/01103_check_cpu_instructions_at_startup.sh index da99a13e97f..9b6e1e05f2d 100755 --- a/tests/queries/0_stateless/01103_check_cpu_instructions_at_startup.sh +++ b/tests/queries/0_stateless/01103_check_cpu_instructions_at_startup.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-tsan, no-asan, no-ubsan, no-msan, no-debug, no-fasttest +# Tags: no-tsan, no-asan, no-ubsan, no-msan, no-debug, no-fasttest, no-cpu-aarch64 # Tag no-fasttest: avoid dependency on qemu -- invonvenient when running locally CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) diff --git a/tests/queries/0_stateless/01154_move_partition_long.sh b/tests/queries/0_stateless/01154_move_partition_long.sh index 6b0b0773cb6..7cefac28e22 100755 --- a/tests/queries/0_stateless/01154_move_partition_long.sh +++ b/tests/queries/0_stateless/01154_move_partition_long.sh @@ -1,5 +1,7 @@ #!/usr/bin/env bash -# Tags: long, no-parallel +# Tags: long, no-parallel, no-s3-storage +# FIXME: s3 storage should work OK, it +# reproduces bug which exists not only in S3 version. CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01155_rename_move_materialized_view.sql b/tests/queries/0_stateless/01155_rename_move_materialized_view.sql index 0b672cbddbf..e0546ec8117 100644 --- a/tests/queries/0_stateless/01155_rename_move_materialized_view.sql +++ b/tests/queries/0_stateless/01155_rename_move_materialized_view.sql @@ -1,5 +1,7 @@ -- Tags: no-parallel +SET prefer_localhost_replica = 1; + DROP DATABASE IF EXISTS test_01155_ordinary; DROP DATABASE IF EXISTS test_01155_atomic; diff --git a/tests/queries/0_stateless/01186_conversion_to_nullable.reference b/tests/queries/0_stateless/01186_conversion_to_nullable.reference index 7a690240eb5..dc77029ec3b 100644 --- a/tests/queries/0_stateless/01186_conversion_to_nullable.reference +++ b/tests/queries/0_stateless/01186_conversion_to_nullable.reference @@ -12,7 +12,7 @@ \N 1970-01-01 \N -1970-01-01 +2149-06-06 2020-12-24 01:02:03 \N 1970-01-01 03:00:00 diff --git a/tests/queries/0_stateless/01187_set_profile_as_setting.sh b/tests/queries/0_stateless/01187_set_profile_as_setting.sh index ec07f4d3687..dacb609d790 100755 --- a/tests/queries/0_stateless/01187_set_profile_as_setting.sh +++ b/tests/queries/0_stateless/01187_set_profile_as_setting.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# Tags: no-random-settings unset CLICKHOUSE_LOG_COMMENT diff --git a/tests/queries/0_stateless/01271_optimize_arithmetic_operations_in_aggr_func_with_alias.sql b/tests/queries/0_stateless/01271_optimize_arithmetic_operations_in_aggr_func_with_alias.sql index 73b87817bb3..242a253e67c 100644 --- a/tests/queries/0_stateless/01271_optimize_arithmetic_operations_in_aggr_func_with_alias.sql +++ b/tests/queries/0_stateless/01271_optimize_arithmetic_operations_in_aggr_func_with_alias.sql @@ -1,4 +1,5 @@ set optimize_arithmetic_operations_in_aggregate_functions = 1; +SET convert_query_to_cnf = 0; explain syntax select min((n as a) + (1 as b)) c from (select number n from numbers(10)) where a > 0 and b > 0 having c > 0; select min((n as a) + (1 as b)) c from (select number n from numbers(10)) where a > 0 and b > 0 having c > 0; diff --git a/tests/queries/0_stateless/01275_parallel_mv.sql b/tests/queries/0_stateless/01275_parallel_mv.sql index 32b43ce616f..11e5ff41417 100644 --- a/tests/queries/0_stateless/01275_parallel_mv.sql +++ b/tests/queries/0_stateless/01275_parallel_mv.sql @@ -1,3 +1,5 @@ +set max_threads = 0; + drop table if exists testX; drop table if exists testXA; drop table if exists testXB; diff --git a/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sh b/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sh index bf201187f45..c9c01455e31 100755 --- a/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sh +++ b/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-replicated-database, no-parallel, no-fasttest +# Tags: no-replicated-database, no-parallel, no-fasttest, no-tsan, no-asan # Tag no-fasttest: max_memory_usage_for_user can interfere another queries running concurrently # Regression for MemoryTracker that had been incorrectly accounted diff --git a/tests/queries/0_stateless/01293_show_settings.sql b/tests/queries/0_stateless/01293_show_settings.sql index 08f00ed201c..3e55ffb58d7 100644 --- a/tests/queries/0_stateless/01293_show_settings.sql +++ b/tests/queries/0_stateless/01293_show_settings.sql @@ -1,3 +1,5 @@ +-- Tags: no-random-settings + show settings like 'send_timeout'; SHOW SETTINGS ILIKE '%CONNECT_timeout%'; SHOW CHANGED SETTINGS ILIKE '%MEMORY%'; diff --git a/tests/queries/0_stateless/01293_system_distribution_queue.sql b/tests/queries/0_stateless/01293_system_distribution_queue.sql index 34158fb081c..9997f18f61d 100644 --- a/tests/queries/0_stateless/01293_system_distribution_queue.sql +++ b/tests/queries/0_stateless/01293_system_distribution_queue.sql @@ -1,4 +1,5 @@ -- Tags: no-parallel +set prefer_localhost_replica = 1; drop table if exists null_01293; drop table if exists dist_01293; diff --git a/tests/queries/0_stateless/01300_group_by_other_keys.sql b/tests/queries/0_stateless/01300_group_by_other_keys.sql index 22cff012e71..0e37ef55a6a 100644 --- a/tests/queries/0_stateless/01300_group_by_other_keys.sql +++ b/tests/queries/0_stateless/01300_group_by_other_keys.sql @@ -1,3 +1,5 @@ +set max_block_size = 65505; + set optimize_group_by_function_keys = 1; SELECT round(max(log(2) * number), 6) AS k FROM numbers(10000000) GROUP BY number % 2, number % 3, (number % 2 + number % 3) % 2 ORDER BY k; diff --git a/tests/queries/0_stateless/01308_row_policy_and_trivial_count_query.sql b/tests/queries/0_stateless/01308_row_policy_and_trivial_count_query.sql index cd41bb227eb..81bd2ad97a9 100644 --- a/tests/queries/0_stateless/01308_row_policy_and_trivial_count_query.sql +++ b/tests/queries/0_stateless/01308_row_policy_and_trivial_count_query.sql @@ -1,3 +1,5 @@ +SET optimize_move_to_prewhere = 1; + DROP TABLE IF EXISTS t; CREATE TABLE t (x UInt8) ENGINE = MergeTree ORDER BY x; diff --git a/tests/queries/0_stateless/01339_client_unrecognized_option.sh b/tests/queries/0_stateless/01339_client_unrecognized_option.sh index 00c153ec915..9f827ccb13e 100755 --- a/tests/queries/0_stateless/01339_client_unrecognized_option.sh +++ b/tests/queries/0_stateless/01339_client_unrecognized_option.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest +# Tags: no-fasttest, no-random-settings CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01386_negative_float_constant_key_condition.sql b/tests/queries/0_stateless/01386_negative_float_constant_key_condition.sql index c2191d6ab96..b45b9c84b18 100644 --- a/tests/queries/0_stateless/01386_negative_float_constant_key_condition.sql +++ b/tests/queries/0_stateless/01386_negative_float_constant_key_condition.sql @@ -1,3 +1,5 @@ +SET convert_query_to_cnf = 0; + DROP TABLE IF EXISTS t0; CREATE TABLE t0 diff --git a/tests/queries/0_stateless/01415_overlimiting_threads_for_repica_bug.sql b/tests/queries/0_stateless/01415_overlimiting_threads_for_repica_bug.sql index 306d94387a4..6b5c2ac8ffd 100644 --- a/tests/queries/0_stateless/01415_overlimiting_threads_for_repica_bug.sql +++ b/tests/queries/0_stateless/01415_overlimiting_threads_for_repica_bug.sql @@ -1,5 +1,6 @@ set log_queries = 1; set max_threads = 16; +set prefer_localhost_replica = 1; select sum(number) from remote('127.0.0.{1|2}', numbers_mt(1000000)) group by number % 2 order by number % 2; diff --git a/tests/queries/0_stateless/01457_create_as_table_function_structure.sql b/tests/queries/0_stateless/01457_create_as_table_function_structure.sql index d7c681dc615..bc677698d88 100644 --- a/tests/queries/0_stateless/01457_create_as_table_function_structure.sql +++ b/tests/queries/0_stateless/01457_create_as_table_function_structure.sql @@ -1,5 +1,7 @@ -- Tags: no-parallel +SET prefer_localhost_replica = 1; + DROP DATABASE IF EXISTS test_01457; CREATE DATABASE test_01457; diff --git a/tests/queries/0_stateless/01473_event_time_microseconds.sql b/tests/queries/0_stateless/01473_event_time_microseconds.sql index 9c6b392a8aa..932acf48cc4 100644 --- a/tests/queries/0_stateless/01473_event_time_microseconds.sql +++ b/tests/queries/0_stateless/01473_event_time_microseconds.sql @@ -1,4 +1,4 @@ --- Tags: no-tsan, no-asan, no-ubsan, no-msan, no-debug +-- Tags: no-tsan, no-asan, no-ubsan, no-msan, no-debug, no-cpu-aarch64 -- This file contains tests for the event_time_microseconds field for various tables. -- Note: Only event_time_microseconds for asynchronous_metric_log table is tested via diff --git a/tests/queries/0_stateless/01475_read_subcolumns.sql b/tests/queries/0_stateless/01475_read_subcolumns.sql index fb26b19ed30..4724bec9eff 100644 --- a/tests/queries/0_stateless/01475_read_subcolumns.sql +++ b/tests/queries/0_stateless/01475_read_subcolumns.sql @@ -1,4 +1,7 @@ -- Tags: no-s3-storage + +SET use_uncompressed_cache = 0; + SELECT '====array===='; DROP TABLE IF EXISTS t_arr; CREATE TABLE t_arr (a Array(UInt32)) ENGINE = MergeTree ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0; diff --git a/tests/queries/0_stateless/01492_format_readable_quantity.reference b/tests/queries/0_stateless/01492_format_readable_quantity.reference index e58a1954eee..247063b70a2 100644 --- a/tests/queries/0_stateless/01492_format_readable_quantity.reference +++ b/tests/queries/0_stateless/01492_format_readable_quantity.reference @@ -20,26 +20,26 @@ 178.48 million 178.48 million 178.48 million 485.17 million 485.17 million 485.17 million 1.32 billion 1.32 billion 1.32 billion -3.58 billion 3.58 billion -2.15 billion -9.74 billion 9.74 billion -2.15 billion -26.49 billion 26.49 billion -2.15 billion -72.00 billion 72.00 billion -2.15 billion -195.73 billion 195.73 billion -2.15 billion -532.05 billion 532.05 billion -2.15 billion -1.45 trillion 1.45 trillion -2.15 billion -3.93 trillion 3.93 trillion -2.15 billion -10.69 trillion 10.69 trillion -2.15 billion -29.05 trillion 29.05 trillion -2.15 billion -78.96 trillion 78.96 trillion -2.15 billion -214.64 trillion 214.64 trillion -2.15 billion -583.46 trillion 583.46 trillion -2.15 billion -1.59 quadrillion 1.59 quadrillion -2.15 billion -4.31 quadrillion 4.31 quadrillion -2.15 billion -11.72 quadrillion 11.72 quadrillion -2.15 billion -31.86 quadrillion 31.86 quadrillion -2.15 billion -86.59 quadrillion 86.59 quadrillion -2.15 billion -235.39 quadrillion 235.39 quadrillion -2.15 billion -639.84 quadrillion 639.84 quadrillion -2.15 billion -1739.27 quadrillion 1739.27 quadrillion -2.15 billion -4727.84 quadrillion 4727.84 quadrillion -2.15 billion -12851.60 quadrillion 12851.60 quadrillion -2.15 billion +3.58 billion 3.58 billion 2.15 billion +9.74 billion 9.74 billion 2.15 billion +26.49 billion 26.49 billion 2.15 billion +72.00 billion 72.00 billion 2.15 billion +195.73 billion 195.73 billion 2.15 billion +532.05 billion 532.05 billion 2.15 billion +1.45 trillion 1.45 trillion 2.15 billion +3.93 trillion 3.93 trillion 2.15 billion +10.69 trillion 10.69 trillion 2.15 billion +29.05 trillion 29.05 trillion 2.15 billion +78.96 trillion 78.96 trillion 2.15 billion +214.64 trillion 214.64 trillion 2.15 billion +583.46 trillion 583.46 trillion 2.15 billion +1.59 quadrillion 1.59 quadrillion 2.15 billion +4.31 quadrillion 4.31 quadrillion 2.15 billion +11.72 quadrillion 11.72 quadrillion 2.15 billion +31.86 quadrillion 31.86 quadrillion 2.15 billion +86.59 quadrillion 86.59 quadrillion 2.15 billion +235.39 quadrillion 235.39 quadrillion 2.15 billion +639.84 quadrillion 639.84 quadrillion 2.15 billion +1739.27 quadrillion 1739.27 quadrillion 2.15 billion +4727.84 quadrillion 4727.84 quadrillion 2.15 billion +12851.60 quadrillion 12851.60 quadrillion 2.15 billion diff --git a/tests/queries/0_stateless/01492_format_readable_quantity.sql b/tests/queries/0_stateless/01492_format_readable_quantity.sql index 3931cde49df..93aa570ccc8 100644 --- a/tests/queries/0_stateless/01492_format_readable_quantity.sql +++ b/tests/queries/0_stateless/01492_format_readable_quantity.sql @@ -1,4 +1,4 @@ -WITH round(exp(number), 6) AS x, toUInt64(x) AS y, toInt32(x) AS z +WITH round(exp(number), 6) AS x, toUInt64(x) AS y, toInt32(min2(x, 2147483647)) AS z SELECT formatReadableQuantity(x), formatReadableQuantity(y), formatReadableQuantity(z) FROM system.numbers LIMIT 45; diff --git a/tests/queries/0_stateless/01517_select_final_distributed.sql b/tests/queries/0_stateless/01517_select_final_distributed.sql index a3d1fcfc185..701828b0b38 100644 --- a/tests/queries/0_stateless/01517_select_final_distributed.sql +++ b/tests/queries/0_stateless/01517_select_final_distributed.sql @@ -1,5 +1,7 @@ -- Tags: distributed +SET allow_experimental_parallel_reading_from_replicas = 0; + DROP TABLE IF EXISTS test5346; CREATE TABLE test5346 (`Id` String, `Timestamp` DateTime, `updated` DateTime) diff --git a/tests/queries/0_stateless/01526_max_untracked_memory.sh b/tests/queries/0_stateless/01526_max_untracked_memory.sh index 20c986f14ca..45fdb314fb2 100755 --- a/tests/queries/0_stateless/01526_max_untracked_memory.sh +++ b/tests/queries/0_stateless/01526_max_untracked_memory.sh @@ -1,9 +1,6 @@ #!/usr/bin/env bash -# Tags: no-tsan, no-asan, no-ubsan, no-msan -# Tag no-tsan: requires TraceCollector, does not available under sanitizers -# Tag no-asan: requires TraceCollector, does not available under sanitizers -# Tag no-ubsan: requires TraceCollector, does not available under sanitizers -# Tag no-msan: requires TraceCollector, does not available under sanitizers +# Tags: no-tsan, no-asan, no-ubsan, no-msan, no-cpu-aarch64 +# requires TraceCollector, does not available under sanitizers and aarch64 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01533_multiple_nested.sql b/tests/queries/0_stateless/01533_multiple_nested.sql index 40f287b4afd..03724ce0b46 100644 --- a/tests/queries/0_stateless/01533_multiple_nested.sql +++ b/tests/queries/0_stateless/01533_multiple_nested.sql @@ -3,6 +3,7 @@ DROP TABLE IF EXISTS nested; SET flatten_nested = 0; +SET use_uncompressed_cache = 0; CREATE TABLE nested ( diff --git a/tests/queries/0_stateless/01557_max_parallel_replicas_no_sample.sql b/tests/queries/0_stateless/01557_max_parallel_replicas_no_sample.sql index 2b1a66147a4..04777f5b31c 100644 --- a/tests/queries/0_stateless/01557_max_parallel_replicas_no_sample.sql +++ b/tests/queries/0_stateless/01557_max_parallel_replicas_no_sample.sql @@ -1,5 +1,7 @@ -- Tags: replica +SET allow_experimental_parallel_reading_from_replicas=0; + DROP TABLE IF EXISTS t; CREATE TABLE t (x String) ENGINE = MergeTree ORDER BY x; INSERT INTO t VALUES ('Hello'); diff --git a/tests/queries/0_stateless/01569_query_profiler_big_query_id.sh b/tests/queries/0_stateless/01569_query_profiler_big_query_id.sh index 118d0a4fb96..e54783e9655 100755 --- a/tests/queries/0_stateless/01569_query_profiler_big_query_id.sh +++ b/tests/queries/0_stateless/01569_query_profiler_big_query_id.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-tsan, no-asan, no-ubsan, no-msan, no-debug +# Tags: no-tsan, no-asan, no-ubsan, no-msan, no-debug, no-cpu-aarch64 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01582_move_to_prewhere_compact_parts.sql b/tests/queries/0_stateless/01582_move_to_prewhere_compact_parts.sql index 788c99da76d..bd3e651e0dc 100644 --- a/tests/queries/0_stateless/01582_move_to_prewhere_compact_parts.sql +++ b/tests/queries/0_stateless/01582_move_to_prewhere_compact_parts.sql @@ -1,3 +1,6 @@ +SET optimize_move_to_prewhere = 1; +SET convert_query_to_cnf = 0; + DROP TABLE IF EXISTS prewhere_move; CREATE TABLE prewhere_move (x Int, y String) ENGINE = MergeTree ORDER BY tuple(); INSERT INTO prewhere_move SELECT number, toString(number) FROM numbers(1000); diff --git a/tests/queries/0_stateless/01605_adaptive_granularity_block_borders.sql b/tests/queries/0_stateless/01605_adaptive_granularity_block_borders.sql index a73045f5a6f..9f26302e564 100644 --- a/tests/queries/0_stateless/01605_adaptive_granularity_block_borders.sql +++ b/tests/queries/0_stateless/01605_adaptive_granularity_block_borders.sql @@ -1,3 +1,5 @@ +SET use_uncompressed_cache = 0; + DROP TABLE IF EXISTS adaptive_table; --- If granularity of consequent blocks differs a lot, then adaptive diff --git a/tests/queries/0_stateless/01655_plan_optimizations.sh b/tests/queries/0_stateless/01655_plan_optimizations.sh index de3d3ac3eb6..b66d788a338 100755 --- a/tests/queries/0_stateless/01655_plan_optimizations.sh +++ b/tests/queries/0_stateless/01655_plan_optimizations.sh @@ -64,7 +64,7 @@ $CLICKHOUSE_CLIENT -q " settings enable_optimize_predicate_expression=0" echo "> one condition of filter should be pushed down after aggregating, other two conditions are ANDed" -$CLICKHOUSE_CLIENT -q " +$CLICKHOUSE_CLIENT --convert_query_to_cnf=0 -q " explain actions = 1 select s, y from ( select sum(x) as s, y from (select number as x, number + 1 as y from numbers(10)) group by y ) where y != 0 and s - 8 and s - 4 @@ -77,7 +77,7 @@ $CLICKHOUSE_CLIENT -q " settings enable_optimize_predicate_expression=0" echo "> two conditions of filter should be pushed down after aggregating and ANDed, one condition is aliased" -$CLICKHOUSE_CLIENT -q " +$CLICKHOUSE_CLIENT --convert_query_to_cnf=0 -q " explain actions = 1 select s, y from ( select sum(x) as s, y from (select number as x, number + 1 as y from numbers(10)) group by y ) where y != 0 and s != 8 and y - 4 @@ -127,7 +127,7 @@ $CLICKHOUSE_CLIENT -q " settings enable_optimize_predicate_expression=0" echo "> filter is pushed down before sorting steps" -$CLICKHOUSE_CLIENT -q " +$CLICKHOUSE_CLIENT --convert_query_to_cnf=0 -q " explain actions = 1 select x, y from ( select number % 2 as x, number % 3 as y from numbers(6) order by y desc ) where x != 0 and y != 0 diff --git a/tests/queries/0_stateless/01666_blns_long.sql b/tests/queries/0_stateless/01666_blns_long.sql index fd959cf0a73..74054551b18 100644 --- a/tests/queries/0_stateless/01666_blns_long.sql +++ b/tests/queries/0_stateless/01666_blns_long.sql @@ -27,6 +27,8 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +SET max_insert_threads = 0; + DROP TABLE IF EXISTS test; CREATE TABLE test diff --git a/tests/queries/0_stateless/01671_aggregate_function_group_bitmap_data.sql b/tests/queries/0_stateless/01671_aggregate_function_group_bitmap_data.sql index 3f5c5c2f25b..d70665655ca 100644 --- a/tests/queries/0_stateless/01671_aggregate_function_group_bitmap_data.sql +++ b/tests/queries/0_stateless/01671_aggregate_function_group_bitmap_data.sql @@ -1,3 +1,5 @@ +SET group_by_two_level_threshold = 10000; + CREATE TABLE group_bitmap_data_test ( `pickup_date` Date, diff --git a/tests/queries/0_stateless/01737_move_order_key_to_prewhere_select_final.sql b/tests/queries/0_stateless/01737_move_order_key_to_prewhere_select_final.sql index ecc11c625e3..789892dbd38 100644 --- a/tests/queries/0_stateless/01737_move_order_key_to_prewhere_select_final.sql +++ b/tests/queries/0_stateless/01737_move_order_key_to_prewhere_select_final.sql @@ -1,3 +1,6 @@ +SET optimize_move_to_prewhere = 1; +SET convert_query_to_cnf = 0; + DROP TABLE IF EXISTS prewhere_move_select_final; CREATE TABLE prewhere_move_select_final (x Int, y Int, z Int) ENGINE = ReplacingMergeTree() ORDER BY (x, y); diff --git a/tests/queries/0_stateless/01746_long_zstd_http_compression_json_format.sh b/tests/queries/0_stateless/01746_long_zstd_http_compression_json_format.sh index 02943cad583..e10032e04fd 100755 --- a/tests/queries/0_stateless/01746_long_zstd_http_compression_json_format.sh +++ b/tests/queries/0_stateless/01746_long_zstd_http_compression_json_format.sh @@ -5,4 +5,4 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -${CLICKHOUSE_CURL} -sS -H 'Accept-Encoding: zstd' "${CLICKHOUSE_URL}&enable_http_compression=1" -d "SELECT toDate('2020-12-12') as datetime, 'test-pipeline' as pipeline, 'clickhouse-test-host-001.clickhouse.com' as host, 'clickhouse' as home, 'clickhouse' as detail, number as row_number FROM numbers(1000000) FORMAT JSON" | zstd -d | tail -n30 | head -n23 +${CLICKHOUSE_CURL} -sS -H 'Accept-Encoding: zstd' "${CLICKHOUSE_URL}&enable_http_compression=1" -d "SELECT toDate('2020-12-12') as datetime, 'test-pipeline' as pipeline, 'clickhouse-test-host-001.clickhouse.com' as host, 'clickhouse' as home, 'clickhouse' as detail, number as row_number FROM numbers(1000000) SETTINGS max_block_size=65505 FORMAT JSON" | zstd -d | tail -n30 | head -n23 diff --git a/tests/queries/0_stateless/01763_max_distributed_depth.sql b/tests/queries/0_stateless/01763_max_distributed_depth.sql index 12b2e368007..f50d15e7121 100644 --- a/tests/queries/0_stateless/01763_max_distributed_depth.sql +++ b/tests/queries/0_stateless/01763_max_distributed_depth.sql @@ -1,5 +1,7 @@ -- Tags: distributed +SET prefer_localhost_replica = 1; + DROP TABLE IF EXISTS tt6; CREATE TABLE tt6 @@ -13,6 +15,8 @@ CREATE TABLE tt6 ) ENGINE = Distributed('test_shard_localhost', '', 'tt7', rand()); +DROP TABLE IF EXISTS tt7; + CREATE TABLE tt7 as tt6 ENGINE = Distributed('test_shard_localhost', '', 'tt6', rand()); INSERT INTO tt6 VALUES (1, 1, 1, 1, 'ok'); -- { serverError 581 } @@ -28,3 +32,4 @@ INSERT INTO tt6 VALUES (1, 1, 1, 1, 'ok'); -- { serverError 306} SELECT * FROM tt6; -- { serverError 306 } DROP TABLE tt6; +DROP TABLE tt7; diff --git a/tests/queries/0_stateless/01786_explain_merge_tree.sh b/tests/queries/0_stateless/01786_explain_merge_tree.sh index 6be86f9ce02..eb47f065044 100755 --- a/tests/queries/0_stateless/01786_explain_merge_tree.sh +++ b/tests/queries/0_stateless/01786_explain_merge_tree.sh @@ -4,6 +4,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh +CLICKHOUSE_CLIENT="$CLICKHOUSE_CLIENT --optimize_move_to_prewhere=1 --convert_query_to_cnf=0" + $CLICKHOUSE_CLIENT -q "drop table if exists test_index" $CLICKHOUSE_CLIENT -q "drop table if exists idx" diff --git a/tests/queries/0_stateless/01798_uniq_theta_sketch.sql b/tests/queries/0_stateless/01798_uniq_theta_sketch.sql index bb400c5de14..eace83d5cfa 100644 --- a/tests/queries/0_stateless/01798_uniq_theta_sketch.sql +++ b/tests/queries/0_stateless/01798_uniq_theta_sketch.sql @@ -1,5 +1,7 @@ -- Tags: no-fasttest +SET max_block_size = 65505; + SELECT 'uniqTheta many agrs'; SELECT diff --git a/tests/queries/0_stateless/01822_short_circuit.sql b/tests/queries/0_stateless/01822_short_circuit.sql index 48fff04921b..c7379d210eb 100644 --- a/tests/queries/0_stateless/01822_short_circuit.sql +++ b/tests/queries/0_stateless/01822_short_circuit.sql @@ -1,4 +1,5 @@ set short_circuit_function_evaluation = 'enable'; +set convert_query_to_cnf = 0; select if(number > 0, intDiv(number + 100, number), throwIf(number)) from numbers(10); select multiIf(number == 0, 0, number == 1, intDiv(1, number), number == 2, intDiv(1, number - 1), number == 3, intDiv(1, number - 2), intDiv(1, number - 3)) from numbers(10); diff --git a/tests/queries/0_stateless/01824_move_to_prewhere_many_columns.sql b/tests/queries/0_stateless/01824_move_to_prewhere_many_columns.sql index e03972e818d..c4ef5516fc8 100644 --- a/tests/queries/0_stateless/01824_move_to_prewhere_many_columns.sql +++ b/tests/queries/0_stateless/01824_move_to_prewhere_many_columns.sql @@ -1,3 +1,6 @@ +SET optimize_move_to_prewhere = 1; +SET convert_query_to_cnf = 0; + DROP TABLE IF EXISTS t_move_to_prewhere; CREATE TABLE t_move_to_prewhere (id UInt32, a UInt8, b UInt8, c UInt8, fat_string String) diff --git a/tests/queries/0_stateless/01854_HTTP_dict_decompression.python b/tests/queries/0_stateless/01854_HTTP_dict_decompression.python index 929eaae8067..4f6878665aa 100644 --- a/tests/queries/0_stateless/01854_HTTP_dict_decompression.python +++ b/tests/queries/0_stateless/01854_HTTP_dict_decompression.python @@ -158,7 +158,7 @@ def test_select(dict_name="", schema="word String, counter UInt32", requests=[], COMPRESS_METHOD = requests[i] print(i, COMPRESS_METHOD, ADDING_ENDING, SEND_ENCODING) - check_answers("select * from {}".format(dict_name), answers[i]) + check_answers("SELECT * FROM {} ORDER BY word".format(dict_name), answers[i]) def main(): # first three for encoding, second three for url @@ -171,7 +171,7 @@ def main(): ] # This answers got experemently in non compressed mode and they are correct - answers = ['''This 152\nHello 1\nis 9283\ndata 555\nWorld 2\ntesting 2313213'''] * 5 + answers = ['''Hello 1\nThis 152\nWorld 2\ndata 555\nis 9283\ntesting 2313213'''] * 5 t = start_server(len(insert_requests)) t.start() diff --git a/tests/queries/0_stateless/01917_prewhere_column_type.sql b/tests/queries/0_stateless/01917_prewhere_column_type.sql index 5147e6093a9..c0bc0c3e36b 100644 --- a/tests/queries/0_stateless/01917_prewhere_column_type.sql +++ b/tests/queries/0_stateless/01917_prewhere_column_type.sql @@ -1,3 +1,5 @@ +SET optimize_move_to_prewhere = 1; + DROP TABLE IF EXISTS t1; CREATE TABLE t1 ( s String, f Float32, e UInt16 ) ENGINE = MergeTree ORDER BY tuple() SETTINGS min_bytes_for_wide_part = '100G'; diff --git a/tests/queries/0_stateless/01921_datatype_date32.reference b/tests/queries/0_stateless/01921_datatype_date32.reference index 2114f6f6b1e..8beaefbeb38 100644 --- a/tests/queries/0_stateless/01921_datatype_date32.reference +++ b/tests/queries/0_stateless/01921_datatype_date32.reference @@ -221,13 +221,13 @@ 1925-04-01 1925-04-01 2283-03-31 -1925-01-01 +2283-11-11 2021-09-22 -------addYears--------- 1926-01-01 1926-01-01 2283-11-11 -1925-01-01 +2283-11-11 2022-06-22 -------subtractSeconds--------- 1925-01-01 00:00:00.000 diff --git a/tests/queries/0_stateless/01926_order_by_desc_limit.sql b/tests/queries/0_stateless/01926_order_by_desc_limit.sql index 7ea102e11e9..9ee7f4a6aff 100644 --- a/tests/queries/0_stateless/01926_order_by_desc_limit.sql +++ b/tests/queries/0_stateless/01926_order_by_desc_limit.sql @@ -1,3 +1,5 @@ +-- Tags: no-random-settings + DROP TABLE IF EXISTS order_by_desc; CREATE TABLE order_by_desc (u UInt32, s String) diff --git a/tests/queries/0_stateless/01943_query_id_check.sql b/tests/queries/0_stateless/01943_query_id_check.sql index cb2ef090854..ad9e88e0478 100644 --- a/tests/queries/0_stateless/01943_query_id_check.sql +++ b/tests/queries/0_stateless/01943_query_id_check.sql @@ -1,6 +1,8 @@ -- Tags: no-replicated-database -- Tag no-replicated-database: Different query_id +SET prefer_localhost_replica = 1; + DROP TABLE IF EXISTS tmp; CREATE TABLE tmp ENGINE = TinyLog AS SELECT queryID(); diff --git a/tests/queries/0_stateless/01951_distributed_push_down_limit.sql b/tests/queries/0_stateless/01951_distributed_push_down_limit.sql index fa2fc1800c1..184e6321988 100644 --- a/tests/queries/0_stateless/01951_distributed_push_down_limit.sql +++ b/tests/queries/0_stateless/01951_distributed_push_down_limit.sql @@ -1,5 +1,7 @@ -- Tags: distributed +set prefer_localhost_replica = 1; + -- { echo } explain select * from remote('127.{1,2}', view(select * from numbers(1e6))) order by number limit 10 settings distributed_push_down_limit=0; explain select * from remote('127.{1,2}', view(select * from numbers(1e6))) order by number limit 10 settings distributed_push_down_limit=1; diff --git a/tests/queries/0_stateless/01952_optimize_distributed_group_by_sharding_key.sql b/tests/queries/0_stateless/01952_optimize_distributed_group_by_sharding_key.sql index d1f80b42e75..74b55b95315 100644 --- a/tests/queries/0_stateless/01952_optimize_distributed_group_by_sharding_key.sql +++ b/tests/queries/0_stateless/01952_optimize_distributed_group_by_sharding_key.sql @@ -2,6 +2,7 @@ set optimize_skip_unused_shards=1; set optimize_distributed_group_by_sharding_key=1; +set prefer_localhost_replica=1; -- { echo } explain select distinct k1 from remote('127.{1,2}', view(select 1 k1, 2 k2, 3 v from numbers(2)), cityHash64(k1, k2)); -- not optimized diff --git a/tests/queries/0_stateless/02006_test_positional_arguments.sql b/tests/queries/0_stateless/02006_test_positional_arguments.sql index 54b55c4a9f8..7442ca6bbf6 100644 --- a/tests/queries/0_stateless/02006_test_positional_arguments.sql +++ b/tests/queries/0_stateless/02006_test_positional_arguments.sql @@ -1,3 +1,4 @@ +set group_by_two_level_threshold = 100000; set enable_positional_arguments = 1; drop table if exists test; diff --git a/tests/queries/0_stateless/02015_async_inserts_stress_long.sh b/tests/queries/0_stateless/02015_async_inserts_stress_long.sh index f9a58818404..086419baa61 100755 --- a/tests/queries/0_stateless/02015_async_inserts_stress_long.sh +++ b/tests/queries/0_stateless/02015_async_inserts_stress_long.sh @@ -1,10 +1,13 @@ #!/usr/bin/env bash +# Tags: no-random-settings + set -e CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh + function insert1() { url="${CLICKHOUSE_URL}&async_insert=1&wait_for_async_insert=0" @@ -24,6 +27,14 @@ function insert2() done } +function insert3() +{ + url="${CLICKHOUSE_URL}&async_insert=1&wait_for_async_insert=0" + while true; do + ${CLICKHOUSE_CURL} -sS "$url" -d "INSERT INTO FUNCTION remote('127.0.0.1', $CLICKHOUSE_DATABASE, async_inserts) VALUES (7, 'g') (8, 'h')" + done +} + function select1() { while true; do @@ -53,6 +64,7 @@ TIMEOUT=10 export -f insert1 export -f insert2 +export -f insert3 export -f select1 export -f select2 export -f truncate1 @@ -60,6 +72,7 @@ export -f truncate1 for _ in {1..5}; do timeout $TIMEOUT bash -c insert1 & timeout $TIMEOUT bash -c insert2 & + timeout $TIMEOUT bash -c insert3 & done timeout $TIMEOUT bash -c select1 & diff --git a/tests/queries/0_stateless/02030_tuple_filter.sql b/tests/queries/0_stateless/02030_tuple_filter.sql index 5efedeb8c0d..c19f538b8e1 100644 --- a/tests/queries/0_stateless/02030_tuple_filter.sql +++ b/tests/queries/0_stateless/02030_tuple_filter.sql @@ -5,6 +5,7 @@ CREATE TABLE test_tuple_filter (id UInt32, value String, log_date Date) Engine=M INSERT INTO test_tuple_filter VALUES (1,'A','2021-01-01'),(2,'B','2021-01-01'),(3,'C','2021-01-01'),(4,'D','2021-01-02'),(5,'E','2021-01-02'); SET force_primary_key = 1; +SET optimize_move_to_prewhere = 1; SELECT * FROM test_tuple_filter WHERE (id, value) = (1, 'A'); SELECT * FROM test_tuple_filter WHERE (1, 'A') = (id, value); diff --git a/tests/queries/0_stateless/02050_client_profile_events.sh b/tests/queries/0_stateless/02050_client_profile_events.sh index 459e8505e22..f8bcea0d1bb 100755 --- a/tests/queries/0_stateless/02050_client_profile_events.sh +++ b/tests/queries/0_stateless/02050_client_profile_events.sh @@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # do not print any ProfileEvents packets $CLICKHOUSE_CLIENT -q 'select * from numbers(1e5) format Null' |& grep -c 'SelectedRows' # print only last (and also number of rows to provide more info in case of failures) -$CLICKHOUSE_CLIENT --print-profile-events --profile-events-delay-ms=-1 -q 'select * from numbers(1e5)' 2> >(grep -o -e '\[ 0 \] SelectedRows: .*$' -e Exception) 1> >(wc -l) +$CLICKHOUSE_CLIENT --max_block_size=65505 --print-profile-events --profile-events-delay-ms=-1 -q 'select * from numbers(1e5)' 2> >(grep -o -e '\[ 0 \] SelectedRows: .*$' -e Exception) 1> >(wc -l) # print everything profile_events="$($CLICKHOUSE_CLIENT --max_block_size 1 --print-profile-events -q 'select sleep(1) from numbers(2) format Null' |& grep -c 'SelectedRows')" test "$profile_events" -gt 1 && echo OK || echo "FAIL ($profile_events)" diff --git a/tests/queries/0_stateless/02131_multiply_row_policies_on_same_column.sql b/tests/queries/0_stateless/02131_multiply_row_policies_on_same_column.sql index 75f7f737e85..d0a55c6ba65 100644 --- a/tests/queries/0_stateless/02131_multiply_row_policies_on_same_column.sql +++ b/tests/queries/0_stateless/02131_multiply_row_policies_on_same_column.sql @@ -1,3 +1,5 @@ +SET optimize_move_to_prewhere = 1; + DROP TABLE IF EXISTS 02131_multiply_row_policies_on_same_column; CREATE TABLE 02131_multiply_row_policies_on_same_column (x UInt8) ENGINE = MergeTree ORDER BY x; INSERT INTO 02131_multiply_row_policies_on_same_column VALUES (1), (2), (3), (4); diff --git a/tests/queries/0_stateless/02136_scalar_progress.sh b/tests/queries/0_stateless/02136_scalar_progress.sh index 4608031f83d..9f4429b0caa 100755 --- a/tests/queries/0_stateless/02136_scalar_progress.sh +++ b/tests/queries/0_stateless/02136_scalar_progress.sh @@ -4,4 +4,4 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CURL -sS "${CLICKHOUSE_URL}&wait_end_of_query=1&send_progress_in_http_headers=1&http_headers_progress_interval_ms=0" -d "SELECT (SELECT max(number), count(number) FROM numbers(100000));" -v 2>&1 | grep -E "X-ClickHouse-Summary|X-ClickHouse-Progress" +$CLICKHOUSE_CURL -sS "${CLICKHOUSE_URL}&wait_end_of_query=1&send_progress_in_http_headers=1&http_headers_progress_interval_ms=0" -d "SELECT (SELECT max(number), count(number) FROM numbers(100000) settings max_block_size=65505);" -v 2>&1 | grep -E "X-ClickHouse-Summary|X-ClickHouse-Progress" diff --git a/tests/queries/0_stateless/02136_scalar_read_rows_json.sh b/tests/queries/0_stateless/02136_scalar_read_rows_json.sh index d589cb60086..34b4b6909b5 100755 --- a/tests/queries/0_stateless/02136_scalar_read_rows_json.sh +++ b/tests/queries/0_stateless/02136_scalar_read_rows_json.sh @@ -7,4 +7,4 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) echo "#1" ${CLICKHOUSE_CLIENT} --query='SELECT count() FROM numbers(100) FORMAT JSON;' | grep -a -v "elapsed" echo "#2" -${CLICKHOUSE_CLIENT} --query='SELECT (SELECT max(number), count(number) FROM numbers(100000) as n) FORMAT JSON;' | grep -a -v "elapsed" | grep -v "_subquery" +${CLICKHOUSE_CLIENT} --query='SELECT (SELECT max(number), count(number) FROM numbers(100000) as n) SETTINGS max_block_size = 65505 FORMAT JSON;' | grep -a -v "elapsed" | grep -v "_subquery" diff --git a/tests/queries/0_stateless/02152_http_external_tables_memory_tracking.sh b/tests/queries/0_stateless/02152_http_external_tables_memory_tracking.sh index 2801ec16a43..44de0e15370 100755 --- a/tests/queries/0_stateless/02152_http_external_tables_memory_tracking.sh +++ b/tests/queries/0_stateless/02152_http_external_tables_memory_tracking.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash -# Tags: no-tsan -# ^^^^^^^ +# Tags: no-tsan, no-cpu-aarch64 # TSan does not supports tracing. +# trace_log doesn't work on aarch64 # Regression for proper release of Context, # via tracking memory of external tables. diff --git a/tests/queries/0_stateless/02156_storage_merge_prewhere.sql b/tests/queries/0_stateless/02156_storage_merge_prewhere.sql index 69fa9ac5ee2..b75d3fa22e5 100644 --- a/tests/queries/0_stateless/02156_storage_merge_prewhere.sql +++ b/tests/queries/0_stateless/02156_storage_merge_prewhere.sql @@ -1,3 +1,5 @@ +SET optimize_move_to_prewhere = 1; + DROP TABLE IF EXISTS t_02156_mt1; DROP TABLE IF EXISTS t_02156_mt2; DROP TABLE IF EXISTS t_02156_log; diff --git a/tests/queries/0_stateless/02161_addressToLineWithInlines.sql b/tests/queries/0_stateless/02161_addressToLineWithInlines.sql index baddea30ae3..12cae6af189 100644 --- a/tests/queries/0_stateless/02161_addressToLineWithInlines.sql +++ b/tests/queries/0_stateless/02161_addressToLineWithInlines.sql @@ -1,4 +1,4 @@ --- Tags: no-tsan, no-asan, no-ubsan, no-msan, no-debug +-- Tags: no-tsan, no-asan, no-ubsan, no-msan, no-debug, no-cpu-aarch64 SELECT addressToLineWithInlines(1); -- { serverError 446 } diff --git a/tests/queries/0_stateless/02169_map_functions.reference b/tests/queries/0_stateless/02169_map_functions.reference new file mode 100644 index 00000000000..160aebbc852 --- /dev/null +++ b/tests/queries/0_stateless/02169_map_functions.reference @@ -0,0 +1,33 @@ +{} +{} +{} +{'key3':103} +{} +{} +{} +{'key3':100,'key2':101,'key4':102} {'key4':102} +{'key3':101,'key2':102,'key4':103} {'key2':102,'key4':103} +{'key3':102,'key2':103,'key4':104} {'key3':102,'key2':103,'key4':104} +{'key3':103,'key2':104,'key4':105} {'key3':103,'key2':104,'key4':105} +{'key1':1111,'key2':2222} {'key2':2222} +{'key1':1112,'key2':2224} {'key1':1112,'key2':2224} +{'key1':1113,'key2':2226} {'key1':1113,'key2':2226} +{'key3':101,'key2':102,'key4':103} +{'key3':102,'key2':103,'key4':104} +{'key3':103,'key2':104,'key4':105} +{'key3':104,'key2':105,'key4':106} +{'key1':1112,'key2':2223} +{'key1':1113,'key2':2225} +{'key1':1114,'key2':2227} +{} +{} +{} +{} +{} +{} +{} +{3:2,1:0,2:0} +{1:2,2:3} +{1:2,2:3} +{'x':'y','x':'y'} +{'x':'y','x':'y'} diff --git a/tests/queries/0_stateless/02169_map_functions.sql b/tests/queries/0_stateless/02169_map_functions.sql new file mode 100644 index 00000000000..4cccaa56722 --- /dev/null +++ b/tests/queries/0_stateless/02169_map_functions.sql @@ -0,0 +1,39 @@ +DROP TABLE IF EXISTS table_map; +CREATE TABLE table_map (id UInt32, col Map(String, UInt64)) engine = MergeTree() ORDER BY tuple(); +INSERT INTO table_map SELECT number, map('key1', number, 'key2', number * 2) FROM numbers(1111, 3); +INSERT INTO table_map SELECT number, map('key3', number, 'key2', number + 1, 'key4', number + 2) FROM numbers(100, 4); + +SELECT mapFilter((k, v) -> k like '%3' and v > 102, col) FROM table_map ORDER BY id; +SELECT col, mapFilter((k, v) -> ((v % 10) > 1), col) FROM table_map ORDER BY id ASC; +SELECT mapApply((k, v) -> (k, v + 1), col) FROM table_map ORDER BY id; +SELECT mapFilter((k, v) -> 0, col) from table_map; +SELECT mapApply((k, v) -> tuple(v + 9223372036854775806), col) FROM table_map; -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } + +SELECT mapUpdate(map(1, 3, 3, 2), map(1, 0, 2, 0)); +SELECT mapApply((x, y) -> (x, x + 1), map(1, 0, 2, 0)); +SELECT mapApply((x, y) -> (x, x + 1), materialize(map(1, 0, 2, 0))); +SELECT mapApply((x, y) -> ('x', 'y'), map(1, 0, 2, 0)); +SELECT mapApply((x, y) -> ('x', 'y'), materialize(map(1, 0, 2, 0))); + +SELECT mapApply(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT mapApply((x, y) -> (x), map(1, 0, 2, 0)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT mapApply((x, y) -> ('x'), map(1, 0, 2, 0)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT mapApply((x) -> (x, x), map(1, 0, 2, 0)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT mapApply((x, y) -> (x, 1, 2), map(1, 0, 2, 0)); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT mapApply((x, y) -> (x, x + 1)); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT mapApply(map(1, 0, 2, 0), (x, y) -> (x, x + 1)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT mapApply((x, y) -> (x, x+1), map(1, 0, 2, 0), map(1, 0, 2, 0)); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } + +SELECT mapFilter(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT mapFilter((x, y) -> (toInt32(x)), map(1, 0, 2, 0)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT mapFilter((x, y) -> ('x'), map(1, 0, 2, 0)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT mapFilter((x) -> (x, x), map(1, 0, 2, 0)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT mapFilter((x, y) -> (x, 1, 2), map(1, 0, 2, 0)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT mapFilter((x, y) -> (x, x + 1)); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT mapFilter(map(1, 0, 2, 0), (x, y) -> (x > 0)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT mapFilter((x, y) -> (x, x + 1), map(1, 0, 2, 0), map(1, 0, 2, 0)); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } + +SELECT mapUpdate(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT mapUpdate(map(1, 3, 3, 2), map(1, 0, 2, 0), map(1, 0, 2, 0)); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } + +DROP TABLE table_map; diff --git a/tests/queries/0_stateless/02207_s3_content_type.reference b/tests/queries/0_stateless/02207_s3_content_type.reference index 2b0a5bcadc2..b015e4a148c 100644 --- a/tests/queries/0_stateless/02207_s3_content_type.reference +++ b/tests/queries/0_stateless/02207_s3_content_type.reference @@ -1,2 +1,2 @@ -ContentLength:2144451 +ContentLength:6888890 ContentType:binary/octet-stream diff --git a/tests/queries/0_stateless/02207_s3_content_type.sh b/tests/queries/0_stateless/02207_s3_content_type.sh index 5ede30e867c..ca75b36c688 100755 --- a/tests/queries/0_stateless/02207_s3_content_type.sh +++ b/tests/queries/0_stateless/02207_s3_content_type.sh @@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh $CLICKHOUSE_CLIENT --query " -INSERT INTO TABLE FUNCTION s3('http://localhost:11111/test/content-type.csv.gz', 'test', 'testtest', 'CSV', 'number UInt64') SELECT number FROM numbers(1000000) SETTINGS s3_max_single_part_upload_size = 10000, s3_truncate_on_insert = 1; +INSERT INTO TABLE FUNCTION s3('http://localhost:11111/test/content-type.csv', 'test', 'testtest', 'CSV', 'number UInt64') SELECT number FROM numbers(1000000) SETTINGS s3_max_single_part_upload_size = 10000, s3_truncate_on_insert = 1; " -aws --endpoint-url http://localhost:11111 s3api head-object --bucket test --key content-type.csv.gz | grep Content | sed 's/[ \t,"]*//g' +aws --endpoint-url http://localhost:11111 s3api head-object --bucket test --key content-type.csv | grep Content | sed 's/[ \t,"]*//g' diff --git a/tests/queries/0_stateless/02221_system_zookeeper_unrestricted.reference b/tests/queries/0_stateless/02221_system_zookeeper_unrestricted.reference new file mode 100644 index 00000000000..bd0c9cee464 --- /dev/null +++ b/tests/queries/0_stateless/02221_system_zookeeper_unrestricted.reference @@ -0,0 +1,74 @@ +1 +1 +alter_partition_version +alter_partition_version +block_numbers +block_numbers +blocks +blocks +columns +columns +columns +columns +failed_parts +failed_parts +flags +flags +host +host +is_active +is_active +is_lost +is_lost +last_part +last_part +leader_election +leader_election +leader_election-0 +leader_election-0 +log +log +log_pointer +log_pointer +max_processed_insert_time +max_processed_insert_time +metadata +metadata +metadata +metadata +metadata_version +metadata_version +min_unprocessed_insert_time +min_unprocessed_insert_time +mutation_pointer +mutation_pointer +mutations +mutations +nonincrement_block_numbers +nonincrement_block_numbers +parallel +parallel +part_moves_shard +part_moves_shard +parts +parts +pinned_part_uuids +pinned_part_uuids +queue +queue +quorum +quorum +replicas +replicas +shared +shared +shared +shared +table_shared_id +table_shared_id +temp +temp +zero_copy_hdfs +zero_copy_hdfs +zero_copy_s3 +zero_copy_s3 diff --git a/tests/queries/0_stateless/02221_system_zookeeper_unrestricted.sh b/tests/queries/0_stateless/02221_system_zookeeper_unrestricted.sh new file mode 100755 index 00000000000..db94c59d2de --- /dev/null +++ b/tests/queries/0_stateless/02221_system_zookeeper_unrestricted.sh @@ -0,0 +1,30 @@ +#!/usr/bin/env bash +# Tags: no-replicated-database, zookeeper + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS sample_table" +${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS sample_table_2" + +${CLICKHOUSE_CLIENT} -n -q" +CREATE TABLE sample_table ( + key UInt64 +) +ENGINE ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/02221_system_zookeeper_unrestricted', '1') +ORDER BY tuple(); +" + +${CLICKHOUSE_CLIENT} -n -q" +CREATE TABLE sample_table_2 ( + key UInt64 +) +ENGINE ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/02221_system_zookeeper_unrestricted_2', '1') +ORDER BY tuple(); +" + +${CLICKHOUSE_CLIENT} --allow_unrestricted_reads_from_keeper=1 --query "SELECT name FROM (SELECT path, name FROM system.zookeeper ORDER BY name) WHERE path LIKE '%$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/02221_system_zookeeper_unrestricted%'"; + +${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS sample_table" +${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS sample_table_2" diff --git a/tests/queries/0_stateless/02221_system_zookeeper_unrestricted_like.reference b/tests/queries/0_stateless/02221_system_zookeeper_unrestricted_like.reference new file mode 100644 index 00000000000..f95d60dc07b --- /dev/null +++ b/tests/queries/0_stateless/02221_system_zookeeper_unrestricted_like.reference @@ -0,0 +1,75 @@ +1 +alter_partition_version +block_numbers +blocks +columns +columns +failed_parts +flags +host +is_active +is_lost +last_part +leader_election +leader_election-0 +log +log_pointer +max_processed_insert_time +metadata +metadata +metadata_version +min_unprocessed_insert_time +mutation_pointer +mutations +nonincrement_block_numbers +parallel +part_moves_shard +parts +pinned_part_uuids +queue +quorum +replicas +shared +shared +table_shared_id +temp +zero_copy_hdfs +zero_copy_s3 +------------------------- +1 +alter_partition_version +block_numbers +blocks +columns +columns +failed_parts +flags +host +is_active +is_lost +last_part +leader_election +leader_election-0 +log +log_pointer +max_processed_insert_time +metadata +metadata +metadata_version +min_unprocessed_insert_time +mutation_pointer +mutations +nonincrement_block_numbers +parallel +part_moves_shard +parts +pinned_part_uuids +queue +quorum +replicas +shared +shared +table_shared_id +temp +zero_copy_hdfs +zero_copy_s3 diff --git a/tests/queries/0_stateless/02221_system_zookeeper_unrestricted_like.sh b/tests/queries/0_stateless/02221_system_zookeeper_unrestricted_like.sh new file mode 100755 index 00000000000..152d8344764 --- /dev/null +++ b/tests/queries/0_stateless/02221_system_zookeeper_unrestricted_like.sh @@ -0,0 +1,32 @@ +#!/usr/bin/env bash +# Tags: no-replicated-database, zookeeper + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS sample_table;" +${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS sample_table_2;" + +${CLICKHOUSE_CLIENT} -n --query="CREATE TABLE sample_table ( + key UInt64 +) +ENGINE ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/02221_system_zookeeper_unrestricted_like', '1') +ORDER BY tuple(); +DROP TABLE IF EXISTS sample_table;" + + +${CLICKHOUSE_CLIENT} -n --query "CREATE TABLE sample_table_2 ( + key UInt64 +) +ENGINE ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/02221_system_zookeeper_unrestricted_like_2', '1') +ORDER BY tuple();" + +${CLICKHOUSE_CLIENT} --allow_unrestricted_reads_from_keeper=1 --query="SELECT name FROM (SELECT path, name FROM system.zookeeper WHERE path LIKE '/clickhouse%' ORDER BY name) WHERE path LIKE '%$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/02221_system_zookeeper_unrestricted_like%'" + +${CLICKHOUSE_CLIENT} --query="SELECT '-------------------------'" + +${CLICKHOUSE_CLIENT} --allow_unrestricted_reads_from_keeper=1 --query="SELECT name FROM (SELECT path, name FROM system.zookeeper WHERE path LIKE '/clickhouse/%' ORDER BY name) WHERE path LIKE '%$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/02221_system_zookeeper_unrestricted_like%'" + +${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS sample_table;" +${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS sample_table_2;" diff --git a/tests/queries/0_stateless/02222_create_table_without_columns_metadata.reference b/tests/queries/0_stateless/02222_create_table_without_columns_metadata.reference new file mode 100644 index 00000000000..9e9e0082cb3 --- /dev/null +++ b/tests/queries/0_stateless/02222_create_table_without_columns_metadata.reference @@ -0,0 +1,3 @@ +CREATE TABLE default.test\n(\n `y` Nullable(String),\n `x` Nullable(Float64)\n)\nENGINE = File(\'JSONEachRow\', \'data.jsonl\') +OK +OK diff --git a/tests/queries/0_stateless/02222_create_table_without_columns_metadata.sh b/tests/queries/0_stateless/02222_create_table_without_columns_metadata.sh new file mode 100755 index 00000000000..1ba67fa77ea --- /dev/null +++ b/tests/queries/0_stateless/02222_create_table_without_columns_metadata.sh @@ -0,0 +1,33 @@ +#!/usr/bin/env bash +# Tags: no-fasttest, no-parallel + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +USER_FILES_PATH=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') + +$CLICKHOUSE_CLIENT -q "insert into table function file(data.jsonl, 'JSONEachRow', 'x UInt32 default 42, y String') select number as x, 'String' as y from numbers(10)" + +$CLICKHOUSE_CLIENT -q "drop table if exists test" +$CLICKHOUSE_CLIENT -q "create table test engine=File(JSONEachRow, 'data.jsonl')" +$CLICKHOUSE_CLIENT -q "show create table test" +$CLICKHOUSE_CLIENT -q "detach table test" + +rm $USER_FILES_PATH/data.jsonl + +$CLICKHOUSE_CLIENT -q "attach table test" +$CLICKHOUSE_CLIENT -q "select * from test" 2>&1 | grep -q "FILE_DOESNT_EXIST" && echo "OK" || echo "FAIL" + + +$CLICKHOUSE_CLIENT -q "drop table test" +$CLICKHOUSE_CLIENT -q "create table test (x UInt64) engine=Memory()" + +$CLICKHOUSE_CLIENT -q "drop table if exists test_dist" +$CLICKHOUSE_CLIENT -q "create table test_dist engine=Distributed('test_shard_localhost', currentDatabase(), 'test')" + +$CLICKHOUSE_CLIENT -q "detach table test_dist" +$CLICKHOUSE_CLIENT -q "drop table test" +$CLICKHOUSE_CLIENT -q "attach table test_dist" +$CLICKHOUSE_CLIENT -q "select * from test_dist" 2>&1 | grep -q "UNKNOWN_TABLE" && echo "OK" || echo "FAIL" + diff --git a/tests/queries/0_stateless/02223_insert_select_schema_inference.reference b/tests/queries/0_stateless/02223_insert_select_schema_inference.reference new file mode 100644 index 00000000000..ef1eea12112 --- /dev/null +++ b/tests/queries/0_stateless/02223_insert_select_schema_inference.reference @@ -0,0 +1,13 @@ +x UInt32 +y String +d Date +0 0 1970-01-01 +1 1 1970-01-02 +2 2 1970-01-03 +3 3 1970-01-04 +4 4 1970-01-05 +5 5 1970-01-06 +6 6 1970-01-07 +7 7 1970-01-08 +8 8 1970-01-09 +9 9 1970-01-10 diff --git a/tests/queries/0_stateless/02223_insert_select_schema_inference.sql b/tests/queries/0_stateless/02223_insert_select_schema_inference.sql new file mode 100644 index 00000000000..ff39ca83b9b --- /dev/null +++ b/tests/queries/0_stateless/02223_insert_select_schema_inference.sql @@ -0,0 +1,5 @@ +drop table if exists test; +create table test (x UInt32, y String, d Date) engine=Memory() as select number as x, toString(number) as y, toDate(number) as d from numbers(10); +insert into table function file('data.native.zst') select * from test; +desc file('data.native.zst'); +select * from file('data.native.zst'); diff --git a/tests/queries/0_stateless/02226_async_insert_table_function.reference b/tests/queries/0_stateless/02226_async_insert_table_function.reference new file mode 100644 index 00000000000..60d475a7393 --- /dev/null +++ b/tests/queries/0_stateless/02226_async_insert_table_function.reference @@ -0,0 +1,2 @@ +1 aaa +2 bbb diff --git a/tests/queries/0_stateless/02226_async_insert_table_function.sql b/tests/queries/0_stateless/02226_async_insert_table_function.sql new file mode 100644 index 00000000000..fc4aadfbfcd --- /dev/null +++ b/tests/queries/0_stateless/02226_async_insert_table_function.sql @@ -0,0 +1,11 @@ +DROP TABLE IF EXISTS t_async_insert_table_function; + +CREATE TABLE t_async_insert_table_function (id UInt32, s String) ENGINE = Memory; + +SET async_insert = 1; + +INSERT INTO function remote('127.0.0.1', currentDatabase(), t_async_insert_table_function) values (1, 'aaa') (2, 'bbb'); + +SELECT * FROM t_async_insert_table_function ORDER BY id; + +DROP TABLE t_async_insert_table_function; diff --git a/tests/queries/0_stateless/02226_low_cardinality_text_bloom_filter_index.reference b/tests/queries/0_stateless/02226_low_cardinality_text_bloom_filter_index.reference new file mode 100644 index 00000000000..a1533c4e44a --- /dev/null +++ b/tests/queries/0_stateless/02226_low_cardinality_text_bloom_filter_index.reference @@ -0,0 +1,24 @@ +lc_bf_tokenbf +1 K1 K1ZZZZZZ +2 K2 K2ZZZZZZ +lc_fixed_bf_tokenbf +1 K1 K1ZZZZZZ +2 K2 K2ZZZZZZ +lc_ngram +1 K1 K1ZZZZZZ +2 K2 K2ZZZZZZ +lc_fixed_ngram +1 K1 K1ZZZZZZ +2 K2 K2ZZZZZZ +lc_bf_tokenbf +3 abCD3ef abCD3ef\0 +4 abCD4ef abCD4ef\0 +lc_fixed_bf_tokenbf +3 abCD3ef abCD3ef\0 +4 abCD4ef abCD4ef\0 +lc_ngram +3 abCD3ef abCD3ef\0 +4 abCD4ef abCD4ef\0 +lc_fixed_ngram +3 abCD3ef abCD3ef\0 +4 abCD4ef abCD4ef\0 diff --git a/tests/queries/0_stateless/02226_low_cardinality_text_bloom_filter_index.sql b/tests/queries/0_stateless/02226_low_cardinality_text_bloom_filter_index.sql new file mode 100644 index 00000000000..d2b30f5e8f4 --- /dev/null +++ b/tests/queries/0_stateless/02226_low_cardinality_text_bloom_filter_index.sql @@ -0,0 +1,69 @@ +DROP TABLE IF EXISTS bf_tokenbf_lowcard_test; +DROP TABLE IF EXISTS bf_ngram_lowcard_test; + +CREATE TABLE bf_tokenbf_lowcard_test +( + row_id UInt32, + lc LowCardinality(String), + lc_fixed LowCardinality(FixedString(8)), + INDEX lc_bf_tokenbf lc TYPE tokenbf_v1(256,2,0) GRANULARITY 1, + INDEX lc_fixed_bf_tokenbf lc_fixed TYPE tokenbf_v1(256,2,0) GRANULARITY 1 +) Engine=MergeTree() ORDER BY row_id SETTINGS index_granularity = 1; + +CREATE TABLE bf_ngram_lowcard_test +( + row_id UInt32, + lc LowCardinality(String), + lc_fixed LowCardinality(FixedString(8)), + INDEX lc_ngram lc TYPE ngrambf_v1(4,256,2,0) GRANULARITY 1, + INDEX lc_fixed_ngram lc_fixed TYPE ngrambf_v1(4,256,2,0) GRANULARITY 1 +) Engine=MergeTree() ORDER BY row_id SETTINGS index_granularity = 1; + +INSERT INTO bf_tokenbf_lowcard_test VALUES (1, 'K1', 'K1ZZZZZZ'), (2, 'K2', 'K2ZZZZZZ'); +INSERT INTO bf_ngram_lowcard_test VALUES (1, 'K1', 'K1ZZZZZZ'), (2, 'K2', 'K2ZZZZZZ'); +INSERT INTO bf_tokenbf_lowcard_test VALUES (3, 'abCD3ef', 'abCD3ef'), (4, 'abCD4ef', 'abCD4ef'); +INSERT INTO bf_ngram_lowcard_test VALUES (3, 'abCD3ef', 'abCD3ef'), (4, 'abCD4ef', 'abCD4ef'); + +SELECT 'lc_bf_tokenbf'; +SELECT * FROM bf_tokenbf_lowcard_test WHERE like(lc, 'K1') SETTINGS force_data_skipping_indices='lc_bf_tokenbf'; +SELECT * FROM bf_tokenbf_lowcard_test WHERE like(lc, 'K2') SETTINGS force_data_skipping_indices='lc_bf_tokenbf'; +SELECT * FROM bf_tokenbf_lowcard_test WHERE like(lc, 'K3') SETTINGS force_data_skipping_indices='lc_bf_tokenbf'; + +SELECT 'lc_fixed_bf_tokenbf'; +SELECT * FROM bf_tokenbf_lowcard_test WHERE like(lc_fixed, 'K1ZZZZZZ') SETTINGS force_data_skipping_indices='lc_fixed_bf_tokenbf'; +SELECT * FROM bf_tokenbf_lowcard_test WHERE like(lc_fixed, 'K2ZZZZZZ') SETTINGS force_data_skipping_indices='lc_fixed_bf_tokenbf'; +SELECT * FROM bf_tokenbf_lowcard_test WHERE like(lc_fixed, 'K3ZZZZZZ') SETTINGS force_data_skipping_indices='lc_fixed_bf_tokenbf'; + +SELECT 'lc_ngram'; +SELECT * FROM bf_ngram_lowcard_test WHERE like(lc, 'K1') SETTINGS force_data_skipping_indices='lc_ngram'; +SELECT * FROM bf_ngram_lowcard_test WHERE like(lc, 'K2') SETTINGS force_data_skipping_indices='lc_ngram'; +SELECT * FROM bf_ngram_lowcard_test WHERE like(lc, 'K3') SETTINGS force_data_skipping_indices='lc_ngram'; + +SELECT 'lc_fixed_ngram'; +SELECT * FROM bf_ngram_lowcard_test WHERE like(lc_fixed, 'K1ZZZZZZ') SETTINGS force_data_skipping_indices='lc_fixed_ngram'; +SELECT * FROM bf_ngram_lowcard_test WHERE like(lc_fixed, 'K2ZZZZZZ') SETTINGS force_data_skipping_indices='lc_fixed_ngram'; +SELECT * FROM bf_ngram_lowcard_test WHERE like(lc_fixed, 'K3ZZZZZZ') SETTINGS force_data_skipping_indices='lc_fixed_ngram'; + + +SELECT 'lc_bf_tokenbf'; +SELECT * FROM bf_tokenbf_lowcard_test WHERE like(lc, '%CD3%') SETTINGS force_data_skipping_indices='lc_bf_tokenbf'; +SELECT * FROM bf_tokenbf_lowcard_test WHERE like(lc, '%CD4%') SETTINGS force_data_skipping_indices='lc_bf_tokenbf'; +SELECT * FROM bf_tokenbf_lowcard_test WHERE like(lc, '%CD5%') SETTINGS force_data_skipping_indices='lc_bf_tokenbf'; + +SELECT 'lc_fixed_bf_tokenbf'; +SELECT * FROM bf_tokenbf_lowcard_test WHERE like(lc_fixed, '%CD3%') SETTINGS force_data_skipping_indices='lc_fixed_bf_tokenbf'; +SELECT * FROM bf_tokenbf_lowcard_test WHERE like(lc_fixed, '%CD4%') SETTINGS force_data_skipping_indices='lc_fixed_bf_tokenbf'; +SELECT * FROM bf_tokenbf_lowcard_test WHERE like(lc_fixed, '%CD5%') SETTINGS force_data_skipping_indices='lc_fixed_bf_tokenbf'; + +SELECT 'lc_ngram'; +SELECT * FROM bf_ngram_lowcard_test WHERE like(lc, '%CD3%') SETTINGS force_data_skipping_indices='lc_ngram'; +SELECT * FROM bf_ngram_lowcard_test WHERE like(lc, '%CD4%') SETTINGS force_data_skipping_indices='lc_ngram'; +SELECT * FROM bf_ngram_lowcard_test WHERE like(lc, '%CD5%') SETTINGS force_data_skipping_indices='lc_ngram'; + +SELECT 'lc_fixed_ngram'; +SELECT * FROM bf_ngram_lowcard_test WHERE like(lc_fixed, '%CD3%') SETTINGS force_data_skipping_indices='lc_fixed_ngram'; +SELECT * FROM bf_ngram_lowcard_test WHERE like(lc_fixed, '%CD4%') SETTINGS force_data_skipping_indices='lc_fixed_ngram'; +SELECT * FROM bf_ngram_lowcard_test WHERE like(lc_fixed, '%CD5%') SETTINGS force_data_skipping_indices='lc_fixed_ngram'; + +DROP TABLE bf_tokenbf_lowcard_test; +DROP TABLE bf_ngram_lowcard_test; diff --git a/tests/queries/0_stateless/02228_unquoted_dates_in_csv_schema_inference.reference b/tests/queries/0_stateless/02228_unquoted_dates_in_csv_schema_inference.reference new file mode 100644 index 00000000000..5fd48ae580a --- /dev/null +++ b/tests/queries/0_stateless/02228_unquoted_dates_in_csv_schema_inference.reference @@ -0,0 +1 @@ +c1 Nullable(String) diff --git a/tests/queries/0_stateless/02228_unquoted_dates_in_csv_schema_inference.sh b/tests/queries/0_stateless/02228_unquoted_dates_in_csv_schema_inference.sh new file mode 100755 index 00000000000..314a60d6491 --- /dev/null +++ b/tests/queries/0_stateless/02228_unquoted_dates_in_csv_schema_inference.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +echo "2020-02-01 16:00:00" | $CLICKHOUSE_LOCAL -q "desc table table" --input-format "CSV" --file=- + diff --git a/tests/queries/0_stateless/02229_client_stop_multiquery_in_SIGINT.reference b/tests/queries/0_stateless/02229_client_stop_multiquery_in_SIGINT.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02229_client_stop_multiquery_in_SIGINT.sh b/tests/queries/0_stateless/02229_client_stop_multiquery_in_SIGINT.sh new file mode 100755 index 00000000000..171dcc52c9c --- /dev/null +++ b/tests/queries/0_stateless/02229_client_stop_multiquery_in_SIGINT.sh @@ -0,0 +1,17 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +timeout -s INT 3s $CLICKHOUSE_CLIENT --max_block_size 1 -nm -q " + SELECT sleep(1) FROM numbers(100) FORMAT Null; + SELECT 'FAIL'; +" + +timeout -s INT 3s $CLICKHOUSE_LOCAL --max_block_size 1 -nm -q " + SELECT sleep(1) FROM numbers(100) FORMAT Null; + SELECT 'FAIL'; +" + +exit 0 diff --git a/tests/queries/0_stateless/02230_create_table_as_ignore_ttl.reference b/tests/queries/0_stateless/02230_create_table_as_ignore_ttl.reference new file mode 100644 index 00000000000..5236875e209 --- /dev/null +++ b/tests/queries/0_stateless/02230_create_table_as_ignore_ttl.reference @@ -0,0 +1,32 @@ +CREATE TABLE default.data_02230_ttl +( + `date` Date, + `key` Int32 +) +ENGINE = MergeTree +ORDER BY key +TTL date + 14 +SETTINGS index_granularity = 8192 +CREATE TABLE default.null_02230_ttl +( + `date` Date, + `key` Int32 +) +ENGINE = Null +CREATE TABLE default.data_02230_column_ttl +( + `date` Date, + `value` Int32 TTL date + 7, + `key` Int32 +) +ENGINE = MergeTree +ORDER BY key +TTL date + 14 +SETTINGS index_granularity = 8192 +CREATE TABLE default.null_02230_column_ttl +( + `date` Date, + `value` Int32, + `key` Int32 +) +ENGINE = Null diff --git a/tests/queries/0_stateless/02230_create_table_as_ignore_ttl.sql b/tests/queries/0_stateless/02230_create_table_as_ignore_ttl.sql new file mode 100644 index 00000000000..8838f67ec83 --- /dev/null +++ b/tests/queries/0_stateless/02230_create_table_as_ignore_ttl.sql @@ -0,0 +1,18 @@ +drop table if exists data_02230_ttl; +drop table if exists null_02230_ttl; +create table data_02230_ttl (date Date, key Int) Engine=MergeTree() order by key TTL date + 14; +show create data_02230_ttl format TSVRaw; +create table null_02230_ttl engine=Null() as data_02230_ttl; +show create null_02230_ttl format TSVRaw; +drop table data_02230_ttl; +drop table null_02230_ttl; + +drop table if exists data_02230_column_ttl; +drop table if exists null_02230_column_ttl; +create table data_02230_column_ttl (date Date, value Int TTL date + 7, key Int) Engine=MergeTree() order by key TTL date + 14; +show create data_02230_column_ttl format TSVRaw; +create table null_02230_column_ttl engine=Null() as data_02230_column_ttl; +-- check that order of columns is the same +show create null_02230_column_ttl format TSVRaw; +drop table data_02230_column_ttl; +drop table null_02230_column_ttl; diff --git a/tests/queries/0_stateless/02231_hierarchical_dictionaries_constant.reference b/tests/queries/0_stateless/02231_hierarchical_dictionaries_constant.reference new file mode 100644 index 00000000000..bccd9864b30 --- /dev/null +++ b/tests/queries/0_stateless/02231_hierarchical_dictionaries_constant.reference @@ -0,0 +1,32 @@ +Get hierarchy +[] +[1] +[2,1] +[3,1] +[4,2,1] +[] +Get is in hierarchy +1 +1 +0 +Get children +[1] +[2,3] +[4] +[] +[] +[] +Get all descendants +[1,2,3,4] +[2,3,4] +[4] +[] +[] +[] +Get descendants at first level +[1] +[2,3] +[4] +[] +[] +[] diff --git a/tests/queries/0_stateless/02231_hierarchical_dictionaries_constant.sql b/tests/queries/0_stateless/02231_hierarchical_dictionaries_constant.sql new file mode 100644 index 00000000000..bc01b447338 --- /dev/null +++ b/tests/queries/0_stateless/02231_hierarchical_dictionaries_constant.sql @@ -0,0 +1,54 @@ +DROP TABLE IF EXISTS hierarchy_source_table; +CREATE TABLE hierarchy_source_table (id UInt64, parent_id UInt64) ENGINE = TinyLog; +INSERT INTO hierarchy_source_table VALUES (1, 0), (2, 1), (3, 1), (4, 2); + +DROP DICTIONARY IF EXISTS hierarchy_flat_dictionary; +CREATE DICTIONARY hierarchy_flat_dictionary +( + id UInt64, + parent_id UInt64 HIERARCHICAL +) +PRIMARY KEY id +SOURCE(CLICKHOUSE(TABLE 'hierarchy_source_table')) +LAYOUT(FLAT()) +LIFETIME(MIN 1 MAX 1000); + +SELECT 'Get hierarchy'; +SELECT dictGetHierarchy('hierarchy_flat_dictionary', 0); +SELECT dictGetHierarchy('hierarchy_flat_dictionary', 1); +SELECT dictGetHierarchy('hierarchy_flat_dictionary', 2); +SELECT dictGetHierarchy('hierarchy_flat_dictionary', 3); +SELECT dictGetHierarchy('hierarchy_flat_dictionary', 4); +SELECT dictGetHierarchy('hierarchy_flat_dictionary', 5); + +SELECT 'Get is in hierarchy'; +SELECT dictIsIn('hierarchy_flat_dictionary', 1, 1); +SELECT dictIsIn('hierarchy_flat_dictionary', 2, 1); +SELECT dictIsIn('hierarchy_flat_dictionary', 2, 0); + +SELECT 'Get children'; +SELECT dictGetChildren('hierarchy_flat_dictionary', 0); +SELECT dictGetChildren('hierarchy_flat_dictionary', 1); +SELECT dictGetChildren('hierarchy_flat_dictionary', 2); +SELECT dictGetChildren('hierarchy_flat_dictionary', 3); +SELECT dictGetChildren('hierarchy_flat_dictionary', 4); +SELECT dictGetChildren('hierarchy_flat_dictionary', 5); + +SELECT 'Get all descendants'; +SELECT dictGetDescendants('hierarchy_flat_dictionary', 0); +SELECT dictGetDescendants('hierarchy_flat_dictionary', 1); +SELECT dictGetDescendants('hierarchy_flat_dictionary', 2); +SELECT dictGetDescendants('hierarchy_flat_dictionary', 3); +SELECT dictGetDescendants('hierarchy_flat_dictionary', 4); +SELECT dictGetDescendants('hierarchy_flat_dictionary', 5); + +SELECT 'Get descendants at first level'; +SELECT dictGetDescendants('hierarchy_flat_dictionary', 0, 1); +SELECT dictGetDescendants('hierarchy_flat_dictionary', 1, 1); +SELECT dictGetDescendants('hierarchy_flat_dictionary', 2, 1); +SELECT dictGetDescendants('hierarchy_flat_dictionary', 3, 1); +SELECT dictGetDescendants('hierarchy_flat_dictionary', 4, 1); +SELECT dictGetDescendants('hierarchy_flat_dictionary', 5, 1); + +DROP DICTIONARY hierarchy_flat_dictionary; +DROP TABLE hierarchy_source_table; diff --git a/tests/queries/1_stateful/00084_external_aggregation.sql b/tests/queries/1_stateful/00084_external_aggregation.sql index b3922eae049..816d95f4b8b 100644 --- a/tests/queries/1_stateful/00084_external_aggregation.sql +++ b/tests/queries/1_stateful/00084_external_aggregation.sql @@ -1,3 +1,5 @@ +-- Tags: no-random-settings + SET max_bytes_before_external_group_by = 200000000; SET max_memory_usage = 1500000000; diff --git a/tests/queries/1_stateful/00154_avro.sql b/tests/queries/1_stateful/00154_avro.sql index ea5d665a3b4..f608da629d2 100644 --- a/tests/queries/1_stateful/00154_avro.sql +++ b/tests/queries/1_stateful/00154_avro.sql @@ -2,7 +2,7 @@ DROP TABLE IF EXISTS test.avro; -SET max_threads = 1, max_block_size = 8192, min_insert_block_size_rows = 8192, min_insert_block_size_bytes = 1048576; -- lower memory usage +SET max_threads = 1, max_insert_threads = 0, max_block_size = 8192, min_insert_block_size_rows = 8192, min_insert_block_size_bytes = 1048576; -- lower memory usage CREATE TABLE test.avro AS test.hits ENGINE = File(Avro); INSERT INTO test.avro SELECT * FROM test.hits LIMIT 10000; diff --git a/tests/queries/1_stateful/00168_parallel_processing_on_replicas_part_1.sh b/tests/queries/1_stateful/00168_parallel_processing_on_replicas_part_1.sh index 699700bcd3e..276fc0274c2 100755 --- a/tests/queries/1_stateful/00168_parallel_processing_on_replicas_part_1.sh +++ b/tests/queries/1_stateful/00168_parallel_processing_on_replicas_part_1.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-tsan +# Tags: no-tsan, no-random-settings CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/utils/check-mysql-binlog/main.cpp b/utils/check-mysql-binlog/main.cpp index 04dfb56ff08..7dd387ba5be 100644 --- a/utils/check-mysql-binlog/main.cpp +++ b/utils/check-mysql-binlog/main.cpp @@ -61,7 +61,9 @@ static DB::MySQLReplication::BinlogEventPtr parseSingleEventBody( } case DB::MySQLReplication::TABLE_MAP_EVENT: { - event = std::make_shared(std::move(header)); + DB::MySQLReplication::TableMapEventHeader map_event_header; + map_event_header.parse(*event_payload); + event = std::make_shared(std::move(header), map_event_header); event->parseEvent(*event_payload); last_table_map_event = std::static_pointer_cast(event); break; diff --git a/utils/check-style/check-style b/utils/check-style/check-style index d178778a410..6ebf53cb932 100755 --- a/utils/check-style/check-style +++ b/utils/check-style/check-style @@ -74,6 +74,8 @@ EXTERN_TYPES_EXCLUDES=( ProfileEvents::Type ProfileEvents::TypeEnum ProfileEvents::dumpToMapColumn + ProfileEvents::getProfileEvents + ProfileEvents::ThreadIdToCountersSnapshot ProfileEvents::LOCAL_NAME ProfileEvents::CountersIncrement diff --git a/utils/db-generator/query_db_generator.cpp b/utils/db-generator/query_db_generator.cpp index 7d71e13a6e9..dec1f6fe60f 100644 --- a/utils/db-generator/query_db_generator.cpp +++ b/utils/db-generator/query_db_generator.cpp @@ -857,7 +857,7 @@ FuncRet likeFunc(DB::ASTPtr ch, std::map & columns) { std::string value = applyVisitor(DB::FieldVisitorToString(), literal->value); std::string example{}; - for (size_t i = 0; i != value.size(); ++i) + for (size_t i = 0; i != value.size(); ++i) /// NOLINT { if (value[i] == '%') example += randomString(rng() % 10); diff --git a/utils/graphite-rollup/graphite-rollup-bench.cpp b/utils/graphite-rollup/graphite-rollup-bench.cpp index dabe0353b0f..4c11f90b3ff 100644 --- a/utils/graphite-rollup/graphite-rollup-bench.cpp +++ b/utils/graphite-rollup/graphite-rollup-bench.cpp @@ -35,7 +35,7 @@ std::vector loadMetrics(const std::string & metrics_file) throw std::runtime_error(strerror(errno)); } - while ((nread = getline(&line, &len, stream)) != -1) + while ((nread = getline(&line, &len, stream)) != -1) /// NOLINT { size_t l = strlen(line); if (l > 0) diff --git a/utils/memcpy-bench/memcpy-bench.cpp b/utils/memcpy-bench/memcpy-bench.cpp index 7f8e89b8355..8b75164eb60 100644 --- a/utils/memcpy-bench/memcpy-bench.cpp +++ b/utils/memcpy-bench/memcpy-bench.cpp @@ -673,7 +673,7 @@ static uint8_t * memcpy_my2(uint8_t * __restrict dst, const uint8_t * __restrict size -= padding; } - while (size >= 512) + while (size >= 512) /// NOLINT { __asm__( "vmovups (%[s]), %%ymm0\n" @@ -794,19 +794,19 @@ static uint8_t * memcpy_my2(uint8_t * __restrict dst, const uint8_t * __restrict return ret; } -extern "C" void * __memcpy_erms(void * __restrict destination, const void * __restrict source, size_t size); -extern "C" void * __memcpy_sse2_unaligned(void * __restrict destination, const void * __restrict source, size_t size); -extern "C" void * __memcpy_ssse3(void * __restrict destination, const void * __restrict source, size_t size); -extern "C" void * __memcpy_ssse3_back(void * __restrict destination, const void * __restrict source, size_t size); -extern "C" void * __memcpy_avx_unaligned(void * __restrict destination, const void * __restrict source, size_t size); -extern "C" void * __memcpy_avx_unaligned_erms(void * __restrict destination, const void * __restrict source, size_t size); -extern "C" void * __memcpy_avx512_unaligned(void * __restrict destination, const void * __restrict source, size_t size); -extern "C" void * __memcpy_avx512_unaligned_erms(void * __restrict destination, const void * __restrict source, size_t size); -extern "C" void * __memcpy_avx512_no_vzeroupper(void * __restrict destination, const void * __restrict source, size_t size); +extern "C" void * __memcpy_erms(void * __restrict destination, const void * __restrict source, size_t size); /// NOLINT +extern "C" void * __memcpy_sse2_unaligned(void * __restrict destination, const void * __restrict source, size_t size); /// NOLINT +extern "C" void * __memcpy_ssse3(void * __restrict destination, const void * __restrict source, size_t size); /// NOLINT +extern "C" void * __memcpy_ssse3_back(void * __restrict destination, const void * __restrict source, size_t size); /// NOLINT +extern "C" void * __memcpy_avx_unaligned(void * __restrict destination, const void * __restrict source, size_t size); /// NOLINT +extern "C" void * __memcpy_avx_unaligned_erms(void * __restrict destination, const void * __restrict source, size_t size); /// NOLINT +extern "C" void * __memcpy_avx512_unaligned(void * __restrict destination, const void * __restrict source, size_t size); /// NOLINT +extern "C" void * __memcpy_avx512_unaligned_erms(void * __restrict destination, const void * __restrict source, size_t size); /// NOLINT +extern "C" void * __memcpy_avx512_no_vzeroupper(void * __restrict destination, const void * __restrict source, size_t size); /// NOLINT #define VARIANT(N, NAME) \ - if (memcpy_variant == N) \ + if (memcpy_variant == (N)) \ return test(dst, src, size, iterations, num_threads, std::forward(generator), NAME, #NAME); template diff --git a/website/benchmark/hardware/index.html b/website/benchmark/hardware/index.html index c6b1e2be275..06878eb077c 100644 --- a/website/benchmark/hardware/index.html +++ b/website/benchmark/hardware/index.html @@ -85,6 +85,16 @@ Results for ThinkPad P15 are from Mikhail Shiryaev.
Results for RockPi4 are from Kirill Zholnay.
Results for Xeon 6266C are from David in Shanghai.
Results for SSDNodes and Cavium are from Lorenzo QXIP.
+Results for AMD EPYC 7662 64-Core Processor are from Evgeniy Kuts.
+Results for scaleway GP1-S 8x x86 64bit 32GB ram 300gb NVMe are from Dag Vilmar Tveit.
+Results for scaleway GP1-M 16x x86 64bit 64GB ram 600gb NVMe are from Dag Vilmar Tveit.
+Results for Intel(R) Core(TM) i5-4440 CPU @ 3.10GHz are from Peter, Chun-Sheng, Li.
+Results for MacBook Pro M1 are from Filatenkov Arthur.
+Results for AWS instance type im4gn.4xlarge are from Ananth Gundabattula (Darwinium).
+Results for AWS instance type im4gn.8xlarge are from Ananth Gundabattula (Darwinium).
+Results for AWS instance type im4gn.16xlarge are from Ananth Gundabattula (Darwinium).
+Results for AWS instance type i3.2xlarge are from Ananth Gundabattula (Darwinium).
+Results for 2x EPYC 7702 on ZFS mirror NVME are from Alibek A.

diff --git a/website/benchmark/hardware/results/amd_epyc_7662.json b/website/benchmark/hardware/results/amd_epyc_7662.json new file mode 100644 index 00000000000..436c0099992 --- /dev/null +++ b/website/benchmark/hardware/results/amd_epyc_7662.json @@ -0,0 +1,54 @@ +[ + { + "system": "AMD EPYC 7662", + "system_full": "AMD EPYC 7662 64-Core Processor", + "time": "2022-01-26 11:28:55", + "kind": "server", + "result": + [ + [0.001, 0.001, 0.001], + [0.037, 0.019, 0.020], + [0.082, 0.034, 0.026], + [0.298, 0.045, 0.038], + [0.424, 0.188, 0.178], + [0.594, 0.229, 0.227], + [0.037, 0.028, 0.032], + [0.060, 0.028, 0.027], + [0.496, 0.185, 0.192], + [0.611, 0.210, 0.214], + [0.400, 0.148, 0.137], + [0.424, 0.155, 0.144], + [0.639, 0.256, 0.239], + [0.944, 0.404, 0.309], + [0.699, 0.326, 0.288], + [0.461, 0.221, 0.216], + [1.176, 0.539, 0.561], + [1.070, 0.410, 0.426], + [2.080, 0.950, 0.866], + [0.351, 0.066, 0.130], + [3.248, 0.461, 0.313], + [3.612, 0.261, 0.231], + [6.720, 0.682, 0.671], + [6.300, 0.517, 0.488], + [0.982, 0.136, 0.125], + [0.531, 0.112, 0.109], + [1.006, 0.133, 0.118], + [3.184, 0.324, 0.310], + [2.799, 0.327, 0.308], + [0.569, 0.492, 0.493], + [0.900, 0.212, 0.221], + [1.925, 0.353, 0.326], + [2.489, 1.173, 1.248], + [3.626, 0.990, 0.897], + [3.743, 0.935, 0.915], + [0.419, 0.311, 0.339], + [0.278, 0.244, 0.236], + [0.111, 0.099, 0.098], + [0.139, 0.086, 0.084], + [0.664, 0.520, 0.552], + [0.072, 0.028, 0.036], + [0.050, 0.031, 0.022], + [0.005, 0.005, 0.011] + ] + } +] diff --git a/website/benchmark/hardware/results/amd_epyc_7702_zfs.json b/website/benchmark/hardware/results/amd_epyc_7702_zfs.json new file mode 100644 index 00000000000..9e7c15f579f --- /dev/null +++ b/website/benchmark/hardware/results/amd_epyc_7702_zfs.json @@ -0,0 +1,54 @@ +[ + { + "system": "2x EPYC 7702 on ZFS mirror NVME", + "system_full": "2x EPYC 7702 on ZFS mirror NVME, AMD EPYC 7702 64-Core Processor", + "time": "2022-01-14 21:07:13", + "kind": "server", + "result": + [ + [0.001, 0.002, 0.001], + [0.033, 0.021, 0.022], + [0.026, 0.022, 0.024], + [0.032, 0.024, 0.027], + [0.114, 0.115, 0.116], + [0.156, 0.150, 0.156], + [0.035, 0.023, 0.022], + [0.035, 0.023, 0.023], + [0.134, 0.148, 0.133], + [0.165, 0.150, 0.156], + [0.132, 0.087, 0.083], + [0.103, 0.124, 0.094], + [0.273, 0.221, 0.229], + [0.305, 0.263, 0.267], + [0.273, 0.267, 0.239], + [0.210, 0.228, 0.241], + [0.641, 0.518, 0.498], + [0.413, 0.423, 0.485], + [1.044, 0.991, 0.999], + [0.091, 0.144, 0.071], + [0.203, 0.190, 0.203], + [0.199, 0.210, 0.189], + [0.662, 0.753, 0.705], + [0.636, 0.461, 0.445], + [0.093, 0.079, 0.082], + [0.066, 0.070, 0.072], + [0.086, 0.080, 0.091], + [0.293, 0.280, 0.298], + [0.301, 0.258, 0.268], + [0.624, 0.611, 0.613], + [0.170, 0.168, 0.170], + [0.317, 0.269, 0.273], + [1.801, 1.071, 1.183], + [1.049, 1.080, 0.957], + [0.904, 0.892, 0.898], + [0.293, 0.288, 0.291], + [0.176, 0.173, 0.176], + [0.068, 0.068, 0.070], + [0.060, 0.060, 0.061], + [0.412, 0.388, 0.382], + [0.021, 0.019, 0.019], + [0.019, 0.022, 0.015], + [0.004, 0.010, 0.009] + ] + } +] diff --git a/website/benchmark/hardware/results/gp1_s_16x.json b/website/benchmark/hardware/results/gp1_s_16x.json new file mode 100644 index 00000000000..1353fc87d00 --- /dev/null +++ b/website/benchmark/hardware/results/gp1_s_16x.json @@ -0,0 +1,54 @@ +[ + { + "system": "scaleway GP1-S 8x x86", + "system_full": "scaleway GP1-M 16x x86 64bit 64GB ram 600gb NVMe", + "time": "2022-02-16 00:00:00", + "kind": "cloud", + "result": + [ + [0.005, 0.005, 0.036], + [0.039, 0.026, 0.026], + [0.092, 0.046, 0.046], + [0.172, 0.056, 0.055], + [0.166, 0.126, 0.123], + [0.364, 0.272, 0.265], + [0.005, 0.006, 0.005], + [0.028, 0.027, 0.029], + [0.581, 0.49, 0.486], + [0.69, 0.549, 0.553], + [0.248, 0.178, 0.175], + [0.266, 0.208, 0.208], + [1.584, 1.017, 0.868], + [1.717, 1.113, 1.145], + [1.144, 1.084, 1.048], + [0.991, 0.92, 0.895], + [4.121, 2.639, 2.621], + [1.447, 1.348, 1.354], + [6.802, 6.466, 6.433], + [0.142, 0.057, 0.052], + [1.252, 0.743, 0.715], + [1.389, 0.823, 0.791], + [3.143, 2.225, 2.159], + [1.795, 0.871, 0.837], + [0.361, 0.236, 0.229], + [0.264, 0.211, 0.214], + [0.37, 0.24, 0.225], + [1.449, 0.967, 0.876], + [1.605, 1.206, 1.16 ], + [3.412, 3.388, 3.397], + [0.783, 0.628, 0.65 ], + [1.419, 1.134, 1.112], + [6.983, 6.843, 6.852], + [5.466, 5.082, 4.955], + [5.632, 4.972, 5.22 ], + [1.639, 1.604, 1.571], + [0.285, 0.298, 0.269], + [0.115, 0.115, 0.101], + [0.098, 0.1, 0.092], + [0.563, 0.562, 0.512], + [0.058, 0.039, 0.042], + [0.039, 0.039, 0.025], + [0.029, 0.012, 0.012] + ] + } +] diff --git a/website/benchmark/hardware/results/gp1_s_8x.json b/website/benchmark/hardware/results/gp1_s_8x.json new file mode 100644 index 00000000000..2bc008af54c --- /dev/null +++ b/website/benchmark/hardware/results/gp1_s_8x.json @@ -0,0 +1,54 @@ +[ + { + "system": "scaleway GP1-S 8x x86", + "system_full": "scaleway GP1-S 8x x86 64bit 32GB ram 300gb NVMe", + "time": "2022-02-16 00:00:00", + "kind": "cloud", + "result": + [ + [0.026, 0.004, 0.004], + [0.038, 0.026, 0.026], + [0.071, 0.058, 0.059], + [0.118, 0.072, 0.069], + [0.190, 0.151, 0.155], + [0.465, 0.438, 0.401], + [0.002, 0.004, 0.004], + [0.028, 0.029, 0.026], + [0.751, 0.672, 0.676], + [0.897, 0.845, 0.798], + [0.291, 0.234, 0.254], + [0.371, 0.297, 0.296], + [1.208, 1.041, 1.005], + [1.445, 1.400, 1.414], + [1.406, 1.317, 1.342], + [1.414, 1.242, 1.244], + [4.179, 3.849, 3.878], + [2.320, 2.275, 2.201], + [7.499, 7.424, 7.196], + [0.135, 0.077, 0.068], + [1.465, 1.075, 1.063], + [1.700, 1.221, 1.198], + [3.731, 2.959, 2.905], + [2.283, 1.401, 1.342], + [0.474, 0.377, 0.367], + [0.371, 0.314, 0.337], + [0.483, 0.357, 0.356], + [1.565, 1.194, 1.181], + [2.226, 1.815, 1.746], + [2.990, 2.971, 2.947], + [1.003, 0.815, 0.842], + [1.386, 1.127, 1.108], + [8.174, 7.690, 7.735], + [6.171, 5.802, 5.933], + [6.201, 5.774, 5.972], + [1.758, 1.642, 1.639], + [0.288, 0.273, 0.253], + [0.121, 0.125, 0.107], + [0.096, 0.082, 0.088], + [0.490, 0.461, 0.476], + [0.041, 0.037, 0.035], + [0.035, 0.031, 0.025], + [0.008, 0.011, 0.015] + ] + } +] diff --git a/website/benchmark/hardware/results/i3_2xlarge.json b/website/benchmark/hardware/results/i3_2xlarge.json new file mode 100644 index 00000000000..e716b99e8a2 --- /dev/null +++ b/website/benchmark/hardware/results/i3_2xlarge.json @@ -0,0 +1,54 @@ +[ + { + "system": "AWS i3.2xlarge", + "system_full": "AWS i3.2xlarge Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", + "time": "2022-01-02 03:16:35", + "kind": "cloud", + "result": + [ + [0.002, 0.002, 0.002], + [0.040, 0.023, 0.027], + [0.153, 0.084, 0.090], + [0.682, 0.113, 0.120], + [1.218, 0.227, 0.225], + [1.972, 0.708, 0.700], + [0.066, 0.052, 0.052], + [0.086, 0.037, 0.030], + [1.609, 1.123, 1.119], + [1.784, 1.231, 1.241], + [0.782, 0.444, 0.392], + [0.929, 0.504, 0.476], + [2.273, 1.649, 1.633], + [4.022, 2.181, 2.214], + [2.459, 2.022, 1.925], + [2.015, 1.621, 1.677], + [6.344, 5.439, 5.625], + [4.450, 3.724, 3.678], + [12.221, 10.922, 10.933], + [0.674, 0.139, 0.132], + [18.758, 2.164, 2.152], + [20.902, 2.440, 2.367], + [39.396, 5.476, 5.427], + [31.640, 2.759, 2.755], + [4.498, 0.647, 0.646], + [1.709, 0.627, 0.540], + [4.488, 0.665, 0.656], + [18.286, 2.023, 2.013], + [15.375, 2.896, 2.959], + [2.962, 2.899, 2.974], + [3.663, 1.299, 1.304], + [9.731, 1.922, 1.915], + [11.575, 10.394, 10.514], + [20.617, 8.121, 8.097], + [20.558, 8.088, 8.049], + [3.059, 2.780, 2.678], + [0.322, 0.244, 0.217], + [0.122, 0.082, 0.092], + [0.146, 0.073, 0.072], + [0.652, 0.473, 0.502], + [0.097, 0.025, 0.034], + [0.052, 0.025, 0.019], + [0.007, 0.004, 0.005] + ] + } +] diff --git a/website/benchmark/hardware/results/im4gn_16xlarge.json b/website/benchmark/hardware/results/im4gn_16xlarge.json new file mode 100644 index 00000000000..6db4f08021f --- /dev/null +++ b/website/benchmark/hardware/results/im4gn_16xlarge.json @@ -0,0 +1,54 @@ +[ + { + "system": "AWS im4gn.16xlarge", + "system_full": "AWS im4gn.16xlarge Neoverse-N1 4x7,500 NVMe SSD", + "time": "2022-01-04 01:04:37", + "kind": "cloud", + "result": + [ + [0.002, 0.001, 0.001], + [0.046, 0.017, 0.021], + [0.044, 0.021, 0.022], + [0.850, 0.064, 0.066], + [1.423, 0.076, 0.075], + [2.368, 0.141, 0.139], + [0.022, 0.013, 0.013], + [0.037, 0.038, 0.036], + [1.434, 0.138, 0.138], + [2.173, 0.159, 0.158], + [1.253, 0.089, 0.091], + [1.481, 0.102, 0.093], + [2.377, 0.211, 0.206], + [3.850, 0.272, 0.253], + [2.180, 0.276, 0.239], + [1.030, 0.242, 0.228], + [3.966, 0.564, 0.526], + [3.549, 0.404, 0.377], + [6.940, 1.389, 1.267], + [0.741, 0.225, 0.126], + [19.135, 0.398, 0.371], + [21.322, 0.330, 0.322], + [40.018, 0.727, 0.697], + [33.059, 1.592, 1.565], + [4.599, 0.098, 0.092], + [2.270, 0.089, 0.088], + [5.238, 0.098, 0.095], + [19.201, 0.358, 0.349], + [15.661, 0.430, 0.412], + [0.896, 0.876, 0.863], + [3.579, 0.223, 0.200], + [9.826, 0.344, 0.314], + [7.844, 2.085, 2.183], + [19.018, 1.143, 1.036], + [19.009, 1.203, 1.046], + [0.531, 0.325, 0.331], + [0.262, 0.221, 0.218], + [0.137, 0.101, 0.090], + [0.116, 0.099, 0.079], + [0.531, 0.468, 0.468], + [0.070, 0.025, 0.043], + [0.034, 0.020, 0.020], + [0.007, 0.004, 0.018] + ] + } +] diff --git a/website/benchmark/hardware/results/im4gn_4xlarge.json b/website/benchmark/hardware/results/im4gn_4xlarge.json new file mode 100644 index 00000000000..c3024c8dff2 --- /dev/null +++ b/website/benchmark/hardware/results/im4gn_4xlarge.json @@ -0,0 +1,54 @@ +[ + { + "system": "AWS im4gn.4xlarge", + "system_full": "AWS im4gn.4xlarge Neoverse-N1 1x7,500 NVMe SSD", + "time": "2022-01-02 06:59:48", + "kind": "cloud", + "result": + [ + [0.002, 0.002, 0.002], + [0.023, 0.013, 0.013], + [0.061, 0.026, 0.025], + [0.841, 0.033, 0.032], + [1.530, 0.086, 0.084], + [2.362, 0.291, 0.292], + [0.038, 0.029, 0.028], + [0.016, 0.015, 0.014], + [1.341, 0.302, 0.301], + [1.845, 0.376, 0.360], + [0.888, 0.184, 0.181], + [1.343, 0.215, 0.210], + [2.185, 0.469, 0.459], + [3.662, 0.603, 0.580], + [2.150, 0.587, 0.561], + [0.875, 0.458, 0.449], + [4.079, 1.425, 1.343], + [3.451, 0.927, 0.859], + [7.646, 2.890, 2.877], + [0.710, 0.107, 0.042], + [19.321, 0.696, 0.677], + [21.321, 0.740, 0.726], + [40.051, 1.625, 1.598], + [32.154, 0.842, 0.819], + [4.681, 0.240, 0.221], + [1.976, 0.197, 0.195], + [5.062, 0.241, 0.223], + [18.972, 0.643, 0.628], + [15.676, 0.978, 0.957], + [0.524, 0.505, 0.518], + [3.589, 0.460, 0.461], + [9.647, 0.674, 0.642], + [8.330, 3.414, 3.354], + [19.314, 2.296, 2.286], + [19.278, 2.311, 2.273], + [0.799, 0.753, 0.717], + [0.288, 0.222, 0.222], + [0.118, 0.101, 0.099], + [0.126, 0.085, 0.084], + [0.542, 0.480, 0.446], + [0.065, 0.025, 0.031], + [0.046, 0.021, 0.020], + [0.006, 0.010, 0.017] + ] + } +] diff --git a/website/benchmark/hardware/results/im4gn_8xlarge.json b/website/benchmark/hardware/results/im4gn_8xlarge.json new file mode 100644 index 00000000000..117812b0162 --- /dev/null +++ b/website/benchmark/hardware/results/im4gn_8xlarge.json @@ -0,0 +1,54 @@ +[ + { + "system": "AWS im4gn.8xlarge", + "system_full": "AWS im4gn.8xlarge Neoverse-N1 2x7,500 NVMe SSD", + "time": "2022-01-03 22:23:27", + "kind": "cloud", + "result": + [ + [0.002, 0.001, 0.001], + [0.034, 0.010, 0.010], + [0.044, 0.016, 0.016], + [0.862, 0.020, 0.020], + [1.500, 0.069, 0.071], + [2.454, 0.174, 0.172], + [0.025, 0.017, 0.017], + [0.023, 0.023, 0.023], + [1.329, 0.182, 0.181], + [2.167, 0.216, 0.212], + [1.159, 0.125, 0.119], + [1.483, 0.127, 0.122], + [2.313, 0.268, 0.260], + [3.788, 0.361, 0.329], + [2.043, 0.343, 0.308], + [0.872, 0.321, 0.309], + [3.921, 0.879, 0.840], + [3.460, 0.587, 0.543], + [7.272, 1.517, 1.447], + [0.707, 0.078, 0.064], + [19.314, 0.425, 0.385], + [21.332, 0.414, 0.405], + [40.030, 0.945, 0.921], + [32.867, 0.513, 0.477], + [4.640, 0.130, 0.124], + [2.227, 0.115, 0.107], + [5.223, 0.134, 0.126], + [19.179, 0.371, 0.367], + [15.658, 0.557, 0.545], + [0.541, 0.558, 0.552], + [3.548, 0.273, 0.250], + [9.772, 0.384, 0.357], + [7.896, 2.431, 2.661], + [19.149, 1.389, 1.268], + [19.103, 1.342, 1.282], + [0.583, 0.530, 0.541], + [0.238, 0.233, 0.243], + [0.114, 0.098, 0.102], + [0.124, 0.092, 0.089], + [0.552, 0.471, 0.481], + [0.053, 0.025, 0.025], + [0.047, 0.057, 0.020], + [0.022, 0.032, 0.004] + ] + } +] diff --git a/website/benchmark/hardware/results/intel_core_i5_4440.json b/website/benchmark/hardware/results/intel_core_i5_4440.json new file mode 100644 index 00000000000..b70b9e08fd4 --- /dev/null +++ b/website/benchmark/hardware/results/intel_core_i5_4440.json @@ -0,0 +1,54 @@ +[ + { + "system": "Intel(R) Core(TM) i5-4440 CPU @ 3.10GHz", + "system_full": "Intel(R) Core(TM) i5-4440 CPU @ 3.10GHz", + "time": "2022-01-06 08:48:45", + "kind": "server", + "result": + [ + [0.002, 0.001, 0.001], + [0.136, 0.021, 0.020], + [1.102, 0.061, 0.055], + [2.669, 0.089, 0.084], + [2.646, 0.198, 0.192], + [4.018, 0.606, 0.600], + [0.115, 0.034, 0.044], + [0.210, 0.018, 0.018], + [4.655, 1.002, 1.004], + [6.715, 1.139, 1.150], + [3.235, 0.351, 0.352], + [3.850, 0.410, 0.408], + [4.446, 1.579, 1.570], + [7.112, 2.031, 2.061], + [5.658, 1.812, 1.804], + [3.528, 1.600, 1.599], + [9.216, 5.029, 5.031], + [7.023, 2.968, 3.362], + [17.412, 9.705, 9.695], + [2.717, 0.110, 0.100], + [28.586, 1.907, 1.870], + [34.064, 2.178, 2.172], + [67.172, 5.105, 5.101], + [79.885, 2.579, 2.540], + [9.176, 0.572, 0.560], + [4.050, 0.496, 0.492], + [8.918, 0.575, 0.568], + [28.731, 2.089, 2.058], + [24.174, 2.956, 3.043], + [5.103, 5.010, 5.007], + [10.075, 1.188, 1.197], + [18.485, 1.966, 1.954], + [19.455, 10.855, 10.917], + [31.320, 7.848, 7.831], + [30.794, 7.871, 7.877], + [3.360, 2.777, 2.778], + [0.371, 0.166, 0.180], + [0.259, 0.064, 0.083], + [0.275, 0.060, 0.058], + [1.024, 0.380, 0.378], + [0.198, 0.025, 0.025], + [0.162, 0.023, 0.015], + [0.059, 0.006, 0.007] + ] + } +] diff --git a/website/benchmark/hardware/results/macbook_pro_m1_2021.json b/website/benchmark/hardware/results/macbook_pro_m1_2021.json new file mode 100644 index 00000000000..516940e1ef2 --- /dev/null +++ b/website/benchmark/hardware/results/macbook_pro_m1_2021.json @@ -0,0 +1,54 @@ +[ + { + "system": "MacBook Pro M1", + "system_full": "MacBook Pro M1 Max 16\" 2022, 64 GiB RAM, 1 TB SSD", + "time": "2022-02-27 00:00:00", + "kind": "laptop", + "result": + [ + [0.012, 0.001, 0.001], + [0.096, 0.012, 0.010], + [0.043, 0.022, 0.023], + [0.063, 0.031, 0.030], + [0.099, 0.070, 0.070], + [0.229, 0.197, 0.195], + [0.012, 0.001, 0.001], + [0.027, 0.012, 0.011], + [0.340, 0.301, 0.306], + [0.439, 0.383, 0.386], + [0.169, 0.134, 0.136], + [0.197, 0.160, 0.162], + [0.475, 0.435, 0.432], + [0.615, 0.557, 0.553], + [0.553, 0.502, 0.507], + [0.490, 0.445, 0.439], + [1.392, 1.260, 1.254], + [0.865, 0.833, 0.835], + [2.285, 2.180, 2.194], + [0.064, 0.035, 0.033], + [0.761, 0.650, 0.651], + [0.867, 0.715, 0.718], + [1.753, 1.478, 1.499], + [1.037, 0.737, 0.735], + [0.251, 0.201, 0.202], + [0.208, 0.172, 0.174], + [0.254, 0.202, 0.201], + [0.733, 0.598, 0.603], + [0.995, 0.882, 0.879], + [0.562, 0.545, 0.545], + [0.431, 0.371, 0.371], + [0.586, 0.490, 0.490], + [2.882, 2.664, 2.656], + [2.255, 2.147, 2.146], + [2.248, 2.137, 2.154], + [0.659, 0.638, 0.631], + [0.125, 0.108, 0.108], + [0.070, 0.052, 0.052], + [0.060, 0.042, 0.042], + [0.250, 0.229, 0.228], + [0.030, 0.013, 0.012], + [0.026, 0.011, 0.010], + [0.017, 0.003, 0.003] + ] + } +] diff --git a/website/sitemap-static.xml b/website/sitemap-static.xml index b5b5f3aa0d5..88888e31b3b 100644 --- a/website/sitemap-static.xml +++ b/website/sitemap-static.xml @@ -17,7 +17,7 @@ weekly - https://clickhouse.com/codebrowser/html_report/ClickHouse/index.html + https://clickhouse.com/codebrowser/ClickHouse/index.html daily