Merge branch 'master' into ldap-per-user-authentication

This commit is contained in:
Denis Glazachev 2020-07-09 14:50:02 +04:00 committed by GitHub
commit 5a4762bb8a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
293 changed files with 7871 additions and 10625 deletions

View File

@ -17,4 +17,4 @@ ClickHouse is an open-source column-oriented database management system that all
## Upcoming Events
* [ClickHouse for genetic data (in Russian)](https://cloud.yandex.ru/events/152) on July 14, 2020.
* [ClickHouse virtual office hours](https://www.meetup.com/San-Francisco-Bay-Area-ClickHouse-Meetup/events/271522978/) on July 15, 2020.
* [ClickHouse virtual office hours](https://www.eventbrite.com/e/clickhouse-july-virtual-meetup-tickets-111199787558) on July 15, 2020.

View File

@ -1,4 +1,4 @@
SET(ENABLE_AMQPCPP 1)
SET(ENABLE_AMQPCPP ${ENABLE_LIBRARIES})
if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/AMQP-CPP/CMakeLists.txt")
message (WARNING "submodule contrib/AMQP-CPP is missing. to fix try run: \n git submodule update --init --recursive")
set (ENABLE_AMQPCPP 0)

View File

@ -44,8 +44,13 @@ endif ()
if (USE_INTERNAL_RE2_LIBRARY)
set(RE2_BUILD_TESTING 0 CACHE INTERNAL "")
add_subdirectory (re2)
add_subdirectory (re2_st)
function(re2_support)
# make option() honor normal variables for BUILD_SHARED_LIBS
set(CMAKE_POLICY_DEFAULT_CMP0077 NEW)
add_subdirectory (re2)
add_subdirectory (re2_st)
endfunction()
re2_support()
endif ()
if (USE_INTERNAL_DOUBLE_CONVERSION_LIBRARY)
@ -222,10 +227,19 @@ if (USE_INTERNAL_AVRO_LIBRARY)
endif()
if(USE_INTERNAL_GTEST_LIBRARY)
# Google Test from sources
add_subdirectory(${ClickHouse_SOURCE_DIR}/contrib/googletest/googletest ${CMAKE_CURRENT_BINARY_DIR}/googletest)
# avoid problems with <regexp.h>
target_compile_definitions (gtest INTERFACE GTEST_HAS_POSIX_RE=0)
# Wrap into function because of CMAKE_POLICY_DEFAULT_CMP0022
function(googletest_support)
set(GOOGLETEST_VERSION 1.10.0) # master
# Google Test from sources uses too old cmake, 2.6.x, and CMP0022 should
# set, to avoid using deprecated LINK_INTERFACE_LIBRARIES(_<CONFIG>)? over
# INTERFACE_LINK_LIBRARIES.
set(CMAKE_POLICY_DEFAULT_CMP0022 NEW)
# Google Test from sources
add_subdirectory(${ClickHouse_SOURCE_DIR}/contrib/googletest/googletest ${CMAKE_CURRENT_BINARY_DIR}/googletest)
# avoid problems with <regexp.h>
target_compile_definitions (gtest INTERFACE GTEST_HAS_POSIX_RE=0)
endfunction()
googletest_support()
elseif(GTEST_SRC_DIR)
add_subdirectory(${GTEST_SRC_DIR}/googletest ${CMAKE_CURRENT_BINARY_DIR}/googletest)
target_compile_definitions(gtest INTERFACE GTEST_HAS_POSIX_RE=0)
@ -290,12 +304,13 @@ if (USE_FASTOPS)
add_subdirectory (fastops-cmake)
endif()
if (USE_AMQPCPP OR USE_CASSANDRA)
add_subdirectory (libuv)
endif()
if (USE_AMQPCPP)
add_subdirectory (amqpcpp-cmake)
endif()
if (USE_CASSANDRA)
add_subdirectory (libuv)
add_subdirectory (cassandra)
endif()

2
contrib/arrow vendored

@ -1 +1 @@
Subproject commit b789226ccb2124285792107c758bb3b40b3d082a
Subproject commit 3cbcb7b62c2f2d02851bff837758637eb592a64b

View File

@ -1,5 +1,3 @@
include(ExternalProject)
set (CMAKE_CXX_STANDARD 17)
# === thrift
@ -77,14 +75,9 @@ add_custom_command(OUTPUT orc_proto.pb.h orc_proto.pb.cc
# === flatbuffers
##############################################################
# fbs - Step 1: build flatbuffers lib and flatc compiler
##############################################################
set(FLATBUFFERS_SRC_DIR ${ClickHouse_SOURCE_DIR}/contrib/flatbuffers)
set(FLATBUFFERS_BINARY_DIR ${ClickHouse_BINARY_DIR}/contrib/flatbuffers)
set(FLATBUFFERS_INCLUDE_DIR ${FLATBUFFERS_SRC_DIR}/include)
set(FLATBUFFERS_COMPILER "$<TARGET_FILE:flatc>")
# set flatbuffers CMake options
if (${USE_STATIC_LIBRARIES})
@ -94,57 +87,11 @@ else ()
set(FLATBUFFERS_BUILD_SHAREDLIB ON CACHE BOOL "Enable the build of the flatbuffers shared library")
set(FLATBUFFERS_BUILD_FLATLIB OFF CACHE BOOL "Disable the build of the flatbuffers library")
endif ()
set(FLATBUFFERS_BUILD_FLATC ON CACHE BOOL "Build flatbuffers compiler")
set(FLATBUFFERS_BUILD_TESTS OFF CACHE BOOL "Skip flatbuffers tests")
add_subdirectory(${FLATBUFFERS_SRC_DIR} "${FLATBUFFERS_BINARY_DIR}")
###################################
# fbs - Step 2: compile *.fbs files
###################################
set(ARROW_IPC_SRC_DIR ${ARROW_SRC_DIR}/arrow/ipc)
set(ARROW_FORMAT_SRC_DIR ${ARROW_SRC_DIR}/../../format)
set(ARROW_GENERATED_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/arrow_gen_headers)
set(FLATBUFFERS_COMPILED_OUT_DIR ${ARROW_GENERATED_INCLUDE_DIR}/arrow/ipc)
set(FBS_OUTPUT_FILES
"${FLATBUFFERS_COMPILED_OUT_DIR}/File_generated.h"
"${FLATBUFFERS_COMPILED_OUT_DIR}/Message_generated.h"
"${FLATBUFFERS_COMPILED_OUT_DIR}/feather_generated.h"
"${FLATBUFFERS_COMPILED_OUT_DIR}/Schema_generated.h"
"${FLATBUFFERS_COMPILED_OUT_DIR}/SparseTensor_generated.h"
"${FLATBUFFERS_COMPILED_OUT_DIR}/Tensor_generated.h")
set(FBS_SRC
${ARROW_FORMAT_SRC_DIR}/Message.fbs
${ARROW_FORMAT_SRC_DIR}/File.fbs
${ARROW_FORMAT_SRC_DIR}/Schema.fbs
${ARROW_FORMAT_SRC_DIR}/Tensor.fbs
${ARROW_FORMAT_SRC_DIR}/SparseTensor.fbs
${ARROW_IPC_SRC_DIR}/feather.fbs)
foreach (FIL ${FBS_SRC})
get_filename_component(ABS_FIL ${FIL} ABSOLUTE)
list(APPEND ABS_FBS_SRC ${ABS_FIL})
endforeach ()
message(STATUS "FLATBUFFERS_LIBRARY: ${FLATBUFFERS_LIBRARY}, FLATBUFFERS_COMPILER: ${FLATBUFFERS_COMPILER}")
message(STATUS "FLATBUFFERS_COMPILED_OUT_DIR: ${FLATBUFFERS_COMPILED_OUT_DIR}")
message(STATUS "flatc: ${FLATBUFFERS_COMPILER} -c -o ${FLATBUFFERS_COMPILED_OUT_DIR}/ ${ABS_FBS_SRC}")
add_custom_command(OUTPUT ${FBS_OUTPUT_FILES}
COMMAND ${FLATBUFFERS_COMPILER}
-c
-o
${FLATBUFFERS_COMPILED_OUT_DIR}/
${ABS_FBS_SRC}
DEPENDS flatc ${ABS_FBS_SRC}
COMMENT "Running flatc compiler on ${ABS_FBS_SRC}"
VERBATIM)
add_custom_target(metadata_fbs DEPENDS ${FBS_OUTPUT_FILES})
add_dependencies(metadata_fbs flatc)
message(STATUS "FLATBUFFERS_LIBRARY: ${FLATBUFFERS_LIBRARY}")
# arrow-cmake cmake file calling orc cmake subroutine which detects certain compiler features.
# Apple Clang compiler failed to compile this code without specifying c++11 standard.
@ -203,6 +150,7 @@ configure_file("${LIBRARY_DIR}/util/config.h.cmake" "${CMAKE_CURRENT_SOURCE_DIR}
set(ARROW_SRCS
${LIBRARY_DIR}/array.cc
${LIBRARY_DIR}/buffer.cc
${LIBRARY_DIR}/device.cc
${LIBRARY_DIR}/builder.cc
${LIBRARY_DIR}/compare.cc
${LIBRARY_DIR}/extension_type.cc
@ -219,6 +167,11 @@ set(ARROW_SRCS
${LIBRARY_DIR}/type.cc
${LIBRARY_DIR}/visitor.cc
${LIBRARY_DIR}/tensor/coo_converter.cc
${LIBRARY_DIR}/tensor/csc_converter.cc
${LIBRARY_DIR}/tensor/csf_converter.cc
${LIBRARY_DIR}/tensor/csr_converter.cc
${LIBRARY_DIR}/array/builder_adaptive.cc
${LIBRARY_DIR}/array/builder_base.cc
${LIBRARY_DIR}/array/builder_binary.cc
@ -230,6 +183,7 @@ set(ARROW_SRCS
${LIBRARY_DIR}/array/concatenate.cc
${LIBRARY_DIR}/array/dict_internal.cc
${LIBRARY_DIR}/array/diff.cc
${LIBRARY_DIR}/array/validate.cc
${LIBRARY_DIR}/csv/converter.cc
${LIBRARY_DIR}/csv/chunker.cc
@ -237,6 +191,7 @@ set(ARROW_SRCS
${LIBRARY_DIR}/csv/options.cc
${LIBRARY_DIR}/csv/parser.cc
${LIBRARY_DIR}/csv/reader.cc
${LIBRARY_DIR}/csv/column_decoder.cc
${LIBRARY_DIR}/ipc/dictionary.cc
${LIBRARY_DIR}/ipc/feather.cc
@ -251,7 +206,6 @@ set(ARROW_SRCS
${LIBRARY_DIR}/io/file.cc
${LIBRARY_DIR}/io/interfaces.cc
${LIBRARY_DIR}/io/memory.cc
${LIBRARY_DIR}/io/readahead.cc
${LIBRARY_DIR}/io/slow.cc
${LIBRARY_DIR}/util/basic_decimal.cc
@ -274,6 +228,12 @@ set(ARROW_SRCS
${LIBRARY_DIR}/util/thread_pool.cc
${LIBRARY_DIR}/util/trie.cc
${LIBRARY_DIR}/util/utf8.cc
${LIBRARY_DIR}/util/future.cc
${LIBRARY_DIR}/util/formatting.cc
${LIBRARY_DIR}/util/parsing.cc
${LIBRARY_DIR}/util/time.cc
${LIBRARY_DIR}/util/delimiting.cc
${LIBRARY_DIR}/util/iterator.cc
${LIBRARY_DIR}/vendored/base64.cpp
${ORC_SRCS}
@ -321,7 +281,7 @@ endif ()
add_library(${ARROW_LIBRARY} ${ARROW_SRCS})
# Arrow dependencies
add_dependencies(${ARROW_LIBRARY} ${FLATBUFFERS_LIBRARY} metadata_fbs)
add_dependencies(${ARROW_LIBRARY} ${FLATBUFFERS_LIBRARY})
target_link_libraries(${ARROW_LIBRARY} PRIVATE ${FLATBUFFERS_LIBRARY} boost::filesystem)
@ -352,17 +312,18 @@ target_include_directories(${ARROW_LIBRARY} PRIVATE SYSTEM ${ORC_BUILD_INCLUDE_D
target_include_directories(${ARROW_LIBRARY} PRIVATE SYSTEM ${ORC_ADDITION_SOURCE_DIR})
target_include_directories(${ARROW_LIBRARY} PRIVATE SYSTEM ${ARROW_SRC_DIR})
target_include_directories(${ARROW_LIBRARY} PRIVATE SYSTEM ${FLATBUFFERS_INCLUDE_DIR})
target_include_directories(${ARROW_LIBRARY} PRIVATE SYSTEM ${ARROW_GENERATED_INCLUDE_DIR})
# === parquet
set(LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src/parquet)
set(GEN_LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src/generated)
# arrow/cpp/src/parquet/CMakeLists.txt
set(PARQUET_SRCS
${LIBRARY_DIR}/arrow/reader.cc
${LIBRARY_DIR}/arrow/reader_internal.cc
${LIBRARY_DIR}/arrow/schema.cc
${LIBRARY_DIR}/arrow/writer.cc
${LIBRARY_DIR}/arrow/path_internal.cc
${LIBRARY_DIR}/bloom_filter.cc
${LIBRARY_DIR}/column_reader.cc
${LIBRARY_DIR}/column_scanner.cc
@ -379,16 +340,19 @@ set(PARQUET_SRCS
${LIBRARY_DIR}/schema.cc
${LIBRARY_DIR}/statistics.cc
${LIBRARY_DIR}/types.cc
${LIBRARY_DIR}/encryption.cc
${LIBRARY_DIR}/encryption_internal.cc
${LIBRARY_DIR}/internal_file_decryptor.cc
${LIBRARY_DIR}/internal_file_encryptor.cc
${GEN_LIBRARY_DIR}/parquet_constants.cpp
${GEN_LIBRARY_DIR}/parquet_types.cpp
)
#list(TRANSFORM PARQUET_SRCS PREPEND ${LIBRARY_DIR}/) # cmake 3.12
list(APPEND PARQUET_SRCS
${CMAKE_CURRENT_SOURCE_DIR}/cpp/src/parquet/parquet_constants.cpp
${CMAKE_CURRENT_SOURCE_DIR}/cpp/src/parquet/parquet_types.cpp
)
add_library(${PARQUET_LIBRARY} ${PARQUET_SRCS})
target_include_directories(${PARQUET_LIBRARY} SYSTEM PUBLIC ${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src ${CMAKE_CURRENT_SOURCE_DIR}/cpp/src)
target_include_directories(${PARQUET_LIBRARY} SYSTEM PUBLIC ${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src ${CMAKE_CURRENT_SOURCE_DIR}/cpp/src PRIVATE ${OPENSSL_INCLUDE_DIR})
include(${ClickHouse_SOURCE_DIR}/contrib/thrift/build/cmake/ConfigureChecks.cmake) # makes config.h
target_link_libraries(${PARQUET_LIBRARY} PUBLIC ${ARROW_LIBRARY} PRIVATE ${THRIFT_LIBRARY} boost::headers_only boost::regex)
target_link_libraries(${PARQUET_LIBRARY} PUBLIC ${ARROW_LIBRARY} PRIVATE ${THRIFT_LIBRARY} boost::headers_only boost::regex ${OPENSSL_LIBRARIES})
if (SANITIZE STREQUAL "undefined")
target_compile_options(${PARQUET_LIBRARY} PRIVATE -fno-sanitize=undefined)

View File

@ -1,17 +0,0 @@
/**
* Autogenerated by Thrift Compiler (0.12.0)
*
* DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
* @generated
*/
#include "parquet_constants.h"
namespace parquet { namespace format {
const parquetConstants g_parquet_constants;
parquetConstants::parquetConstants() {
}
}} // namespace

View File

@ -1,24 +0,0 @@
/**
* Autogenerated by Thrift Compiler (0.12.0)
*
* DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
* @generated
*/
#ifndef parquet_CONSTANTS_H
#define parquet_CONSTANTS_H
#include "parquet_types.h"
namespace parquet { namespace format {
class parquetConstants {
public:
parquetConstants();
};
extern const parquetConstants g_parquet_constants;
}} // namespace
#endif

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -18,6 +18,13 @@
#ifndef PARQUET_VERSION_H
#define PARQUET_VERSION_H
#define PARQUET_VERSION_MAJOR 1
#define PARQUET_VERSION_MINOR 5
#define PARQUET_VERSION_PATCH 1
#define PARQUET_SO_VERSION 0
#define PARQUET_FULL_SO_VERSION 0.17
// define the parquet created by version
#define CREATED_BY_VERSION "parquet-cpp version 1.5.1-SNAPSHOT"

2
contrib/flatbuffers vendored

@ -1 +1 @@
Subproject commit bf9eb67ab9371755c6bcece13cadc7693bcbf264
Subproject commit 6df40a2471737b27271bdd9b900ab5f3aec746c7

2
contrib/googletest vendored

@ -1 +1 @@
Subproject commit 703bd9caab50b139428cea1aaff9974ebee5742e
Subproject commit 356f2d264a485db2fcc50ec1c672e0d37b6cb39b

View File

@ -18,7 +18,7 @@ ccache --zero-stats ||:
ln -s /usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 /usr/lib/libOpenCL.so ||:
rm -f CMakeCache.txt
cmake --debug-trycompile --verbose=1 -DCMAKE_VERBOSE_MAKEFILE=1 -LA -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DSANITIZE=$SANITIZER $CMAKE_FLAGS ..
ninja clickhouse-bundle
ninja $NINJA_FLAGS clickhouse-bundle
mv ./programs/clickhouse* /output
mv ./src/unit_tests_dbms /output
find . -name '*.so' -print -exec mv '{}' /output \;

View File

@ -149,6 +149,8 @@ def parse_env_variables(build_type, compiler, sanitizer, package_type, image_typ
if clang_tidy:
cmake_flags.append('-DENABLE_CLANG_TIDY=1')
# Don't stop on first error to find more clang-tidy errors in one run.
result.append('NINJA_FLAGS=-k0')
if with_coverage:
cmake_flags.append('-DWITH_COVERAGE=1')

View File

@ -18,8 +18,58 @@ to have false positives.
### How to read the report
Should add inline comments there, because who reads the docs anyway. They must
be collapsible and I am afraid of Javascript, so I'm going to do it later.
The check status summarizes the report in a short text message like `1 faster, 10 unstable`:
* `1 faster` -- how many queries became faster,
* `1 slower` -- how many queries are slower,
* `1 too long` -- how many queries are taking too long to run,
* `1 unstable` -- how many queries have unstable results,
* `1 errors` -- how many errors there are in total. Action is required for every error, this number must be zero. The number of errors includes slower tests, tests that are too long, errors while running the tests and building reports, etc. Please look at the main report page to investigate these errors.
The report page itself constists of a several tables. Some of them always signify errors, e.g. "Run errors" -- the very presence of this table indicates that there were errors during the test, that are not normal and must be fixed. Some tables are mostly informational, e.g. "Test times" -- they reflect normal test results. But if a cell in such table is marked in red, this also means an error, e.g., a test is taking too long to run.
#### Tested commits
Informational, no action required. Log messages for the commits that are tested. Note that for the right commit, we show nominal tested commit `pull/*/head` and real tested commit `pull/*/merge`, which is generated by GitHub by merging latest master to the `pull/*/head` and which we actually build and test in CI.
#### Run errors
Action required for every item -- these are errors that must be fixed. The errors that ocurred when running some test queries. For more information about the error, download test output archive and see `test-name-err.log`. To reproduce, see 'How to run' below.
#### Slow on client
Action required for every item -- these are errors that must be fixed. This table shows queries that take significantly longer to process on the client than on the server. A possible reason might be sending too much data to the client, e.g., a forgotten `format Null`.
#### Partial queries
Action required for the cells marked in red. Shows the queries we are unable to run on an old server -- probably because they contain a new function. You should see this table when you add a new function and a performance test for it. Check that the run time and variance are acceptable (run time between 0.1 and 1 seconds, variance below 10%). If not, they will be highlighted in red.
#### Changes in performance
Action required for the cells marked in red, and some cheering is appropriate for the cells marked in green. These are the queries for which we observe a statistically significant change in performance. Note that there will always be some false positives -- we try to filter by p < 0.001, and have 2000 queries, so two false positives per run are expected. In practice we have more -- e.g. code layout changed because of some unknowable jitter in compiler internals, so the change we observe is real, but it is a 'false positive' in the sense that it is not directly caused by your changes. If, based on your knowledge of ClickHouse internals, you can decide that the observed test changes are not relevant to the changes made in the tested PR, you can ignore them.
You can find flame graphs for queries with performance changes in the test output archive, in files named as 'my_test_0_Cpu_SELECT 1 FROM....FORMAT Null.left.svg'. First goes the test name, then the query number in the test, then the trace type (same as in `system.trace_log`), and then the server version (left is old and right is new).
#### Unstable queries
Action required for the cells marked in red. These are queries for which we did not observe a statistically significant change in performance, but for which the variance in query performance is very high. This means that we are likely to observe big changes in performance even in the absence of real changes, e.g. when comparing the server to itself. Such queries are going to have bad sensitivity as performance tests -- if a query has, say, 50% expected variability, this means we are going to see changes in performance up to 50%, even when there were no real changes in the code. And because of this, we won't be able to detect changes less than 50% with such a query, which is pretty bad. The reasons for the high variability must be investigated and fixed; ideally, the variability should be brought under 5-10%.
The most frequent reason for instability is that the query is just too short -- e.g. below 0.1 seconds. Bringing query time to 0.2 seconds or above usually helps.
Other reasons may include:
* using a lot of memory which is allocated differently between servers, so the access time may vary. This may apply to your queries if you have a `Memory` engine table that is bigger than 1 GB. For example, this problem has plagued `arithmetic` and `logical_functions` tests for a long time.
* having some threshold behavior in the query, e.g. you insert to a Buffer table and it is flushed only on some query runs, so you get a much higher time for them.
Investigating the instablility is the hardest problem in performance testing, and we still have not been able to understand the reasons behind the instability of some queries. There are some data that can help you in the performance test output archive. Look for files named 'my_unstable_test_0_SELECT 1...FORMAT Null.{left,right}.metrics.rep'. They contain metrics from `system.query_log.ProfileEvents` and functions from stack traces from `system.trace_log`, that vary significantly between query runs. The second column is array of \[min, med, max] values for the metric. Say, if you see `PerfCacheMisses` there, it may mean that the code being tested has not-so-cache-local memory access pattern that is sensitive to memory layout.
#### Skipped tests
Informational, no action required. Shows the tests that were skipped, and the reason for it. Normally it is because the data set required for the test was not loaded, or the test is marked as 'long' -- both cases mean that the test is too big to be ran per-commit.
#### Test performance changes
Informational, no action required. This table summarizes the changes in performance of queries in each test -- how many queries have changed, how many are unstable, and what is the magnitude of the changes.
#### Test times
Action required for the cells marked in red. This table shows the run times for all the tests. You may have to fix two kinds of errors in this table:
1) Average query run time is too long -- probalby means that the preparatory steps such as creating the table and filling them with data are taking too long. Try to make them faster.
2) Longest query run time is too long -- some particular queries are taking too long, try to make them faster. The ideal query run time is between 0.1 and 1 s.
#### Concurrent benchmarks
No action required. This table shows the results of a concurrent behcmark where queries from `website` are ran in parallel using `clickhouse-benchmark`, and requests per second values are compared for old and new servers. It shows variability up to 20% for no apparent reason, so it's probably safe to disregard it. We have it for special cases like investigating concurrency effects in memory allocators, where it may be important.
#### Metric changes
No action required. These are changes in median values of metrics from `system.asynchronous_metrics_log`. Again, they are prone to unexplained variation and you can safely ignore this table unless it's interesting to you for some particular reason (e.g. you want to compare memory usage). There are also graphs of these metrics in the performance test output archive, in the `metrics` folder.
### How to run
Run the entire docker container, specifying PR number (0 for master)

View File

@ -11,46 +11,5 @@ COPY s3downloader /s3downloader
ENV DATASETS="hits visits"
CMD dpkg -i package_folder/clickhouse-common-static_*.deb; \
dpkg -i package_folder/clickhouse-common-static-dbg_*.deb; \
dpkg -i package_folder/clickhouse-server_*.deb; \
dpkg -i package_folder/clickhouse-client_*.deb; \
dpkg -i package_folder/clickhouse-test_*.deb; \
mkdir -p /etc/clickhouse-server/dict_examples; \
ln -s /usr/share/clickhouse-test/config/ints_dictionary.xml /etc/clickhouse-server/dict_examples/; \
ln -s /usr/share/clickhouse-test/config/strings_dictionary.xml /etc/clickhouse-server/dict_examples/; \
ln -s /usr/share/clickhouse-test/config/decimals_dictionary.xml /etc/clickhouse-server/dict_examples/; \
ln -s /usr/share/clickhouse-test/config/zookeeper.xml /etc/clickhouse-server/config.d/; \
ln -s /usr/share/clickhouse-test/config/listen.xml /etc/clickhouse-server/config.d/; \
ln -s /usr/share/clickhouse-test/config/part_log.xml /etc/clickhouse-server/config.d/; \
ln -s /usr/share/clickhouse-test/config/text_log.xml /etc/clickhouse-server/config.d/; \
ln -s /usr/share/clickhouse-test/config/metric_log.xml /etc/clickhouse-server/config.d/; \
ln -s /usr/share/clickhouse-test/config/log_queries.xml /etc/clickhouse-server/users.d/; \
ln -s /usr/share/clickhouse-test/config/readonly.xml /etc/clickhouse-server/users.d/; \
ln -s /usr/share/clickhouse-test/config/ints_dictionary.xml /etc/clickhouse-server/; \
ln -s /usr/share/clickhouse-test/config/strings_dictionary.xml /etc/clickhouse-server/; \
ln -s /usr/share/clickhouse-test/config/decimals_dictionary.xml /etc/clickhouse-server/; \
ln -s /usr/share/clickhouse-test/config/macros.xml /etc/clickhouse-server/config.d/; \
ln -s /usr/lib/llvm-9/bin/llvm-symbolizer /usr/bin/llvm-symbolizer; \
if [[ -n "$USE_DATABASE_ATOMIC" ]] && [[ "$USE_DATABASE_ATOMIC" -eq 1 ]]; then ln -s /usr/share/clickhouse-test/config/database_atomic_configd.xml /etc/clickhouse-server/config.d/; fi; \
if [[ -n "$USE_DATABASE_ATOMIC" ]] && [[ "$USE_DATABASE_ATOMIC" -eq 1 ]]; then ln -s /usr/share/clickhouse-test/config/database_atomic_usersd.xml /etc/clickhouse-server/users.d/; fi; \
echo "TSAN_OPTIONS='verbosity=1000 halt_on_error=1 history_size=7'" >> /etc/environment; \
echo "TSAN_SYMBOLIZER_PATH=/usr/lib/llvm-8/bin/llvm-symbolizer" >> /etc/environment; \
echo "UBSAN_OPTIONS='print_stacktrace=1'" >> /etc/environment; \
echo "ASAN_SYMBOLIZER_PATH=/usr/lib/llvm-6.0/bin/llvm-symbolizer" >> /etc/environment; \
echo "UBSAN_SYMBOLIZER_PATH=/usr/lib/llvm-6.0/bin/llvm-symbolizer" >> /etc/environment; \
echo "LLVM_SYMBOLIZER_PATH=/usr/lib/llvm-6.0/bin/llvm-symbolizer" >> /etc/environment; \
service zookeeper start; sleep 5; \
service clickhouse-server start && sleep 5 \
&& /s3downloader --dataset-names $DATASETS \
&& chmod 777 -R /var/lib/clickhouse \
&& clickhouse-client --query "SHOW DATABASES" \
&& clickhouse-client --query "ATTACH DATABASE datasets ENGINE = Ordinary" \
&& clickhouse-client --query "CREATE DATABASE test" \
&& service clickhouse-server restart && sleep 5 \
&& clickhouse-client --query "SHOW TABLES FROM datasets" \
&& clickhouse-client --query "SHOW TABLES FROM test" \
&& clickhouse-client --query "RENAME TABLE datasets.hits_v1 TO test.hits" \
&& clickhouse-client --query "RENAME TABLE datasets.visits_v1 TO test.visits" \
&& clickhouse-client --query "SHOW TABLES FROM test" \
&& clickhouse-test --testname --shard --zookeeper --no-stateless --use-skip-list $ADDITIONAL_OPTIONS $SKIP_TESTS_OPTION 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt
COPY run.sh /
CMD ["/bin/bash", "/run.sh"]

59
docker/test/stateful/run.sh Executable file
View File

@ -0,0 +1,59 @@
#!/bin/bash
set -e -x
dpkg -i package_folder/clickhouse-common-static_*.deb;
dpkg -i package_folder/clickhouse-common-static-dbg_*.deb
dpkg -i package_folder/clickhouse-server_*.deb
dpkg -i package_folder/clickhouse-client_*.deb
dpkg -i package_folder/clickhouse-test_*.deb
mkdir -p /etc/clickhouse-server/dict_examples
ln -s /usr/share/clickhouse-test/config/ints_dictionary.xml /etc/clickhouse-server/dict_examples/
ln -s /usr/share/clickhouse-test/config/strings_dictionary.xml /etc/clickhouse-server/dict_examples/
ln -s /usr/share/clickhouse-test/config/decimals_dictionary.xml /etc/clickhouse-server/dict_examples/
ln -s /usr/share/clickhouse-test/config/zookeeper.xml /etc/clickhouse-server/config.d/
ln -s /usr/share/clickhouse-test/config/listen.xml /etc/clickhouse-server/config.d/
ln -s /usr/share/clickhouse-test/config/part_log.xml /etc/clickhouse-server/config.d/
ln -s /usr/share/clickhouse-test/config/text_log.xml /etc/clickhouse-server/config.d/
ln -s /usr/share/clickhouse-test/config/metric_log.xml /etc/clickhouse-server/config.d/
ln -s /usr/share/clickhouse-test/config/log_queries.xml /etc/clickhouse-server/users.d/
ln -s /usr/share/clickhouse-test/config/readonly.xml /etc/clickhouse-server/users.d/
ln -s /usr/share/clickhouse-test/config/ints_dictionary.xml /etc/clickhouse-server/
ln -s /usr/share/clickhouse-test/config/strings_dictionary.xml /etc/clickhouse-server/
ln -s /usr/share/clickhouse-test/config/decimals_dictionary.xml /etc/clickhouse-server/
ln -s /usr/share/clickhouse-test/config/macros.xml /etc/clickhouse-server/config.d/
if [[ -n "$USE_DATABASE_ATOMIC" ]] && [[ "$USE_DATABASE_ATOMIC" -eq 1 ]]; then
ln -s /usr/share/clickhouse-test/config/database_atomic_configd.xml /etc/clickhouse-server/config.d/
ln -s /usr/share/clickhouse-test/config/database_atomic_usersd.xml /etc/clickhouse-server/users.d/
fi
echo "TSAN_OPTIONS='verbosity=1000 halt_on_error=1 history_size=7'" >> /etc/environment
echo "TSAN_SYMBOLIZER_PATH=/usr/lib/llvm-10/bin/llvm-symbolizer" >> /etc/environment
echo "UBSAN_OPTIONS='print_stacktrace=1'" >> /etc/environment
echo "ASAN_SYMBOLIZER_PATH=/usr/lib/llvm-10/bin/llvm-symbolizer" >> /etc/environment
echo "UBSAN_SYMBOLIZER_PATH=/usr/lib/llvm-10/bin/llvm-symbolizer" >> /etc/environment
echo "LLVM_SYMBOLIZER_PATH=/usr/lib/llvm-10/bin/llvm-symbolizer" >> /etc/environment
service zookeeper start
sleep 5
service clickhouse-server start
sleep 5
/s3downloader --dataset-names $DATASETS
chmod 777 -R /var/lib/clickhouse
clickhouse-client --query "SHOW DATABASES"
clickhouse-client --query "ATTACH DATABASE datasets ENGINE = Ordinary"
clickhouse-client --query "CREATE DATABASE test"
service clickhouse-server restart && sleep 5
clickhouse-client --query "SHOW TABLES FROM datasets"
clickhouse-client --query "SHOW TABLES FROM test"
clickhouse-client --query "RENAME TABLE datasets.hits_v1 TO test.hits"
clickhouse-client --query "RENAME TABLE datasets.visits_v1 TO test.visits"
clickhouse-client --query "SHOW TABLES FROM test"
if cat /usr/bin/clickhouse-test | grep -q -- "--use-skip-list"; then
SKIP_LIST_OPT="--use-skip-list"
fi
clickhouse-test --testname --shard --zookeeper --no-stateless "$SKIP_LIST_OPT" $ADDITIONAL_OPTIONS $SKIP_TESTS_OPTION 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt

View File

@ -66,8 +66,7 @@ ln -s /usr/share/clickhouse-test/config/zookeeper.xml /etc/clickhouse-server/con
ln -s /usr/share/clickhouse-test/config/ints_dictionary.xml /etc/clickhouse-server/; \
ln -s /usr/share/clickhouse-test/config/strings_dictionary.xml /etc/clickhouse-server/; \
ln -s /usr/share/clickhouse-test/config/decimals_dictionary.xml /etc/clickhouse-server/; \
ln -s /usr/share/clickhouse-test/config/macros.xml /etc/clickhouse-server/config.d/; \
ln -s /usr/lib/llvm-8/bin/llvm-symbolizer /usr/bin/llvm-symbolizer
ln -s /usr/share/clickhouse-test/config/macros.xml /etc/clickhouse-server/config.d/;
service zookeeper start
@ -105,7 +104,12 @@ LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "SHOW TABL
LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "RENAME TABLE datasets.hits_v1 TO test.hits"
LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "RENAME TABLE datasets.visits_v1 TO test.visits"
LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-client --query "SHOW TABLES FROM test"
LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-test --testname --shard --zookeeper --no-stateless --use-skip-list $ADDITIONAL_OPTIONS $SKIP_TESTS_OPTION 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt
if cat /usr/bin/clickhouse-test | grep -q -- "--use-skip-list"; then
SKIP_LIST_OPT="--use-skip-list"
fi
LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-test --testname --shard --zookeeper --no-stateless "$SKIP_LIST_OPT" $ADDITIONAL_OPTIONS $SKIP_TESTS_OPTION 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt
kill_clickhouse

View File

@ -47,40 +47,7 @@ RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
RUN echo "TSAN_OPTIONS='verbosity=1000 halt_on_error=1 history_size=7'" >> /etc/environment; \
echo "UBSAN_OPTIONS='print_stacktrace=1'" >> /etc/environment; \
echo "MSAN_OPTIONS='abort_on_error=1'" >> /etc/environment; \
ln -s /usr/lib/llvm-9/bin/llvm-symbolizer /usr/bin/llvm-symbolizer;
ln -s /usr/lib/llvm-10/bin/llvm-symbolizer /usr/bin/llvm-symbolizer;
CMD dpkg -i package_folder/clickhouse-common-static_*.deb; \
dpkg -i package_folder/clickhouse-common-static-dbg_*.deb; \
dpkg -i package_folder/clickhouse-server_*.deb; \
dpkg -i package_folder/clickhouse-client_*.deb; \
dpkg -i package_folder/clickhouse-test_*.deb; \
mkdir -p /etc/clickhouse-server/dict_examples; \
ln -s /usr/share/clickhouse-test/config/ints_dictionary.xml /etc/clickhouse-server/dict_examples/; \
ln -s /usr/share/clickhouse-test/config/strings_dictionary.xml /etc/clickhouse-server/dict_examples/; \
ln -s /usr/share/clickhouse-test/config/decimals_dictionary.xml /etc/clickhouse-server/dict_examples/; \
ln -s /usr/share/clickhouse-test/config/zookeeper.xml /etc/clickhouse-server/config.d/; \
ln -s /usr/share/clickhouse-test/config/listen.xml /etc/clickhouse-server/config.d/; \
ln -s /usr/share/clickhouse-test/config/part_log.xml /etc/clickhouse-server/config.d/; \
ln -s /usr/share/clickhouse-test/config/text_log.xml /etc/clickhouse-server/config.d/; \
ln -s /usr/share/clickhouse-test/config/metric_log.xml /etc/clickhouse-server/config.d/; \
ln -s /usr/share/clickhouse-test/config/query_masking_rules.xml /etc/clickhouse-server/config.d/; \
ln -s /usr/share/clickhouse-test/config/log_queries.xml /etc/clickhouse-server/users.d/; \
ln -s /usr/share/clickhouse-test/config/readonly.xml /etc/clickhouse-server/users.d/; \
ln -s /usr/share/clickhouse-test/config/access_management.xml /etc/clickhouse-server/users.d/; \
ln -s /usr/share/clickhouse-test/config/ints_dictionary.xml /etc/clickhouse-server/; \
ln -s /usr/share/clickhouse-test/config/strings_dictionary.xml /etc/clickhouse-server/; \
ln -s /usr/share/clickhouse-test/config/decimals_dictionary.xml /etc/clickhouse-server/; \
ln -s /usr/share/clickhouse-test/config/macros.xml /etc/clickhouse-server/config.d/; \
ln -s /usr/share/clickhouse-test/config/disks.xml /etc/clickhouse-server/config.d/; \
ln -s /usr/share/clickhouse-test/config/secure_ports.xml /etc/clickhouse-server/config.d/; \
ln -s /usr/share/clickhouse-test/config/clusters.xml /etc/clickhouse-server/config.d/; \
ln -s /usr/share/clickhouse-test/config/graphite.xml /etc/clickhouse-server/config.d/; \
ln -s /usr/share/clickhouse-test/config/server.key /etc/clickhouse-server/; \
ln -s /usr/share/clickhouse-test/config/server.crt /etc/clickhouse-server/; \
ln -s /usr/share/clickhouse-test/config/dhparam.pem /etc/clickhouse-server/; \
if [[ -n "$USE_POLYMORPHIC_PARTS" ]] && [[ "$USE_POLYMORPHIC_PARTS" -eq 1 ]]; then ln -s /usr/share/clickhouse-test/config/polymorphic_parts.xml /etc/clickhouse-server/config.d/; fi; \
if [[ -n "$USE_DATABASE_ATOMIC" ]] && [[ "$USE_DATABASE_ATOMIC" -eq 1 ]]; then ln -s /usr/share/clickhouse-test/config/database_atomic_configd.xml /etc/clickhouse-server/config.d/; fi; \
if [[ -n "$USE_DATABASE_ATOMIC" ]] && [[ "$USE_DATABASE_ATOMIC" -eq 1 ]]; then ln -s /usr/share/clickhouse-test/config/database_atomic_usersd.xml /etc/clickhouse-server/users.d/; fi; \
ln -sf /usr/share/clickhouse-test/config/client_config.xml /etc/clickhouse-client/config.xml; \
service zookeeper start; sleep 5; \
service clickhouse-server start && sleep 5 && clickhouse-test --testname --shard --zookeeper --use-skip-list $ADDITIONAL_OPTIONS $SKIP_TESTS_OPTION 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt
COPY run.sh /
CMD ["/bin/bash", "/run.sh"]

61
docker/test/stateless/run.sh Executable file
View File

@ -0,0 +1,61 @@
#!/bin/bash
set -e -x
dpkg -i package_folder/clickhouse-common-static_*.deb
dpkg -i package_folder/clickhouse-common-static-dbg_*.deb
dpkg -i package_folder/clickhouse-server_*.deb
dpkg -i package_folder/clickhouse-client_*.deb
dpkg -i package_folder/clickhouse-test_*.deb
mkdir -p /etc/clickhouse-server/dict_examples
ln -s /usr/share/clickhouse-test/config/ints_dictionary.xml /etc/clickhouse-server/dict_examples/
ln -s /usr/share/clickhouse-test/config/strings_dictionary.xml /etc/clickhouse-server/dict_examples/
ln -s /usr/share/clickhouse-test/config/decimals_dictionary.xml /etc/clickhouse-server/dict_examples/
ln -s /usr/share/clickhouse-test/config/zookeeper.xml /etc/clickhouse-server/config.d/
ln -s /usr/share/clickhouse-test/config/listen.xml /etc/clickhouse-server/config.d/
ln -s /usr/share/clickhouse-test/config/part_log.xml /etc/clickhouse-server/config.d/
ln -s /usr/share/clickhouse-test/config/text_log.xml /etc/clickhouse-server/config.d/
ln -s /usr/share/clickhouse-test/config/metric_log.xml /etc/clickhouse-server/config.d/
ln -s /usr/share/clickhouse-test/config/query_masking_rules.xml /etc/clickhouse-server/config.d/
ln -s /usr/share/clickhouse-test/config/log_queries.xml /etc/clickhouse-server/users.d/
ln -s /usr/share/clickhouse-test/config/readonly.xml /etc/clickhouse-server/users.d/
ln -s /usr/share/clickhouse-test/config/access_management.xml /etc/clickhouse-server/users.d/
ln -s /usr/share/clickhouse-test/config/ints_dictionary.xml /etc/clickhouse-server/
ln -s /usr/share/clickhouse-test/config/strings_dictionary.xml /etc/clickhouse-server/
ln -s /usr/share/clickhouse-test/config/decimals_dictionary.xml /etc/clickhouse-server/
ln -s /usr/share/clickhouse-test/config/macros.xml /etc/clickhouse-server/config.d/
ln -s /usr/share/clickhouse-test/config/disks.xml /etc/clickhouse-server/config.d/
ln -s /usr/share/clickhouse-test/config/secure_ports.xml /etc/clickhouse-server/config.d/
ln -s /usr/share/clickhouse-test/config/clusters.xml /etc/clickhouse-server/config.d/
ln -s /usr/share/clickhouse-test/config/graphite.xml /etc/clickhouse-server/config.d/
ln -s /usr/share/clickhouse-test/config/server.key /etc/clickhouse-server/
ln -s /usr/share/clickhouse-test/config/server.crt /etc/clickhouse-server/
ln -s /usr/share/clickhouse-test/config/dhparam.pem /etc/clickhouse-server/
if [[ -n "$USE_POLYMORPHIC_PARTS" ]] && [[ "$USE_POLYMORPHIC_PARTS" -eq 1 ]]; then
ln -s /usr/share/clickhouse-test/config/polymorphic_parts.xml /etc/clickhouse-server/config.d/
fi
if [[ -n "$USE_DATABASE_ATOMIC" ]] && [[ "$USE_DATABASE_ATOMIC" -eq 1 ]]; then
ln -s /usr/share/clickhouse-test/config/database_atomic_configd.xml /etc/clickhouse-server/config.d/
ln -s /usr/share/clickhouse-test/config/database_atomic_usersd.xml /etc/clickhouse-server/users.d/
fi
ln -sf /usr/share/clickhouse-test/config/client_config.xml /etc/clickhouse-client/config.xml
echo "TSAN_OPTIONS='verbosity=1000 halt_on_error=1 history_size=7'" >> /etc/environment
echo "TSAN_SYMBOLIZER_PATH=/usr/lib/llvm-10/bin/llvm-symbolizer" >> /etc/environment
echo "UBSAN_OPTIONS='print_stacktrace=1'" >> /etc/environment
echo "ASAN_SYMBOLIZER_PATH=/usr/lib/llvm-10/bin/llvm-symbolizer" >> /etc/environment
echo "UBSAN_SYMBOLIZER_PATH=/usr/lib/llvm-10/bin/llvm-symbolizer" >> /etc/environment
echo "LLVM_SYMBOLIZER_PATH=/usr/lib/llvm-10/bin/llvm-symbolizer" >> /etc/environment
service zookeeper start
sleep 5
service clickhouse-server start && sleep 5
if cat /usr/bin/clickhouse-test | grep -q -- "--use-skip-list"; then
SKIP_LIST_OPT="--use-skip-list"
fi
clickhouse-test --testname --shard --zookeeper "$SKIP_LIST_OPT" $ADDITIONAL_OPTIONS $SKIP_TESTS_OPTION 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt

View File

@ -44,4 +44,11 @@ ENV TZ=Europe/Moscow
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
COPY run.sh /run.sh
# Sanitizer options
RUN echo "TSAN_OPTIONS='verbosity=1000 halt_on_error=1 history_size=7'" >> /etc/environment; \
echo "UBSAN_OPTIONS='print_stacktrace=1'" >> /etc/environment; \
echo "MSAN_OPTIONS='abort_on_error=1'" >> /etc/environment; \
ln -s /usr/lib/llvm-10/bin/llvm-symbolizer /usr/bin/llvm-symbolizer;
CMD ["/bin/bash", "/run.sh"]

View File

@ -66,8 +66,7 @@ ln -s /usr/share/clickhouse-test/config/zookeeper.xml /etc/clickhouse-server/con
ln -s /usr/share/clickhouse-test/config/server.key /etc/clickhouse-server/; \
ln -s /usr/share/clickhouse-test/config/server.crt /etc/clickhouse-server/; \
ln -s /usr/share/clickhouse-test/config/dhparam.pem /etc/clickhouse-server/; \
ln -sf /usr/share/clickhouse-test/config/client_config.xml /etc/clickhouse-client/config.xml; \
ln -s /usr/lib/llvm-8/bin/llvm-symbolizer /usr/bin/llvm-symbolizer
ln -sf /usr/share/clickhouse-test/config/client_config.xml /etc/clickhouse-client/config.xml
service zookeeper start
sleep 5
@ -76,7 +75,12 @@ start_clickhouse
sleep 10
LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-test --testname --shard --zookeeper --use-skip-list $ADDITIONAL_OPTIONS $SKIP_TESTS_OPTION 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt
if cat /usr/bin/clickhouse-test | grep -q -- "--use-skip-list"; then
SKIP_LIST_OPT="--use-skip-list"
fi
LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-test --testname --shard --zookeeper "$SKIP_LIST_OPT" $ADDITIONAL_OPTIONS $SKIP_TESTS_OPTION 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt
kill_clickhouse

View File

@ -33,7 +33,6 @@ CMD dpkg -i package_folder/clickhouse-common-static_*.deb; \
dpkg -i package_folder/clickhouse-test_*.deb; \
ln -s /usr/share/clickhouse-test/config/log_queries.xml /etc/clickhouse-server/users.d/; \
ln -s /usr/share/clickhouse-test/config/part_log.xml /etc/clickhouse-server/config.d/; \
ln -s /usr/lib/llvm-9/bin/llvm-symbolizer /usr/bin/llvm-symbolizer; \
echo "TSAN_OPTIONS='halt_on_error=1 history_size=7 ignore_noninstrumented_modules=1 verbosity=1'" >> /etc/environment; \
echo "UBSAN_OPTIONS='print_stacktrace=1'" >> /etc/environment; \
echo "ASAN_OPTIONS='malloc_context_size=10 verbosity=1 allocator_release_to_os_interval_ms=10000'" >> /etc/environment; \

View File

@ -1,5 +1,5 @@
#!/usr/bin/env python
#-*- coding: utf-8 -*-
# -*- coding: utf-8 -*-
from multiprocessing import cpu_count
from subprocess import Popen, check_call
import os
@ -8,27 +8,39 @@ import argparse
import logging
import time
def get_skip_list_cmd(path):
with open(path, 'r') as f:
for line in f:
if '--use-skip-list' in line:
return '--use-skip-list'
return ''
def run_perf_test(cmd, xmls_path, output_folder):
output_path = os.path.join(output_folder, "perf_stress_run.txt")
f = open(output_path, 'w')
p = Popen("{} --skip-tags=long --recursive --input-files {}".format(cmd, xmls_path), shell=True, stdout=f, stderr=f)
return p
def run_func_test(cmd, output_prefix, num_processes, skip_tests_option):
skip_list_opt = get_skip_list_cmd(cmd)
output_paths = [os.path.join(output_prefix, "stress_test_run_{}.txt".format(i)) for i in range(num_processes)]
f = open(output_paths[0], 'w')
main_command = "{} --use-skip-list {}".format(cmd, skip_tests_option)
main_command = "{} {} {}".format(cmd, skip_list_opt, skip_tests_option)
logging.info("Run func tests main cmd '%s'", main_command)
pipes = [Popen(main_command, shell=True, stdout=f, stderr=f)]
for output_path in output_paths[1:]:
time.sleep(0.5)
f = open(output_path, 'w')
full_command = "{} --use-skip-list --order=random {}".format(cmd, skip_tests_option)
full_command = "{} {} --order=random {}".format(cmd, skip_list_opt, skip_tests_option)
logging.info("Run func tests '%s'", full_command)
p = Popen(full_command, shell=True, stdout=f, stderr=f)
pipes.append(p)
return pipes
def check_clickhouse_alive(cmd):
try:
logging.info("Checking ClickHouse still alive")
@ -37,10 +49,11 @@ def check_clickhouse_alive(cmd):
except:
return False
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
parser = argparse.ArgumentParser(description="ClickHouse script for running stresstest")
parser.add_argument("--test-cmd", default='clickhouse-test')
parser.add_argument("--test-cmd", default='/usr/bin/clickhouse-test')
parser.add_argument("--skip-func-tests", default='')
parser.add_argument("--client-cmd", default='clickhouse-client')
parser.add_argument("--perf-test-cmd", default='clickhouse-performance-test')

View File

@ -72,5 +72,5 @@ RUN set -x \
VOLUME /var/lib/docker
EXPOSE 2375
ENTRYPOINT ["dockerd-entrypoint.sh"]
CMD ["sh", "-c", "python3 regression.py --no-color --local --clickhouse-binary-path ${CLICKHOUSE_TESTS_SERVER_BIN_PATH} --log test.log ${TESTFLOWS_OPTS} && cat test.log | tfs report results --format json > results.json"]
CMD ["sh", "-c", "python3 regression.py --no-color --local --clickhouse-binary-path ${CLICKHOUSE_TESTS_SERVER_BIN_PATH} --log test.log ${TESTFLOWS_OPTS}; cat test.log | tfs report results --format json > results.json"]

View File

@ -184,6 +184,42 @@ SELECT decodeURLComponent('http://127.0.0.1:8123/?query=SELECT%201%3B') AS Decod
└────────────────────────────────────────┘
```
### netloc {#netloc}
Extracts network locality (`username:password@host:port`) from a URL.
**Syntax**
```sql
netloc(URL)
```
**Parameters**
- `url` — URL. [String](../../sql-reference/data-types/string.md).
**Returned value**
- `username:password@host:port`.
Type: `String`.
**Example**
Query:
``` sql
SELECT netloc('http://paul@www.example.com:80/');
```
Result:
``` text
┌─netloc('http://paul@www.example.com:80/')─┐
│ paul@www.example.com:80 │
└───────────────────────────────────────────┘
```
## Functions that Remove Part of a URL {#functions-that-remove-part-of-a-url}
If the URL doesnt have anything similar, the URL remains unchanged.

View File

@ -174,6 +174,42 @@ SELECT decodeURLComponent('http://127.0.0.1:8123/?query=SELECT%201%3B') AS Decod
└────────────────────────────────────────┘
```
### netloc {#netloc}
Извлекает сетевую локальность (`username:password@host:port`) из URL.
**Синтаксис**
```sql
netloc(URL)
```
**Параметры**
- `url` — URL. Тип — [String](../../sql-reference/data-types/string.md).
**Возвращаемое значение**
- `username:password@host:port`.
Тип: `String`.
**Пример**
Запрос:
``` sql
SELECT netloc('http://paul@www.example.com:80/');
```
Результат:
``` text
┌─netloc('http://paul@www.example.com:80/')─┐
│ paul@www.example.com:80 │
└───────────────────────────────────────────┘
```
## Функции, удаляющие часть из URL-а {#funktsii-udaliaiushchie-chast-iz-url-a}
Если в URL-е нет ничего похожего, то URL остаётся без изменений.

View File

@ -1,6 +1,7 @@
set (CLICKHOUSE_CLIENT_SOURCES
Client.cpp
ConnectionParameters.cpp
QueryFuzzer.cpp
Suggest.cpp
)

View File

@ -1,5 +1,6 @@
#include "TestHint.h"
#include "ConnectionParameters.h"
#include "QueryFuzzer.h"
#include "Suggest.h"
#if USE_REPLXX
@ -40,6 +41,7 @@
#include <Common/typeid_cast.h>
#include <Common/clearPasswordFromCommandLine.h>
#include <Common/Config/ConfigProcessor.h>
#include <Common/PODArray.h>
#include <Core/Types.h>
#include <Core/QueryProcessingStage.h>
#include <Core/ExternalTable.h>
@ -213,6 +215,9 @@ private:
ConnectionParameters connection_parameters;
QueryFuzzer fuzzer;
int query_fuzzer_runs;
void initialize(Poco::Util::Application & self) override
{
Poco::Util::Application::initialize(self);
@ -660,7 +665,14 @@ private:
else
{
query_id = config().getString("query_id", "");
nonInteractive();
if (query_fuzzer_runs)
{
nonInteractiveWithFuzzing();
}
else
{
nonInteractive();
}
/// If exception code isn't zero, we should return non-zero return code anyway.
if (last_exception_received_from_server)
@ -762,6 +774,119 @@ private:
processQueryText(text);
}
void nonInteractiveWithFuzzing()
{
if (config().has("query"))
{
// Poco configuration should not process substitutions in form of
// ${...} inside query
processWithFuzzing(config().getRawString("query"));
return;
}
// Try to stream the queries from stdin, without reading all of them
// into memory. The interface of the parser does not support streaming,
// in particular, it can't distinguish the end of partial input buffer
// and the final end of input file. This means we have to try to split
// the input into separate queries here. Two patterns of input are
// especially interesing:
// 1) multiline query:
// select 1
// from system.numbers;
//
// 2) csv insert with in-place data:
// insert into t format CSV 1;2
//
// (1) means we can't split on new line, and (2) means we can't split on
// semicolon. Solution: split on ';\n'. This sequence is frequent enough
// in the SQL tests which are our principal input for fuzzing. Now we
// have another interesting case:
// 3) escaped semicolon followed by newline, e.g.
// select ';
// '
//
// To handle (3), parse until we can, and read more data if the parser
// complains. Hopefully this should be enough...
ReadBufferFromFileDescriptor in(STDIN_FILENO);
std::string text;
while (!in.eof())
{
// Read until separator.
while (!in.eof())
{
char * next_separator = find_first_symbols<';'>(in.position(),
in.buffer().end());
if (next_separator < in.buffer().end())
{
next_separator++;
if (next_separator < in.buffer().end()
&& *next_separator == '\n')
{
// Found ';\n', append it to the query text and try to
// parse.
next_separator++;
text.append(in.position(), next_separator - in.position());
in.position() = next_separator;
break;
}
}
// Didn't find the semicolon and reached the end of buffer.
text.append(in.position(), next_separator - in.position());
in.position() = next_separator;
if (text.size() > 1024 * 1024)
{
// We've read a lot of text and still haven't seen a separator.
// Likely some pathological input, just fall through to prevent
// too long loops.
break;
}
}
// Parse and execute what we've read.
fprintf(stderr, "will now parse '%s'\n", text.c_str());
const auto * new_end = processWithFuzzing(text);
if (new_end > &text[0])
{
const auto rest_size = text.size() - (new_end - &text[0]);
fprintf(stderr, "total %zd, rest %zd\n", text.size(), rest_size);
memcpy(&text[0], new_end, rest_size);
text.resize(rest_size);
}
else
{
fprintf(stderr, "total %zd, can't parse\n", text.size());
}
if (!connection->isConnected())
{
// Uh-oh...
std::cerr << "Lost connection to the server." << std::endl;
last_exception_received_from_server
= std::make_unique<Exception>(210, "~");
return;
}
if (text.size() > 4 * 1024)
{
// Some pathological situation where the text is larger than 4kB
// and we still cannot parse a single query in it. Abort.
std::cerr << "Read too much text and still can't parse a query."
" Aborting." << std::endl;
last_exception_received_from_server
= std::make_unique<Exception>(1, "~");
// return;
exit(1);
}
}
}
bool processQueryText(const String & text)
{
if (exit_strings.end() != exit_strings.find(trim(text, [](char c){ return isWhitespaceASCII(c) || c == ';'; })))
@ -769,10 +894,17 @@ private:
if (!config().has("multiquery"))
{
assert(!query_fuzzer_runs);
processTextAsSingleQuery(text);
return true;
}
if (query_fuzzer_runs)
{
processWithFuzzing(text);
return true;
}
return processMultiQuery(text);
}
@ -871,6 +1003,121 @@ private:
}
// Returns the last position we could parse.
const char * processWithFuzzing(const String & text)
{
/// Several queries separated by ';'.
/// INSERT data is ended by the end of line, not ';'.
const char * begin = text.data();
const char * end = begin + text.size();
while (begin < end)
{
// Skip whitespace before the query
while (isWhitespaceASCII(*begin) || *begin == ';')
{
++begin;
}
const auto * this_query_begin = begin;
ASTPtr orig_ast = parseQuery(begin, end, true);
if (!orig_ast)
{
// Can't continue after a parsing error
return begin;
}
auto * as_insert = orig_ast->as<ASTInsertQuery>();
if (as_insert && as_insert->data)
{
// INSERT data is ended by newline
as_insert->end = find_first_symbols<'\n'>(as_insert->data, end);
begin = as_insert->end;
}
full_query = text.substr(this_query_begin - text.data(),
begin - text.data());
ASTPtr fuzz_base = orig_ast;
for (int fuzz_step = 0; fuzz_step < query_fuzzer_runs; fuzz_step++)
{
fprintf(stderr, "fuzzing step %d for query at pos %zd\n",
fuzz_step, this_query_begin - text.data());
ASTPtr ast_to_process;
try
{
std::stringstream dump_before_fuzz;
fuzz_base->dumpTree(dump_before_fuzz);
auto base_before_fuzz = fuzz_base->formatForErrorMessage();
ast_to_process = fuzz_base->clone();
fuzzer.fuzzMain(ast_to_process);
auto base_after_fuzz = fuzz_base->formatForErrorMessage();
// Debug AST cloning errors.
if (base_before_fuzz != base_after_fuzz)
{
fprintf(stderr, "base before fuzz: %s\n"
"base after fuzz: %s\n", base_before_fuzz.c_str(),
base_after_fuzz.c_str());
fprintf(stderr, "dump before fuzz:\n%s\n",
dump_before_fuzz.str().c_str());
fprintf(stderr, "dump after fuzz:\n");
fuzz_base->dumpTree(std::cerr);
assert(false);
}
auto fuzzed_text = ast_to_process->formatForErrorMessage();
if (fuzz_step > 0 && fuzzed_text == base_before_fuzz)
{
fprintf(stderr, "got boring ast\n");
continue;
}
parsed_query = ast_to_process;
query_to_send = parsed_query->formatForErrorMessage();
processParsedSingleQuery();
}
catch (...)
{
last_exception_received_from_server = std::make_unique<Exception>(getCurrentExceptionMessage(true), getCurrentExceptionCode());
received_exception_from_server = true;
std::cerr << "Error on processing query: " << ast_to_process->formatForErrorMessage() << std::endl << last_exception_received_from_server->message();
}
if (received_exception_from_server)
{
// Query completed with error, ignore it and fuzz again.
fprintf(stderr, "Got error, will fuzz again\n");
received_exception_from_server = false;
last_exception_received_from_server.reset();
continue;
}
else if (ast_to_process->formatForErrorMessage().size() > 500)
{
// ast too long, start from original ast
fprintf(stderr, "current ast too long, won't elaborate\n");
fuzz_base = orig_ast;
}
else
{
// fuzz starting from this successful query
fprintf(stderr, "using this ast as etalon\n");
fuzz_base = ast_to_process;
}
}
}
return begin;
}
void processTextAsSingleQuery(const String & text_)
{
full_query = text_;
@ -906,6 +1153,7 @@ private:
void processParsedSingleQuery()
{
resetOutput();
last_exception_received_from_server.reset();
received_exception_from_server = false;
if (echo_queries)
@ -1537,8 +1785,11 @@ private:
processed_rows += block.rows();
initBlockOutputStream(block);
/// The header block containing zero rows was used to initialize block_out_stream, do not output it.
if (block.rows() != 0)
/// The header block containing zero rows was used to initialize
/// block_out_stream, do not output it.
/// Also do not output too much data if we're fuzzing.
if (block.rows() != 0
&& (query_fuzzer_runs == 0 || processed_rows < 100))
{
block_out_stream->write(block);
written_first_block = true;
@ -1895,6 +2146,7 @@ public:
("highlight", po::value<bool>()->default_value(true), "enable or disable basic syntax highlight in interactive command line")
("log-level", po::value<std::string>(), "client log level")
("server_logs_file", po::value<std::string>(), "put server logs into specified file")
("query-fuzzer-runs", po::value<int>()->default_value(0), "query fuzzer runs")
;
Settings cmd_settings;
@ -2052,6 +2304,17 @@ public:
if (options.count("highlight"))
config().setBool("highlight", options["highlight"].as<bool>());
if ((query_fuzzer_runs = options["query-fuzzer-runs"].as<int>()))
{
// Fuzzer implies multiquery.
config().setBool("multiquery", true);
// Ignore errors in parsing queries.
// TODO stop using parseQuery.
config().setBool("ignore-error", true);
ignore_error = true;
}
argsToConfig(common_arguments, config(), 100);
clearPasswordFromCommandLine(argc, argv);

View File

@ -0,0 +1,421 @@
#include "QueryFuzzer.h"
#include <unordered_set>
#include <pcg_random.hpp>
#include <Common/assert_cast.h>
#include <Common/typeid_cast.h>
#include <Core/Types.h>
#include <IO/Operators.h>
#include <IO/UseSSL.h>
#include <Parsers/ASTExpressionList.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTInsertQuery.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTQueryWithOutput.h>
#include <Parsers/ASTSelectQuery.h>
#include <Parsers/ASTSelectWithUnionQuery.h>
#include <Parsers/ASTSetQuery.h>
#include <Parsers/ASTSubquery.h>
#include <Parsers/ASTTablesInSelectQuery.h>
#include <Parsers/ASTUseQuery.h>
#include <Parsers/ParserQuery.h>
#include <Parsers/formatAST.h>
#include <Parsers/parseQuery.h>
namespace DB
{
Field QueryFuzzer::getRandomField(int type)
{
switch (type)
{
case 0:
{
static constexpr Int64 values[]
= {-2, -1, 0, 1, 2, 3, 7, 10, 100, 255, 256, 257, 1023, 1024,
1025, 65535, 65536, 65537, 1024 * 1024 - 1, 1024 * 1024,
1024 * 1024 + 1, INT64_MIN, INT64_MAX};
return values[fuzz_rand() % (sizeof(values) / sizeof(*values))];
}
case 1:
{
static constexpr float values[]
= {NAN, INFINITY, -INFINITY, 0., 0.0001, 0.5, 0.9999,
1., 1.0001, 2., 10.0001, 100.0001, 1000.0001};
return values[fuzz_rand() % (sizeof(values) / sizeof(*values))];
}
case 2:
{
static constexpr Int64 values[]
= {-2, -1, 0, 1, 2, 3, 7, 10, 100, 255, 256, 257, 1023, 1024,
1025, 65535, 65536, 65537, 1024 * 1024 - 1, 1024 * 1024,
1024 * 1024 + 1, INT64_MIN, INT64_MAX};
static constexpr UInt64 scales[] = {0, 1, 2, 10};
return DecimalField<Decimal64>(
values[fuzz_rand() % (sizeof(values) / sizeof(*values))],
scales[fuzz_rand() % (sizeof(scales) / sizeof(*scales))]
);
}
default:
assert(false);
return Null{};
}
}
Field QueryFuzzer::fuzzField(Field field)
{
const auto type = field.getType();
int type_index = -1;
if (type == Field::Types::Int64
|| type == Field::Types::UInt64)
{
type_index = 0;
}
else if (type == Field::Types::Float64)
{
type_index = 1;
}
else if (type == Field::Types::Decimal32
|| type == Field::Types::Decimal64
|| type == Field::Types::Decimal128)
{
type_index = 2;
}
if (fuzz_rand() % 20 == 0)
{
return Null{};
}
if (type_index >= 0)
{
if (fuzz_rand() % 20 == 0)
{
// Change type sometimes, but not often, because it mostly leads to
// boring errors.
type_index = fuzz_rand() % 3;
}
return getRandomField(type_index);
}
if (type == Field::Types::String)
{
auto & str = field.get<std::string>();
UInt64 action = fuzz_rand() % 10;
switch (action)
{
case 0:
str = "";
break;
case 1:
str = str + str;
break;
case 2:
str = str + str + str + str;
break;
case 4:
if (!str.empty())
{
str[fuzz_rand() % str.size()] = '\0';
}
break;
default:
// Do nothing
break;
}
}
else if (type == Field::Types::Array || type == Field::Types::Tuple)
{
auto & arr = field.reinterpret<FieldVector>();
if (fuzz_rand() % 5 == 0 && !arr.empty())
{
size_t pos = fuzz_rand() % arr.size();
arr.erase(arr.begin() + pos);
fprintf(stderr, "erased\n");
}
if (fuzz_rand() % 5 == 0)
{
if (!arr.empty())
{
size_t pos = fuzz_rand() % arr.size();
arr.insert(arr.begin() + pos, fuzzField(arr[pos]));
fprintf(stderr, "inserted (pos %zd)\n", pos);
}
else
{
arr.insert(arr.begin(), getRandomField(0));
fprintf(stderr, "inserted (0)\n");
}
}
for (auto & element : arr)
{
element = fuzzField(element);
}
}
return field;
}
ASTPtr QueryFuzzer::getRandomColumnLike()
{
if (column_like.empty())
{
return nullptr;
}
ASTPtr new_ast = column_like[fuzz_rand() % column_like.size()]->clone();
new_ast->setAlias("");
return new_ast;
}
void QueryFuzzer::replaceWithColumnLike(ASTPtr & ast)
{
if (column_like.empty())
{
return;
}
std::string old_alias = ast->tryGetAlias();
ast = getRandomColumnLike();
ast->setAlias(old_alias);
}
void QueryFuzzer::replaceWithTableLike(ASTPtr & ast)
{
if (table_like.empty())
{
return;
}
ASTPtr new_ast = table_like[fuzz_rand() % table_like.size()]->clone();
std::string old_alias = ast->tryGetAlias();
new_ast->setAlias(old_alias);
ast = new_ast;
}
void QueryFuzzer::fuzzColumnLikeExpressionList(ASTPtr ast)
{
if (!ast)
{
return;
}
auto * impl = assert_cast<ASTExpressionList *>(ast.get());
if (fuzz_rand() % 50 == 0 && impl->children.size() > 1)
{
// Don't remove last element -- this leads to questionable
// constructs such as empty select.
impl->children.erase(impl->children.begin()
+ fuzz_rand() % impl->children.size());
}
if (fuzz_rand() % 50 == 0)
{
auto pos = impl->children.empty()
? impl->children.begin()
: impl->children.begin() + fuzz_rand() % impl->children.size();
auto col = getRandomColumnLike();
if (col)
{
impl->children.insert(pos, col);
}
else
{
fprintf(stderr, "no random col!\n");
}
}
}
void QueryFuzzer::fuzz(ASTs & asts)
{
for (auto & ast : asts)
{
fuzz(ast);
}
}
void QueryFuzzer::fuzz(ASTPtr & ast)
{
if (!ast)
return;
if (auto * with_union = typeid_cast<ASTSelectWithUnionQuery *>(ast.get()))
{
fuzz(with_union->list_of_selects);
}
else if (auto * tables = typeid_cast<ASTTablesInSelectQuery *>(ast.get()))
{
fuzz(tables->children);
}
else if (auto * tables_element = typeid_cast<ASTTablesInSelectQueryElement *>(ast.get()))
{
fuzz(tables_element->table_join);
fuzz(tables_element->table_expression);
fuzz(tables_element->array_join);
}
else if (auto * table_expr = typeid_cast<ASTTableExpression *>(ast.get()))
{
fuzz(table_expr->database_and_table_name);
fuzz(table_expr->subquery);
fuzz(table_expr->table_function);
}
else if (auto * expr_list = typeid_cast<ASTExpressionList *>(ast.get()))
{
fuzz(expr_list->children);
}
else if (auto * fn = typeid_cast<ASTFunction *>(ast.get()))
{
fuzzColumnLikeExpressionList(fn->arguments);
fuzzColumnLikeExpressionList(fn->parameters);
fuzz(fn->children);
}
else if (auto * select = typeid_cast<ASTSelectQuery *>(ast.get()))
{
fuzzColumnLikeExpressionList(select->select());
fuzzColumnLikeExpressionList(select->groupBy());
fuzz(select->children);
}
else if (auto * literal = typeid_cast<ASTLiteral *>(ast.get()))
{
// Only change the queries sometimes.
int r = fuzz_rand() % 10;
if (r == 0)
{
literal->value = fuzzField(literal->value);
}
else if (r == 1)
{
/* replace with a random function? */
}
else if (r == 2)
{
/* replace with something column-like */
replaceWithColumnLike(ast);
}
}
else
{
fuzz(ast->children);
}
}
/*
* This functions collects various parts of query that we can then substitute
* to a query being fuzzed.
*
* TODO: we just stop remembering new parts after our corpus reaches certain size.
* This is boring, should implement a random replacement of existing parst with
* small probability. Do this after we add this fuzzer to CI and fix all the
* problems it can routinely find even in this boring version.
*/
void QueryFuzzer::collectFuzzInfoMain(const ASTPtr ast)
{
collectFuzzInfoRecurse(ast);
aliases.clear();
for (const auto & alias : aliases_set)
{
aliases.push_back(alias);
}
column_like.clear();
for (const auto & [name, value] : column_like_map)
{
column_like.push_back(value);
}
table_like.clear();
for (const auto & [name, value] : table_like_map)
{
table_like.push_back(value);
}
}
void QueryFuzzer::addTableLike(const ASTPtr ast)
{
if (table_like_map.size() > 1000)
{
return;
}
const auto name = ast->formatForErrorMessage();
if (name.size() < 200)
{
table_like_map.insert({name, ast});
}
}
void QueryFuzzer::addColumnLike(const ASTPtr ast)
{
if (column_like_map.size() > 1000)
{
return;
}
const auto name = ast->formatForErrorMessage();
if (name.size() < 200)
{
column_like_map.insert({name, ast});
}
}
void QueryFuzzer::collectFuzzInfoRecurse(const ASTPtr ast)
{
if (auto * impl = dynamic_cast<ASTWithAlias *>(ast.get()))
{
if (aliases_set.size() < 1000)
{
aliases_set.insert(impl->alias);
}
}
if (typeid_cast<ASTLiteral *>(ast.get()))
{
addColumnLike(ast);
}
else if (typeid_cast<ASTIdentifier *>(ast.get()))
{
addColumnLike(ast);
}
else if (typeid_cast<ASTFunction *>(ast.get()))
{
addColumnLike(ast);
}
else if (typeid_cast<ASTTableExpression *>(ast.get()))
{
addTableLike(ast);
}
else if (typeid_cast<ASTSubquery *>(ast.get()))
{
addTableLike(ast);
}
for (const auto & child : ast->children)
{
collectFuzzInfoRecurse(child);
}
}
void QueryFuzzer::fuzzMain(ASTPtr & ast)
{
collectFuzzInfoMain(ast);
fuzz(ast);
std::cout << std::endl;
formatAST(*ast, std::cout);
std::cout << std::endl << std::endl;
}
}

View File

@ -0,0 +1,58 @@
#pragma once
#include <unordered_set>
#include <unordered_map>
#include <vector>
#include <Common/randomSeed.h>
#include <Common/Stopwatch.h>
#include <Core/Field.h>
#include <Parsers/IAST.h>
namespace DB
{
/*
* This is an AST-based query fuzzer that makes random modifications to query
* AST, changing numbers, list of columns, functions, etc. It remembers part of
* queries it fuzzed previously, and can substitute these parts to new fuzzed
* queries, so you want to feed it a lot of queries to get some interesting mix
* of them. Normally we feed SQL regression tests to it.
*/
struct QueryFuzzer
{
pcg64 fuzz_rand{randomSeed()};
// These arrays hold parts of queries that we can substitute into the query
// we are currently fuzzing. We add some part from each new query we are asked
// to fuzz, and keep this state between queries, so the fuzzing output becomes
// more interesting over time, as the queries mix.
std::unordered_set<std::string> aliases_set;
std::vector<std::string> aliases;
std::unordered_map<std::string, ASTPtr> column_like_map;
std::vector<ASTPtr> column_like;
std::unordered_map<std::string, ASTPtr> table_like_map;
std::vector<ASTPtr> table_like;
// This is the only function you have to call -- it will modify the passed
// ASTPtr to point to new AST with some random changes.
void fuzzMain(ASTPtr & ast);
// Variuos helper functions follow, normally you shouldn't have to call them.
Field getRandomField(int type);
Field fuzzField(Field field);
ASTPtr getRandomColumnLike();
void replaceWithColumnLike(ASTPtr & ast);
void replaceWithTableLike(ASTPtr & ast);
void fuzzColumnLikeExpressionList(ASTPtr ast);
void fuzz(ASTs & asts);
void fuzz(ASTPtr & ast);
void collectFuzzInfoMain(const ASTPtr ast);
void addTableLike(const ASTPtr ast);
void addColumnLike(const ASTPtr ast);
void collectFuzzInfoRecurse(const ASTPtr ast);
};
}

View File

@ -169,17 +169,6 @@ ASTPtr extractOrderBy(const ASTPtr & storage_ast)
}
String createCommaSeparatedStringFrom(const Names & names)
{
std::ostringstream ss;
if (!names.empty())
{
std::copy(names.begin(), std::prev(names.end()), std::ostream_iterator<std::string>(ss, ", "));
ss << names.back();
}
return ss.str();
}
Names extractPrimaryKeyColumnNames(const ASTPtr & storage_ast)
{
const auto sorting_key_ast = extractOrderBy(storage_ast);

View File

@ -40,6 +40,7 @@
#include <Columns/ColumnString.h>
#include <Columns/ColumnsNumber.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/NestedUtils.h>
#include <Parsers/ParserCreateQuery.h>
#include <Parsers/parseQuery.h>
#include <Parsers/ParserQuery.h>
@ -197,8 +198,6 @@ ASTPtr extractPrimaryKey(const ASTPtr & storage_ast);
ASTPtr extractOrderBy(const ASTPtr & storage_ast);
String createCommaSeparatedStringFrom(const Names & names);
Names extractPrimaryKeyColumnNames(const ASTPtr & storage_ast);
String extractReplicatedTableZookeeperPath(const ASTPtr & storage_ast);

View File

@ -268,7 +268,7 @@ inline TaskTable::TaskTable(TaskCluster & parent, const Poco::Util::AbstractConf
ParserStorage parser_storage;
engine_push_ast = parseQuery(parser_storage, engine_push_str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
engine_push_partition_key_ast = extractPartitionKey(engine_push_ast);
primary_key_comma_separated = createCommaSeparatedStringFrom(extractPrimaryKeyColumnNames(engine_push_ast));
primary_key_comma_separated = Nested::createCommaSeparatedStringFrom(extractPrimaryKeyColumnNames(engine_push_ast));
engine_push_zk_path = extractReplicatedTableZookeeperPath(engine_push_ast);
}

View File

@ -17,6 +17,7 @@ SRCS(
main.cpp
client/Client.cpp
client/QueryFuzzer.cpp
client/ConnectionParameters.cpp
client/Suggest.cpp
extract-from-config/ExtractFromConfig.cpp

View File

@ -373,11 +373,8 @@ target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${DOUBLE_C
target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${MSGPACK_INCLUDE_DIR})
if (USE_ORC)
target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${ORC_INCLUDE_DIR})
configure_file (
"${ORC_INCLUDE_DIR}/orc/orc-config.hh.in"
"${ORC_INCLUDE_DIR}/orc/orc-config.hh"
)
dbms_target_link_libraries(PUBLIC ${ORC_LIBRARIES})
dbms_target_include_directories(SYSTEM BEFORE PUBLIC ${ORC_INCLUDE_DIR} ${CMAKE_BINARY_DIR}/contrib/orc/c++/include)
endif ()
if (ENABLE_TESTS AND USE_GTEST)

View File

@ -170,6 +170,8 @@ public:
/// If not connected yet, or if connection is broken - then connect. If cannot connect - throw an exception.
void forceConnected(const ConnectionTimeouts & timeouts);
bool isConnected() const { return connected; }
TablesStatusResponse getTablesStatus(const ConnectionTimeouts & timeouts,
const TablesStatusRequest & request);

View File

@ -88,35 +88,35 @@ void ColumnAggregateFunction::addArena(ConstArenaPtr arena_)
MutableColumnPtr ColumnAggregateFunction::convertToValues(MutableColumnPtr column)
{
/** If the aggregate function returns an unfinalized/unfinished state,
* then you just need to copy pointers to it and also shared ownership of data.
*
* Also replace the aggregate function with the nested function.
* That is, if this column is the states of the aggregate function `aggState`,
* then we return the same column, but with the states of the aggregate function `agg`.
* These are the same states, changing only the function to which they correspond.
*
* Further is quite difficult to understand.
* Example when this happens:
*
* SELECT k, finalizeAggregation(quantileTimingState(0.5)(x)) FROM ... GROUP BY k WITH TOTALS
*
* This calculates the aggregate function `quantileTimingState`.
* Its return type AggregateFunction(quantileTiming(0.5), UInt64)`.
* Due to the presence of WITH TOTALS, during aggregation the states of this aggregate function will be stored
* in the ColumnAggregateFunction column of type
* AggregateFunction(quantileTimingState(0.5), UInt64).
* Then, in `TotalsHavingTransform`, it will be called `convertToValues` method,
* to get the "ready" values.
* But it just converts a column of type
* `AggregateFunction(quantileTimingState(0.5), UInt64)`
* into `AggregateFunction(quantileTiming(0.5), UInt64)`
* - in the same states.
*column_aggregate_func
* Then `finalizeAggregation` function will be calculated, which will call `convertToValues` already on the result.
* And this converts a column of type
* AggregateFunction(quantileTiming(0.5), UInt64)
* into UInt16 - already finished result of `quantileTiming`.
*/
* then you just need to copy pointers to it and also shared ownership of data.
*
* Also replace the aggregate function with the nested function.
* That is, if this column is the states of the aggregate function `aggState`,
* then we return the same column, but with the states of the aggregate function `agg`.
* These are the same states, changing only the function to which they correspond.
*
* Further is quite difficult to understand.
* Example when this happens:
*
* SELECT k, finalizeAggregation(quantileTimingState(0.5)(x)) FROM ... GROUP BY k WITH TOTALS
*
* This calculates the aggregate function `quantileTimingState`.
* Its return type AggregateFunction(quantileTiming(0.5), UInt64)`.
* Due to the presence of WITH TOTALS, during aggregation the states of this aggregate function will be stored
* in the ColumnAggregateFunction column of type
* AggregateFunction(quantileTimingState(0.5), UInt64).
* Then, in `TotalsHavingTransform`, it will be called `convertToValues` method,
* to get the "ready" values.
* But it just converts a column of type
* `AggregateFunction(quantileTimingState(0.5), UInt64)`
* into `AggregateFunction(quantileTiming(0.5), UInt64)`
* - in the same states.
*
* Then `finalizeAggregation` function will be calculated, which will call `convertToValues` already on the result.
* And this converts a column of type
* AggregateFunction(quantileTiming(0.5), UInt64)
* into UInt16 - already finished result of `quantileTiming`.
*/
auto & column_aggregate_func = assert_cast<ColumnAggregateFunction &>(*column);
auto & func = column_aggregate_func.func;
auto & data = column_aggregate_func.data;
@ -355,6 +355,13 @@ void ColumnAggregateFunction::updateWeakHash32(WeakHash32 & hash) const
}
}
void ColumnAggregateFunction::updateHashFast(SipHash & hash) const
{
/// Fallback to per-element hashing, as there is no faster way
for (size_t i = 0; i < size(); ++i)
updateHashWithValue(i, hash);
}
/// The returned size is less than real size. The reason is that some parts of
/// aggregate function data may be allocated on shared arenas. These arenas are
/// used for several blocks, and also may be updated concurrently from other

View File

@ -159,6 +159,8 @@ public:
void updateWeakHash32(WeakHash32 & hash) const override;
void updateHashFast(SipHash & hash) const override;
size_t byteSize() const override;
size_t allocatedBytes() const override;

View File

@ -259,6 +259,12 @@ void ColumnArray::updateWeakHash32(WeakHash32 & hash) const
}
}
void ColumnArray::updateHashFast(SipHash & hash) const
{
offsets->updateHashFast(hash);
data->updateHashFast(hash);
}
void ColumnArray::insert(const Field & x)
{
const Array & array = DB::get<const Array &>(x);

View File

@ -63,6 +63,7 @@ public:
const char * deserializeAndInsertFromArena(const char * pos) override;
void updateHashWithValue(size_t n, SipHash & hash) const override;
void updateWeakHash32(WeakHash32 & hash) const override;
void updateHashFast(SipHash & hash) const override;
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
void insert(const Field & x) override;
void insertFrom(const IColumn & src_, size_t n) override;

View File

@ -170,6 +170,11 @@ public:
void updateWeakHash32(WeakHash32 & hash) const override;
void updateHashFast(SipHash & hash) const override
{
data->updateHashFast(hash);
}
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override;
ColumnPtr replicate(const Offsets & offsets) const override;
ColumnPtr permute(const Permutation & perm, size_t limit) const override;

View File

@ -98,6 +98,12 @@ void ColumnDecimal<T>::updateWeakHash32(WeakHash32 & hash) const
}
}
template <typename T>
void ColumnDecimal<T>::updateHashFast(SipHash & hash) const
{
hash.update(reinterpret_cast<const char *>(data.data()), size() * sizeof(data[0]));
}
template <typename T>
void ColumnDecimal<T>::getPermutation(bool reverse, size_t limit, int , IColumn::Permutation & res) const
{

View File

@ -107,6 +107,7 @@ public:
const char * deserializeAndInsertFromArena(const char * pos) override;
void updateHashWithValue(size_t n, SipHash & hash) const override;
void updateWeakHash32(WeakHash32 & hash) const override;
void updateHashFast(SipHash & hash) const override;
int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override;
void compareColumn(const IColumn & rhs, size_t rhs_row_num,
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,

View File

@ -124,6 +124,12 @@ void ColumnFixedString::updateWeakHash32(WeakHash32 & hash) const
}
}
void ColumnFixedString::updateHashFast(SipHash & hash) const
{
hash.update(n);
hash.update(reinterpret_cast<const char *>(chars.data()), size() * n);
}
template <bool positive>
struct ColumnFixedString::less
{

View File

@ -111,6 +111,8 @@ public:
void updateWeakHash32(WeakHash32 & hash) const override;
void updateHashFast(SipHash & hash) const override;
int compareAt(size_t p1, size_t p2, const IColumn & rhs_, int /*nan_direction_hint*/) const override
{
const ColumnFixedString & rhs = assert_cast<const ColumnFixedString &>(rhs_);

View File

@ -107,6 +107,11 @@ public:
throw Exception("updateWeakHash32 is not implemented for " + getName(), ErrorCodes::NOT_IMPLEMENTED);
}
void updateHashFast(SipHash &) const override
{
throw Exception("updateHashFast is not implemented for " + getName(), ErrorCodes::NOT_IMPLEMENTED);
}
void popBack(size_t) override
{
throw Exception("popBack is not implemented for " + getName(), ErrorCodes::NOT_IMPLEMENTED);

View File

@ -257,6 +257,12 @@ void ColumnLowCardinality::updateWeakHash32(WeakHash32 & hash) const
idx.updateWeakHash(hash, dict_hash);
}
void ColumnLowCardinality::updateHashFast(SipHash & hash) const
{
idx.getPositions()->updateHashFast(hash);
getDictionary().getNestedColumn()->updateHashFast(hash);
}
void ColumnLowCardinality::gather(ColumnGathererStream & gatherer)
{
gatherer.gather(*this);

View File

@ -93,6 +93,8 @@ public:
void updateWeakHash32(WeakHash32 & hash) const override;
void updateHashFast(SipHash &) const override;
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override
{
return ColumnLowCardinality::create(dictionary.getColumnUniquePtr(), getIndexes().filter(filt, result_size_hint));

View File

@ -64,6 +64,12 @@ void ColumnNullable::updateWeakHash32(WeakHash32 & hash) const
hash_data[row] = old_hash_data[row];
}
void ColumnNullable::updateHashFast(SipHash & hash) const
{
null_map->updateHashFast(hash);
nested_column->updateHashFast(hash);
}
MutableColumnPtr ColumnNullable::cloneResized(size_t new_size) const
{
MutableColumnPtr new_nested_col = getNestedColumn().cloneResized(new_size);

View File

@ -90,6 +90,7 @@ public:
ColumnPtr replicate(const Offsets & replicate_offsets) const override;
void updateHashWithValue(size_t n, SipHash & hash) const override;
void updateWeakHash32(WeakHash32 & hash) const override;
void updateHashFast(SipHash & hash) const override;
void getExtremes(Field & min, Field & max) const override;
MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override

View File

@ -191,6 +191,12 @@ public:
void updateWeakHash32(WeakHash32 & hash) const override;
void updateHashFast(SipHash & hash) const override
{
hash.update(reinterpret_cast<const char *>(offsets.data()), size() * sizeof(offsets[0]));
hash.update(reinterpret_cast<const char *>(chars.data()), size() * sizeof(chars[0]));
}
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override;

View File

@ -195,6 +195,12 @@ void ColumnTuple::updateWeakHash32(WeakHash32 & hash) const
column->updateWeakHash32(hash);
}
void ColumnTuple::updateHashFast(SipHash & hash) const
{
for (const auto & column : columns)
column->updateHashFast(hash);
}
void ColumnTuple::insertRangeFrom(const IColumn & src, size_t start, size_t length)
{
const size_t tuple_size = columns.size();

View File

@ -63,6 +63,7 @@ public:
const char * deserializeAndInsertFromArena(const char * pos) override;
void updateHashWithValue(size_t n, SipHash & hash) const override;
void updateWeakHash32(WeakHash32 & hash) const override;
void updateHashFast(SipHash & hash) const override;
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override;
ColumnPtr permute(const Permutation & perm, size_t limit) const override;

View File

@ -86,6 +86,12 @@ void ColumnVector<T>::updateWeakHash32(WeakHash32 & hash) const
}
}
template <typename T>
void ColumnVector<T>::updateHashFast(SipHash & hash) const
{
hash.update(reinterpret_cast<const char *>(data.data()), size() * sizeof(data[0]));
}
template <typename T>
struct ColumnVector<T>::less
{

View File

@ -162,6 +162,8 @@ public:
void updateWeakHash32(WeakHash32 & hash) const override;
void updateHashFast(SipHash & hash) const override;
size_t byteSize() const override
{
return data.size() * sizeof(data[0]);

View File

@ -216,6 +216,9 @@ public:
/// WeakHash32 must have the same size as column.
virtual void updateWeakHash32(WeakHash32 & hash) const = 0;
/// Update state of hash with all column.
virtual void updateHashFast(SipHash & hash) const = 0;
/** Removes elements that don't match the filter.
* Is used in WHERE and HAVING operations.
* If result_size_hint > 0, then makes advance reserve(result_size_hint) for the result column;

View File

@ -72,6 +72,10 @@ public:
{
}
void updateHashFast(SipHash & /*hash*/) const override
{
}
void insertFrom(const IColumn &, size_t) override
{
++s;

View File

@ -143,6 +143,11 @@ public:
throw Exception("Method updateWeakHash32 is not supported for ColumnUnique.", ErrorCodes::NOT_IMPLEMENTED);
}
void updateHashFast(SipHash &) const override
{
throw Exception("Method updateHashFast is not supported for ColumnUnique.", ErrorCodes::NOT_IMPLEMENTED);
}
void compareColumn(const IColumn &, size_t, PaddedPODArray<UInt64> *, PaddedPODArray<Int8> &, int, int) const override
{
throw Exception("Method compareColumn is not supported for ColumnUnique.", ErrorCodes::NOT_IMPLEMENTED);

View File

@ -56,7 +56,7 @@ void checkColumn(
{
if (it->second != hash[i])
{
std::cout << "Different hashes for the same equivalent class (" << val << "):\n";
std::cout << "Different hashes for the same equivalent class (" << size_t(val) << "):\n";
std::cout << print_for_row(it->first) << '\n';
std::cout << print_for_row(i) << std::endl;
}

View File

@ -269,6 +269,8 @@ bool DNSResolver::updateCache()
LOG_DEBUG(log, "Updating DNS cache");
{
String updated_host_name = Poco::Net::DNS::hostName();
std::lock_guard lock(impl->drop_mutex);
for (const auto & host : impl->new_hosts)
@ -279,7 +281,7 @@ bool DNSResolver::updateCache()
impl->known_addresses.insert(address);
impl->new_addresses.clear();
impl->host_name.emplace(Poco::Net::DNS::hostName());
impl->host_name.emplace(updated_host_name);
}
/// FIXME Updating may take a long time becouse we cannot manage timeouts of getaddrinfo(...) and getnameinfo(...).

View File

@ -497,7 +497,8 @@ namespace ErrorCodes
extern const int CASSANDRA_INTERNAL_ERROR = 528;
extern const int NOT_A_LEADER = 529;
extern const int CANNOT_CONNECT_RABBITMQ = 530;
extern const int LDAP_ERROR = 531;
extern const int CANNOT_FSTAT = 531;
extern const int LDAP_ERROR = 532;
extern const int KEEPER_EXCEPTION = 999;
extern const int POCO_EXCEPTION = 1000;

View File

@ -77,6 +77,21 @@ void MemoryTracker::alloc(Int64 size)
Int64 current_hard_limit = hard_limit.load(std::memory_order_relaxed);
Int64 current_profiler_limit = profiler_limit.load(std::memory_order_relaxed);
/// Cap the limit to the total_memory_tracker, since it may include some drift.
///
/// And since total_memory_tracker is reseted to the process resident
/// memory peridically (in AsynchronousMetrics::update()), any limit can be
/// capped to it, to avoid possible drift.
if (unlikely(current_hard_limit && will_be > current_hard_limit))
{
Int64 total_amount = total_memory_tracker.get();
if (amount > total_amount)
{
set(total_amount);
will_be = size + total_amount;
}
}
std::bernoulli_distribution fault(fault_probability);
if (unlikely(fault_probability && fault(thread_local_rng)))
{

View File

@ -44,3 +44,25 @@ TEST(Common, PODPushBackRawMany)
EXPECT_EQ(15, chars.size());
EXPECT_EQ(std::string("first0123456789"), std::string(chars.data(), chars.size()));
}
TEST(Common, PODNoOverallocation)
{
/// Check that PaddedPODArray allocates for smaller number of elements than the power of two due to padding.
/// NOTE: It's Ok to change these numbers if you will modify initial size or padding.
PaddedPODArray<char> chars;
std::vector<size_t> capacities;
size_t prev_capacity = 0;
for (size_t i = 0; i < 1000000; ++i)
{
chars.emplace_back();
if (chars.capacity() != prev_capacity)
{
prev_capacity = chars.capacity();
capacities.emplace_back(prev_capacity);
}
}
EXPECT_EQ(capacities, (std::vector<size_t>{4065, 8161, 16353, 32737, 65505, 131041, 262113, 524257, 1048545}));
}

View File

@ -11,7 +11,7 @@
#include <Parsers/IParser.h>
#include <Parsers/TokenIterator.h>
#include <boost/format.hpp>
#include <fmt/format.h>
#include <bitset>
#include <cmath>
@ -63,6 +63,10 @@ std::vector<T> operator+(std::vector<T> && left, std::vector<T> && right)
namespace
{
template <class T> using is_pod = std::is_trivial<std::is_standard_layout<T>>;
template <class T> inline constexpr bool is_pod_v = is_pod<T>::value;
template <typename T>
struct AsHexStringHelper
{
@ -84,7 +88,7 @@ std::ostream & operator << (std::ostream & ostr, const AsHexStringHelper<T> & he
template <typename T>
AsHexStringHelper<T> AsHexString(const T & container)
{
static_assert (sizeof(container[0]) == 1 && std::is_pod<std::decay_t<decltype(container[0])>>::value, "Only works on containers of byte-size PODs.");
static_assert (sizeof(container[0]) == 1 && is_pod_v<std::decay_t<decltype(container[0])>>, "Only works on containers of byte-size PODs.");
return AsHexStringHelper<T>{container};
}
@ -162,7 +166,7 @@ public:
data_end(container.data() + container.size()),
current_value(T{})
{
static_assert(sizeof(container[0]) == 1 && std::is_pod<std::decay_t<decltype(container[0])>>::value, "Only works on containers of byte-size PODs.");
static_assert(sizeof(container[0]) == 1 && is_pod_v<std::decay_t<decltype(container[0])>>, "Only works on containers of byte-size PODs.");
read();
}
@ -252,8 +256,8 @@ template <typename T, typename ContainerLeft, typename ContainerRight>
if (++mismatching_items <= MAX_MISMATCHING_ITEMS)
{
result << "\nmismatching " << sizeof(T) << "-byte item #" << i
<< "\nexpected: " << bin(left_value) << " (0x" << std::hex << left_value << ")"
<< "\ngot : " << bin(right_value) << " (0x" << std::hex << right_value << ")";
<< "\nexpected: " << bin(left_value) << " (0x" << std::hex << size_t(left_value) << ")"
<< "\ngot : " << bin(right_value) << " (0x" << std::hex << size_t(right_value) << ")";
if (mismatching_items == MAX_MISMATCHING_ITEMS)
{
result << "\n..." << std::endl;
@ -386,7 +390,7 @@ CodecTestSequence makeSeq(Args && ... args)
}
return CodecTestSequence{
(boost::format("%1% values of %2%") % std::size(vals) % type_name<T>()).str(),
(fmt::format("{} values of {}", std::size(vals), type_name<T>())),
std::move(data),
makeDataType<T>()
};
@ -408,7 +412,7 @@ CodecTestSequence generateSeq(Generator gen, const char* gen_name, B Begin = 0,
}
return CodecTestSequence{
(boost::format("%1% values of %2% from %3%") % (End - Begin) % type_name<T>() % gen_name).str(),
(fmt::format("{} values of {} from {}", (End - Begin), type_name<T>(), gen_name)),
std::move(data),
makeDataType<T>()
};
@ -652,6 +656,7 @@ TEST_P(CodecTestPerformance, TranscodingWithDataType)
std::cerr << std::endl;
}
GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CodecTestPerformance);
///////////////////////////////////////////////////////////////////////////////////////////////////
// Here we use generators to produce test payload for codecs.

View File

@ -96,6 +96,8 @@ struct Settings : public SettingsCollection<Settings>
\
M(SettingBool, optimize_move_to_prewhere, true, "Allows disabling WHERE to PREWHERE optimization in SELECT queries from MergeTree.", 0) \
\
M(SettingMilliseconds, insert_in_memory_parts_timeout, 600000, "", 0) \
\
M(SettingUInt64, replication_alter_partitions_sync, 1, "Wait for actions to manipulate the partitions. 0 - do not wait, 1 - wait for execution only of itself, 2 - wait for everyone.", 0) \
M(SettingUInt64, replication_alter_columns_timeout, 60, "Wait for actions to change the table structure within the specified number of seconds. 0 - wait unlimited time.", 0) \
\
@ -367,7 +369,7 @@ struct Settings : public SettingsCollection<Settings>
M(SettingBool, optimize_move_functions_out_of_any, true, "Move functions out of aggregate functions 'any', 'anyLast'.", 0) \
M(SettingBool, optimize_arithmetic_operations_in_aggregate_functions, true, "Move arithmetic operations out of aggregation functions", 0) \
M(SettingBool, optimize_duplicate_order_by_and_distinct, true, "Remove duplicate ORDER BY and DISTINCT if it's possible", 0) \
M(SettingBool, optimize_if_chain_to_miltiif, false, "Replace if(cond1, then1, if(cond2, ...)) chains to multiIf. Currently it's not beneficial for numeric types.", 0) \
M(SettingBool, optimize_if_chain_to_multiif, false, "Replace if(cond1, then1, if(cond2, ...)) chains to multiIf. Currently it's not beneficial for numeric types.", 0) \
M(SettingBool, allow_experimental_alter_materialized_view_structure, false, "Allow atomic alter on Materialized views. Work in progress.", 0) \
M(SettingBool, enable_early_constant_folding, true, "Enable query optimization where we analyze function and subqueries results and rewrite query if there're constants there", 0) \
\

View File

@ -0,0 +1,41 @@
#include <Core/SortDescription.h>
#include <Core/Block.h>
#include <IO/Operators.h>
namespace DB
{
void dumpSortDescription(const SortDescription & description, const Block & header, WriteBuffer & out)
{
bool first = true;
for (const auto & desc : description)
{
if (!first)
out << ", ";
first = false;
if (!desc.column_name.empty())
out << desc.column_name;
else
{
if (desc.column_number < header.columns())
out << header.getByPosition(desc.column_number).name;
else
out << "?";
out << " (pos " << desc.column_number << ")";
}
if (desc.direction > 0)
out << " ASC";
else
out << " DESC";
if (desc.with_fill)
out << " WITH FILL";
}
}
}

View File

@ -71,4 +71,9 @@ struct SortColumnDescription
/// Description of the sorting rule for several columns.
using SortDescription = std::vector<SortColumnDescription>;
class Block;
/// Outputs user-readable description into `out`.
void dumpSortDescription(const SortDescription & description, const Block & header, WriteBuffer & out);
}

View File

@ -20,6 +20,7 @@ SRCS(
NamesAndTypes.cpp
Settings.cpp
SettingsCollection.cpp
SortDescription.cpp
)
END()

View File

@ -70,6 +70,17 @@ std::pair<std::string, std::string> splitName(const std::string & name)
return {{ begin, first_end }, { second_begin, end }};
}
std::string createCommaSeparatedStringFrom(const Names & names)
{
std::ostringstream ss;
if (!names.empty())
{
std::copy(names.begin(), std::prev(names.end()), std::ostream_iterator<std::string>(ss, ", "));
ss << names.back();
}
return ss.str();
}
std::string extractTableName(const std::string & nested_name)
{

View File

@ -13,6 +13,8 @@ namespace Nested
std::pair<std::string, std::string> splitName(const std::string & name);
std::string createCommaSeparatedStringFrom(const Names & names);
/// Returns the prefix of the name to the first '.'. Or the name is unchanged if there is no dot.
std::string extractTableName(const std::string & nested_name);

View File

@ -9,6 +9,7 @@
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeTuple.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeLowCardinality.h>
#include <DataTypes/DataTypeNothing.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypeDateTime.h>
@ -160,6 +161,39 @@ DataTypePtr getLeastSupertype(const DataTypes & types)
}
}
/// For LowCardinality. This is above Nullable, because LowCardinality can contain Nullable but cannot be inside Nullable.
{
bool have_low_cardinality = false;
bool have_not_low_cardinality = false;
DataTypes nested_types;
nested_types.reserve(types.size());
for (const auto & type : types)
{
if (const DataTypeLowCardinality * type_low_cardinality = typeid_cast<const DataTypeLowCardinality *>(type.get()))
{
have_low_cardinality = true;
nested_types.emplace_back(type_low_cardinality->getDictionaryType());
}
else
{
have_not_low_cardinality = true;
nested_types.emplace_back(type);
}
}
/// All LowCardinality gives LowCardinality.
/// LowCardinality with high cardinality gives high cardinality.
if (have_low_cardinality)
{
if (have_not_low_cardinality)
return getLeastSupertype(nested_types);
else
return std::make_shared<DataTypeLowCardinality>(getLeastSupertype(nested_types));
}
}
/// For Nullable
{
bool have_nullable = false;

View File

@ -64,18 +64,8 @@ TrieDictionary::TrieDictionary(
{
createAttributes();
trie = btrie_create();
try
{
loadData();
calculateBytesAllocated();
}
catch (...)
{
creation_exception = std::current_exception();
}
creation_time = std::chrono::system_clock::now();
loadData();
calculateBytesAllocated();
}
TrieDictionary::~TrieDictionary()

View File

@ -249,10 +249,6 @@ private:
size_t bucket_count = 0;
mutable std::atomic<size_t> query_count{0};
std::chrono::time_point<std::chrono::system_clock> creation_time;
std::exception_ptr creation_exception;
Poco::Logger * logger;
};

View File

@ -99,6 +99,10 @@ public:
bool isDictGetFunctionInjective(const Block & sample_block)
{
/// Assume non-injective by default
if (!sample_block)
return false;
if (sample_block.columns() != 3 && sample_block.columns() != 4)
throw Exception{"Function dictGet... takes 3 or 4 arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};

View File

@ -336,7 +336,7 @@ static void executeForTernaryLogicImpl(ColumnRawPtrs arguments, ColumnWithTypeAn
const bool has_consts = extractConstColumnsAsTernary<Op>(arguments, const_3v_value);
/// If the constant value uniquely determines the result, return it.
if (has_consts && (arguments.empty() || Op::isSaturatedValue(const_3v_value)))
if (has_consts && (arguments.empty() || Op::isSaturatedValueTernary(const_3v_value)))
{
result_info.column = ColumnConst::create(
buildColumnFromTernaryData(UInt8Container({const_3v_value}), result_info.type->isNullable()),
@ -498,7 +498,8 @@ DataTypePtr FunctionAnyArityLogical<Impl, Name>::getReturnTypeImpl(const DataTyp
}
template <typename Impl, typename Name>
void FunctionAnyArityLogical<Impl, Name>::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result_index, size_t input_rows_count)
void FunctionAnyArityLogical<Impl, Name>::executeImpl(
Block & block, const ColumnNumbers & arguments, size_t result_index, size_t input_rows_count)
{
ColumnRawPtrs args_in;
for (const auto arg_index : arguments)

View File

@ -36,9 +36,21 @@ namespace Ternary
{
using ResultType = UInt8;
static constexpr UInt8 False = 0;
static constexpr UInt8 True = -1;
static constexpr UInt8 Null = 1;
/** These carefully picked values magically work so bitwise "and", "or" on them
* corresponds to the expected results in three-valued logic.
*
* False and True are represented by all-0 and all-1 bits, so all bitwise operations on them work as expected.
* Null is represented as single 1 bit. So, it is something in between False and True.
* And "or" works like maximum and "and" works like minimum:
* "or" keeps True as is and lifts False with Null to Null.
* "and" keeps False as is and downs True with Null to Null.
*
* This logic does not apply for "not" and "xor" - they work with default implementation for NULLs:
* anything with NULL returns NULL, otherwise use conventional two-valued logic.
*/
static constexpr UInt8 False = 0; /// All zero bits.
static constexpr UInt8 True = -1; /// All one bits.
static constexpr UInt8 Null = 1; /// Single one bit.
template <typename T>
inline ResultType makeValue(T value)
@ -61,9 +73,16 @@ struct AndImpl
using ResultType = UInt8;
static inline constexpr bool isSaturable() { return true; }
/// Final value in two-valued logic (no further operations with True, False will change this value)
static inline constexpr bool isSaturatedValue(bool a) { return !a; }
/// Final value in three-valued logic (no further operations with True, False, Null will change this value)
static inline constexpr bool isSaturatedValueTernary(UInt8 a) { return a == Ternary::False; }
static inline constexpr ResultType apply(UInt8 a, UInt8 b) { return a & b; }
/// Will use three-valued logic for NULLs (see above) or default implementation (any operation with NULL returns NULL).
static inline constexpr bool specialImplementationForNulls() { return true; }
};

View File

@ -133,6 +133,10 @@ public:
* But we assume, that it is injective. This could be documented as implementation-specific behaviour.
*
* sample_block should contain data types of arguments and values of constants, if relevant.
* NOTE: to check is function injective with any arguments, you can pass
* empty block as sample_block (since most of the time function will
* ignore it anyway, and creating arguments just for checking is
* function injective or not is overkill).
*/
virtual bool isInjective(const Block & /*sample_block*/) const { return false; }

View File

@ -11,6 +11,11 @@ namespace ErrorCodes
extern const int ILLEGAL_COLUMN;
}
/** Replaces values where condition is met with the previous value that have condition not met
* (or with the first value if condition was true for all elements before).
* Looks somewhat similar to arrayFilter, but instead removing elements, it fills gaps with the value of previous element.
*/
template <bool reverse>
struct ArrayFillImpl
{
@ -45,11 +50,11 @@ struct ArrayFillImpl
for (auto in_offset : in_offsets)
{
array_end = in_offset - 1;
array_end = in_offset;
for (; end <= array_end; ++end)
for (; end < array_end; ++end)
{
if (end == array_end || fill[end + 1] != fill[begin])
if (end + 1 == array_end || fill[end + 1] != fill[begin])
{
if (fill[begin])
out_data.insertRangeFrom(in_data, begin, end + 1 - begin);
@ -57,8 +62,8 @@ struct ArrayFillImpl
{
if constexpr (reverse)
{
if (end == array_end)
out_data.insertManyFrom(in_data, array_end, end + 1 - begin);
if (end + 1 == array_end)
out_data.insertManyFrom(in_data, end, end + 1 - begin);
else
out_data.insertManyFrom(in_data, end + 1, end + 1 - begin);
}
@ -75,7 +80,7 @@ struct ArrayFillImpl
}
}
array_begin = array_end + 1;
array_begin = array_end;
}
}
else
@ -88,8 +93,7 @@ struct ArrayFillImpl
if (column_fill_const->getValue<UInt8>())
return ColumnArray::create(
array.getDataPtr(),
array.getOffsetsPtr()
);
array.getOffsetsPtr());
size_t array_begin = 0;
size_t array_end = 0;

View File

@ -251,7 +251,6 @@ FunctionArrayIntersect::CastArgumentsResult FunctionArrayIntersect::castColumns(
}
else
{
if (!arg.type->equals(*return_type) && !arg.type->equals(*nullable_return_type))
{
/// If result has array type Array(T) still cast Array(Nullable(U)) to Array(Nullable(T))

View File

@ -14,6 +14,11 @@
#include <Poco/Net/HTTPResponse.h>
#include <common/logger_useful.h>
namespace DB::ErrorCodes
{
extern const int TOO_MANY_REDIRECTS;
}
namespace DB::S3
{
PocoHTTPClient::PocoHTTPClient(const Aws::Client::ClientConfiguration & clientConfiguration)
@ -153,8 +158,10 @@ void PocoHTTPClient::MakeRequestInternal(
else
response->GetResponseStream().SetUnderlyingStream(std::make_shared<PocoHTTPResponseStream>(session, response_body_stream));
break;
return;
}
throw Exception(String("Too many redirects while trying to access ") + request.GetUri().GetURIString(),
ErrorCodes::TOO_MANY_REDIRECTS);
}
catch (...)
{

View File

@ -1,6 +1,8 @@
#include <unistd.h>
#include <errno.h>
#include <cassert>
#include <sys/types.h>
#include <sys/stat.h>
#include <Common/Exception.h>
#include <Common/ProfileEvents.h>
@ -33,6 +35,7 @@ namespace ErrorCodes
extern const int CANNOT_FSYNC;
extern const int CANNOT_SEEK_THROUGH_FILE;
extern const int CANNOT_TRUNCATE_FILE;
extern const int CANNOT_FSTAT;
}
@ -130,4 +133,14 @@ void WriteBufferFromFileDescriptor::truncate(off_t length)
throwFromErrnoWithPath("Cannot truncate file " + getFileName(), getFileName(), ErrorCodes::CANNOT_TRUNCATE_FILE);
}
off_t WriteBufferFromFileDescriptor::size()
{
struct stat buf;
int res = fstat(fd, &buf);
if (-1 == res)
throwFromErrnoWithPath("Cannot execute fstat " + getFileName(), getFileName(), ErrorCodes::CANNOT_FSTAT);
return buf.st_size;
}
}

View File

@ -44,6 +44,8 @@ public:
off_t seek(off_t offset, int whence);
void truncate(off_t length);
off_t size();
};
}

View File

@ -0,0 +1,102 @@
#include <Interpreters/AggregateDescription.h>
#include <Common/FieldVisitors.h>
#include <IO/Operators.h>
namespace DB
{
void AggregateDescription::explain(WriteBuffer & out, size_t indent) const
{
String prefix(indent, ' ');
out << prefix << column_name << '\n';
auto dump_params = [&](const Array & arr)
{
bool first = true;
for (const auto & param : arr)
{
if (!first)
out << ", ";
first = false;
out << applyVisitor(FieldVisitorToString(), param);
}
};
if (function)
{
/// Double whitespace is intentional.
out << prefix << " Function: " << function->getName();
const auto & params = function->getParameters();
if (!params.empty())
{
out << "(";
dump_params(params);
out << ")";
}
out << "(";
bool first = true;
for (const auto & type : function->getArgumentTypes())
{
if (!first)
out << ", ";
first = false;
out << type->getName();
}
out << ") → " << function->getReturnType()->getName() << "\n";
}
else
out << prefix << " Function: nullptr\n";
if (!parameters.empty())
{
out << prefix << " Parameters: ";
dump_params(parameters);
out << '\n';
}
out << prefix << " Arguments: ";
if (argument_names.empty())
out << "none\n";
else
{
bool first = true;
for (const auto & arg : argument_names)
{
if (!first)
out << ", ";
first = false;
out << arg;
}
out << "\n";
}
out << prefix << " Argument positions: ";
if (arguments.empty())
out << "none\n";
else
{
bool first = true;
for (auto arg : arguments)
{
if (!first)
out << ", ";
first = false;
out << arg;
}
out << '\n';
}
}
}

View File

@ -15,6 +15,8 @@ struct AggregateDescription
ColumnNumbers arguments;
Names argument_names; /// used if no `arguments` are specified.
String column_name; /// What name to use for a column with aggregate function values
void explain(WriteBuffer & out, size_t indent) const; /// Get description for EXPLAIN query.
};
using AggregateDescriptions = std::vector<AggregateDescription>;

View File

@ -30,6 +30,7 @@
#include <AggregateFunctions/AggregateFunctionState.h>
#include <AggregateFunctions/AggregateFunctionResample.h>
#include <Disks/StoragePolicy.h>
#include <IO/Operators.h>
namespace ProfileEvents
@ -151,6 +152,42 @@ Block Aggregator::Params::getHeader(
return materializeBlock(res);
}
void Aggregator::Params::explain(WriteBuffer & out, size_t indent) const
{
Strings res;
const auto & header = src_header ? src_header
: intermediate_header;
String prefix(indent, ' ');
{
/// Dump keys.
out << prefix << "Keys: ";
bool first = true;
for (auto key : keys)
{
if (!first)
out << ", ";
first = false;
if (key >= header.columns())
out << "unknown position " << key;
else
out << header.getByPosition(key).name;
}
out << '\n';
}
if (!aggregates.empty())
{
out << prefix << "Aggregates:\n";
for (const auto & aggregate : aggregates)
aggregate.explain(out, indent + 4);
}
}
Aggregator::Aggregator(const Params & params_)
: params(params_),

View File

@ -923,6 +923,9 @@ public:
{
return getHeader(src_header, intermediate_header, keys, aggregates, final);
}
/// Returns keys and aggregated for EXPLAIN query
void explain(WriteBuffer & out, size_t indent) const;
};
Aggregator(const Params & params_);

View File

@ -81,19 +81,9 @@ public:
using TypeToVisit = ASTSelectQuery;
const Context & context;
bool done = false;
void visit(ASTSelectQuery & select_query, ASTPtr &)
{
if (done)
return;
for (const auto & elem : select_query.children)
{
if (elem->as<ASTSetQuery>() && !elem->as<ASTSetQuery>()->is_standalone)
return;
}
if (select_query.orderBy() || select_query.groupBy())
{
for (auto & elem : select_query.children)

View File

@ -10,16 +10,25 @@
#include <Parsers/DumpASTNode.h>
#include <Parsers/queryToString.h>
#include <Parsers/ASTExplainQuery.h>
#include <Parsers/ASTTablesInSelectQuery.h>
#include <Parsers/ASTSelectQuery.h>
#include <IO/WriteBufferFromOStream.h>
#include <Storages/StorageView.h>
#include <sstream>
#include <Processors/QueryPlan/QueryPlan.h>
#include <Processors/printPipeline.h>
namespace DB
{
namespace ErrorCodes
{
extern const int INCORRECT_QUERY;
extern const int INVALID_SETTING_VALUE;
extern const int UNKNOWN_SETTING;
extern const int LOGICAL_ERROR;
}
namespace
{
struct ExplainAnalyzedSyntaxMatcher
@ -79,10 +88,133 @@ Block InterpreterExplainQuery::getSampleBlock()
return block;
}
/// Split str by line feed and write as separate row to ColumnString.
static void fillColumn(IColumn & column, const std::string & str)
{
size_t start = 0;
size_t end = 0;
size_t size = str.size();
while (end < size)
{
if (str[end] == '\n')
{
column.insertData(str.data() + start, end - start);
start = end + 1;
}
++end;
}
if (start < end)
column.insertData(str.data() + start, end - start);
}
namespace
{
/// Settings. Different for each explain type.
struct QueryPlanSettings
{
QueryPlan::ExplainPlanOptions query_plan_options;
constexpr static char name[] = "PLAN";
std::unordered_map<std::string, std::reference_wrapper<bool>> boolean_settings =
{
{"header", query_plan_options.header},
{"description", query_plan_options.description},
{"actions", query_plan_options.actions}
};
};
struct QueryPipelineSettings
{
QueryPlan::ExplainPipelineOptions query_pipeline_options;
bool graph = false;
bool compact = true;
constexpr static char name[] = "PIPELINE";
std::unordered_map<std::string, std::reference_wrapper<bool>> boolean_settings =
{
{"header", query_pipeline_options.header},
{"graph", graph},
{"compact", compact},
};
};
template <typename Settings>
struct ExplainSettings : public Settings
{
using Settings::boolean_settings;
bool has(const std::string & name_) const
{
return boolean_settings.count(name_) > 0;
}
void setBooleanSetting(const std::string & name_, bool value)
{
auto it = boolean_settings.find(name_);
if (it == boolean_settings.end())
throw Exception("Unknown setting for ExplainSettings: " + name_, ErrorCodes::LOGICAL_ERROR);
it->second.get() = value;
}
std::string getSettingsList() const
{
std::string res;
for (const auto & setting : boolean_settings)
{
if (!res.empty())
res += ", ";
res += setting.first;
}
return res;
}
};
template <typename Settings>
ExplainSettings<Settings> checkAndGetSettings(const ASTPtr & ast_settings)
{
if (!ast_settings)
return {};
ExplainSettings<Settings> settings;
const auto & set_query = ast_settings->as<ASTSetQuery &>();
for (const auto & change : set_query.changes)
{
if (!settings.has(change.name))
throw Exception("Unknown setting \"" + change.name + "\" for EXPLAIN " + Settings::name + " query. "
"Supported settings: " + settings.getSettingsList(), ErrorCodes::UNKNOWN_SETTING);
if (change.value.getType() != Field::Types::UInt64)
throw Exception("Invalid type " + std::string(change.value.getTypeName()) + " for setting \"" + change.name +
"\" only boolean settings are supported", ErrorCodes::INVALID_SETTING_VALUE);
auto value = change.value.get<UInt64>();
if (value > 1)
throw Exception("Invalid value " + std::to_string(value) + " for setting \"" + change.name +
"\". Only boolean settings are supported", ErrorCodes::INVALID_SETTING_VALUE);
settings.setBooleanSetting(change.name, value);
}
return settings;
}
}
BlockInputStreamPtr InterpreterExplainQuery::executeImpl()
{
const auto & ast = query->as<ASTExplainQuery &>();
Block sample_block = getSampleBlock();
MutableColumns res_columns = sample_block.cloneEmptyColumns();
@ -90,17 +222,63 @@ BlockInputStreamPtr InterpreterExplainQuery::executeImpl()
if (ast.getKind() == ASTExplainQuery::ParsedAST)
{
dumpAST(ast, ss);
if (ast.getSettings())
throw Exception("Settings are not supported for EXPLAIN AST query.", ErrorCodes::UNKNOWN_SETTING);
dumpAST(*ast.getExplainedQuery(), ss);
}
else if (ast.getKind() == ASTExplainQuery::AnalyzedSyntax)
{
if (ast.getSettings())
throw Exception("Settings are not supported for EXPLAIN SYNTAX query.", ErrorCodes::UNKNOWN_SETTING);
ExplainAnalyzedSyntaxVisitor::Data data{.context = context};
ExplainAnalyzedSyntaxVisitor(data).visit(query);
ast.children.at(0)->format(IAST::FormatSettings(ss, false));
ast.getExplainedQuery()->format(IAST::FormatSettings(ss, false));
}
else if (ast.getKind() == ASTExplainQuery::QueryPlan)
{
if (!dynamic_cast<const ASTSelectWithUnionQuery *>(ast.getExplainedQuery().get()))
throw Exception("Only SELECT is supported for EXPLAIN query", ErrorCodes::INCORRECT_QUERY);
auto settings = checkAndGetSettings<QueryPlanSettings>(ast.getSettings());
QueryPlan plan;
InterpreterSelectWithUnionQuery interpreter(ast.getExplainedQuery(), context, SelectQueryOptions());
interpreter.buildQueryPlan(plan);
WriteBufferFromOStream buffer(ss);
plan.explainPlan(buffer, settings.query_plan_options);
}
else if (ast.getKind() == ASTExplainQuery::QueryPipeline)
{
if (!dynamic_cast<const ASTSelectWithUnionQuery *>(ast.getExplainedQuery().get()))
throw Exception("Only SELECT is supported for EXPLAIN query", ErrorCodes::INCORRECT_QUERY);
auto settings = checkAndGetSettings<QueryPipelineSettings>(ast.getSettings());
QueryPlan plan;
InterpreterSelectWithUnionQuery interpreter(ast.getExplainedQuery(), context, SelectQueryOptions());
interpreter.buildQueryPlan(plan);
auto pipeline = plan.buildQueryPipeline();
WriteBufferFromOStream buffer(ss);
if (settings.graph)
{
if (settings.compact)
printPipelineCompact(pipeline->getProcessors(), buffer, settings.query_pipeline_options.header);
else
printPipeline(pipeline->getProcessors(), buffer);
}
else
{
plan.explainPipeline(buffer, settings.query_pipeline_options);
}
}
res_columns[0]->insert(ss.str());
fillColumn(*res_columns[0], ss.str());
return std::make_shared<OneBlockInputStream>(sample_block.cloneWithColumns(std::move(res_columns)));
}

View File

@ -54,7 +54,7 @@
#include <Processors/QueryPlan/CubeStep.h>
#include <Processors/QueryPlan/FillingStep.h>
#include <Processors/QueryPlan/ExtremesStep.h>
#include <Processors/QueryPlan/OffsetsStep.h>
#include <Processors/QueryPlan/OffsetStep.h>
#include <Processors/QueryPlan/FinishSortingStep.h>
#include <Processors/QueryPlan/QueryPlan.h>
@ -962,7 +962,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, const BlockInpu
*/
if (!expressions.first_stage && !expressions.need_aggregate && !(query.group_by_with_totals && !aggregate_final))
executeMergeSorted(query_plan, "before ORDER BY");
executeMergeSorted(query_plan, "for ORDER BY");
else /// Otherwise, just sort.
executeOrder(query_plan, query_info.input_order_info);
}
@ -1589,7 +1589,7 @@ void InterpreterSelectQuery::executeOrder(QueryPlan & query_plan, InputOrderInfo
limit,
SizeLimits(settings.max_rows_to_sort, settings.max_bytes_to_sort, settings.sort_overflow_mode));
partial_sorting->setStepDescription("Sort each block before ORDER BY");
partial_sorting->setStepDescription("Sort each block for ORDER BY");
query_plan.addStep(std::move(partial_sorting));
/// Merge the sorted blocks.
@ -1600,11 +1600,11 @@ void InterpreterSelectQuery::executeOrder(QueryPlan & query_plan, InputOrderInfo
settings.max_bytes_before_external_sort, context->getTemporaryVolume(),
settings.min_free_disk_space_for_temporary_data);
merge_sorting_step->setStepDescription("Merge sorted blocks before ORDER BY");
merge_sorting_step->setStepDescription("Merge sorted blocks for ORDER BY");
query_plan.addStep(std::move(merge_sorting_step));
/// If there are several streams, we merge them into one
executeMergeSorted(query_plan, output_order_descr, limit, "before ORDER BY");
executeMergeSorted(query_plan, output_order_descr, limit, "for ORDER BY");
}
@ -1785,7 +1785,7 @@ void InterpreterSelectQuery::executeOffset(QueryPlan & query_plan)
UInt64 limit_offset;
std::tie(limit_length, limit_offset) = getLimitLengthAndOffset(query, *context);
auto offsets_step = std::make_unique<OffsetsStep>(query_plan.getCurrentDataStream(), limit_offset);
auto offsets_step = std::make_unique<OffsetStep>(query_plan.getCurrentDataStream(), limit_offset);
query_plan.addStep(std::move(offsets_step));
}
}

View File

@ -248,6 +248,7 @@ void executeScalarSubqueries(ASTPtr & query, const Context & context, size_t sub
const std::unordered_set<String> possibly_injective_function_names
{
"dictGet",
"dictGetString",
"dictGetUInt8",
"dictGetUInt16",
@ -327,10 +328,18 @@ void optimizeGroupBy(ASTSelectQuery * select_query, const NameSet & source_colum
continue;
}
const auto & dict_name = function->arguments->children[0]->as<ASTLiteral &>().value.safeGet<String>();
const auto & dict_ptr = context.getExternalDictionariesLoader().getDictionary(dict_name);
const auto & attr_name = function->arguments->children[1]->as<ASTLiteral &>().value.safeGet<String>();
const auto * dict_name_ast = function->arguments->children[0]->as<ASTLiteral>();
const auto * attr_name_ast = function->arguments->children[1]->as<ASTLiteral>();
if (!dict_name_ast || !attr_name_ast)
{
++i;
continue;
}
const auto & dict_name = dict_name_ast->value.safeGet<String>();
const auto & attr_name = attr_name_ast->value.safeGet<String>();
const auto & dict_ptr = context.getExternalDictionariesLoader().getDictionary(dict_name);
if (!dict_ptr->isInjective(attr_name))
{
++i;
@ -512,7 +521,7 @@ void optimizeOrderBy(const ASTSelectQuery * select_query)
/// Optimize duplicate ORDER BY and DISTINCT
void optimizeDuplicateOrderByAndDistinct(ASTPtr & query, const Context & context)
{
DuplicateOrderByVisitor::Data order_by_data{context, false};
DuplicateOrderByVisitor::Data order_by_data{context};
DuplicateOrderByVisitor(order_by_data).visit(query);
DuplicateDistinctVisitor::Data distinct_data{};
DuplicateDistinctVisitor(distinct_data).visit(query);
@ -569,12 +578,12 @@ void optimizeUsing(const ASTSelectQuery * select_query)
expression_list = uniq_expressions_list;
}
void optimizeIf(ASTPtr & query, Aliases & aliases, bool if_chain_to_miltiif)
void optimizeIf(ASTPtr & query, Aliases & aliases, bool if_chain_to_multiif)
{
/// Optimize if with constant condition after constants was substituted instead of scalar subqueries.
OptimizeIfWithConstantConditionVisitor(aliases).visit(query);
if (if_chain_to_miltiif)
if (if_chain_to_multiif)
OptimizeIfChainsVisitor().visit(query);
}
@ -969,7 +978,7 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyzeSelect(
executeScalarSubqueries(query, context, subquery_depth, result.scalars, select_options.only_analyze);
{
optimizeIf(query, result.aliases, settings.optimize_if_chain_to_miltiif);
optimizeIf(query, result.aliases, settings.optimize_if_chain_to_multiif);
/// Move arithmetic operations out of aggregation functions
if (settings.optimize_arithmetic_operations_in_aggregate_functions)
@ -1046,7 +1055,7 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze(
/// Executing scalar subqueries. Column defaults could be a scalar subquery.
executeScalarSubqueries(query, context, 0, result.scalars, false);
optimizeIf(query, result.aliases, settings.optimize_if_chain_to_miltiif);
optimizeIf(query, result.aliases, settings.optimize_if_chain_to_multiif);
if (allow_aggregations)
{

View File

@ -18,6 +18,7 @@ SRCS(
ActionsVisitor.cpp
addMissingDefaults.cpp
addTypeConversionToAST.cpp
AggregateDescription.cpp
Aggregator.cpp
AnyInputOptimize.cpp
ArithmeticOperationsInAgrFuncOptimize.cpp

View File

@ -1,6 +1,6 @@
#pragma once
#include <Parsers/IAST.h>
#include <Parsers/ASTQueryWithOutput.h>
namespace DB
@ -8,45 +8,78 @@ namespace DB
/// AST, EXPLAIN or other query with meaning of explanation query instead of execution
class ASTExplainQuery : public IAST
class ASTExplainQuery : public ASTQueryWithOutput
{
public:
enum ExplainKind
{
ParsedAST,
AnalyzedSyntax,
ParsedAST, /// 'EXPLAIN AST SELECT ...'
AnalyzedSyntax, /// 'EXPLAIN SYNTAX SELECT ...'
QueryPlan, /// 'EXPLAIN SELECT ...'
QueryPipeline, /// 'EXPLAIN PIPELINE ...'
};
ASTExplainQuery(ExplainKind kind_)
: kind(kind_)
{}
ASTExplainQuery(ExplainKind kind_, bool old_syntax_)
: kind(kind_), old_syntax(old_syntax_)
{
}
String getID(char delim) const override { return "Explain" + (delim + toString(kind)); }
String getID(char delim) const override { return "Explain" + (delim + toString(kind, old_syntax)); }
ExplainKind getKind() const { return kind; }
ASTPtr clone() const override
{
auto res = std::make_shared<ASTExplainQuery>(*this);
res->children.clear();
res->children.push_back(children[0]->clone());
cloneOutputOptions(*res);
return res;
}
protected:
void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override
void setExplainedQuery(ASTPtr query_)
{
settings.ostr << (settings.hilite ? hilite_keyword : "") << toString(kind) << (settings.hilite ? hilite_none : "") << " ";
children.at(0)->formatImpl(settings, state, frame);
children.emplace_back(query_);
query = std::move(query_);
}
void setSettings(ASTPtr settings_)
{
children.emplace_back(settings_);
ast_settings = std::move(settings_);
}
const ASTPtr & getExplainedQuery() const { return query; }
const ASTPtr & getSettings() const { return ast_settings; }
protected:
void formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override
{
settings.ostr << (settings.hilite ? hilite_keyword : "") << toString(kind, old_syntax) << (settings.hilite ? hilite_none : "");
if (ast_settings)
{
settings.ostr << ' ';
ast_settings->formatImpl(settings, state, frame);
}
settings.ostr << settings.nl_or_ws;
query->formatImpl(settings, state, frame);
}
private:
ExplainKind kind;
bool old_syntax; /// "EXPLAIN AST" -> "AST", "EXPLAIN SYNTAX" -> "ANALYZE"
static String toString(ExplainKind kind)
ASTPtr query;
ASTPtr ast_settings;
static String toString(ExplainKind kind, bool old_syntax)
{
switch (kind)
{
case ParsedAST: return "AST";
case AnalyzedSyntax: return "ANALYZE";
case ParsedAST: return old_syntax ? "AST" : "EXPLAIN AST";
case AnalyzedSyntax: return old_syntax ? "ANALYZE" : "EXPLAIN SYNTAX";
case QueryPlan: return "EXPLAIN";
case QueryPipeline: return "EXPLAIN PIPELINE";
}
__builtin_unreachable();

View File

@ -0,0 +1,72 @@
#include <Parsers/ParserExplainQuery.h>
#include <Parsers/ASTExplainQuery.h>
#include <Parsers/CommonParsers.h>
#include <Parsers/ParserSelectWithUnionQuery.h>
#include <Parsers/ParserSetQuery.h>
namespace DB
{
bool ParserExplainQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
{
ASTExplainQuery::ExplainKind kind;
bool old_syntax = false;
ParserKeyword s_ast("AST");
ParserKeyword s_analyze("ANALYZE");
ParserKeyword s_explain("EXPLAIN");
ParserKeyword s_syntax("SYNTAX");
ParserKeyword s_pipeline("PIPELINE");
ParserKeyword s_plan("PLAN");
if (enable_debug_queries && s_ast.ignore(pos, expected))
{
old_syntax = true;
kind = ASTExplainQuery::ExplainKind::ParsedAST;
}
else if (enable_debug_queries && s_analyze.ignore(pos, expected))
{
old_syntax = true;
kind = ASTExplainQuery::ExplainKind::AnalyzedSyntax;
}
else if (s_explain.ignore(pos, expected))
{
kind = ASTExplainQuery::QueryPlan;
if (s_ast.ignore(pos, expected))
kind = ASTExplainQuery::ExplainKind::ParsedAST;
else if (s_syntax.ignore(pos, expected))
kind = ASTExplainQuery::ExplainKind::AnalyzedSyntax;
else if (s_pipeline.ignore(pos, expected))
kind = ASTExplainQuery::ExplainKind::QueryPipeline;
else if (s_plan.ignore(pos, expected))
kind = ASTExplainQuery::ExplainKind::QueryPlan;
}
else
return false;
auto explain_query = std::make_shared<ASTExplainQuery>(kind, old_syntax);
{
ASTPtr settings;
ParserSetQuery parser_settings(true);
auto begin = pos;
if (parser_settings.parse(pos, settings, expected))
explain_query->setSettings(std::move(settings));
else
pos = begin;
}
ParserSelectWithUnionQuery select_p;
ASTPtr query;
if (!select_p.parse(pos, query, expected))
return false;
explain_query->setExplainedQuery(std::move(query));
node = std::move(explain_query);
return true;
}
}

View File

@ -0,0 +1,25 @@
#pragma once
#include <Parsers/IParserBase.h>
namespace DB
{
class ParserExplainQuery : public IParserBase
{
public:
explicit ParserExplainQuery(bool enable_debug_queries_ = false)
: enable_debug_queries(enable_debug_queries_)
{
}
protected:
const char * getName() const override { return "EXPLAIN"; }
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
private:
bool enable_debug_queries;
};
}

View File

@ -19,6 +19,7 @@
#include <Parsers/ParserShowCreateAccessEntityQuery.h>
#include <Parsers/ParserShowGrantsQuery.h>
#include <Parsers/ParserShowPrivilegesQuery.h>
#include <Parsers/ParserExplainQuery.h>
namespace DB
@ -44,21 +45,13 @@ bool ParserQueryWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expec
ParserShowCreateAccessEntityQuery show_create_access_entity_p;
ParserShowGrantsQuery show_grants_p;
ParserShowPrivilegesQuery show_privileges_p;
ParserExplainQuery explain_p(enable_debug_queries);
ASTPtr query;
ParserKeyword s_ast("AST");
ParserKeyword s_analyze("ANALYZE");
bool explain_ast = false;
bool analyze_syntax = false;
if (enable_explain && s_ast.ignore(pos, expected))
explain_ast = true;
if (enable_explain && s_analyze.ignore(pos, expected))
analyze_syntax = true;
bool parsed = select_p.parse(pos, query, expected)
bool parsed =
explain_p.parse(pos, query, expected)
|| select_p.parse(pos, query, expected)
|| show_create_access_entity_p.parse(pos, query, expected) /// should be before `show_tables_p`
|| show_tables_p.parse(pos, query, expected)
|| table_p.parse(pos, query, expected)
@ -116,19 +109,17 @@ bool ParserQueryWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expec
query_with_output.children.push_back(query_with_output.settings_ast);
}
if (explain_ast)
if (auto * ast = query->as<ASTExplainQuery>())
{
node = std::make_shared<ASTExplainQuery>(ASTExplainQuery::ParsedAST);
node->children.push_back(query);
/// Set default format TSV, because output is a single string column.
if (!ast->format)
{
ast->format = std::make_shared<ASTIdentifier>("TSV");
ast->children.push_back(ast->format);
}
}
else if (analyze_syntax)
{
node = std::make_shared<ASTExplainQuery>(ASTExplainQuery::AnalyzedSyntax);
node->children.push_back(query);
}
else
node = query;
node = std::move(query);
return true;
}

View File

@ -11,8 +11,9 @@ namespace DB
class ParserQueryWithOutput : public IParserBase
{
public:
ParserQueryWithOutput(bool enable_explain_ = false)
: enable_explain(enable_explain_)
/// enable_debug_queries flag enables queries 'AST SELECT' and 'ANALYZE SELECT'
explicit ParserQueryWithOutput(bool enable_debug_queries_ = false)
: enable_debug_queries(enable_debug_queries_)
{}
protected:
@ -21,7 +22,7 @@ protected:
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
private:
bool enable_explain;
bool enable_debug_queries;
};
}

View File

@ -225,6 +225,15 @@ ASTPtr tryParseQuery(
|| token_iterator->type == TokenType::Semicolon)
{
out_error_message = "Empty query";
// Token iterator skips over comments, so we'll get this error for queries
// like this:
// "
// -- just a comment
// ;
//"
// Advance the position, so that we can use this parser for stream parsing
// even in presence of such queries.
pos = token_iterator->begin;
return nullptr;
}

View File

@ -84,6 +84,7 @@ SRCS(
ParserDictionaryAttributeDeclaration.cpp
ParserDropAccessEntityQuery.cpp
ParserDropQuery.cpp
ParserExplainQuery.cpp
ParserGrantQuery.cpp
ParserInsertQuery.cpp
ParserKillQueryQuery.cpp

View File

@ -24,10 +24,9 @@ arrow::Status ArrowBufferedOutputStream::Close()
return arrow::Status::OK();
}
arrow::Status ArrowBufferedOutputStream::Tell(int64_t * position) const
arrow::Result<int64_t> ArrowBufferedOutputStream::Tell() const
{
*position = total_length;
return arrow::Status::OK();
return arrow::Result<int64_t>(total_length);
}
arrow::Status ArrowBufferedOutputStream::Write(const void * data, int64_t length)
@ -42,10 +41,9 @@ RandomAccessFileFromSeekableReadBuffer::RandomAccessFileFromSeekableReadBuffer(S
{
}
arrow::Status RandomAccessFileFromSeekableReadBuffer::GetSize(int64_t * size)
arrow::Result<int64_t> RandomAccessFileFromSeekableReadBuffer::GetSize()
{
*size = file_size;
return arrow::Status::OK();
return arrow::Result<int64_t>(file_size);
}
arrow::Status RandomAccessFileFromSeekableReadBuffer::Close()
@ -54,25 +52,25 @@ arrow::Status RandomAccessFileFromSeekableReadBuffer::Close()
return arrow::Status::OK();
}
arrow::Status RandomAccessFileFromSeekableReadBuffer::Tell(int64_t * position) const
arrow::Result<int64_t> RandomAccessFileFromSeekableReadBuffer::Tell() const
{
*position = in.getPosition();
return arrow::Status::OK();
return arrow::Result<int64_t>(in.getPosition());
}
arrow::Status RandomAccessFileFromSeekableReadBuffer::Read(int64_t nbytes, int64_t * bytes_read, void * out)
arrow::Result<int64_t> RandomAccessFileFromSeekableReadBuffer::Read(int64_t nbytes, void * out)
{
*bytes_read = in.readBig(reinterpret_cast<char *>(out), nbytes);
return arrow::Status::OK();
int64_t bytes_read = in.readBig(reinterpret_cast<char *>(out), nbytes);
return arrow::Result<int64_t>(bytes_read);
}
arrow::Status RandomAccessFileFromSeekableReadBuffer::Read(int64_t nbytes, std::shared_ptr<arrow::Buffer> * out)
arrow::Result<std::shared_ptr<arrow::Buffer>> RandomAccessFileFromSeekableReadBuffer::Read(int64_t nbytes)
{
std::shared_ptr<arrow::Buffer> buf;
ARROW_RETURN_NOT_OK(arrow::AllocateBuffer(nbytes, &buf));
size_t n = in.readBig(reinterpret_cast<char *>(buf->mutable_data()), nbytes);
*out = arrow::SliceBuffer(buf, 0, n);
return arrow::Status::OK();
auto read_buffer = arrow::SliceBuffer(buf, 0, n);
return arrow::Result<std::shared_ptr<arrow::Buffer>>(read_buffer);
}
arrow::Status RandomAccessFileFromSeekableReadBuffer::Seek(int64_t position)

Some files were not shown because too many files have changed in this diff Show More