diff --git a/.gitmodules b/.gitmodules index f148e937e24..56856bea6a0 100644 --- a/.gitmodules +++ b/.gitmodules @@ -76,6 +76,9 @@ [submodule "contrib/brotli"] path = contrib/brotli url = https://github.com/google/brotli.git +[submodule "contrib/h3"] + path = contrib/h3 + url = https://github.com/uber/h3 [submodule "contrib/hyperscan"] path = contrib/hyperscan url = https://github.com/ClickHouse-Extras/hyperscan.git diff --git a/CMakeLists.txt b/CMakeLists.txt index 7102333c937..0273abec108 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -297,6 +297,24 @@ if (USE_INCLUDE_WHAT_YOU_USE) endif() endif () +# Using clang-tidy static analyzer http://mariobadr.com/using-clang-tidy-with-cmake-36.html https://cmake.org/cmake/help/v3.6/prop_tgt/LANG_CLANG_TIDY.html +option (ENABLE_CLANG_TIDY "Use 'clang-tidy' static analyzer" OFF) +if (ENABLE_CLANG_TIDY) + if (${CMAKE_VERSION} VERSION_LESS "3.6.0") + message(FATAL_ERROR "clang-tidy requires CMake version at least 3.6.") + endif() + find_program (CLANG_TIDY_EXE NAMES "clang-tidy" DOC "Path to clang-tidy executable") + if (NOT CLANG_TIDY_EXE) + set (USE_CLANG_TIDY 0) + message (STATUS "clang-tidy not found.") + else () + set (USE_CLANG_TIDY 1) + message (STATUS "clang-tidy found: ${CLANG_TIDY_EXE}") + set (DO_CLANG_TIDY "${CLANG_TIDY_EXE}" "-checks=*,-clang-analyzer-alpha.*") + # You can enable it within a directory by: set (CMAKE_CXX_CLANG_TIDY "${DO_CLANG_TIDY}") + endif () +endif () + if (ENABLE_TESTS) message (STATUS "Tests are enabled") endif () @@ -347,6 +365,7 @@ include (cmake/find_libgsasl.cmake) include (cmake/find_rdkafka.cmake) include (cmake/find_capnp.cmake) include (cmake/find_llvm.cmake) +include (cmake/find_h3.cmake) include (cmake/find_cpuid.cmake) # Freebsd, bundled if (NOT USE_CPUID) include (cmake/find_cpuinfo.cmake) # Debian @@ -427,6 +446,7 @@ if (GLIBC_COMPATIBILITY OR USE_INTERNAL_UNWIND_LIBRARY_FOR_EXCEPTION_HANDLING) add_default_dependencies(kj) add_default_dependencies(simdjson) add_default_dependencies(apple_rt) + add_default_dependencies(h3) add_default_dependencies(re2) add_default_dependencies(re2_st) add_default_dependencies(hs_compile_shared) diff --git a/ci/install-os-packages.sh b/ci/install-os-packages.sh index fe5b4f84833..38fa6dbba15 100755 --- a/ci/install-os-packages.sh +++ b/ci/install-os-packages.sh @@ -7,9 +7,9 @@ WHAT=$1 [[ $EUID -ne 0 ]] && SUDO=sudo -command -v apt-get && PACKAGE_MANAGER=apt command -v yum && PACKAGE_MANAGER=yum command -v pkg && PACKAGE_MANAGER=pkg +command -v apt-get && PACKAGE_MANAGER=apt case $PACKAGE_MANAGER in diff --git a/cmake/find_h3.cmake b/cmake/find_h3.cmake new file mode 100644 index 00000000000..802f5aff05e --- /dev/null +++ b/cmake/find_h3.cmake @@ -0,0 +1,19 @@ +option (USE_INTERNAL_H3_LIBRARY "Set to FALSE to use system h3 library instead of bundled" ${NOT_UNBUNDLED}) + +set (H3_INCLUDE_PATHS /usr/local/include/h3) + +if (USE_INTERNAL_H3_LIBRARY) + set (H3_LIBRARY h3) + set (H3_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/h3/src/h3lib/include) +else () + find_library (H3_LIBRARY h3) + find_path (H3_INCLUDE_DIR NAMES h3api.h PATHS ${H3_INCLUDE_PATHS}) +endif () + +if (H3_LIBRARY AND H3_INCLUDE_DIR) + set (USE_H3 1) +else () + set (USE_H3 0) +endif () + +message (STATUS "Using h3=${USE_H3}: ${H3_INCLUDE_DIR} : ${H3_LIBRARY}") diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index eb3712f4c2b..ba75615aadc 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -106,6 +106,10 @@ if (USE_INTERNAL_CPUID_LIBRARY) add_subdirectory (libcpuid) endif () +if (USE_INTERNAL_H3_LIBRARY) + add_subdirectory(h3-cmake) +endif () + if (USE_INTERNAL_SSL_LIBRARY) if (NOT MAKE_STATIC_LIBRARIES) set (BUILD_SHARED 1) diff --git a/contrib/h3 b/contrib/h3 new file mode 160000 index 00000000000..6cfd649e8c0 --- /dev/null +++ b/contrib/h3 @@ -0,0 +1 @@ +Subproject commit 6cfd649e8c0d3ed913e8aae928a669fc3b8a2365 diff --git a/contrib/h3-cmake/CMakeLists.txt b/contrib/h3-cmake/CMakeLists.txt new file mode 100644 index 00000000000..5df0a205a34 --- /dev/null +++ b/contrib/h3-cmake/CMakeLists.txt @@ -0,0 +1,27 @@ +set(H3_SOURCE_DIR ${ClickHouse_SOURCE_DIR}/contrib/h3/src/h3lib) +set(H3_BINARY_DIR ${ClickHouse_BINARY_DIR}/contrib/h3/src/h3lib) + +set(SRCS +${H3_SOURCE_DIR}/lib/algos.c +${H3_SOURCE_DIR}/lib/baseCells.c +${H3_SOURCE_DIR}/lib/bbox.c +${H3_SOURCE_DIR}/lib/coordijk.c +${H3_SOURCE_DIR}/lib/faceijk.c +${H3_SOURCE_DIR}/lib/geoCoord.c +${H3_SOURCE_DIR}/lib/h3Index.c +${H3_SOURCE_DIR}/lib/h3UniEdge.c +${H3_SOURCE_DIR}/lib/linkedGeo.c +${H3_SOURCE_DIR}/lib/localij.c +${H3_SOURCE_DIR}/lib/mathExtensions.c +${H3_SOURCE_DIR}/lib/polygon.c +${H3_SOURCE_DIR}/lib/vec2d.c +${H3_SOURCE_DIR}/lib/vec3d.c +${H3_SOURCE_DIR}/lib/vertexGraph.c +) + +configure_file(${H3_SOURCE_DIR}/include/h3api.h.in ${H3_BINARY_DIR}/include/h3api.h) + +add_library(h3 ${SRCS}) +target_include_directories(h3 SYSTEM PUBLIC ${H3_SOURCE_DIR}/include) +target_include_directories(h3 SYSTEM PUBLIC ${H3_BINARY_DIR}/include) +target_compile_definitions(h3 PRIVATE H3_HAVE_VLA) diff --git a/contrib/libhdfs3-cmake/CMake/Platform.cmake b/contrib/libhdfs3-cmake/CMake/Platform.cmake index 55fbf646589..d9bc760ee3f 100644 --- a/contrib/libhdfs3-cmake/CMake/Platform.cmake +++ b/contrib/libhdfs3-cmake/CMake/Platform.cmake @@ -15,9 +15,14 @@ IF(CMAKE_COMPILER_IS_GNUCXX) STRING(REGEX MATCHALL "[0-9]+" GCC_COMPILER_VERSION ${GCC_COMPILER_VERSION}) + LIST(LENGTH GCC_COMPILER_VERSION GCC_COMPILER_VERSION_LENGTH) LIST(GET GCC_COMPILER_VERSION 0 GCC_COMPILER_VERSION_MAJOR) - LIST(GET GCC_COMPILER_VERSION 0 GCC_COMPILER_VERSION_MINOR) - + if (GCC_COMPILER_VERSION_LENGTH GREATER 1) + LIST(GET GCC_COMPILER_VERSION 1 GCC_COMPILER_VERSION_MINOR) + else () + set (GCC_COMPILER_VERSION_MINOR 0) + endif () + SET(GCC_COMPILER_VERSION_MAJOR ${GCC_COMPILER_VERSION_MAJOR} CACHE INTERNAL "gcc major version") SET(GCC_COMPILER_VERSION_MINOR ${GCC_COMPILER_VERSION_MINOR} CACHE INTERNAL "gcc minor version") diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index 4b47b77dec2..18c169211d9 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -2,6 +2,10 @@ if (USE_INCLUDE_WHAT_YOU_USE) set (CMAKE_CXX_INCLUDE_WHAT_YOU_USE ${IWYU_PATH}) endif () +if (USE_CLANG_TIDY) + set (CMAKE_CXX_CLANG_TIDY "${DO_CLANG_TIDY}") +endif () + if(COMPILER_PIPE) set(MAX_COMPILER_MEMORY 2500) else() diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp index 69cdf5b1355..2da1c4a987d 100644 --- a/dbms/programs/client/Client.cpp +++ b/dbms/programs/client/Client.cpp @@ -59,6 +59,7 @@ #include #include #include +#include #include #include #include @@ -202,6 +203,9 @@ private: /// External tables info. std::list external_tables; + /// Dictionary with query parameters for prepared statements. + NameToNameMap query_parameters; + ConnectionParameters connection_parameters; void initialize(Poco::Util::Application & self) @@ -795,7 +799,6 @@ private: /// Some parts of a query (result output and formatting) are executed client-side. /// Thus we need to parse the query. parsed_query = parsed_query_; - if (!parsed_query) { const char * begin = query.data(); @@ -900,6 +903,16 @@ private: /// Process the query that doesn't require transferring data blocks to the server. void processOrdinaryQuery() { + /// We will always rewrite query (even if there are no query_parameters) because it will help to find errors in query formatter. + { + /// Replace ASTQueryParameter with ASTLiteral for prepared statements. + ReplaceQueryParameterVisitor visitor(query_parameters); + visitor.visit(parsed_query); + + /// Get new query after substitutions. Note that it cannot be done for INSERT query with embedded data. + query = serializeAST(*parsed_query); + } + connection->sendQuery(connection_parameters.timeouts, query, query_id, QueryProcessingStage::Complete, &context.getSettingsRef(), nullptr, true); sendExternalTables(); receiveResult(); @@ -1548,7 +1561,8 @@ public: /** We allow different groups of arguments: * - common arguments; * - arguments for any number of external tables each in form "--external args...", - * where possible args are file, name, format, structure, types. + * where possible args are file, name, format, structure, types; + * - param arguments for prepared statements. * Split these groups before processing. */ using Arguments = std::vector; @@ -1597,7 +1611,31 @@ public: else { in_external_group = false; - common_arguments.emplace_back(arg); + + /// Parameter arg after underline. + if (startsWith(arg, "--param_")) + { + const char * param_continuation = arg + strlen("--param_"); + const char * equal_pos = strchr(param_continuation, '='); + + if (equal_pos == param_continuation) + throw Exception("Parameter name cannot be empty", ErrorCodes::BAD_ARGUMENTS); + + if (equal_pos) + { + /// param_name=value + query_parameters.emplace(String(param_continuation, equal_pos), String(equal_pos + 1)); + } + else + { + /// param_name value + ++arg_num; + arg = argv[arg_num]; + query_parameters.emplace(String(param_continuation), String(arg)); + } + } + else + common_arguments.emplace_back(arg); } } @@ -1672,6 +1710,7 @@ public: ("structure", po::value(), "structure") ("types", po::value(), "types") ; + /// Parse main commandline options. po::parsed_options parsed = po::command_line_parser(common_arguments).options(main_description).run(); po::variables_map options; @@ -1696,6 +1735,7 @@ public: { std::cout << main_description << "\n"; std::cout << external_description << "\n"; + std::cout << "In addition, --param_name=value can be specified for substitution of parameters for parametrized queries.\n"; exit(0); } diff --git a/dbms/programs/client/readpassphrase/readpassphrase.h b/dbms/programs/client/readpassphrase/readpassphrase.h index d504cff5f00..272c822423a 100644 --- a/dbms/programs/client/readpassphrase/readpassphrase.h +++ b/dbms/programs/client/readpassphrase/readpassphrase.h @@ -29,6 +29,11 @@ //#include "includes.h" #include "config_client.h" +// Should not be included on BSD systems, but if it happen... +#ifdef HAVE_READPASSPHRASE +# include_next +#endif + #ifndef HAVE_READPASSPHRASE # ifdef __cplusplus diff --git a/dbms/programs/server/HTTPHandler.cpp b/dbms/programs/server/HTTPHandler.cpp index 048f175160b..2349ab337f0 100644 --- a/dbms/programs/server/HTTPHandler.cpp +++ b/dbms/programs/server/HTTPHandler.cpp @@ -475,9 +475,9 @@ void HTTPHandler::processQuery( settings.readonly = 2; } - bool isExternalData = startsWith(request.getContentType().data(), "multipart/form-data"); + bool has_external_data = startsWith(request.getContentType().data(), "multipart/form-data"); - if (isExternalData) + if (has_external_data) { /// Skip unneeded parameters to avoid confusing them later with context settings or query parameters. reserved_param_suffixes.reserve(3); @@ -501,6 +501,12 @@ void HTTPHandler::processQuery( else if (param_could_be_skipped(key)) { } + else if (startsWith(key, "param_")) + { + /// Save name and values of substitution in dictionary. + const String parameter_name = key.substr(strlen("param_")); + context.setQueryParameter(parameter_name, value); + } else { /// All other query parameters are treated as settings. @@ -516,7 +522,7 @@ void HTTPHandler::processQuery( std::string full_query; /// Support for "external data for query processing". - if (isExternalData) + if (has_external_data) { ExternalTablesHandler handler(context, params); params.load(request, istr, handler); diff --git a/dbms/src/AggregateFunctions/AggregateFunctionSequenceMatch.h b/dbms/src/AggregateFunctions/AggregateFunctionSequenceMatch.h index 017b6d113dc..80860fdb62a 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionSequenceMatch.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionSequenceMatch.h @@ -47,8 +47,7 @@ struct AggregateFunctionSequenceMatchData final using Comparator = ComparePairFirst; bool sorted = true; - static constexpr size_t bytes_in_arena = 64; - PODArray, bytes_in_arena>> events_list; + PODArrayWithStackMemory events_list; void add(const Timestamp timestamp, const Events & events) { @@ -203,8 +202,7 @@ private: PatternAction(const PatternActionType type, const std::uint64_t extra = 0) : type{type}, extra{extra} {} }; - static constexpr size_t bytes_on_stack = 64; - using PatternActions = PODArray, bytes_on_stack>>; + using PatternActions = PODArrayWithStackMemory; Derived & derived() { return static_cast(*this); } diff --git a/dbms/src/AggregateFunctions/AggregateFunctionTimeSeriesGroupSum.h b/dbms/src/AggregateFunctions/AggregateFunctionTimeSeriesGroupSum.h index c74ad8c0bdb..5e2a9b15f4e 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionTimeSeriesGroupSum.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionTimeSeriesGroupSum.h @@ -68,9 +68,8 @@ struct AggregateFunctionTimeSeriesGroupSumData } }; - static constexpr size_t bytes_on_stack = 128; typedef std::map Series; - typedef PODArray, bytes_on_stack>> AggSeries; + typedef PODArrayWithStackMemory AggSeries; Series ss; AggSeries result; diff --git a/dbms/src/AggregateFunctions/AggregateFunctionWindowFunnel.h b/dbms/src/AggregateFunctions/AggregateFunctionWindowFunnel.h index 9a738d3fefb..1e3c005f73f 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionWindowFunnel.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionWindowFunnel.h @@ -35,10 +35,7 @@ template struct AggregateFunctionWindowFunnelData { using TimestampEvent = std::pair; - - static constexpr size_t bytes_on_stack = 64; - using TimestampEvents = PODArray, bytes_on_stack>>; - + using TimestampEvents = PODArray; using Comparator = ComparePairFirst; bool sorted = true; diff --git a/dbms/src/AggregateFunctions/QuantileExact.h b/dbms/src/AggregateFunctions/QuantileExact.h index b4398e8bb7f..a5b616669b9 100644 --- a/dbms/src/AggregateFunctions/QuantileExact.h +++ b/dbms/src/AggregateFunctions/QuantileExact.h @@ -27,8 +27,7 @@ struct QuantileExact { /// The memory will be allocated to several elements at once, so that the state occupies 64 bytes. static constexpr size_t bytes_in_arena = 64 - sizeof(PODArray); - - using Array = PODArray, bytes_in_arena>>; + using Array = PODArrayWithStackMemory; Array array; void add(const Value & x) diff --git a/dbms/src/AggregateFunctions/QuantileTDigest.h b/dbms/src/AggregateFunctions/QuantileTDigest.h index e9f261d4c21..f7201ef3b0d 100644 --- a/dbms/src/AggregateFunctions/QuantileTDigest.h +++ b/dbms/src/AggregateFunctions/QuantileTDigest.h @@ -86,8 +86,7 @@ class QuantileTDigest /// The memory will be allocated to several elements at once, so that the state occupies 64 bytes. static constexpr size_t bytes_in_arena = 128 - sizeof(PODArray) - sizeof(Count) - sizeof(UInt32); - - using Summary = PODArray, bytes_in_arena>>; + using Summary = PODArrayWithStackMemory; Summary summary; Count count = 0; diff --git a/dbms/src/AggregateFunctions/ReservoirSampler.h b/dbms/src/AggregateFunctions/ReservoirSampler.h index ad5bf10f48f..30d72709ac2 100644 --- a/dbms/src/AggregateFunctions/ReservoirSampler.h +++ b/dbms/src/AggregateFunctions/ReservoirSampler.h @@ -194,8 +194,7 @@ private: friend void rs_perf_test(); /// We allocate a little memory on the stack - to avoid allocations when there are many objects with a small number of elements. - static constexpr size_t bytes_on_stack = 64; - using Array = DB::PODArray, bytes_on_stack>>; + using Array = DB::PODArrayWithStackMemory; size_t sample_count; size_t total_values = 0; diff --git a/dbms/src/AggregateFunctions/ReservoirSamplerDeterministic.h b/dbms/src/AggregateFunctions/ReservoirSamplerDeterministic.h index c543e662b2a..4beeecd93bc 100644 --- a/dbms/src/AggregateFunctions/ReservoirSamplerDeterministic.h +++ b/dbms/src/AggregateFunctions/ReservoirSamplerDeterministic.h @@ -164,9 +164,8 @@ public: private: /// We allocate some memory on the stack to avoid allocations when there are many objects with a small number of elements. - static constexpr size_t bytes_on_stack = 64; using Element = std::pair; - using Array = DB::PODArray, bytes_on_stack>>; + using Array = DB::PODArray; size_t sample_count; size_t total_values{}; diff --git a/dbms/src/Columns/ColumnConst.h b/dbms/src/Columns/ColumnConst.h index 05a9562e549..be9e9ff839a 100644 --- a/dbms/src/Columns/ColumnConst.h +++ b/dbms/src/Columns/ColumnConst.h @@ -99,6 +99,11 @@ public: return data->getBool(0); } + Float64 getFloat64(size_t) const override + { + return data->getFloat64(0); + } + bool isNullAt(size_t) const override { return data->isNullAt(0); diff --git a/dbms/src/Columns/ColumnLowCardinality.h b/dbms/src/Columns/ColumnLowCardinality.h index d36b91b0c40..60a332b22b9 100644 --- a/dbms/src/Columns/ColumnLowCardinality.h +++ b/dbms/src/Columns/ColumnLowCardinality.h @@ -57,6 +57,8 @@ public: UInt64 get64(size_t n) const override { return getDictionary().get64(getIndexes().getUInt(n)); } UInt64 getUInt(size_t n) const override { return getDictionary().getUInt(getIndexes().getUInt(n)); } Int64 getInt(size_t n) const override { return getDictionary().getInt(getIndexes().getUInt(n)); } + Float64 getFloat64(size_t n) const override { return getDictionary().getInt(getIndexes().getFloat64(n)); } + bool getBool(size_t n) const override { return getDictionary().getInt(getIndexes().getBool(n)); } bool isNullAt(size_t n) const override { return getDictionary().isNullAt(getIndexes().getUInt(n)); } ColumnPtr cut(size_t start, size_t length) const override { diff --git a/dbms/src/Columns/ColumnUnique.h b/dbms/src/Columns/ColumnUnique.h index 11344a23a1f..322d61081d2 100644 --- a/dbms/src/Columns/ColumnUnique.h +++ b/dbms/src/Columns/ColumnUnique.h @@ -64,6 +64,8 @@ public: UInt64 get64(size_t n) const override { return getNestedColumn()->get64(n); } UInt64 getUInt(size_t n) const override { return getNestedColumn()->getUInt(n); } Int64 getInt(size_t n) const override { return getNestedColumn()->getInt(n); } + Float64 getFloat64(size_t n) const override { return getNestedColumn()->getFloat64(n); } + bool getBool(size_t n) const override { return getNestedColumn()->getBool(n); } bool isNullAt(size_t n) const override { return is_nullable && n == getNullValueIndex(); } StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override; void updateHashWithValue(size_t n, SipHash & hash_func) const override diff --git a/dbms/src/Columns/ColumnVector.cpp b/dbms/src/Columns/ColumnVector.cpp index 6db110ef02e..a2d6de9df80 100644 --- a/dbms/src/Columns/ColumnVector.cpp +++ b/dbms/src/Columns/ColumnVector.cpp @@ -33,7 +33,7 @@ template StringRef ColumnVector::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const { auto pos = arena.allocContinue(sizeof(T), begin); - unalignedStore(pos, data[n]); + unalignedStore(pos, data[n]); return StringRef(pos, sizeof(T)); } diff --git a/dbms/src/Common/ErrorCodes.cpp b/dbms/src/Common/ErrorCodes.cpp index 5881f6bc501..17bf5d0a0e5 100644 --- a/dbms/src/Common/ErrorCodes.cpp +++ b/dbms/src/Common/ErrorCodes.cpp @@ -430,6 +430,8 @@ namespace ErrorCodes extern const int MYSQL_CLIENT_INSUFFICIENT_CAPABILITIES = 453; extern const int OPENSSL_ERROR = 454; extern const int SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY = 455; + extern const int UNKNOWN_QUERY_PARAMETER = 456; + extern const int BAD_QUERY_PARAMETER = 457; extern const int CANNOT_UNLINK = 458; extern const int KEEPER_EXCEPTION = 999; diff --git a/dbms/src/Common/MiAllocator.cpp b/dbms/src/Common/MiAllocator.cpp new file mode 100644 index 00000000000..456609374ee --- /dev/null +++ b/dbms/src/Common/MiAllocator.cpp @@ -0,0 +1,43 @@ +#include + +#if USE_MIMALLOC + +#include "MiAllocator.h" +#include + +namespace DB +{ + +void * MiAllocator::alloc(size_t size, size_t alignment) +{ + if (alignment == 0) + return mi_malloc(size); + else + return mi_malloc_aligned(size, alignment); +} + +void MiAllocator::free(void * buf, size_t) +{ + mi_free(buf); +} + +void * MiAllocator::realloc(void * old_ptr, size_t, size_t new_size, size_t alignment) +{ + if (old_ptr == nullptr) + return alloc(new_size, alignment); + + if (new_size == 0) + { + mi_free(old_ptr); + return nullptr; + } + + if (alignment == 0) + return mi_realloc(old_ptr, alignment); + + return mi_realloc_aligned(old_ptr, new_size, alignment); +} + +} + +#endif diff --git a/dbms/src/Common/MiAllocator.h b/dbms/src/Common/MiAllocator.h index 075328e5d94..48cfc6f9ab4 100644 --- a/dbms/src/Common/MiAllocator.h +++ b/dbms/src/Common/MiAllocator.h @@ -6,7 +6,6 @@ #error "do not include this file until USE_MIMALLOC is set to 1" #endif -#include #include namespace DB @@ -19,37 +18,11 @@ namespace DB */ struct MiAllocator { + static void * alloc(size_t size, size_t alignment = 0); - static void * alloc(size_t size, size_t alignment = 0) - { - if (alignment == 0) - return mi_malloc(size); - else - return mi_malloc_aligned(size, alignment); - } - - static void free(void * buf, size_t) - { - mi_free(buf); - } - - static void * realloc(void * old_ptr, size_t, size_t new_size, size_t alignment = 0) - { - if (old_ptr == nullptr) - return alloc(new_size, alignment); - - if (new_size == 0) - { - mi_free(old_ptr); - return nullptr; - } - - if (alignment == 0) - return mi_realloc(old_ptr, alignment); - - return mi_realloc_aligned(old_ptr, new_size, alignment); - } + static void free(void * buf, size_t); + static void * realloc(void * old_ptr, size_t, size_t new_size, size_t alignment = 0); }; } diff --git a/dbms/src/Common/PODArray.h b/dbms/src/Common/PODArray.h index 0e7d547a7d0..01085a2c5a7 100644 --- a/dbms/src/Common/PODArray.h +++ b/dbms/src/Common/PODArray.h @@ -45,7 +45,7 @@ inline constexpr size_t integerRoundUp(size_t value, size_t dividend) * Only part of the std::vector interface is supported. * * The default constructor creates an empty object that does not allocate memory. - * Then the memory is allocated at least INITIAL_SIZE bytes. + * Then the memory is allocated at least initial_bytes bytes. * * If you insert elements with push_back, without making a `reserve`, then PODArray is about 2.5 times faster than std::vector. * @@ -74,7 +74,7 @@ extern const char EmptyPODArray[EmptyPODArraySize]; /** Base class that depend only on size of element, not on element itself. * You can static_cast to this class if you want to insert some data regardless to the actual type T. */ -template +template class PODArrayBase : private boost::noncopyable, private TAllocator /// empty base optimization { protected: @@ -161,7 +161,8 @@ protected: { // The allocated memory should be multiplication of ELEMENT_SIZE to hold the element, otherwise, // memory issue such as corruption could appear in edge case. - realloc(std::max(((INITIAL_SIZE - 1) / ELEMENT_SIZE + 1) * ELEMENT_SIZE, minimum_memory_for_elements(1)), + realloc(std::max(integerRoundUp(initial_bytes, ELEMENT_SIZE), + minimum_memory_for_elements(1)), std::forward(allocator_params)...); } else @@ -257,11 +258,11 @@ public: } }; -template , size_t pad_right_ = 0, size_t pad_left_ = 0> -class PODArray : public PODArrayBase +template , size_t pad_right_ = 0, size_t pad_left_ = 0> +class PODArray : public PODArrayBase { protected: - using Base = PODArrayBase; + using Base = PODArrayBase; T * t_start() { return reinterpret_cast(this->c_start); } T * t_end() { return reinterpret_cast(this->c_end); } @@ -618,17 +619,23 @@ public: } }; -template -void swap(PODArray & lhs, PODArray & rhs) +template +void swap(PODArray & lhs, PODArray & rhs) { lhs.swap(rhs); } /** For columns. Padding is enough to read and write xmm-register at the address of the last element. */ -template > -using PaddedPODArray = PODArray; +template > +using PaddedPODArray = PODArray; -template -using PODArrayWithStackMemory = PODArray, integerRoundUp(stack_size_in_bytes, sizeof(T))>>; +/** A helper for declaring PODArray that uses inline memory. + * The initial size is set to use all the inline bytes, since using less would + * only add some extra allocation calls. + */ +template +using PODArrayWithStackMemory = PODArray, rounded_bytes>>; } diff --git a/dbms/src/Common/ThreadPool.cpp b/dbms/src/Common/ThreadPool.cpp index 6ed350240c6..91ec29dc188 100644 --- a/dbms/src/Common/ThreadPool.cpp +++ b/dbms/src/Common/ThreadPool.cpp @@ -30,10 +30,18 @@ template template ReturnType ThreadPoolImpl::scheduleImpl(Job job, int priority, std::optional wait_microseconds) { - auto on_error = [] + auto on_error = [&] { if constexpr (std::is_same_v) + { + if (first_exception) + { + std::exception_ptr exception; + std::swap(exception, first_exception); + std::rethrow_exception(exception); + } throw DB::Exception("Cannot schedule a task", DB::ErrorCodes::CANNOT_SCHEDULE_TASK); + } else return false; }; diff --git a/dbms/src/Common/tests/CMakeLists.txt b/dbms/src/Common/tests/CMakeLists.txt index 1c6c7e9f504..23b1614e704 100644 --- a/dbms/src/Common/tests/CMakeLists.txt +++ b/dbms/src/Common/tests/CMakeLists.txt @@ -41,9 +41,6 @@ target_link_libraries (compact_array PRIVATE clickhouse_common_io ${Boost_FILESY add_executable (radix_sort radix_sort.cpp) target_link_libraries (radix_sort PRIVATE clickhouse_common_io) -add_executable (shell_command_test shell_command_test.cpp) -target_link_libraries (shell_command_test PRIVATE clickhouse_common_io) - add_executable (arena_with_free_lists arena_with_free_lists.cpp) target_link_libraries (arena_with_free_lists PRIVATE clickhouse_compression clickhouse_common_io) @@ -53,15 +50,6 @@ target_link_libraries (pod_array PRIVATE clickhouse_common_io) add_executable (thread_creation_latency thread_creation_latency.cpp) target_link_libraries (thread_creation_latency PRIVATE clickhouse_common_io) -add_executable (thread_pool thread_pool.cpp) -target_link_libraries (thread_pool PRIVATE clickhouse_common_io) - -add_executable (thread_pool_2 thread_pool_2.cpp) -target_link_libraries (thread_pool_2 PRIVATE clickhouse_common_io) - -add_executable (thread_pool_3 thread_pool_3.cpp) -target_link_libraries (thread_pool_3 PRIVATE clickhouse_common_io) - add_executable (multi_version multi_version.cpp) target_link_libraries (multi_version PRIVATE clickhouse_common_io) add_check(multi_version) diff --git a/dbms/src/Common/tests/gtest_shell_command.cpp b/dbms/src/Common/tests/gtest_shell_command.cpp new file mode 100644 index 00000000000..2378cda2ee7 --- /dev/null +++ b/dbms/src/Common/tests/gtest_shell_command.cpp @@ -0,0 +1,72 @@ +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#pragma GCC diagnostic ignored "-Wsign-compare" +#ifdef __clang__ + #pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" + #pragma clang diagnostic ignored "-Wundef" +#endif +#include + + +using namespace DB; + + +TEST(ShellCommand, Execute) +{ + auto command = ShellCommand::execute("echo 'Hello, world!'"); + + std::string res; + readStringUntilEOF(res, command->out); + command->wait(); + + EXPECT_EQ(res, "Hello, world!\n"); +} + +TEST(ShellCommand, ExecuteDirect) +{ + auto command = ShellCommand::executeDirect("/bin/echo", {"Hello, world!"}); + + std::string res; + readStringUntilEOF(res, command->out); + command->wait(); + + EXPECT_EQ(res, "Hello, world!\n"); +} + +TEST(ShellCommand, ExecuteWithInput) +{ + auto command = ShellCommand::execute("cat"); + + String in_str = "Hello, world!\n"; + ReadBufferFromString in(in_str); + copyData(in, command->in); + command->in.close(); + + std::string res; + readStringUntilEOF(res, command->out); + command->wait(); + + EXPECT_EQ(res, "Hello, world!\n"); +} + +TEST(ShellCommand, AutoWait) +{ + // hunting: + for (int i = 0; i < 1000; ++i) + { + auto command = ShellCommand::execute("echo " + std::to_string(i)); + //command->wait(); // now automatic + } + + // std::cerr << "inspect me: ps auxwwf" << "\n"; + // std::this_thread::sleep_for(std::chrono::seconds(100)); +} diff --git a/dbms/src/Common/tests/thread_pool.cpp b/dbms/src/Common/tests/gtest_thread_pool_concurrent_wait.cpp similarity index 73% rename from dbms/src/Common/tests/thread_pool.cpp rename to dbms/src/Common/tests/gtest_thread_pool_concurrent_wait.cpp index 23dba2aadec..1e38e418a22 100644 --- a/dbms/src/Common/tests/thread_pool.cpp +++ b/dbms/src/Common/tests/gtest_thread_pool_concurrent_wait.cpp @@ -1,11 +1,18 @@ #include +#pragma GCC diagnostic ignored "-Wsign-compare" +#ifdef __clang__ + #pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" + #pragma clang diagnostic ignored "-Wundef" +#endif +#include + /** Reproduces bug in ThreadPool. * It get stuck if we call 'wait' many times from many other threads simultaneously. */ -int main(int, char **) +TEST(ThreadPool, ConcurrentWait) { auto worker = [] { @@ -29,6 +36,4 @@ int main(int, char **) waiting_pool.schedule([&pool]{ pool.wait(); }); waiting_pool.wait(); - - return 0; } diff --git a/dbms/src/Common/tests/gtest_thread_pool_limit.cpp b/dbms/src/Common/tests/gtest_thread_pool_limit.cpp new file mode 100644 index 00000000000..2bd38f34d10 --- /dev/null +++ b/dbms/src/Common/tests/gtest_thread_pool_limit.cpp @@ -0,0 +1,32 @@ +#include +#include +#include + +#pragma GCC diagnostic ignored "-Wsign-compare" +#ifdef __clang__ + #pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" + #pragma clang diagnostic ignored "-Wundef" +#endif +#include + +/// Test for thread self-removal when number of free threads in pool is too large. +/// Just checks that nothing weird happens. + +template +int test() +{ + Pool pool(10, 2, 10); + + std::atomic counter{0}; + for (size_t i = 0; i < 10; ++i) + pool.schedule([&]{ ++counter; }); + pool.wait(); + + return counter; +} + +TEST(ThreadPool, ThreadRemoval) +{ + EXPECT_EQ(test(), 10); + EXPECT_EQ(test(), 10); +} diff --git a/dbms/src/Common/tests/thread_pool_2.cpp b/dbms/src/Common/tests/gtest_thread_pool_loop.cpp similarity index 50% rename from dbms/src/Common/tests/thread_pool_2.cpp rename to dbms/src/Common/tests/gtest_thread_pool_loop.cpp index 029c3695e36..80b7b94d988 100644 --- a/dbms/src/Common/tests/thread_pool_2.cpp +++ b/dbms/src/Common/tests/gtest_thread_pool_loop.cpp @@ -2,10 +2,17 @@ #include #include +#pragma GCC diagnostic ignored "-Wsign-compare" +#ifdef __clang__ + #pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" + #pragma clang diagnostic ignored "-Wundef" +#endif +#include -int main(int, char **) + +TEST(ThreadPool, Loop) { - std::atomic res{0}; + std::atomic res{0}; for (size_t i = 0; i < 1000; ++i) { @@ -16,6 +23,5 @@ int main(int, char **) pool.wait(); } - std::cerr << res << "\n"; - return 0; + EXPECT_EQ(res, 16000); } diff --git a/dbms/src/Common/tests/gtest_thread_pool_schedule_exception.cpp b/dbms/src/Common/tests/gtest_thread_pool_schedule_exception.cpp new file mode 100644 index 00000000000..001d9c30b27 --- /dev/null +++ b/dbms/src/Common/tests/gtest_thread_pool_schedule_exception.cpp @@ -0,0 +1,38 @@ +#include +#include +#include + +#pragma GCC diagnostic ignored "-Wsign-compare" +#ifdef __clang__ + #pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" + #pragma clang diagnostic ignored "-Wundef" +#endif +#include + + +bool check() +{ + ThreadPool pool(10); + + pool.schedule([]{ throw std::runtime_error("Hello, world!"); }); + + try + { + for (size_t i = 0; i < 100; ++i) + pool.schedule([]{}); /// An exception will be rethrown from this method. + } + catch (const std::runtime_error &) + { + return true; + } + + pool.wait(); + + return false; +} + + +TEST(ThreadPool, ExceptionFromSchedule) +{ + EXPECT_TRUE(check()); +} diff --git a/dbms/src/Common/tests/shell_command_test.cpp b/dbms/src/Common/tests/shell_command_test.cpp deleted file mode 100644 index 7de6c18bfdf..00000000000 --- a/dbms/src/Common/tests/shell_command_test.cpp +++ /dev/null @@ -1,63 +0,0 @@ -#include -#include -#include -#include -#include -#include - -#include -#include - -using namespace DB; - - -int main(int, char **) -try -{ - { - auto command = ShellCommand::execute("echo 'Hello, world!'"); - - WriteBufferFromFileDescriptor out(STDOUT_FILENO); - copyData(command->out, out); - - command->wait(); - } - - { - auto command = ShellCommand::executeDirect("/bin/echo", {"Hello, world!"}); - - WriteBufferFromFileDescriptor out(STDOUT_FILENO); - copyData(command->out, out); - - command->wait(); - } - - { - auto command = ShellCommand::execute("cat"); - - String in_str = "Hello, world!\n"; - ReadBufferFromString in(in_str); - copyData(in, command->in); - command->in.close(); - - WriteBufferFromFileDescriptor out(STDOUT_FILENO); - copyData(command->out, out); - - command->wait(); - } - - // hunting: - for (int i = 0; i < 1000; ++i) - { - auto command = ShellCommand::execute("echo " + std::to_string(i)); - //command->wait(); // now automatic - } - - // std::cerr << "inspect me: ps auxwwf" << "\n"; - // std::this_thread::sleep_for(std::chrono::seconds(100)); -} -catch (...) -{ - std::cerr << getCurrentExceptionMessage(false) << "\n"; - return 1; -} diff --git a/dbms/src/Common/tests/thread_pool_3.cpp b/dbms/src/Common/tests/thread_pool_3.cpp deleted file mode 100644 index 924895de308..00000000000 --- a/dbms/src/Common/tests/thread_pool_3.cpp +++ /dev/null @@ -1,27 +0,0 @@ -#include -#include -#include - -/// Test for thread self-removal when number of free threads in pool is too large. -/// Just checks that nothing weird happens. - -template -void test() -{ - Pool pool(10, 2, 10); - - std::mutex mutex; - for (size_t i = 0; i < 10; ++i) - pool.schedule([&]{ std::lock_guard lock(mutex); std::cerr << '.'; }); - pool.wait(); -} - -int main(int, char **) -{ - test(); - std::cerr << '\n'; - test(); - std::cerr << '\n'; - - return 0; -} diff --git a/dbms/src/Compression/CompressionCodecDelta.cpp b/dbms/src/Compression/CompressionCodecDelta.cpp index f5a5db04927..9f2397f8e59 100644 --- a/dbms/src/Compression/CompressionCodecDelta.cpp +++ b/dbms/src/Compression/CompressionCodecDelta.cpp @@ -67,7 +67,7 @@ void decompressDataForType(const char * source, UInt32 source_size, char * dest) while (source < source_end) { accumulator += unalignedLoad(source); - unalignedStore(dest, accumulator); + unalignedStore(dest, accumulator); source += sizeof(T); dest += sizeof(T); diff --git a/dbms/src/Compression/CompressionCodecDoubleDelta.cpp b/dbms/src/Compression/CompressionCodecDoubleDelta.cpp index b40b2abccfa..8f306f3f06a 100644 --- a/dbms/src/Compression/CompressionCodecDoubleDelta.cpp +++ b/dbms/src/Compression/CompressionCodecDoubleDelta.cpp @@ -90,7 +90,7 @@ UInt32 compressDataForType(const char * source, UInt32 source_size, char * dest) const char * source_end = source + source_size; const UInt32 items_count = source_size / sizeof(T); - unalignedStore(dest, items_count); + unalignedStore(dest, items_count); dest += sizeof(items_count); T prev_value{}; @@ -99,7 +99,7 @@ UInt32 compressDataForType(const char * source, UInt32 source_size, char * dest) if (source < source_end) { prev_value = unalignedLoad(source); - unalignedStore(dest, prev_value); + unalignedStore(dest, prev_value); source += sizeof(prev_value); dest += sizeof(prev_value); @@ -109,7 +109,7 @@ UInt32 compressDataForType(const char * source, UInt32 source_size, char * dest) { const T curr_value = unalignedLoad(source); prev_delta = static_cast(curr_value - prev_value); - unalignedStore(dest, prev_delta); + unalignedStore(dest, prev_delta); source += sizeof(curr_value); dest += sizeof(prev_delta); @@ -164,7 +164,7 @@ void decompressDataForType(const char * source, UInt32 source_size, char * dest) if (source < source_end) { prev_value = unalignedLoad(source); - unalignedStore(dest, prev_value); + unalignedStore(dest, prev_value); source += sizeof(prev_value); dest += sizeof(prev_value); @@ -174,7 +174,7 @@ void decompressDataForType(const char * source, UInt32 source_size, char * dest) { prev_delta = unalignedLoad(source); prev_value = static_cast(prev_value + prev_delta); - unalignedStore(dest, prev_value); + unalignedStore(dest, prev_value); source += sizeof(prev_delta); dest += sizeof(prev_value); @@ -209,7 +209,7 @@ void decompressDataForType(const char * source, UInt32 source_size, char * dest) // else if first bit is zero, no need to read more data. const T curr_value = static_cast(prev_value + prev_delta + double_delta); - unalignedStore(dest, curr_value); + unalignedStore(dest, curr_value); dest += sizeof(curr_value); prev_delta = curr_value - prev_value; diff --git a/dbms/src/Compression/CompressionCodecGorilla.cpp b/dbms/src/Compression/CompressionCodecGorilla.cpp index f9c6b52756c..79cc6d27e81 100644 --- a/dbms/src/Compression/CompressionCodecGorilla.cpp +++ b/dbms/src/Compression/CompressionCodecGorilla.cpp @@ -94,7 +94,7 @@ UInt32 compressDataForType(const char * source, UInt32 source_size, char * dest) const UInt32 items_count = source_size / sizeof(T); - unalignedStore(dest, items_count); + unalignedStore(dest, items_count); dest += sizeof(items_count); T prev_value{}; @@ -104,7 +104,7 @@ UInt32 compressDataForType(const char * source, UInt32 source_size, char * dest) if (source < source_end) { prev_value = unalignedLoad(source); - unalignedStore(dest, prev_value); + unalignedStore(dest, prev_value); source += sizeof(prev_value); dest += sizeof(prev_value); @@ -166,7 +166,7 @@ void decompressDataForType(const char * source, UInt32 source_size, char * dest) if (source < source_end) { prev_value = unalignedLoad(source); - unalignedStore(dest, prev_value); + unalignedStore(dest, prev_value); source += sizeof(prev_value); dest += sizeof(prev_value); @@ -210,7 +210,7 @@ void decompressDataForType(const char * source, UInt32 source_size, char * dest) } // else: 0b0 prefix - use prev_value - unalignedStore(dest, curr_value); + unalignedStore(dest, curr_value); dest += sizeof(curr_value); prev_xored_info = curr_xored_info; diff --git a/dbms/src/Compression/CompressionCodecT64.cpp b/dbms/src/Compression/CompressionCodecT64.cpp index cd369fc9c4e..9919f5322c5 100644 --- a/dbms/src/Compression/CompressionCodecT64.cpp +++ b/dbms/src/Compression/CompressionCodecT64.cpp @@ -390,7 +390,7 @@ void decompressData(const char * src, UInt32 bytes_size, char * dst, UInt32 unco { _T min_value = min; for (UInt32 i = 0; i < num_elements; ++i, dst += sizeof(_T)) - unalignedStore(dst, min_value); + unalignedStore<_T>(dst, min_value); return; } diff --git a/dbms/src/Compression/LZ4_decompress_faster.cpp b/dbms/src/Compression/LZ4_decompress_faster.cpp index 387650d3dcc..0d65a06b098 100644 --- a/dbms/src/Compression/LZ4_decompress_faster.cpp +++ b/dbms/src/Compression/LZ4_decompress_faster.cpp @@ -200,7 +200,7 @@ inline void copyOverlap8Shuffle(UInt8 * op, const UInt8 *& match, const size_t o 0, 1, 2, 3, 4, 5, 6, 0, }; - unalignedStore(op, vtbl1_u8(unalignedLoad(match), unalignedLoad(masks + 8 * offset))); + unalignedStore(op, vtbl1_u8(unalignedLoad(match), unalignedLoad(masks + 8 * offset))); match += masks[offset]; } @@ -328,10 +328,10 @@ inline void copyOverlap16Shuffle(UInt8 * op, const UInt8 *& match, const size_t 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 0, }; - unalignedStore(op, + unalignedStore(op, vtbl2_u8(unalignedLoad(match), unalignedLoad(masks + 16 * offset))); - unalignedStore(op + 8, + unalignedStore(op + 8, vtbl2_u8(unalignedLoad(match), unalignedLoad(masks + 16 * offset + 8))); match += masks[offset]; diff --git a/dbms/src/Core/Block.cpp b/dbms/src/Core/Block.cpp index 27b2cb81b09..e156d7f69f6 100644 --- a/dbms/src/Core/Block.cpp +++ b/dbms/src/Core/Block.cpp @@ -336,6 +336,7 @@ MutableColumns Block::mutateColumns() void Block::setColumns(MutableColumns && columns) { + /// TODO: assert if |columns| doesn't match |data|! size_t num_columns = data.size(); for (size_t i = 0; i < num_columns; ++i) data[i].column = std::move(columns[i]); @@ -344,6 +345,7 @@ void Block::setColumns(MutableColumns && columns) void Block::setColumns(const Columns & columns) { + /// TODO: assert if |columns| doesn't match |data|! size_t num_columns = data.size(); for (size_t i = 0; i < num_columns; ++i) data[i].column = columns[i]; diff --git a/dbms/src/Core/Defines.h b/dbms/src/Core/Defines.h index b8db4d64843..bb39c495087 100644 --- a/dbms/src/Core/Defines.h +++ b/dbms/src/Core/Defines.h @@ -88,7 +88,7 @@ #define PLATFORM_NOT_SUPPORTED "The only supported platforms are x86_64 and AArch64, PowerPC (work in progress)" #if !defined(__x86_64__) && !defined(__aarch64__) && !defined(__PPC__) -// #error PLATFORM_NOT_SUPPORTED + #error PLATFORM_NOT_SUPPORTED #endif /// Check for presence of address sanitizer @@ -114,10 +114,12 @@ #if defined(__clang__) #define NO_SANITIZE_UNDEFINED __attribute__((__no_sanitize__("undefined"))) #define NO_SANITIZE_ADDRESS __attribute__((__no_sanitize__("address"))) + #define NO_SANITIZE_THREAD __attribute__((__no_sanitize__("thread"))) #else /// It does not work in GCC. GCC 7 cannot recognize this attribute and GCC 8 simply ignores it. #define NO_SANITIZE_UNDEFINED #define NO_SANITIZE_ADDRESS + #define NO_SANITIZE_THREAD #endif #if defined __GNUC__ && !defined __clang__ diff --git a/dbms/src/Core/Types.h b/dbms/src/Core/Types.h index 3541eaf296a..75c7cbaff66 100644 --- a/dbms/src/Core/Types.h +++ b/dbms/src/Core/Types.h @@ -1,8 +1,8 @@ #pragma once +#include #include #include -#include namespace DB diff --git a/dbms/src/DataStreams/ConvertingBlockInputStream.cpp b/dbms/src/DataStreams/ConvertingBlockInputStream.cpp index 4c78aeb7ce5..49283278bf4 100644 --- a/dbms/src/DataStreams/ConvertingBlockInputStream.cpp +++ b/dbms/src/DataStreams/ConvertingBlockInputStream.cpp @@ -60,7 +60,7 @@ ConvertingBlockInputStream::ConvertingBlockInputStream( if (input_header.has(res_elem.name)) conversion[result_col_num] = input_header.getPositionByName(res_elem.name); else - throw Exception("Cannot find column " + backQuoteIfNeed(res_elem.name) + " in source stream", + throw Exception("Cannot find column " + backQuote(res_elem.name) + " in source stream", ErrorCodes::THERE_IS_NO_COLUMN); break; } diff --git a/dbms/src/DataStreams/OneBlockInputStream.h b/dbms/src/DataStreams/OneBlockInputStream.h index 3f1da34fcd8..168053b4fb3 100644 --- a/dbms/src/DataStreams/OneBlockInputStream.h +++ b/dbms/src/DataStreams/OneBlockInputStream.h @@ -12,7 +12,7 @@ namespace DB class OneBlockInputStream : public IBlockInputStream { public: - OneBlockInputStream(const Block & block_) : block(block_) {} + explicit OneBlockInputStream(const Block & block_) : block(block_) {} String getName() const override { return "One"; } diff --git a/dbms/src/DataStreams/ParallelInputsProcessor.h b/dbms/src/DataStreams/ParallelInputsProcessor.h index 3f85238814a..505bfac567c 100644 --- a/dbms/src/DataStreams/ParallelInputsProcessor.h +++ b/dbms/src/DataStreams/ParallelInputsProcessor.h @@ -95,12 +95,11 @@ public: { active_threads = max_threads; threads.reserve(max_threads); - auto thread_group = CurrentThread::getGroup(); try { for (size_t i = 0; i < max_threads; ++i) - threads.emplace_back([=] () { thread(thread_group, i); }); + threads.emplace_back(&ParallelInputsProcessor::thread, this, CurrentThread::getGroup(), i); } catch (...) { diff --git a/dbms/src/DataStreams/PushingToViewsBlockOutputStream.cpp b/dbms/src/DataStreams/PushingToViewsBlockOutputStream.cpp index 195c5edcb07..304d7aa989c 100644 --- a/dbms/src/DataStreams/PushingToViewsBlockOutputStream.cpp +++ b/dbms/src/DataStreams/PushingToViewsBlockOutputStream.cpp @@ -63,6 +63,17 @@ PushingToViewsBlockOutputStream::PushingToViewsBlockOutputStream( } +Block PushingToViewsBlockOutputStream::getHeader() const +{ + /// If we don't write directly to the destination + /// then expect that we're inserting with precalculated virtual columns + if (output) + return storage->getSampleBlock(); + else + return storage->getSampleBlockWithVirtuals(); +} + + void PushingToViewsBlockOutputStream::write(const Block & block) { /** Throw an exception if the sizes of arrays - elements of nested data structures doesn't match. @@ -73,6 +84,8 @@ void PushingToViewsBlockOutputStream::write(const Block & block) Nested::validateArraySizes(block); if (output) + /// TODO: to support virtual and alias columns inside MVs, we should return here the inserted block extended + /// with additional columns directly from storage and pass it to MVs instead of raw block. output->write(block); /// Don't process materialized views if this block is duplicate diff --git a/dbms/src/DataStreams/PushingToViewsBlockOutputStream.h b/dbms/src/DataStreams/PushingToViewsBlockOutputStream.h index 3381a828ff0..34b8cb43042 100644 --- a/dbms/src/DataStreams/PushingToViewsBlockOutputStream.h +++ b/dbms/src/DataStreams/PushingToViewsBlockOutputStream.h @@ -22,7 +22,7 @@ public: const String & database, const String & table, const StoragePtr & storage_, const Context & context_, const ASTPtr & query_ptr_, bool no_destination = false); - Block getHeader() const override { return storage->getSampleBlock(); } + Block getHeader() const override; void write(const Block & block) override; void flush() override; diff --git a/dbms/src/DataStreams/TTLBlockInputStream.cpp b/dbms/src/DataStreams/TTLBlockInputStream.cpp index 482a3ff4814..1e765f8bb3c 100644 --- a/dbms/src/DataStreams/TTLBlockInputStream.cpp +++ b/dbms/src/DataStreams/TTLBlockInputStream.cpp @@ -26,6 +26,7 @@ TTLBlockInputStream::TTLBlockInputStream( , date_lut(DateLUT::instance()) { children.push_back(input_); + header = children.at(0)->getHeader(); const auto & column_defaults = storage.getColumns().getDefaults(); ASTPtr default_expr_list = std::make_shared(); @@ -58,11 +59,6 @@ TTLBlockInputStream::TTLBlockInputStream( } -Block TTLBlockInputStream::getHeader() const -{ - return children.at(0)->getHeader(); -} - Block TTLBlockInputStream::readImpl() { Block block = children.at(0)->read(); @@ -108,11 +104,13 @@ void TTLBlockInputStream::removeRowsWithExpiredTableTTL(Block & block) const auto & current = block.getByName(storage.ttl_table_entry.result_column); const IColumn * ttl_column = current.column.get(); + const auto & column_names = header.getNames(); MutableColumns result_columns; - result_columns.reserve(getHeader().columns()); - for (const auto & name : storage.getColumns().getNamesOfPhysical()) + result_columns.reserve(column_names.size()); + + for (auto it = column_names.begin(); it != column_names.end(); ++it) { - auto & column_with_type = block.getByName(name); + auto & column_with_type = block.getByName(*it); const IColumn * values_column = column_with_type.column.get(); MutableColumnPtr result_column = values_column->cloneEmpty(); result_column->reserve(block.rows()); @@ -125,13 +123,13 @@ void TTLBlockInputStream::removeRowsWithExpiredTableTTL(Block & block) new_ttl_infos.table_ttl.update(cur_ttl); result_column->insertFrom(*values_column, i); } - else + else if (it == column_names.begin()) ++rows_removed; } result_columns.emplace_back(std::move(result_column)); } - block = getHeader().cloneWithColumns(std::move(result_columns)); + block = header.cloneWithColumns(std::move(result_columns)); } void TTLBlockInputStream::removeValuesWithExpiredColumnTTL(Block & block) diff --git a/dbms/src/DataStreams/TTLBlockInputStream.h b/dbms/src/DataStreams/TTLBlockInputStream.h index a95cd627bc9..de0d4f9156b 100644 --- a/dbms/src/DataStreams/TTLBlockInputStream.h +++ b/dbms/src/DataStreams/TTLBlockInputStream.h @@ -21,7 +21,7 @@ public: String getName() const override { return "TTLBlockInputStream"; } - Block getHeader() const override; + Block getHeader() const override { return header; } protected: Block readImpl() override; @@ -47,6 +47,8 @@ private: std::unordered_map defaults_result_column; ExpressionActionsPtr defaults_expression; + + Block header; private: /// Removes values with expired ttl and computes new min_ttl and empty_columns for part void removeValuesWithExpiredColumnTTL(Block & block); diff --git a/dbms/src/DataTypes/DataTypeAggregateFunction.cpp b/dbms/src/DataTypes/DataTypeAggregateFunction.cpp index a5dd5f8be62..a2c00e18acb 100644 --- a/dbms/src/DataTypes/DataTypeAggregateFunction.cpp +++ b/dbms/src/DataTypes/DataTypeAggregateFunction.cpp @@ -30,6 +30,7 @@ namespace ErrorCodes extern const int PARAMETERS_TO_AGGREGATE_FUNCTIONS_MUST_BE_LITERALS; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int LOGICAL_ERROR; + extern const int NOT_IMPLEMENTED; } @@ -216,6 +217,12 @@ void DataTypeAggregateFunction::deserializeTextQuoted(IColumn & column, ReadBuff } +void DataTypeAggregateFunction::deserializeWholeText(IColumn &, ReadBuffer &, const FormatSettings &) const +{ + throw Exception("AggregateFunction data type cannot be read from text", ErrorCodes::NOT_IMPLEMENTED); +} + + void DataTypeAggregateFunction::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const { writeJSONString(serializeToString(function, column, row_num), ostr, settings); diff --git a/dbms/src/DataTypes/DataTypeAggregateFunction.h b/dbms/src/DataTypes/DataTypeAggregateFunction.h index 14407c8a90c..9ae7c67a803 100644 --- a/dbms/src/DataTypes/DataTypeAggregateFunction.h +++ b/dbms/src/DataTypes/DataTypeAggregateFunction.h @@ -52,6 +52,8 @@ public: void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; + void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; + void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; diff --git a/dbms/src/DataTypes/DataTypeCustom.h b/dbms/src/DataTypes/DataTypeCustom.h index 0706803048d..c4f846d0259 100644 --- a/dbms/src/DataTypes/DataTypeCustom.h +++ b/dbms/src/DataTypes/DataTypeCustom.h @@ -33,6 +33,10 @@ public: */ virtual void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const = 0; + /** Text deserialization without quoting or escaping. + */ + virtual void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const = 0; + /** Text serialization with escaping but without quoting. */ virtual void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const = 0; diff --git a/dbms/src/DataTypes/DataTypeCustomSimpleTextSerialization.cpp b/dbms/src/DataTypes/DataTypeCustomSimpleTextSerialization.cpp index 5f1e2ae5665..18b8798fe77 100644 --- a/dbms/src/DataTypes/DataTypeCustomSimpleTextSerialization.cpp +++ b/dbms/src/DataTypes/DataTypeCustomSimpleTextSerialization.cpp @@ -32,6 +32,13 @@ DataTypeCustomSimpleTextSerialization::~DataTypeCustomSimpleTextSerialization() { } +void DataTypeCustomSimpleTextSerialization::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +{ + String str; + readString(str, istr); + deserializeFromString(*this, column, str, settings); +} + void DataTypeCustomSimpleTextSerialization::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const { writeEscapedString(serializeToString(*this, column, row_num, settings), ostr); diff --git a/dbms/src/DataTypes/DataTypeCustomSimpleTextSerialization.h b/dbms/src/DataTypes/DataTypeCustomSimpleTextSerialization.h index ba483b4ff5c..e1c08d28738 100644 --- a/dbms/src/DataTypes/DataTypeCustomSimpleTextSerialization.h +++ b/dbms/src/DataTypes/DataTypeCustomSimpleTextSerialization.h @@ -21,6 +21,10 @@ public: virtual void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override = 0; virtual void deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const = 0; + /** Text deserialization without quoting or escaping. + */ + void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; + /** Text serialization with escaping but without quoting. */ void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; diff --git a/dbms/src/DataTypes/DataTypeDate.cpp b/dbms/src/DataTypes/DataTypeDate.cpp index 73edfd012fa..0b1f502b694 100644 --- a/dbms/src/DataTypes/DataTypeDate.cpp +++ b/dbms/src/DataTypes/DataTypeDate.cpp @@ -16,6 +16,11 @@ void DataTypeDate::serializeText(const IColumn & column, size_t row_num, WriteBu writeDateText(DayNum(static_cast(column).getData()[row_num]), ostr); } +void DataTypeDate::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +{ + deserializeTextEscaped(column, istr, settings); +} + void DataTypeDate::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const { DayNum x; diff --git a/dbms/src/DataTypes/DataTypeDate.h b/dbms/src/DataTypes/DataTypeDate.h index a441d638cc4..7bd4c0d6b02 100644 --- a/dbms/src/DataTypes/DataTypeDate.h +++ b/dbms/src/DataTypes/DataTypeDate.h @@ -13,6 +13,7 @@ public: const char * getFamilyName() const override { return "Date"; } void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; + void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; diff --git a/dbms/src/DataTypes/DataTypeDateTime.cpp b/dbms/src/DataTypes/DataTypeDateTime.cpp index f3d6efa1488..a6b8f0da92a 100644 --- a/dbms/src/DataTypes/DataTypeDateTime.cpp +++ b/dbms/src/DataTypes/DataTypeDateTime.cpp @@ -62,6 +62,11 @@ static inline void readText(time_t & x, ReadBuffer & istr, const FormatSettings } +void DataTypeDateTime::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +{ + deserializeTextEscaped(column, istr, settings); +} + void DataTypeDateTime::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { time_t x; diff --git a/dbms/src/DataTypes/DataTypeDateTime.h b/dbms/src/DataTypes/DataTypeDateTime.h index 679a2777472..6a951e0e288 100644 --- a/dbms/src/DataTypes/DataTypeDateTime.h +++ b/dbms/src/DataTypes/DataTypeDateTime.h @@ -38,6 +38,7 @@ public: TypeIndex getTypeId() const override { return TypeIndex::DateTime; } void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; + void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; diff --git a/dbms/src/DataTypes/DataTypeEnum.cpp b/dbms/src/DataTypes/DataTypeEnum.cpp index 24f760a1800..a784f1502e4 100644 --- a/dbms/src/DataTypes/DataTypeEnum.cpp +++ b/dbms/src/DataTypes/DataTypeEnum.cpp @@ -166,6 +166,14 @@ void DataTypeEnum::deserializeTextQuoted(IColumn & column, ReadBuffer & is static_cast(column).getData().push_back(getValue(StringRef(field_name))); } +template +void DataTypeEnum::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const +{ + std::string field_name; + readString(field_name, istr); + static_cast(column).getData().push_back(getValue(StringRef(field_name))); +} + template void DataTypeEnum::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const { diff --git a/dbms/src/DataTypes/DataTypeEnum.h b/dbms/src/DataTypes/DataTypeEnum.h index 19d4ad691dc..b99e2383860 100644 --- a/dbms/src/DataTypes/DataTypeEnum.h +++ b/dbms/src/DataTypes/DataTypeEnum.h @@ -96,6 +96,8 @@ public: void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; + void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; + void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; diff --git a/dbms/src/DataTypes/DataTypeFixedString.cpp b/dbms/src/DataTypes/DataTypeFixedString.cpp index d1a007e16d2..34970fdaae9 100644 --- a/dbms/src/DataTypes/DataTypeFixedString.cpp +++ b/dbms/src/DataTypes/DataTypeFixedString.cpp @@ -176,6 +176,12 @@ void DataTypeFixedString::deserializeTextQuoted(IColumn & column, ReadBuffer & i } +void DataTypeFixedString::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const +{ + read(*this, column, [&istr](ColumnFixedString::Chars & data) { readStringInto(data, istr); }); +} + + void DataTypeFixedString::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const { const char * pos = reinterpret_cast(&static_cast(column).getChars()[n * row_num]); diff --git a/dbms/src/DataTypes/DataTypeFixedString.h b/dbms/src/DataTypes/DataTypeFixedString.h index 3019b6d225d..1a8a33d95c6 100644 --- a/dbms/src/DataTypes/DataTypeFixedString.h +++ b/dbms/src/DataTypes/DataTypeFixedString.h @@ -50,6 +50,8 @@ public: void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; + void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; + void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; diff --git a/dbms/src/DataTypes/DataTypeLowCardinality.h b/dbms/src/DataTypes/DataTypeLowCardinality.h index 1742c1cb2e9..8e6e12fadba 100644 --- a/dbms/src/DataTypes/DataTypeLowCardinality.h +++ b/dbms/src/DataTypes/DataTypeLowCardinality.h @@ -81,6 +81,11 @@ public: deserializeImpl(column, &IDataType::deserializeAsTextQuoted, istr, settings); } + void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override + { + deserializeImpl(column, &IDataType::deserializeAsTextEscaped, istr, settings); + } + void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override { serializeImpl(column, row_num, &IDataType::serializeAsTextCSV, ostr, settings); diff --git a/dbms/src/DataTypes/DataTypeNullable.cpp b/dbms/src/DataTypes/DataTypeNullable.cpp index 0bfe8a157d6..c56d8616be2 100644 --- a/dbms/src/DataTypes/DataTypeNullable.cpp +++ b/dbms/src/DataTypes/DataTypeNullable.cpp @@ -251,6 +251,15 @@ void DataTypeNullable::deserializeTextQuoted(IColumn & column, ReadBuffer & istr [this, &istr, &settings] (IColumn & nested) { nested_data_type->deserializeAsTextQuoted(nested, istr, settings); }); } + +void DataTypeNullable::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +{ + safeDeserialize(column, + [&istr] { return checkStringByFirstCharacterAndAssertTheRestCaseInsensitive("NULL", istr); }, + [this, &istr, &settings] (IColumn & nested) { nested_data_type->deserializeAsWholeText(nested, istr, settings); }); +} + + void DataTypeNullable::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const { const ColumnNullable & col = static_cast(column); diff --git a/dbms/src/DataTypes/DataTypeNullable.h b/dbms/src/DataTypes/DataTypeNullable.h index 1081f84dd11..2b098ea0476 100644 --- a/dbms/src/DataTypes/DataTypeNullable.h +++ b/dbms/src/DataTypes/DataTypeNullable.h @@ -53,6 +53,7 @@ public: void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; + void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; diff --git a/dbms/src/DataTypes/DataTypeString.cpp b/dbms/src/DataTypes/DataTypeString.cpp index d3334ef93bf..5d104c76fef 100644 --- a/dbms/src/DataTypes/DataTypeString.cpp +++ b/dbms/src/DataTypes/DataTypeString.cpp @@ -244,6 +244,12 @@ static inline void read(IColumn & column, Reader && reader) } +void DataTypeString::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const +{ + read(column, [&](ColumnString::Chars & data) { readStringInto(data, istr); }); +} + + void DataTypeString::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const { read(column, [&](ColumnString::Chars & data) { readEscapedStringInto(data, istr); }); diff --git a/dbms/src/DataTypes/DataTypeString.h b/dbms/src/DataTypes/DataTypeString.h index 0a3d2277e79..3cf85f69a1f 100644 --- a/dbms/src/DataTypes/DataTypeString.h +++ b/dbms/src/DataTypes/DataTypeString.h @@ -30,6 +30,7 @@ public: void deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, size_t limit, double avg_value_size_hint) const override; void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; + void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; diff --git a/dbms/src/DataTypes/DataTypeWithSimpleSerialization.h b/dbms/src/DataTypes/DataTypeWithSimpleSerialization.h index 8f897153fd0..6f6120deb4f 100644 --- a/dbms/src/DataTypes/DataTypeWithSimpleSerialization.h +++ b/dbms/src/DataTypes/DataTypeWithSimpleSerialization.h @@ -32,6 +32,11 @@ protected: serializeText(column, row_num, ostr, settings); } + void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override + { + deserializeText(column, istr, settings); + } + void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override { deserializeText(column, istr, settings); diff --git a/dbms/src/DataTypes/IDataType.cpp b/dbms/src/DataTypes/IDataType.cpp index 09c080f56cc..39d269d8613 100644 --- a/dbms/src/DataTypes/IDataType.cpp +++ b/dbms/src/DataTypes/IDataType.cpp @@ -142,121 +142,89 @@ void IDataType::insertDefaultInto(IColumn & column) const void IDataType::serializeAsTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const { if (custom_text_serialization) - { custom_text_serialization->serializeTextEscaped(column, row_num, ostr, settings); - } else - { serializeTextEscaped(column, row_num, ostr, settings); - } } void IDataType::deserializeAsTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { if (custom_text_serialization) - { custom_text_serialization->deserializeTextEscaped(column, istr, settings); - } else - { deserializeTextEscaped(column, istr, settings); - } } void IDataType::serializeAsTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const { if (custom_text_serialization) - { custom_text_serialization->serializeTextQuoted(column, row_num, ostr, settings); - } else - { serializeTextQuoted(column, row_num, ostr, settings); - } } void IDataType::deserializeAsTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { if (custom_text_serialization) - { custom_text_serialization->deserializeTextQuoted(column, istr, settings); - } else - { deserializeTextQuoted(column, istr, settings); - } } void IDataType::serializeAsTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const { if (custom_text_serialization) - { custom_text_serialization->serializeTextCSV(column, row_num, ostr, settings); - } else - { serializeTextCSV(column, row_num, ostr, settings); - } } void IDataType::deserializeAsTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { if (custom_text_serialization) - { custom_text_serialization->deserializeTextCSV(column, istr, settings); - } else - { deserializeTextCSV(column, istr, settings); - } } void IDataType::serializeAsText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const { if (custom_text_serialization) - { custom_text_serialization->serializeText(column, row_num, ostr, settings); - } else - { serializeText(column, row_num, ostr, settings); - } +} + +void IDataType::deserializeAsWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +{ + if (custom_text_serialization) + custom_text_serialization->deserializeWholeText(column, istr, settings); + else + deserializeWholeText(column, istr, settings); } void IDataType::serializeAsTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const { if (custom_text_serialization) - { custom_text_serialization->serializeTextJSON(column, row_num, ostr, settings); - } else - { serializeTextJSON(column, row_num, ostr, settings); - } } void IDataType::deserializeAsTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { if (custom_text_serialization) - { custom_text_serialization->deserializeTextJSON(column, istr, settings); - } else - { deserializeTextJSON(column, istr, settings); - } } void IDataType::serializeAsTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const { if (custom_text_serialization) - { custom_text_serialization->serializeTextXML(column, row_num, ostr, settings); - } else - { serializeTextXML(column, row_num, ostr, settings); - } } void IDataType::setCustomization(DataTypeCustomDescPtr custom_desc_) const diff --git a/dbms/src/DataTypes/IDataType.h b/dbms/src/DataTypes/IDataType.h index 1a6c87b64ce..f4c22ff9ac8 100644 --- a/dbms/src/DataTypes/IDataType.h +++ b/dbms/src/DataTypes/IDataType.h @@ -222,76 +222,60 @@ public: /// If method will throw an exception, then column will be in same state as before call to method. virtual void deserializeBinary(IColumn & column, ReadBuffer & istr) const = 0; - /** Text serialization with escaping but without quoting. - */ - virtual void serializeAsTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const; - - virtual void deserializeAsTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const; - - /** Text serialization as a literal that may be inserted into a query. - */ - virtual void serializeAsTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const; - - virtual void deserializeAsTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const; - - /** Text serialization for the CSV format. - */ - virtual void serializeAsTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const; - virtual void deserializeAsTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const; - - /** Text serialization for displaying on a terminal or saving into a text file, and the like. - * Without escaping or quoting. - */ - virtual void serializeAsText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const; - - /** Text serialization intended for using in JSON format. - */ - virtual void serializeAsTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const; - virtual void deserializeAsTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const; - - /** Text serialization for putting into the XML format. - */ - virtual void serializeAsTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const; - /** Serialize to a protobuf. */ virtual void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const = 0; virtual void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const = 0; -protected: - virtual String doGetName() const; - /** Text serialization with escaping but without quoting. */ -public: // used somewhere in arcadia - virtual void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const = 0; + void serializeAsTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const; -protected: - virtual void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const = 0; + void deserializeAsTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const; /** Text serialization as a literal that may be inserted into a query. */ - virtual void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const = 0; + void serializeAsTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const; - virtual void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const = 0; + void deserializeAsTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const; /** Text serialization for the CSV format. */ - virtual void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const = 0; - virtual void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const = 0; + void serializeAsTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const; + void deserializeAsTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const; /** Text serialization for displaying on a terminal or saving into a text file, and the like. * Without escaping or quoting. */ - virtual void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const = 0; + void serializeAsText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const; + + /** Text deserialization in case when buffer contains only one value, without any escaping and delimiters. + */ + void deserializeAsWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const; /** Text serialization intended for using in JSON format. - * force_quoting_64bit_integers parameter forces to brace UInt64 and Int64 types into quotes. */ - virtual void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const = 0; - virtual void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const = 0; + void serializeAsTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const; + void deserializeAsTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const; /** Text serialization for putting into the XML format. */ + void serializeAsTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const; + +protected: + virtual String doGetName() const; + + /// Default implementations of text serialization in case of 'custom_text_serialization' is not set. + + virtual void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const = 0; + virtual void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const = 0; + virtual void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const = 0; + virtual void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const = 0; + virtual void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const = 0; + virtual void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const = 0; + virtual void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const = 0; + virtual void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const = 0; + virtual void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const = 0; + virtual void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const = 0; virtual void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const { serializeText(column, row_num, ostr, settings); @@ -471,7 +455,6 @@ private: public: const IDataTypeCustomName * getCustomName() const { return custom_name.get(); } - const IDataTypeCustomTextSerialization * getCustomTextSerialization() const { return custom_text_serialization.get(); } }; diff --git a/dbms/src/Databases/DatabaseDictionary.cpp b/dbms/src/Databases/DatabaseDictionary.cpp index 01aa397148f..b11f4de88b8 100644 --- a/dbms/src/Databases/DatabaseDictionary.cpp +++ b/dbms/src/Databases/DatabaseDictionary.cpp @@ -86,7 +86,7 @@ DatabaseIteratorPtr DatabaseDictionary::getIterator(const Context & context, con bool DatabaseDictionary::empty(const Context & context) const { - return context.getExternalDictionaries().getNumberOfNames() == 0; + return !context.getExternalDictionaries().hasCurrentlyLoadedObjects(); } StoragePtr DatabaseDictionary::detachTable(const String & /*table_name*/) diff --git a/dbms/src/Formats/BinaryRowInputStream.cpp b/dbms/src/Formats/BinaryRowInputStream.cpp index c710b17ee9e..9177a70bb18 100644 --- a/dbms/src/Formats/BinaryRowInputStream.cpp +++ b/dbms/src/Formats/BinaryRowInputStream.cpp @@ -65,11 +65,12 @@ void registerInputFormatRowBinary(FormatFactory & factory) const Context &, UInt64 max_block_size, UInt64 rows_portion_size, + FormatFactory::ReadCallback callback, const FormatSettings & settings) { return std::make_shared( std::make_shared(buf, sample, false, false), - sample, max_block_size, rows_portion_size, settings); + sample, max_block_size, rows_portion_size, callback, settings); }); factory.registerInputFormat("RowBinaryWithNamesAndTypes", []( @@ -78,11 +79,12 @@ void registerInputFormatRowBinary(FormatFactory & factory) const Context &, UInt64 max_block_size, UInt64 rows_portion_size, + FormatFactory::ReadCallback callback, const FormatSettings & settings) { return std::make_shared( std::make_shared(buf, sample, true, true), - sample, max_block_size, rows_portion_size, settings); + sample, max_block_size, rows_portion_size, callback, settings); }); } diff --git a/dbms/src/Formats/BlockInputStreamFromRowInputStream.cpp b/dbms/src/Formats/BlockInputStreamFromRowInputStream.cpp index b67ce9b28cd..2335363db70 100644 --- a/dbms/src/Formats/BlockInputStreamFromRowInputStream.cpp +++ b/dbms/src/Formats/BlockInputStreamFromRowInputStream.cpp @@ -28,9 +28,15 @@ BlockInputStreamFromRowInputStream::BlockInputStreamFromRowInputStream( const Block & sample_, UInt64 max_block_size_, UInt64 rows_portion_size_, + FormatFactory::ReadCallback callback, const FormatSettings & settings) - : row_input(row_input_), sample(sample_), max_block_size(max_block_size_), rows_portion_size(rows_portion_size_), - allow_errors_num(settings.input_allow_errors_num), allow_errors_ratio(settings.input_allow_errors_ratio) + : row_input(row_input_) + , sample(sample_) + , max_block_size(max_block_size_) + , rows_portion_size(rows_portion_size_) + , read_virtual_columns_callback(callback) + , allow_errors_num(settings.input_allow_errors_num) + , allow_errors_ratio(settings.input_allow_errors_ratio) { } @@ -73,6 +79,8 @@ Block BlockInputStreamFromRowInputStream::readImpl() RowReadExtension info; if (!row_input->read(columns, info)) break; + if (read_virtual_columns_callback) + read_virtual_columns_callback(); for (size_t column_idx = 0; column_idx < info.read_columns.size(); ++column_idx) { diff --git a/dbms/src/Formats/BlockInputStreamFromRowInputStream.h b/dbms/src/Formats/BlockInputStreamFromRowInputStream.h index 2f91aa2ecb2..2338af3bf38 100644 --- a/dbms/src/Formats/BlockInputStreamFromRowInputStream.h +++ b/dbms/src/Formats/BlockInputStreamFromRowInputStream.h @@ -2,6 +2,7 @@ #include #include +#include #include #include @@ -24,6 +25,7 @@ public: const Block & sample_, UInt64 max_block_size_, UInt64 rows_portion_size_, + FormatFactory::ReadCallback callback, const FormatSettings & settings); void readPrefix() override { row_input->readPrefix(); } @@ -45,6 +47,10 @@ private: Block sample; UInt64 max_block_size; UInt64 rows_portion_size; + + /// Callback used to setup virtual columns after reading each row. + FormatFactory::ReadCallback read_virtual_columns_callback; + BlockMissingValues block_missing_values; UInt64 allow_errors_num; diff --git a/dbms/src/Formats/CSVRowInputStream.cpp b/dbms/src/Formats/CSVRowInputStream.cpp index 9108c7df3d1..07cfd4826df 100644 --- a/dbms/src/Formats/CSVRowInputStream.cpp +++ b/dbms/src/Formats/CSVRowInputStream.cpp @@ -531,11 +531,12 @@ void registerInputFormatCSV(FormatFactory & factory) const Context &, UInt64 max_block_size, UInt64 rows_portion_size, + FormatFactory::ReadCallback callback, const FormatSettings & settings) { return std::make_shared( std::make_shared(buf, sample, with_names, settings), - sample, max_block_size, rows_portion_size, settings); + sample, max_block_size, rows_portion_size, callback, settings); }); } } diff --git a/dbms/src/Formats/CapnProtoRowInputStream.cpp b/dbms/src/Formats/CapnProtoRowInputStream.cpp index 5424e233618..96c3c5fded3 100644 --- a/dbms/src/Formats/CapnProtoRowInputStream.cpp +++ b/dbms/src/Formats/CapnProtoRowInputStream.cpp @@ -308,6 +308,7 @@ void registerInputFormatCapnProto(FormatFactory & factory) const Context & context, UInt64 max_block_size, UInt64 rows_portion_size, + FormatFactory::ReadCallback callback, const FormatSettings & settings) { return std::make_shared( @@ -315,6 +316,7 @@ void registerInputFormatCapnProto(FormatFactory & factory) sample, max_block_size, rows_portion_size, + callback, settings); }); } diff --git a/dbms/src/Formats/FormatFactory.cpp b/dbms/src/Formats/FormatFactory.cpp index d1b3bac5e3d..3ae0bf3b6de 100644 --- a/dbms/src/Formats/FormatFactory.cpp +++ b/dbms/src/Formats/FormatFactory.cpp @@ -27,7 +27,14 @@ const FormatFactory::Creators & FormatFactory::getCreators(const String & name) } -BlockInputStreamPtr FormatFactory::getInput(const String & name, ReadBuffer & buf, const Block & sample, const Context & context, UInt64 max_block_size, UInt64 rows_portion_size) const +BlockInputStreamPtr FormatFactory::getInput( + const String & name, + ReadBuffer & buf, + const Block & sample, + const Context & context, + UInt64 max_block_size, + UInt64 rows_portion_size, + ReadCallback callback) const { const auto & input_getter = getCreators(name).first; if (!input_getter) @@ -48,7 +55,8 @@ BlockInputStreamPtr FormatFactory::getInput(const String & name, ReadBuffer & bu format_settings.input_allow_errors_num = settings.input_format_allow_errors_num; format_settings.input_allow_errors_ratio = settings.input_format_allow_errors_ratio; - return input_getter(buf, sample, context, max_block_size, rows_portion_size, format_settings); + return input_getter( + buf, sample, context, max_block_size, rows_portion_size, callback ? callback : ReadCallback(), format_settings); } diff --git a/dbms/src/Formats/FormatFactory.h b/dbms/src/Formats/FormatFactory.h index 843d866301d..9c8b87e7d8b 100644 --- a/dbms/src/Formats/FormatFactory.h +++ b/dbms/src/Formats/FormatFactory.h @@ -24,6 +24,11 @@ class WriteBuffer; */ class FormatFactory final : public ext::singleton { +public: + /// This callback allows to perform some additional actions after reading a single row. + /// It's initial purpose was to extract payload for virtual columns from Kafka Consumer ReadBuffer. + using ReadCallback = std::function; + private: using InputCreator = std::function; using OutputCreator = std::function; public: - BlockInputStreamPtr getInput(const String & name, ReadBuffer & buf, - const Block & sample, const Context & context, UInt64 max_block_size, UInt64 rows_portion_size = 0) const; + BlockInputStreamPtr getInput( + const String & name, + ReadBuffer & buf, + const Block & sample, + const Context & context, + UInt64 max_block_size, + UInt64 rows_portion_size = 0, + ReadCallback callback = {}) const; BlockOutputStreamPtr getOutput(const String & name, WriteBuffer & buf, const Block & sample, const Context & context) const; diff --git a/dbms/src/Formats/JSONEachRowRowInputStream.cpp b/dbms/src/Formats/JSONEachRowRowInputStream.cpp index 5055d6c0c7d..72acf722ae7 100644 --- a/dbms/src/Formats/JSONEachRowRowInputStream.cpp +++ b/dbms/src/Formats/JSONEachRowRowInputStream.cpp @@ -260,11 +260,12 @@ void registerInputFormatJSONEachRow(FormatFactory & factory) const Context &, UInt64 max_block_size, UInt64 rows_portion_size, + FormatFactory::ReadCallback callback, const FormatSettings & settings) { return std::make_shared( std::make_shared(buf, sample, settings), - sample, max_block_size, rows_portion_size, settings); + sample, max_block_size, rows_portion_size, callback, settings); }); } diff --git a/dbms/src/Formats/NativeFormat.cpp b/dbms/src/Formats/NativeFormat.cpp index 88e727fdd3f..f324879608b 100644 --- a/dbms/src/Formats/NativeFormat.cpp +++ b/dbms/src/Formats/NativeFormat.cpp @@ -14,6 +14,7 @@ void registerInputFormatNative(FormatFactory & factory) const Context &, UInt64 /* max_block_size */, UInt64 /* min_read_rows */, + FormatFactory::ReadCallback /* callback */, const FormatSettings &) { return std::make_shared(buf, sample, 0); diff --git a/dbms/src/Formats/ParquetBlockInputStream.cpp b/dbms/src/Formats/ParquetBlockInputStream.cpp index 5e7dc876244..deba953bab4 100644 --- a/dbms/src/Formats/ParquetBlockInputStream.cpp +++ b/dbms/src/Formats/ParquetBlockInputStream.cpp @@ -476,6 +476,7 @@ void registerInputFormatParquet(FormatFactory & factory) const Context & context, UInt64 /* max_block_size */, UInt64 /* rows_portion_size */, + FormatFactory::ReadCallback /* callback */, const FormatSettings & /* settings */) { return std::make_shared(buf, sample, context); }); } diff --git a/dbms/src/Formats/ProtobufRowInputStream.cpp b/dbms/src/Formats/ProtobufRowInputStream.cpp index 45ea6b8dca7..1c4193b9f1a 100644 --- a/dbms/src/Formats/ProtobufRowInputStream.cpp +++ b/dbms/src/Formats/ProtobufRowInputStream.cpp @@ -74,11 +74,12 @@ void registerInputFormatProtobuf(FormatFactory & factory) const Context & context, UInt64 max_block_size, UInt64 rows_portion_size, + FormatFactory::ReadCallback callback, const FormatSettings & settings) { return std::make_shared( std::make_shared(buf, sample, FormatSchemaInfo(context, "Protobuf")), - sample, max_block_size, rows_portion_size, settings); + sample, max_block_size, rows_portion_size, callback, settings); }); } diff --git a/dbms/src/Formats/TSKVRowInputStream.cpp b/dbms/src/Formats/TSKVRowInputStream.cpp index ac89d5ec1c5..d86ee22bc4b 100644 --- a/dbms/src/Formats/TSKVRowInputStream.cpp +++ b/dbms/src/Formats/TSKVRowInputStream.cpp @@ -199,11 +199,12 @@ void registerInputFormatTSKV(FormatFactory & factory) const Context &, UInt64 max_block_size, UInt64 rows_portion_size, + FormatFactory::ReadCallback callback, const FormatSettings & settings) { return std::make_shared( std::make_shared(buf, sample, settings), - sample, max_block_size, rows_portion_size, settings); + sample, max_block_size, rows_portion_size, callback, settings); }); } diff --git a/dbms/src/Formats/TabSeparatedRowInputStream.cpp b/dbms/src/Formats/TabSeparatedRowInputStream.cpp index b9428e81c62..0c16c14e306 100644 --- a/dbms/src/Formats/TabSeparatedRowInputStream.cpp +++ b/dbms/src/Formats/TabSeparatedRowInputStream.cpp @@ -457,11 +457,12 @@ void registerInputFormatTabSeparated(FormatFactory & factory) const Context &, UInt64 max_block_size, UInt64 rows_portion_size, + FormatFactory::ReadCallback callback, const FormatSettings & settings) { return std::make_shared( std::make_shared(buf, sample, false, false, settings), - sample, max_block_size, rows_portion_size, settings); + sample, max_block_size, rows_portion_size, callback, settings); }); } @@ -473,11 +474,12 @@ void registerInputFormatTabSeparated(FormatFactory & factory) const Context &, UInt64 max_block_size, UInt64 rows_portion_size, + FormatFactory::ReadCallback callback, const FormatSettings & settings) { return std::make_shared( std::make_shared(buf, sample, true, false, settings), - sample, max_block_size, rows_portion_size, settings); + sample, max_block_size, rows_portion_size, callback, settings); }); } @@ -489,11 +491,12 @@ void registerInputFormatTabSeparated(FormatFactory & factory) const Context &, UInt64 max_block_size, UInt64 rows_portion_size, + FormatFactory::ReadCallback callback, const FormatSettings & settings) { return std::make_shared( std::make_shared(buf, sample, true, true, settings), - sample, max_block_size, rows_portion_size, settings); + sample, max_block_size, rows_portion_size, callback, settings); }); } } diff --git a/dbms/src/Formats/ValuesRowInputStream.cpp b/dbms/src/Formats/ValuesRowInputStream.cpp index b2d972d678b..33799a95549 100644 --- a/dbms/src/Formats/ValuesRowInputStream.cpp +++ b/dbms/src/Formats/ValuesRowInputStream.cpp @@ -156,11 +156,12 @@ void registerInputFormatValues(FormatFactory & factory) const Context & context, UInt64 max_block_size, UInt64 rows_portion_size, + FormatFactory::ReadCallback callback, const FormatSettings & settings) { return std::make_shared( std::make_shared(buf, sample, context, settings), - sample, max_block_size, rows_portion_size, settings); + sample, max_block_size, rows_portion_size, callback, settings); }); } diff --git a/dbms/src/Formats/tests/block_row_transforms.cpp b/dbms/src/Formats/tests/block_row_transforms.cpp index c880ff7fc39..9d38a37f833 100644 --- a/dbms/src/Formats/tests/block_row_transforms.cpp +++ b/dbms/src/Formats/tests/block_row_transforms.cpp @@ -45,7 +45,7 @@ try FormatSettings format_settings; RowInputStreamPtr row_input = std::make_shared(in_buf, sample, false, false, format_settings); - BlockInputStreamFromRowInputStream block_input(row_input, sample, DEFAULT_INSERT_BLOCK_SIZE, 0, format_settings); + BlockInputStreamFromRowInputStream block_input(row_input, sample, DEFAULT_INSERT_BLOCK_SIZE, 0, []{}, format_settings); RowOutputStreamPtr row_output = std::make_shared(out_buf, sample, false, false, format_settings); BlockOutputStreamFromRowOutputStream block_output(row_output, sample); diff --git a/dbms/src/Formats/tests/tab_separated_streams.cpp b/dbms/src/Formats/tests/tab_separated_streams.cpp index 50b9350d4c5..11895699c3b 100644 --- a/dbms/src/Formats/tests/tab_separated_streams.cpp +++ b/dbms/src/Formats/tests/tab_separated_streams.cpp @@ -42,7 +42,7 @@ try RowInputStreamPtr row_input = std::make_shared(in_buf, sample, false, false, format_settings); RowOutputStreamPtr row_output = std::make_shared(out_buf, sample, false, false, format_settings); - BlockInputStreamFromRowInputStream block_input(row_input, sample, DEFAULT_INSERT_BLOCK_SIZE, 0, format_settings); + BlockInputStreamFromRowInputStream block_input(row_input, sample, DEFAULT_INSERT_BLOCK_SIZE, 0, []{}, format_settings); BlockOutputStreamFromRowOutputStream block_output(row_output, sample); copyData(block_input, block_output); diff --git a/dbms/src/Functions/CMakeLists.txt b/dbms/src/Functions/CMakeLists.txt index 72e2ee5aeea..a584bd14a7d 100644 --- a/dbms/src/Functions/CMakeLists.txt +++ b/dbms/src/Functions/CMakeLists.txt @@ -65,6 +65,11 @@ if(USE_XXHASH) target_include_directories(clickhouse_functions SYSTEM PRIVATE ${XXHASH_INCLUDE_DIR}) endif() +if (USE_H3) + target_link_libraries(clickhouse_functions PRIVATE ${H3_LIBRARY}) + target_include_directories(clickhouse_functions SYSTEM PRIVATE ${H3_INCLUDE_DIR}) +endif() + if(USE_HYPERSCAN) target_link_libraries(clickhouse_functions PRIVATE ${HYPERSCAN_LIBRARY}) target_include_directories(clickhouse_functions SYSTEM PRIVATE ${HYPERSCAN_INCLUDE_DIR}) diff --git a/dbms/src/Functions/FunctionsRandom.cpp b/dbms/src/Functions/FunctionsRandom.cpp index ede8c332d18..19b2f08cdba 100644 --- a/dbms/src/Functions/FunctionsRandom.cpp +++ b/dbms/src/Functions/FunctionsRandom.cpp @@ -57,10 +57,10 @@ void RandImpl::execute(char * output, size_t size) for (const char * end = output + size; output < end; output += 16) { - unalignedStore(output, generator0.next()); - unalignedStore(output + 4, generator1.next()); - unalignedStore(output + 8, generator2.next()); - unalignedStore(output + 12, generator3.next()); + unalignedStore(output, generator0.next()); + unalignedStore(output + 4, generator1.next()); + unalignedStore(output + 8, generator2.next()); + unalignedStore(output + 12, generator3.next()); } /// It is guaranteed (by PaddedPODArray) that we can overwrite up to 15 bytes after end. diff --git a/dbms/src/Functions/FunctionsStringSimilarity.cpp b/dbms/src/Functions/FunctionsStringSimilarity.cpp index 9a9dd01a972..d5632b136e4 100644 --- a/dbms/src/Functions/FunctionsStringSimilarity.cpp +++ b/dbms/src/Functions/FunctionsStringSimilarity.cpp @@ -271,11 +271,17 @@ struct NgramDistanceImpl { size_t first_size = dispatchSearcher(calculateHaystackStatsAndMetric, data.data(), data_size, common_stats, distance, nullptr); /// For !Symmetric version we should not use first_size. - res = distance * 1.f / std::max(Symmetric * first_size + second_size, size_t(1)); + if constexpr (Symmetric) + res = distance * 1.f / std::max(first_size + second_size, size_t(1)); + else + res = 1.f - distance * 1.f / std::max(second_size, size_t(1)); } else { - res = 1.f; + if constexpr (Symmetric) + res = 1.f; + else + res = 0.f; } } @@ -333,13 +339,19 @@ struct NgramDistanceImpl /// For !Symmetric version we should not use haystack_stats_size. - res[i] = distance * 1.f / std::max(Symmetric * haystack_stats_size + needle_stats_size, size_t(1)); + if constexpr (Symmetric) + res[i] = distance * 1.f / std::max(haystack_stats_size + needle_stats_size, size_t(1)); + else + res[i] = 1.f - distance * 1.f / std::max(needle_stats_size, size_t(1)); } else { /// Strings are too big, we are assuming they are not the same. This is done because of limiting number /// of bigrams added and not allocating too much memory. - res[i] = 1.f; + if constexpr (Symmetric) + res[i] = 1.f; + else + res[i] = 0.f; } prev_needle_offset = needle_offsets[i]; @@ -399,11 +411,11 @@ struct NgramDistanceImpl for (size_t j = 0; j < needle_stats_size; ++j) --common_stats[needle_ngram_storage[j]]; - res[i] = distance * 1.f / std::max(needle_stats_size, size_t(1)); + res[i] = 1.f - distance * 1.f / std::max(needle_stats_size, size_t(1)); } else { - res[i] = 1.f; + res[i] = 0.f; } prev_offset = needle_offsets[i]; @@ -446,12 +458,18 @@ struct NgramDistanceImpl distance, ngram_storage.get()); /// For !Symmetric version we should not use haystack_stats_size. - res[i] = distance * 1.f / std::max(Symmetric * haystack_stats_size + needle_stats_size, size_t(1)); + if constexpr (Symmetric) + res[i] = distance * 1.f / std::max(haystack_stats_size + needle_stats_size, size_t(1)); + else + res[i] = 1.f - distance * 1.f / std::max(needle_stats_size, size_t(1)); } else { /// if the strings are too big, we say they are completely not the same - res[i] = 1.f; + if constexpr (Symmetric) + res[i] = 1.f; + else + res[i] = 0.f; } distance = needle_stats_size; prev_offset = offsets[i]; diff --git a/dbms/src/Functions/FunctionsVisitParam.h b/dbms/src/Functions/FunctionsVisitParam.h index 09cc3106719..41a49dfd908 100644 --- a/dbms/src/Functions/FunctionsVisitParam.h +++ b/dbms/src/Functions/FunctionsVisitParam.h @@ -91,8 +91,7 @@ struct ExtractBool struct ExtractRaw { - static constexpr size_t bytes_on_stack = 64; - using ExpectChars = PODArray, bytes_on_stack>>; + using ExpectChars = PODArrayWithStackMemory; static void extract(const UInt8 * pos, const UInt8 * end, ColumnString::Chars & res_data) { diff --git a/dbms/src/Functions/URL/domain.h b/dbms/src/Functions/URL/domain.h index 1a7b1df5291..141887d8e96 100644 --- a/dbms/src/Functions/URL/domain.h +++ b/dbms/src/Functions/URL/domain.h @@ -3,44 +3,117 @@ #include "protocol.h" #include #include - +#include namespace DB { +namespace +{ + +inline StringRef checkAndReturnHost(const Pos & pos, const Pos & dot_pos, const Pos & start_of_host) +{ + if (!dot_pos || start_of_host >= pos || pos - dot_pos == 1) + return StringRef{}; + + auto after_dot = *(dot_pos + 1); + if (after_dot == ':' || after_dot == '/' || after_dot == '?' || after_dot == '#') + return StringRef{}; + + return StringRef(start_of_host, pos - start_of_host); +} + +} + /// Extracts host from given url. inline StringRef getURLHost(const char * data, size_t size) { Pos pos = data; Pos end = data + size; - if (end == (pos = find_first_symbols<'/'>(pos, end))) - return {}; - - if (pos != data) + if (*pos == '/' && *(pos + 1) == '/') { - StringRef scheme = getURLScheme(data, size); - Pos scheme_end = data + scheme.size; - - // Colon must follows after scheme. - if (pos - scheme_end != 1 || *scheme_end != ':') - return {}; + pos += 2; + } + else + { + Pos scheme_end = data + std::min(size, 16UL); + for (++pos; pos < scheme_end; ++pos) + { + if (!isAlphaNumericASCII(*pos)) + { + switch (*pos) + { + case '.': + case '-': + case '+': + break; + case ' ': /// restricted symbols + case '\t': + case '<': + case '>': + case '%': + case '{': + case '}': + case '|': + case '\\': + case '^': + case '~': + case '[': + case ']': + case ';': + case '=': + case '&': + return StringRef{}; + default: + goto exloop; + } + } + } +exloop: if ((scheme_end - pos) > 2 && *pos == ':' && *(pos + 1) == '/' && *(pos + 2) == '/') + pos += 3; + else + pos = data; } - if (end - pos < 2 || *(pos) != '/' || *(pos + 1) != '/') - return {}; - pos += 2; - - const char * start_of_host = pos; + Pos dot_pos = nullptr; + auto start_of_host = pos; for (; pos < end; ++pos) { - if (*pos == '@') - start_of_host = pos + 1; - else if (*pos == ':' || *pos == '/' || *pos == '?' || *pos == '#') + switch (*pos) + { + case '.': + dot_pos = pos; break; + case ':': /// end symbols + case '/': + case '?': + case '#': + return checkAndReturnHost(pos, dot_pos, start_of_host); + case '@': /// myemail@gmail.com + start_of_host = pos + 1; + break; + case ' ': /// restricted symbols in whole URL + case '\t': + case '<': + case '>': + case '%': + case '{': + case '}': + case '|': + case '\\': + case '^': + case '~': + case '[': + case ']': + case ';': + case '=': + case '&': + return StringRef{}; + } } - return (pos == start_of_host) ? StringRef{} : StringRef(start_of_host, pos - start_of_host); + return checkAndReturnHost(pos, dot_pos, start_of_host); } template diff --git a/dbms/src/Functions/config_functions.h.in b/dbms/src/Functions/config_functions.h.in index a6b5e9790c0..7d395741b78 100644 --- a/dbms/src/Functions/config_functions.h.in +++ b/dbms/src/Functions/config_functions.h.in @@ -8,3 +8,4 @@ #cmakedefine01 USE_HYPERSCAN #cmakedefine01 USE_SIMDJSON #cmakedefine01 USE_RAPIDJSON +#cmakedefine01 USE_H3 diff --git a/dbms/src/Functions/geoToH3.cpp b/dbms/src/Functions/geoToH3.cpp new file mode 100644 index 00000000000..6d3a7197ee0 --- /dev/null +++ b/dbms/src/Functions/geoToH3.cpp @@ -0,0 +1,108 @@ +#include "config_functions.h" +#if USE_H3 +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +extern "C" +{ +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wdocumentation" +#endif + +#include + +#ifdef __clang__ +#pragma clang diagnostic pop +#endif +} + +namespace DB +{ +namespace ErrorCodes +{ + extern const int ILLEGAL_COLUMN; +} + +/// Implements the function geoToH3 which takes 3 arguments (latitude, longitude and h3 resolution) +/// and returns h3 index of this point +class FunctionGeoToH3 : public IFunction +{ +public: + static constexpr auto name = "geoToH3"; + + static FunctionPtr create(const Context &) { return std::make_shared(); } + + std::string getName() const override { return name; } + + size_t getNumberOfArguments() const override { return 3; } + bool useDefaultImplementationForConstants() const override { return true; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + auto arg = arguments[0].get(); + if (!WhichDataType(arg).isFloat64()) + throw Exception( + "Illegal type " + arg->getName() + " of argument " + std::to_string(1) + " of function " + getName() + ". Must be Float64", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + arg = arguments[1].get(); + if (!WhichDataType(arg).isFloat64()) + throw Exception( + "Illegal type " + arg->getName() + " of argument " + std::to_string(2) + " of function " + getName() + ". Must be Float64", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + arg = arguments[2].get(); + if (!WhichDataType(arg).isUInt8()) + throw Exception( + "Illegal type " + arg->getName() + " of argument " + std::to_string(3) + " of function " + getName() + ". Must be UInt8", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + return std::make_shared(); + } + + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override + { + const auto col_lon = block.getByPosition(arguments[0]).column.get(); + const auto col_lat = block.getByPosition(arguments[1]).column.get(); + const auto col_res = block.getByPosition(arguments[2]).column.get(); + + auto dst = ColumnVector::create(); + auto & dst_data = dst->getData(); + dst_data.resize(input_rows_count); + + for (const auto row : ext::range(0, input_rows_count)) + { + const double lon = col_lon->getFloat64(row); + const double lat = col_lat->getFloat64(row); + const UInt8 res = col_res->getUInt(row); + + GeoCoord coord; + coord.lon = H3_EXPORT(degsToRads)(lon); + coord.lat = H3_EXPORT(degsToRads)(lat); + + H3Index hindex = H3_EXPORT(geoToH3)(&coord, res); + + dst_data[row] = hindex; + } + + block.getByPosition(result).column = std::move(dst); + } +}; + + +void registerFunctionGeoToH3(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +} +#endif diff --git a/dbms/src/Functions/geohashDecode.cpp b/dbms/src/Functions/geohashDecode.cpp new file mode 100644 index 00000000000..9774ecdee40 --- /dev/null +++ b/dbms/src/Functions/geohashDecode.cpp @@ -0,0 +1,99 @@ +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_COLUMN; +} + + +// geohashDecode(string) => (lon float64, lat float64) +class FunctionGeohashDecode : public IFunction +{ +public: + static constexpr auto name = "geohashDecode"; + static FunctionPtr create(const Context &) { return std::make_shared(); } + + String getName() const override + { + return name; + } + + size_t getNumberOfArguments() const override { return 1; } + bool useDefaultImplementationForConstants() const override { return true; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + validateArgumentType(*this, arguments, 0, isStringOrFixedString, "string or fixed string"); + + return std::make_shared( + DataTypes{std::make_shared(), std::make_shared()}, + Strings{"longitude", "latitude"}); + } + + template + bool tryExecute(const IColumn * encoded_column, ColumnPtr & result_column) + { + const auto * encoded = checkAndGetColumn(encoded_column); + if (!encoded) + return false; + + const size_t count = encoded->size(); + + auto latitude = ColumnFloat64::create(count); + auto longitude = ColumnFloat64::create(count); + + ColumnFloat64::Container & lon_data = longitude->getData(); + ColumnFloat64::Container & lat_data = latitude->getData(); + + for (size_t i = 0; i < count; ++i) + { + StringRef encoded_string = encoded->getDataAt(i); + GeoUtils::geohashDecode(encoded_string.data, encoded_string.size, &lon_data[i], &lat_data[i]); + } + + MutableColumns result; + result.emplace_back(std::move(longitude)); + result.emplace_back(std::move(latitude)); + result_column = ColumnTuple::create(std::move(result)); + + return true; + } + + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override + { + const IColumn * encoded = block.getByPosition(arguments[0]).column.get(); + ColumnPtr & res_column = block.getByPosition(result).column; + + if (tryExecute(encoded, res_column) || + tryExecute(encoded, res_column)) + return; + + throw Exception("Unsupported argument type:" + block.getByPosition(arguments[0]).column->getName() + + " of argument of function " + getName(), + ErrorCodes::ILLEGAL_COLUMN); + } +}; + + +void registerFunctionGeohashDecode(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +} diff --git a/dbms/src/Functions/geohashEncode.cpp b/dbms/src/Functions/geohashEncode.cpp new file mode 100644 index 00000000000..9f4ccddd0f4 --- /dev/null +++ b/dbms/src/Functions/geohashEncode.cpp @@ -0,0 +1,136 @@ +#include +#include +#include + +#include +#include + +#include + +#define GEOHASH_MAX_TEXT_LENGTH 16 + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; + extern const int ILLEGAL_COLUMN; + extern const int TOO_MANY_ARGUMENTS_FOR_FUNCTION; +} + +// geohashEncode(lon float32/64, lat float32/64, length UInt8) => string +class FunctionGeohashEncode : public IFunction +{ +public: + static constexpr auto name = "geohashEncode"; + static FunctionPtr create(const Context &) { return std::make_shared(); } + + String getName() const override + { + return name; + } + + bool isVariadic() const override { return true; } + size_t getNumberOfArguments() const override { return 0; } + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {2}; } + bool useDefaultImplementationForConstants() const override { return true; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + validateArgumentType(*this, arguments, 0, isFloat, "float"); + validateArgumentType(*this, arguments, 1, isFloat, "float"); + if (arguments.size() == 3) + { + validateArgumentType(*this, arguments, 2, isInteger, "integer"); + } + if (arguments.size() > 3) + { + throw Exception("Too many arguments for function " + getName() + + " expected at most 3", + ErrorCodes::TOO_MANY_ARGUMENTS_FOR_FUNCTION); + } + + return std::make_shared(); + } + + template + bool tryExecute(const IColumn * lon_column, const IColumn * lat_column, UInt64 precision_value, ColumnPtr & result) + { + const ColumnVector * longitude = checkAndGetColumn>(lon_column); + const ColumnVector * latitude = checkAndGetColumn>(lat_column); + if (!latitude || !longitude) + return false; + + auto col_str = ColumnString::create(); + ColumnString::Chars & out_vec = col_str->getChars(); + ColumnString::Offsets & out_offsets = col_str->getOffsets(); + + const size_t size = lat_column->size(); + + out_offsets.resize(size); + out_vec.resize(size * (GEOHASH_MAX_TEXT_LENGTH + 1)); + + char * begin = reinterpret_cast(out_vec.data()); + char * pos = begin; + + for (size_t i = 0; i < size; ++i) + { + const Float64 longitude_value = longitude->getElement(i); + const Float64 latitude_value = latitude->getElement(i); + + const size_t encoded_size = GeoUtils::geohashEncode(longitude_value, latitude_value, precision_value, pos); + + pos += encoded_size; + *pos = '\0'; + out_offsets[i] = ++pos - begin; + } + out_vec.resize(pos - begin); + + if (!out_offsets.empty() && out_offsets.back() != out_vec.size()) + throw Exception("Column size mismatch (internal logical error)", ErrorCodes::LOGICAL_ERROR); + + result = std::move(col_str); + + return true; + + } + + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override + { + const IColumn * longitude = block.getByPosition(arguments[0]).column.get(); + const IColumn * latitude = block.getByPosition(arguments[1]).column.get(); + + const UInt64 precision_value = std::min(GEOHASH_MAX_TEXT_LENGTH, + arguments.size() == 3 ? block.getByPosition(arguments[2]).column->get64(0) : GEOHASH_MAX_TEXT_LENGTH); + + ColumnPtr & res_column = block.getByPosition(result).column; + + if (tryExecute(longitude, latitude, precision_value, res_column) || + tryExecute(longitude, latitude, precision_value, res_column) || + tryExecute(longitude, latitude, precision_value, res_column) || + tryExecute(longitude, latitude, precision_value, res_column)) + return; + + std::string arguments_description; + for (size_t i = 0; i < arguments.size(); ++i) + { + if (i != 0) + arguments_description += ", "; + arguments_description += block.getByPosition(arguments[i]).column->getName(); + } + + throw Exception("Unsupported argument types: " + arguments_description + + + " for function " + getName(), + ErrorCodes::ILLEGAL_COLUMN); + } +}; + + +void registerFunctionGeohashEncode(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +} diff --git a/dbms/src/Functions/greatCircleDistance.cpp b/dbms/src/Functions/greatCircleDistance.cpp new file mode 100644 index 00000000000..593334c6cfb --- /dev/null +++ b/dbms/src/Functions/greatCircleDistance.cpp @@ -0,0 +1,166 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DEGREES_IN_RADIANS (M_PI / 180.0) +#define EARTH_RADIUS_IN_METERS 6372797.560856 + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ARGUMENT_OUT_OF_BOUND; + extern const int ILLEGAL_COLUMN; + extern const int LOGICAL_ERROR; +} + +static inline Float64 degToRad(Float64 angle) { return angle * DEGREES_IN_RADIANS; } + +/** + * The function calculates distance in meters between two points on Earth specified by longitude and latitude in degrees. + * The function uses great circle distance formula https://en.wikipedia.org/wiki/Great-circle_distance. + * Throws exception when one or several input values are not within reasonable bounds. + * Latitude must be in [-90, 90], longitude must be [-180, 180] + * + */ +class FunctionGreatCircleDistance : public IFunction +{ +public: + + static constexpr auto name = "greatCircleDistance"; + static FunctionPtr create(const Context &) { return std::make_shared(); } + +private: + + enum class instr_type : uint8_t + { + get_float_64, + get_const_float_64 + }; + + using instr_t = std::pair; + using instrs_t = std::array; + + String getName() const override { return name; } + + size_t getNumberOfArguments() const override { return 4; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + for (const auto arg_idx : ext::range(0, arguments.size())) + { + const auto arg = arguments[arg_idx].get(); + if (!WhichDataType(arg).isFloat64()) + throw Exception( + "Illegal type " + arg->getName() + " of argument " + std::to_string(arg_idx + 1) + " of function " + getName() + ". Must be Float64", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } + + return std::make_shared(); + } + + instrs_t getInstructions(const Block & block, const ColumnNumbers & arguments, bool & out_const) + { + instrs_t result; + out_const = true; + + for (const auto arg_idx : ext::range(0, arguments.size())) + { + const auto column = block.getByPosition(arguments[arg_idx]).column.get(); + + if (const auto col = checkAndGetColumn>(column)) + { + out_const = false; + result[arg_idx] = instr_t{instr_type::get_float_64, col}; + } + else if (const auto col_const = checkAndGetColumnConst>(column)) + { + result[arg_idx] = instr_t{instr_type::get_const_float_64, col_const}; + } + else + throw Exception("Illegal column " + column->getName() + " of argument of function " + getName(), + ErrorCodes::ILLEGAL_COLUMN); + } + + return result; + } + + /// https://en.wikipedia.org/wiki/Great-circle_distance + Float64 greatCircleDistance(Float64 lon1Deg, Float64 lat1Deg, Float64 lon2Deg, Float64 lat2Deg) + { + if (lon1Deg < -180 || lon1Deg > 180 || + lon2Deg < -180 || lon2Deg > 180 || + lat1Deg < -90 || lat1Deg > 90 || + lat2Deg < -90 || lat2Deg > 90) + { + throw Exception("Arguments values out of bounds for function " + getName(), ErrorCodes::ARGUMENT_OUT_OF_BOUND); + } + + Float64 lon1Rad = degToRad(lon1Deg); + Float64 lat1Rad = degToRad(lat1Deg); + Float64 lon2Rad = degToRad(lon2Deg); + Float64 lat2Rad = degToRad(lat2Deg); + Float64 u = sin((lat2Rad - lat1Rad) / 2); + Float64 v = sin((lon2Rad - lon1Rad) / 2); + return 2.0 * EARTH_RADIUS_IN_METERS * asin(sqrt(u * u + cos(lat1Rad) * cos(lat2Rad) * v * v)); + } + + + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override + { + const auto size = input_rows_count; + + bool result_is_const{}; + auto instrs = getInstructions(block, arguments, result_is_const); + + if (result_is_const) + { + const auto & colLon1 = static_cast(block.getByPosition(arguments[0]).column.get())->getValue(); + const auto & colLat1 = static_cast(block.getByPosition(arguments[1]).column.get())->getValue(); + const auto & colLon2 = static_cast(block.getByPosition(arguments[2]).column.get())->getValue(); + const auto & colLat2 = static_cast(block.getByPosition(arguments[3]).column.get())->getValue(); + + Float64 res = greatCircleDistance(colLon1, colLat1, colLon2, colLat2); + block.getByPosition(result).column = block.getByPosition(result).type->createColumnConst(size, res); + } + else + { + auto dst = ColumnVector::create(); + auto & dst_data = dst->getData(); + dst_data.resize(size); + Float64 vals[instrs.size()]; + for (const auto row : ext::range(0, size)) + { + for (const auto idx : ext::range(0, instrs.size())) + { + if (instr_type::get_float_64 == instrs[idx].first) + vals[idx] = static_cast *>(instrs[idx].second)->getData()[row]; + else if (instr_type::get_const_float_64 == instrs[idx].first) + vals[idx] = static_cast(instrs[idx].second)->getValue(); + else + throw Exception{"Unknown instruction type in implementation of greatCircleDistance function", ErrorCodes::LOGICAL_ERROR}; + } + dst_data[row] = greatCircleDistance(vals[0], vals[1], vals[2], vals[3]); + } + block.getByPosition(result).column = std::move(dst); + } + } +}; + + +void registerFunctionGreatCircleDistance(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +} + diff --git a/dbms/src/Functions/FunctionsGeo.h b/dbms/src/Functions/pointInEllipses.cpp similarity index 54% rename from dbms/src/Functions/FunctionsGeo.h rename to dbms/src/Functions/pointInEllipses.cpp index 1f351633dd7..2958d6171f1 100644 --- a/dbms/src/Functions/FunctionsGeo.h +++ b/dbms/src/Functions/pointInEllipses.cpp @@ -1,17 +1,11 @@ -#pragma once - #include #include #include #include #include #include +#include #include -#include -#include - -#define DEGREES_IN_RADIANS (M_PI / 180.0) -#define EARTH_RADIUS_IN_METERS 6372797.560856 namespace DB @@ -19,148 +13,11 @@ namespace DB namespace ErrorCodes { - extern const int ARGUMENT_OUT_OF_BOUND; extern const int TOO_MANY_ARGUMENTS_FOR_FUNCTION; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int ILLEGAL_COLUMN; - extern const int LOGICAL_ERROR; } -static inline Float64 degToRad(Float64 angle) { return angle * DEGREES_IN_RADIANS; } -static inline Float64 radToDeg(Float64 angle) { return angle / DEGREES_IN_RADIANS; } - -/** - * The function calculates distance in meters between two points on Earth specified by longitude and latitude in degrees. - * The function uses great circle distance formula https://en.wikipedia.org/wiki/Great-circle_distance. - * Throws exception when one or several input values are not within reasonable bounds. - * Latitude must be in [-90, 90], longitude must be [-180, 180] - * - */ -class FunctionGreatCircleDistance : public IFunction -{ -public: - - static constexpr auto name = "greatCircleDistance"; - static FunctionPtr create(const Context &) { return std::make_shared(); } - -private: - - enum class instr_type : uint8_t - { - get_float_64, - get_const_float_64 - }; - - using instr_t = std::pair; - using instrs_t = std::array; - - String getName() const override { return name; } - - size_t getNumberOfArguments() const override { return 4; } - - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override - { - for (const auto arg_idx : ext::range(0, arguments.size())) - { - const auto arg = arguments[arg_idx].get(); - if (!WhichDataType(arg).isFloat64()) - throw Exception( - "Illegal type " + arg->getName() + " of argument " + std::to_string(arg_idx + 1) + " of function " + getName() + ". Must be Float64", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - } - - return std::make_shared(); - } - - instrs_t getInstructions(const Block & block, const ColumnNumbers & arguments, bool & out_const) - { - instrs_t result; - out_const = true; - - for (const auto arg_idx : ext::range(0, arguments.size())) - { - const auto column = block.getByPosition(arguments[arg_idx]).column.get(); - - if (const auto col = checkAndGetColumn>(column)) - { - out_const = false; - result[arg_idx] = instr_t{instr_type::get_float_64, col}; - } - else if (const auto col_const = checkAndGetColumnConst>(column)) - { - result[arg_idx] = instr_t{instr_type::get_const_float_64, col_const}; - } - else - throw Exception("Illegal column " + column->getName() + " of argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); - } - - return result; - } - - /// https://en.wikipedia.org/wiki/Great-circle_distance - Float64 greatCircleDistance(Float64 lon1Deg, Float64 lat1Deg, Float64 lon2Deg, Float64 lat2Deg) - { - if (lon1Deg < -180 || lon1Deg > 180 || - lon2Deg < -180 || lon2Deg > 180 || - lat1Deg < -90 || lat1Deg > 90 || - lat2Deg < -90 || lat2Deg > 90) - { - throw Exception("Arguments values out of bounds for function " + getName(), ErrorCodes::ARGUMENT_OUT_OF_BOUND); - } - - Float64 lon1Rad = degToRad(lon1Deg); - Float64 lat1Rad = degToRad(lat1Deg); - Float64 lon2Rad = degToRad(lon2Deg); - Float64 lat2Rad = degToRad(lat2Deg); - Float64 u = sin((lat2Rad - lat1Rad) / 2); - Float64 v = sin((lon2Rad - lon1Rad) / 2); - return 2.0 * EARTH_RADIUS_IN_METERS * asin(sqrt(u * u + cos(lat1Rad) * cos(lat2Rad) * v * v)); - } - - - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override - { - const auto size = input_rows_count; - - bool result_is_const{}; - auto instrs = getInstructions(block, arguments, result_is_const); - - if (result_is_const) - { - const auto & colLon1 = static_cast(block.getByPosition(arguments[0]).column.get())->getValue(); - const auto & colLat1 = static_cast(block.getByPosition(arguments[1]).column.get())->getValue(); - const auto & colLon2 = static_cast(block.getByPosition(arguments[2]).column.get())->getValue(); - const auto & colLat2 = static_cast(block.getByPosition(arguments[3]).column.get())->getValue(); - - Float64 res = greatCircleDistance(colLon1, colLat1, colLon2, colLat2); - block.getByPosition(result).column = block.getByPosition(result).type->createColumnConst(size, res); - } - else - { - auto dst = ColumnVector::create(); - auto & dst_data = dst->getData(); - dst_data.resize(size); - Float64 vals[instrs.size()]; - for (const auto row : ext::range(0, size)) - { - for (const auto idx : ext::range(0, instrs.size())) - { - if (instr_type::get_float_64 == instrs[idx].first) - vals[idx] = static_cast *>(instrs[idx].second)->getData()[row]; - else if (instr_type::get_const_float_64 == instrs[idx].first) - vals[idx] = static_cast(instrs[idx].second)->getValue(); - else - throw Exception{"Unknown instruction type in implementation of greatCircleDistance function", ErrorCodes::LOGICAL_ERROR}; - } - dst_data[row] = greatCircleDistance(vals[0], vals[1], vals[2], vals[3]); - } - block.getByPosition(result).column = std::move(dst); - } - } -}; - - /** * The function checks if a point is in one of ellipses in set. * The number of arguments must be 2 + 4*N where N is the number of ellipses. @@ -177,7 +34,6 @@ private: class FunctionPointInEllipses : public IFunction { public: - static constexpr auto name = "pointInEllipses"; static FunctionPtr create(const Context &) { return std::make_shared(); } @@ -330,6 +186,10 @@ private: } }; + +void registerFunctionPointInEllipses(FunctionFactory & factory) +{ + factory.registerFunction(); } -#undef DEGREES_IN_RADIANS +} diff --git a/dbms/src/Functions/FunctionsGeo.cpp b/dbms/src/Functions/pointInPolygon.cpp similarity index 55% rename from dbms/src/Functions/FunctionsGeo.cpp rename to dbms/src/Functions/pointInPolygon.cpp index 05ed8db2969..fc94be6c343 100644 --- a/dbms/src/Functions/FunctionsGeo.cpp +++ b/dbms/src/Functions/pointInPolygon.cpp @@ -1,5 +1,4 @@ #include -#include #include #include @@ -16,6 +15,7 @@ #include #include #include +#include #include #include @@ -37,6 +37,7 @@ namespace ErrorCodes extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION; extern const int BAD_ARGUMENTS; extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int ILLEGAL_COLUMN; } namespace FunctionPointInPolygonDetail @@ -251,185 +252,6 @@ private: }; -const size_t GEOHASH_MAX_TEXT_LENGTH = 16; - -// geohashEncode(lon float32/64, lat float32/64, length UInt8) => string -class FunctionGeohashEncode : public IFunction -{ -public: - static constexpr auto name = "geohashEncode"; - static FunctionPtr create(const Context &) { return std::make_shared(); } - - String getName() const override - { - return name; - } - - bool isVariadic() const override { return true; } - size_t getNumberOfArguments() const override { return 0; } - ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {2}; } - bool useDefaultImplementationForConstants() const override { return true; } - - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override - { - validateArgumentType(*this, arguments, 0, isFloat, "float"); - validateArgumentType(*this, arguments, 1, isFloat, "float"); - if (arguments.size() == 3) - { - validateArgumentType(*this, arguments, 2, isInteger, "integer"); - } - if (arguments.size() > 3) - { - throw Exception("Too many arguments for function " + getName() + - " expected at most 3", - ErrorCodes::TOO_MANY_ARGUMENTS_FOR_FUNCTION); - } - - return std::make_shared(); - } - - template - bool tryExecute(const IColumn * lon_column, const IColumn * lat_column, UInt64 precision_value, ColumnPtr & result) - { - const ColumnVector * longitude = checkAndGetColumn>(lon_column); - const ColumnVector * latitude = checkAndGetColumn>(lat_column); - if (!latitude || !longitude) - return false; - - auto col_str = ColumnString::create(); - ColumnString::Chars & out_vec = col_str->getChars(); - ColumnString::Offsets & out_offsets = col_str->getOffsets(); - - const size_t size = lat_column->size(); - - out_offsets.resize(size); - out_vec.resize(size * (GEOHASH_MAX_TEXT_LENGTH + 1)); - - char * begin = reinterpret_cast(out_vec.data()); - char * pos = begin; - - for (size_t i = 0; i < size; ++i) - { - const Float64 longitude_value = longitude->getElement(i); - const Float64 latitude_value = latitude->getElement(i); - - const size_t encoded_size = GeoUtils::geohashEncode(longitude_value, latitude_value, precision_value, pos); - - pos += encoded_size; - *pos = '\0'; - out_offsets[i] = ++pos - begin; - } - out_vec.resize(pos - begin); - - if (!out_offsets.empty() && out_offsets.back() != out_vec.size()) - throw Exception("Column size mismatch (internal logical error)", ErrorCodes::LOGICAL_ERROR); - - result = std::move(col_str); - - return true; - - } - - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override - { - const IColumn * longitude = block.getByPosition(arguments[0]).column.get(); - const IColumn * latitude = block.getByPosition(arguments[1]).column.get(); - - const UInt64 precision_value = std::min(GEOHASH_MAX_TEXT_LENGTH, - arguments.size() == 3 ? block.getByPosition(arguments[2]).column->get64(0) : GEOHASH_MAX_TEXT_LENGTH); - - ColumnPtr & res_column = block.getByPosition(result).column; - - if (tryExecute(longitude, latitude, precision_value, res_column) || - tryExecute(longitude, latitude, precision_value, res_column) || - tryExecute(longitude, latitude, precision_value, res_column) || - tryExecute(longitude, latitude, precision_value, res_column)) - return; - - const char sep[] = ", "; - std::string arguments_description = ""; - for (size_t i = 0; i < arguments.size(); ++i) - { - arguments_description += block.getByPosition(arguments[i]).column->getName() + sep; - } - if (arguments_description.size() > sizeof(sep)) - { - arguments_description.erase(arguments_description.size() - sizeof(sep) - 1); - } - - throw Exception("Unsupported argument types: " + arguments_description + - + " for function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); - } -}; - -// geohashDecode(string) => (lon float64, lat float64) -class FunctionGeohashDecode : public IFunction -{ -public: - static constexpr auto name = "geohashDecode"; - static FunctionPtr create(const Context &) { return std::make_shared(); } - - String getName() const override - { - return name; - } - - size_t getNumberOfArguments() const override { return 1; } - bool useDefaultImplementationForConstants() const override { return true; } - - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override - { - validateArgumentType(*this, arguments, 0, isStringOrFixedString, "string or fixed string"); - - return std::make_shared( - DataTypes{std::make_shared(), std::make_shared()}, - Strings{"longitude", "latitude"}); - } - - template - bool tryExecute(const IColumn * encoded_column, ColumnPtr & result_column) - { - const auto * encoded = checkAndGetColumn(encoded_column); - if (!encoded) - return false; - - const size_t count = encoded->size(); - - auto latitude = ColumnFloat64::create(count); - auto longitude = ColumnFloat64::create(count); - - ColumnFloat64::Container & lon_data = longitude->getData(); - ColumnFloat64::Container & lat_data = latitude->getData(); - - for (size_t i = 0; i < count; ++i) - { - StringRef encoded_string = encoded->getDataAt(i); - GeoUtils::geohashDecode(encoded_string.data, encoded_string.size, &lon_data[i], &lat_data[i]); - } - - MutableColumns result; - result.emplace_back(std::move(longitude)); - result.emplace_back(std::move(latitude)); - result_column = ColumnTuple::create(std::move(result)); - - return true; - } - - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override - { - const IColumn * encoded = block.getByPosition(arguments[0]).column.get(); - ColumnPtr & res_column = block.getByPosition(result).column; - - if (tryExecute(encoded, res_column) || - tryExecute(encoded, res_column)) - return; - - throw Exception("Unsupported argument type:" + block.getByPosition(arguments[0]).column->getName() - + " of argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); - } -}; template using Point = boost::geometry::model::d2::point_xy; @@ -440,13 +262,9 @@ using PointInPolygonWithGrid = GeoUtils::PointInPolygonWithGrid; template <> const char * FunctionPointInPolygon::name = "pointInPolygon"; -void registerFunctionsGeo(FunctionFactory & factory) +void registerFunctionPointInPolygon(FunctionFactory & factory) { - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction>(); - factory.registerFunction(); - factory.registerFunction(); } + } diff --git a/dbms/src/Functions/registerFunctionsGeo.cpp b/dbms/src/Functions/registerFunctionsGeo.cpp new file mode 100644 index 00000000000..0f436811874 --- /dev/null +++ b/dbms/src/Functions/registerFunctionsGeo.cpp @@ -0,0 +1,32 @@ +#include "config_functions.h" + +namespace DB +{ + +class FunctionFactory; + +void registerFunctionGreatCircleDistance(FunctionFactory & factory); +void registerFunctionPointInEllipses(FunctionFactory & factory); +void registerFunctionPointInPolygon(FunctionFactory & factory); +void registerFunctionGeohashEncode(FunctionFactory & factory); +void registerFunctionGeohashDecode(FunctionFactory & factory); + +#if USE_H3 +void registerFunctionGeoToH3(FunctionFactory &); +#endif + +void registerFunctionsGeo(FunctionFactory & factory) +{ + registerFunctionGreatCircleDistance(factory); + registerFunctionPointInEllipses(factory); + registerFunctionPointInPolygon(factory); + registerFunctionGeohashEncode(factory); + registerFunctionGeohashDecode(factory); + +#if USE_H3 + registerFunctionGeoToH3(factory); +#endif +} + +} + diff --git a/dbms/src/Interpreters/BloomFilter.cpp b/dbms/src/Interpreters/BloomFilter.cpp index 765f1ea9478..d648fd114f4 100644 --- a/dbms/src/Interpreters/BloomFilter.cpp +++ b/dbms/src/Interpreters/BloomFilter.cpp @@ -1,5 +1,4 @@ #include - #include @@ -9,14 +8,13 @@ namespace DB static constexpr UInt64 SEED_GEN_A = 845897321; static constexpr UInt64 SEED_GEN_B = 217728422; - -StringBloomFilter::StringBloomFilter(size_t size_, size_t hashes_, size_t seed_) +BloomFilter::BloomFilter(size_t size_, size_t hashes_, size_t seed_) : size(size_), hashes(hashes_), seed(seed_), words((size + sizeof(UnderType) - 1) / sizeof(UnderType)), filter(words, 0) {} -StringBloomFilter::StringBloomFilter(const StringBloomFilter & bloom_filter) +BloomFilter::BloomFilter(const BloomFilter & bloom_filter) : size(bloom_filter.size), hashes(bloom_filter.hashes), seed(bloom_filter.seed), words(bloom_filter.words), filter(bloom_filter.filter) {} -bool StringBloomFilter::find(const char * data, size_t len) +bool BloomFilter::find(const char * data, size_t len) { size_t hash1 = CityHash_v1_0_2::CityHash64WithSeed(data, len, seed); size_t hash2 = CityHash_v1_0_2::CityHash64WithSeed(data, len, SEED_GEN_A * seed + SEED_GEN_B); @@ -30,7 +28,7 @@ bool StringBloomFilter::find(const char * data, size_t len) return true; } -void StringBloomFilter::add(const char * data, size_t len) +void BloomFilter::add(const char * data, size_t len) { size_t hash1 = CityHash_v1_0_2::CityHash64WithSeed(data, len, seed); size_t hash2 = CityHash_v1_0_2::CityHash64WithSeed(data, len, SEED_GEN_A * seed + SEED_GEN_B); @@ -42,12 +40,12 @@ void StringBloomFilter::add(const char * data, size_t len) } } -void StringBloomFilter::clear() +void BloomFilter::clear() { filter.assign(words, 0); } -bool StringBloomFilter::contains(const StringBloomFilter & bf) +bool BloomFilter::contains(const BloomFilter & bf) { for (size_t i = 0; i < words; ++i) { @@ -57,7 +55,7 @@ bool StringBloomFilter::contains(const StringBloomFilter & bf) return true; } -UInt64 StringBloomFilter::isEmpty() const +UInt64 BloomFilter::isEmpty() const { for (size_t i = 0; i < words; ++i) if (filter[i] != 0) @@ -65,7 +63,7 @@ UInt64 StringBloomFilter::isEmpty() const return true; } -bool operator== (const StringBloomFilter & a, const StringBloomFilter & b) +bool operator== (const BloomFilter & a, const BloomFilter & b) { for (size_t i = 0; i < a.words; ++i) if (a.filter[i] != b.filter[i]) @@ -73,4 +71,16 @@ bool operator== (const StringBloomFilter & a, const StringBloomFilter & b) return true; } +void BloomFilter::addHashWithSeed(const UInt64 & hash, const UInt64 & hash_seed) +{ + size_t pos = CityHash_v1_0_2::Hash128to64(CityHash_v1_0_2::uint128(hash, hash_seed)) % (8 * size); + filter[pos / (8 * sizeof(UnderType))] |= (1ULL << (pos % (8 * sizeof(UnderType)))); +} + +bool BloomFilter::findHashWithSeed(const UInt64 & hash, const UInt64 & hash_seed) +{ + size_t pos = CityHash_v1_0_2::Hash128to64(CityHash_v1_0_2::uint128(hash, hash_seed)) % (8 * size); + return bool(filter[pos / (8 * sizeof(UnderType))] & (1ULL << (pos % (8 * sizeof(UnderType))))); +} + } diff --git a/dbms/src/Interpreters/BloomFilter.h b/dbms/src/Interpreters/BloomFilter.h index 1825dbec4bd..19469834c94 100644 --- a/dbms/src/Interpreters/BloomFilter.h +++ b/dbms/src/Interpreters/BloomFilter.h @@ -1,15 +1,17 @@ #pragma once -#include #include - +#include +#include +#include +#include namespace DB { -/// Bloom filter for strings. -class StringBloomFilter +class BloomFilter { + public: using UnderType = UInt64; using Container = std::vector; @@ -17,16 +19,19 @@ public: /// size -- size of filter in bytes. /// hashes -- number of used hash functions. /// seed -- random seed for hash functions generation. - StringBloomFilter(size_t size_, size_t hashes_, size_t seed_); - StringBloomFilter(const StringBloomFilter & bloom_filter); + BloomFilter(size_t size_, size_t hashes_, size_t seed_); + BloomFilter(const BloomFilter & bloom_filter); bool find(const char * data, size_t len); void add(const char * data, size_t len); void clear(); + void addHashWithSeed(const UInt64 & hash, const UInt64 & hash_seed); + bool findHashWithSeed(const UInt64 & hash, const UInt64 & hash_seed); + /// Checks if this contains everything from another bloom filter. /// Bloom filters must have equal size and seed. - bool contains(const StringBloomFilter & bf); + bool contains(const BloomFilter & bf); const Container & getFilter() const { return filter; } Container & getFilter() { return filter; } @@ -34,7 +39,7 @@ public: /// For debug. UInt64 isEmpty() const; - friend bool operator== (const StringBloomFilter & a, const StringBloomFilter & b); + friend bool operator== (const BloomFilter & a, const BloomFilter & b); private: size_t size; @@ -44,7 +49,8 @@ private: Container filter; }; +using BloomFilterPtr = std::shared_ptr; -bool operator== (const StringBloomFilter & a, const StringBloomFilter & b); +bool operator== (const BloomFilter & a, const BloomFilter & b); } diff --git a/dbms/src/Interpreters/BloomFilterHash.h b/dbms/src/Interpreters/BloomFilterHash.h new file mode 100644 index 00000000000..a94bc8687eb --- /dev/null +++ b/dbms/src/Interpreters/BloomFilterHash.h @@ -0,0 +1,207 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_COLUMN; +} + +struct BloomFilterHash +{ + static constexpr UInt64 bf_hash_seed[15] = { + 13635471485423070496ULL, 10336109063487487899ULL, 17779957404565211594ULL, 8988612159822229247ULL, 4954614162757618085ULL, + 12980113590177089081ULL, 9263883436177860930ULL, 3656772712723269762ULL, 10362091744962961274ULL, 7582936617938287249ULL, + 15033938188484401405ULL, 18286745649494826751ULL, 6852245486148412312ULL, 8886056245089344681ULL, 10151472371158292780ULL + }; + + static ColumnPtr hashWithField(const IDataType * data_type, const Field & field) + { + WhichDataType which(data_type); + + if (which.isUInt() || which.isDateOrDateTime()) + return ColumnConst::create(ColumnUInt64::create(1, intHash64(field.safeGet())), 1); + else if (which.isInt() || which.isEnum()) + return ColumnConst::create(ColumnUInt64::create(1, intHash64(ext::bit_cast(field.safeGet()))), 1); + else if (which.isFloat32() || which.isFloat64()) + return ColumnConst::create(ColumnUInt64::create(1, intHash64(ext::bit_cast(field.safeGet()))), 1); + else if (which.isString() || which.isFixedString()) + { + const auto & value = field.safeGet(); + return ColumnConst::create(ColumnUInt64::create(1, CityHash_v1_0_2::CityHash64(value.data(), value.size())), 1); + } + else + throw Exception("Unexpected type " + data_type->getName() + " of bloom filter index.", ErrorCodes::LOGICAL_ERROR); + } + + static ColumnPtr hashWithColumn(const DataTypePtr & data_type, const ColumnPtr & column, size_t pos, size_t limit) + { + auto index_column = ColumnUInt64::create(limit); + ColumnUInt64::Container & index_column_vec = index_column->getData(); + getAnyTypeHash(&*data_type, &*column, index_column_vec, pos); + return index_column; + } + + template + static void getAnyTypeHash(const IDataType * data_type, const IColumn * column, ColumnUInt64::Container & vec, size_t pos) + { + WhichDataType which(data_type); + + if (which.isUInt8()) getNumberTypeHash(column, vec, pos); + else if (which.isUInt16()) getNumberTypeHash(column, vec, pos); + else if (which.isUInt32()) getNumberTypeHash(column, vec, pos); + else if (which.isUInt64()) getNumberTypeHash(column, vec, pos); + else if (which.isInt8()) getNumberTypeHash(column, vec, pos); + else if (which.isInt16()) getNumberTypeHash(column, vec, pos); + else if (which.isInt32()) getNumberTypeHash(column, vec, pos); + else if (which.isInt64()) getNumberTypeHash(column, vec, pos); + else if (which.isEnum8()) getNumberTypeHash(column, vec, pos); + else if (which.isEnum16()) getNumberTypeHash(column, vec, pos); + else if (which.isDate()) getNumberTypeHash(column, vec, pos); + else if (which.isDateTime()) getNumberTypeHash(column, vec, pos); + else if (which.isFloat32()) getNumberTypeHash(column, vec, pos); + else if (which.isFloat64()) getNumberTypeHash(column, vec, pos); + else if (which.isString()) getStringTypeHash(column, vec, pos); + else if (which.isFixedString()) getStringTypeHash(column, vec, pos); + else throw Exception("Unexpected type " + data_type->getName() + " of bloom filter index.", ErrorCodes::LOGICAL_ERROR); + } + + template + static void getNumberTypeHash(const IColumn * column, ColumnUInt64::Container & vec, size_t pos) + { + const auto * index_column = typeid_cast *>(column); + + if (unlikely(!index_column)) + throw Exception("Illegal column type was passed to the bloom filter index.", ErrorCodes::ILLEGAL_COLUMN); + + const typename ColumnVector::Container & vec_from = index_column->getData(); + + /// Because we're missing the precision of float in the Field.h + /// to be consistent, we need to convert Float32 to Float64 processing, also see: BloomFilterHash::hashWithField + if constexpr (std::is_same_v, ColumnFloat32>) + { + for (size_t index = 0, size = vec.size(); index < size; ++index) + { + UInt64 hash = intHash64(ext::bit_cast(Float64(vec_from[index + pos]))); + + if constexpr (is_first) + vec[index] = hash; + else + vec[index] = CityHash_v1_0_2::Hash128to64(CityHash_v1_0_2::uint128(vec[index], hash)); + } + } + else + { + for (size_t index = 0, size = vec.size(); index < size; ++index) + { + UInt64 hash = intHash64(ext::bit_cast(vec_from[index + pos])); + + if constexpr (is_first) + vec[index] = hash; + else + vec[index] = CityHash_v1_0_2::Hash128to64(CityHash_v1_0_2::uint128(vec[index], hash)); + } + } + } + + template + static void getStringTypeHash(const IColumn * column, ColumnUInt64::Container & vec, size_t pos) + { + if (const auto * index_column = typeid_cast(column)) + { + const ColumnString::Chars & data = index_column->getChars(); + const ColumnString::Offsets & offsets = index_column->getOffsets(); + + ColumnString::Offset current_offset = pos; + for (size_t index = 0, size = vec.size(); index < size; ++index) + { + UInt64 city_hash = CityHash_v1_0_2::CityHash64( + reinterpret_cast(&data[current_offset]), offsets[index + pos] - current_offset - 1); + + if constexpr (is_first) + vec[index] = city_hash; + else + vec[index] = CityHash_v1_0_2::Hash128to64(CityHash_v1_0_2::uint128(vec[index], city_hash)); + + current_offset = offsets[index + pos]; + } + } + else if (const auto * fixed_string_index_column = typeid_cast(column)) + { + size_t fixed_len = fixed_string_index_column->getN(); + const auto & data = fixed_string_index_column->getChars(); + + for (size_t index = 0, size = vec.size(); index < size; ++index) + { + UInt64 city_hash = CityHash_v1_0_2::CityHash64(reinterpret_cast(&data[(index + pos) * fixed_len]), fixed_len); + + if constexpr (is_first) + vec[index] = city_hash; + else + vec[index] = CityHash_v1_0_2::Hash128to64(CityHash_v1_0_2::uint128(vec[index], city_hash)); + } + } + else + throw Exception("Illegal column type was passed to the bloom filter index.", ErrorCodes::ILLEGAL_COLUMN); + } + + static std::pair calculationBestPractices(double max_conflict_probability) + { + static const size_t MAX_BITS_PER_ROW = 20; + static const size_t MAX_HASH_FUNCTION_COUNT = 15; + + /// For the smallest index per level in probability_lookup_table + static const size_t min_probability_index_each_bits[] = {0, 0, 1, 2, 3, 3, 4, 5, 6, 6, 7, 8, 8, 9, 10, 10, 11, 12, 12, 13, 14}; + + static const long double probability_lookup_table[MAX_BITS_PER_ROW + 1][MAX_HASH_FUNCTION_COUNT] = + { + {1.0}, /// dummy, 0 bits per row + {1.0, 1.0}, + {1.0, 0.393, 0.400}, + {1.0, 0.283, 0.237, 0.253}, + {1.0, 0.221, 0.155, 0.147, 0.160}, + {1.0, 0.181, 0.109, 0.092, 0.092, 0.101}, // 5 + {1.0, 0.154, 0.0804, 0.0609, 0.0561, 0.0578, 0.0638}, + {1.0, 0.133, 0.0618, 0.0423, 0.0359, 0.0347, 0.0364}, + {1.0, 0.118, 0.0489, 0.0306, 0.024, 0.0217, 0.0216, 0.0229}, + {1.0, 0.105, 0.0397, 0.0228, 0.0166, 0.0141, 0.0133, 0.0135, 0.0145}, + {1.0, 0.0952, 0.0329, 0.0174, 0.0118, 0.00943, 0.00844, 0.00819, 0.00846}, // 10 + {1.0, 0.0869, 0.0276, 0.0136, 0.00864, 0.0065, 0.00552, 0.00513, 0.00509}, + {1.0, 0.08, 0.0236, 0.0108, 0.00646, 0.00459, 0.00371, 0.00329, 0.00314}, + {1.0, 0.074, 0.0203, 0.00875, 0.00492, 0.00332, 0.00255, 0.00217, 0.00199, 0.00194}, + {1.0, 0.0689, 0.0177, 0.00718, 0.00381, 0.00244, 0.00179, 0.00146, 0.00129, 0.00121, 0.0012}, + {1.0, 0.0645, 0.0156, 0.00596, 0.003, 0.00183, 0.00128, 0.001, 0.000852, 0.000775, 0.000744}, // 15 + {1.0, 0.0606, 0.0138, 0.005, 0.00239, 0.00139, 0.000935, 0.000702, 0.000574, 0.000505, 0.00047, 0.000459}, + {1.0, 0.0571, 0.0123, 0.00423, 0.00193, 0.00107, 0.000692, 0.000499, 0.000394, 0.000335, 0.000302, 0.000287, 0.000284}, + {1.0, 0.054, 0.0111, 0.00362, 0.00158, 0.000839, 0.000519, 0.00036, 0.000275, 0.000226, 0.000198, 0.000183, 0.000176}, + {1.0, 0.0513, 0.00998, 0.00312, 0.0013, 0.000663, 0.000394, 0.000264, 0.000194, 0.000155, 0.000132, 0.000118, 0.000111, 0.000109}, + {1.0, 0.0488, 0.00906, 0.0027, 0.00108, 0.00053, 0.000303, 0.000196, 0.00014, 0.000108, 8.89e-05, 7.77e-05, 7.12e-05, 6.79e-05, 6.71e-05} // 20 + }; + + for (size_t bits_per_row = 1; bits_per_row < MAX_BITS_PER_ROW; ++bits_per_row) + { + if (probability_lookup_table[bits_per_row][min_probability_index_each_bits[bits_per_row]] <= max_conflict_probability) + { + size_t max_size_of_hash_functions = min_probability_index_each_bits[bits_per_row]; + for (size_t size_of_hash_functions = max_size_of_hash_functions; size_of_hash_functions > 0; --size_of_hash_functions) + if (probability_lookup_table[bits_per_row][size_of_hash_functions] > max_conflict_probability) + return std::pair(bits_per_row, size_of_hash_functions + 1); + } + } + + return std::pair(MAX_BITS_PER_ROW - 1, min_probability_index_each_bits[MAX_BITS_PER_ROW - 1]); + } +}; + +} diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index 904facc1ece..6a9b347016f 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -245,15 +245,12 @@ struct ContextShared return; shutdown_called = true; - { - std::lock_guard lock(mutex); + /** After system_logs have been shut down it is guaranteed that no system table gets created or written to. + * Note that part changes at shutdown won't be logged to part log. + */ - /** After this point, system logs will shutdown their threads and no longer write any data. - * It will prevent recreation of system tables at shutdown. - * Note that part changes at shutdown won't be logged to part log. - */ - system_logs.reset(); - } + if (system_logs) + system_logs->shutdown(); /** At this point, some tables may have threads that block our mutex. * To shutdown them correctly, we will copy the current list of tables, @@ -281,6 +278,7 @@ struct ContextShared /// Preemptive destruction is important, because these objects may have a refcount to ContextShared (cyclic reference). /// TODO: Get rid of this. + system_logs.reset(); embedded_dictionaries.reset(); external_dictionaries.reset(); external_models.reset(); @@ -1849,6 +1847,25 @@ Context::SampleBlockCache & Context::getSampleBlockCache() const } +bool Context::hasQueryParameters() const +{ + return !query_parameters.empty(); +} + + +const NameToNameMap & Context::getQueryParameters() const +{ + return query_parameters; +} + + +void Context::setQueryParameter(const String & name, const String & value) +{ + if (!query_parameters.emplace(name, value).second) + throw Exception("Duplicate name " + backQuote(name) + " of query parameter", ErrorCodes::BAD_ARGUMENTS); +} + + #if USE_EMBEDDED_COMPILER std::shared_ptr Context::getCompiledExpressionCache() const diff --git a/dbms/src/Interpreters/Context.h b/dbms/src/Interpreters/Context.h index 50f9dd1ca36..443ea3bdb55 100644 --- a/dbms/src/Interpreters/Context.h +++ b/dbms/src/Interpreters/Context.h @@ -144,6 +144,9 @@ private: using DatabasePtr = std::shared_ptr; using Databases = std::map>; + NameToNameMap query_parameters; /// Dictionary with query parameters for prepared statements. + /// (key=name, value) + IHostContextPtr host_context; /// Arbitrary object that may used to attach some host specific information to query context, /// when using ClickHouse as a library in some project. For example, it may contain host /// logger, some query identification information, profiling guards, etc. This field is @@ -464,6 +467,11 @@ public: SampleBlockCache & getSampleBlockCache() const; + /// Query parameters for prepared statements. + bool hasQueryParameters() const; + const NameToNameMap & getQueryParameters() const; + void setQueryParameter(const String & name, const String & value); + #if USE_EMBEDDED_COMPILER std::shared_ptr getCompiledExpressionCache() const; void setCompiledExpressionCache(size_t cache_size); diff --git a/dbms/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp b/dbms/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp index 88049565aeb..59f7f46be70 100644 --- a/dbms/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp +++ b/dbms/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp @@ -9,10 +9,12 @@ #include #include #include +#include #include #include + namespace DB { @@ -23,22 +25,6 @@ namespace ErrorCodes } -static ASTPtr addTypeConversion(std::unique_ptr && ast, const String & type_name) -{ - auto func = std::make_shared(); - ASTPtr res = func; - func->alias = ast->alias; - func->prefer_alias_to_column_name = ast->prefer_alias_to_column_name; - ast->alias.clear(); - func->name = "CAST"; - auto exp_list = std::make_shared(); - func->arguments = exp_list; - func->children.push_back(func->arguments); - exp_list->children.emplace_back(ast.release()); - exp_list->children.emplace_back(std::make_shared(type_name)); - return res; -} - bool ExecuteScalarSubqueriesMatcher::needChildVisit(ASTPtr & node, const ASTPtr & child) { /// Processed @@ -110,7 +96,7 @@ void ExecuteScalarSubqueriesMatcher::visit(const ASTSubquery & subquery, ASTPtr auto lit = std::make_unique((*block.safeGetByPosition(0).column)[0]); lit->alias = subquery.alias; lit->prefer_alias_to_column_name = subquery.prefer_alias_to_column_name; - ast = addTypeConversion(std::move(lit), block.safeGetByPosition(0).type->getName()); + ast = addTypeConversionToAST(std::move(lit), block.safeGetByPosition(0).type->getName()); } else { @@ -125,7 +111,7 @@ void ExecuteScalarSubqueriesMatcher::visit(const ASTSubquery & subquery, ASTPtr exp_list->children.resize(columns); for (size_t i = 0; i < columns; ++i) { - exp_list->children[i] = addTypeConversion( + exp_list->children[i] = addTypeConversionToAST( std::make_unique((*block.safeGetByPosition(i).column)[0]), block.safeGetByPosition(i).type->getName()); } diff --git a/dbms/src/Interpreters/ExternalLoader.cpp b/dbms/src/Interpreters/ExternalLoader.cpp index 658f17b531d..018565e0a2c 100644 --- a/dbms/src/Interpreters/ExternalLoader.cpp +++ b/dbms/src/Interpreters/ExternalLoader.cpp @@ -343,25 +343,6 @@ public: enable_async_loading = enable; } - /// Returns the names of all the objects in the configuration (loaded or not). - std::vector getNames() const - { - std::lock_guard lock{mutex}; - std::vector all_names; - for (const auto & name_and_info : infos) - { - const String & name = name_and_info.first; - all_names.emplace_back(name); - } - return all_names; - } - - size_t getNumberOfNames() const - { - std::lock_guard lock{mutex}; - return infos.size(); - } - /// Returns the status of the object. /// If the object has not been loaded yet then the function returns Status::NOT_LOADED. /// If the specified name isn't found in the configuration then the function returns Status::NOT_EXIST. @@ -419,6 +400,15 @@ public: return count; } + bool hasCurrentlyLoadedObjects() const + { + std::lock_guard lock{mutex}; + for (auto & [name, info] : infos) + if (info.loaded()) + return true; + return false; + } + /// Starts loading of a specified object. void load(const String & name) { @@ -1008,14 +998,9 @@ void ExternalLoader::enablePeriodicUpdates(bool enable_, const ExternalLoaderUpd periodic_updater->enable(enable_, settings_); } -std::vector ExternalLoader::getNames() const +bool ExternalLoader::hasCurrentlyLoadedObjects() const { - return loading_dispatcher->getNames(); -} - -size_t ExternalLoader::getNumberOfNames() const -{ - return loading_dispatcher->getNumberOfNames(); + return loading_dispatcher->hasCurrentlyLoadedObjects(); } ExternalLoader::Status ExternalLoader::getCurrentStatus(const String & name) const @@ -1053,6 +1038,11 @@ size_t ExternalLoader::getNumberOfCurrentlyLoadedObjects() const return loading_dispatcher->getNumberOfCurrentlyLoadedObjects(); } +void ExternalLoader::load(const String & name) const +{ + loading_dispatcher->load(name); +} + void ExternalLoader::load(const String & name, LoadablePtr & loaded_object, Duration timeout) const { loading_dispatcher->load(name, loaded_object, timeout); @@ -1073,6 +1063,11 @@ void ExternalLoader::loadStrict(const String & name, LoadResult & load_result) c loading_dispatcher->loadStrict(name, load_result); } +void ExternalLoader::load(const FilterByNameFunction & filter_by_name) const +{ + loading_dispatcher->load(filter_by_name); +} + void ExternalLoader::load(const FilterByNameFunction & filter_by_name, Loadables & loaded_objects, Duration timeout) const { if (filter_by_name) @@ -1089,6 +1084,11 @@ void ExternalLoader::load(const FilterByNameFunction & filter_by_name, LoadResul loading_dispatcher->load(load_results, timeout); } +void ExternalLoader::load() const +{ + loading_dispatcher->load(); +} + void ExternalLoader::load(Loadables & loaded_objects, Duration timeout) const { return loading_dispatcher->load(loaded_objects, timeout); diff --git a/dbms/src/Interpreters/ExternalLoader.h b/dbms/src/Interpreters/ExternalLoader.h index 8fe565c7667..da999bfe21a 100644 --- a/dbms/src/Interpreters/ExternalLoader.h +++ b/dbms/src/Interpreters/ExternalLoader.h @@ -107,10 +107,6 @@ public: /// Sets settings for periodic updates. void enablePeriodicUpdates(bool enable, const ExternalLoaderUpdateSettings & settings = {}); - /// Returns the names of all the objects in the configuration (loaded or not). - std::vector getNames() const; - size_t getNumberOfNames() const; - /// Returns the status of the object. /// If the object has not been loaded yet then the function returns Status::NOT_LOADED. /// If the specified name isn't found in the configuration then the function returns Status::NOT_EXIST. @@ -133,6 +129,9 @@ public: Loadables getCurrentlyLoadedObjects(const FilterByNameFunction & filter_by_name) const; size_t getNumberOfCurrentlyLoadedObjects() const; + /// Returns true if any object was loaded. + bool hasCurrentlyLoadedObjects() const; + static constexpr Duration NO_TIMEOUT = Duration::max(); /// Starts loading of a specified object. diff --git a/dbms/src/Interpreters/InterpreterCreateQuery.cpp b/dbms/src/Interpreters/InterpreterCreateQuery.cpp index 973023cd4b2..7853e0c0841 100644 --- a/dbms/src/Interpreters/InterpreterCreateQuery.cpp +++ b/dbms/src/Interpreters/InterpreterCreateQuery.cpp @@ -1,9 +1,6 @@ #include -#include - #include -#include #include #include @@ -44,10 +41,10 @@ #include #include -#include - #include + #include +#include namespace DB @@ -281,19 +278,25 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription(const ASTExpres /// add column to postprocessing if there is a default_expression specified if (col_decl.default_expression) { - /** for columns with explicitly-specified type create two expressions: - * 1. default_expression aliased as column name with _tmp suffix - * 2. conversion of expression (1) to explicitly-specified type alias as column name */ + /** For columns with explicitly-specified type create two expressions: + * 1. default_expression aliased as column name with _tmp suffix + * 2. conversion of expression (1) to explicitly-specified type alias as column name + */ if (col_decl.type) { const auto & final_column_name = col_decl.name; const auto tmp_column_name = final_column_name + "_tmp"; const auto data_type_ptr = column_names_and_types.back().type.get(); - default_expr_list->children.emplace_back(setAlias( - makeASTFunction("CAST", std::make_shared(tmp_column_name), - std::make_shared(data_type_ptr->getName())), final_column_name)); - default_expr_list->children.emplace_back(setAlias(col_decl.default_expression->clone(), tmp_column_name)); + + default_expr_list->children.emplace_back( + setAlias(addTypeConversionToAST(std::make_shared(tmp_column_name), data_type_ptr->getName()), + final_column_name)); + + default_expr_list->children.emplace_back( + setAlias( + col_decl.default_expression->clone(), + tmp_column_name)); } else default_expr_list->children.emplace_back(setAlias(col_decl.default_expression->clone(), col_decl.name)); @@ -332,7 +335,7 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription(const ASTExpres column.type = name_type_it->type; if (!column.type->equals(*deduced_type)) - default_expr = makeASTFunction("CAST", default_expr, std::make_shared(column.type->getName())); + default_expr = addTypeConversionToAST(std::move(default_expr), column.type->getName()); } else column.type = defaults_sample_block.getByName(column.name).type; diff --git a/dbms/src/Interpreters/InterpreterInsertQuery.cpp b/dbms/src/Interpreters/InterpreterInsertQuery.cpp index e4391f52247..b906d151415 100644 --- a/dbms/src/Interpreters/InterpreterInsertQuery.cpp +++ b/dbms/src/Interpreters/InterpreterInsertQuery.cpp @@ -57,8 +57,6 @@ StoragePtr InterpreterInsertQuery::getTable(const ASTInsertQuery & query) Block InterpreterInsertQuery::getSampleBlock(const ASTInsertQuery & query, const StoragePtr & table) { - - Block table_sample_non_materialized = table->getSampleBlockNonMaterialized(); /// If the query does not include information about columns if (!query.columns) @@ -66,6 +64,8 @@ Block InterpreterInsertQuery::getSampleBlock(const ASTInsertQuery & query, const /// Format Native ignores header and write blocks as is. if (query.format == "Native") return {}; + else if (query.no_destination) + return table->getSampleBlockWithVirtuals(); else return table_sample_non_materialized; } @@ -108,14 +108,14 @@ BlockIO InterpreterInsertQuery::execute() if (!(context.getSettingsRef().insert_distributed_sync && table->isRemote())) { out = std::make_shared( - out, table->getSampleBlock(), context.getSettingsRef().min_insert_block_size_rows, context.getSettingsRef().min_insert_block_size_bytes); + out, out->getHeader(), context.getSettingsRef().min_insert_block_size_rows, context.getSettingsRef().min_insert_block_size_bytes); } auto query_sample_block = getSampleBlock(query, table); /// Actually we don't know structure of input blocks from query/table, /// because some clients break insertion protocol (columns != header) out = std::make_shared( - out, query_sample_block, table->getSampleBlock(), table->getColumns().getDefaults(), context); + out, query_sample_block, out->getHeader(), table->getColumns().getDefaults(), context); auto out_wrapper = std::make_shared(out); out_wrapper->setProcessListElement(context.getProcessListElement()); diff --git a/dbms/src/Interpreters/ProcessList.cpp b/dbms/src/Interpreters/ProcessList.cpp index a4fe438af8f..def39d4d91c 100644 --- a/dbms/src/Interpreters/ProcessList.cpp +++ b/dbms/src/Interpreters/ProcessList.cpp @@ -87,10 +87,9 @@ ProcessList::EntryPtr ProcessList::insert(const String & query_, const IAST * as { std::unique_lock lock(mutex); + const auto max_wait_ms = settings.queue_max_wait_ms.totalMilliseconds(); if (!is_unlimited_query && max_size && processes.size() >= max_size) { - auto max_wait_ms = settings.queue_max_wait_ms.totalMilliseconds(); - if (!max_wait_ms || !have_space.wait_for(lock, std::chrono::milliseconds(max_wait_ms), [&]{ return processes.size() < max_size; })) throw Exception("Too many simultaneous queries. Maximum: " + toString(max_size), ErrorCodes::TOO_MANY_SIMULTANEOUS_QUERIES); } @@ -117,20 +116,41 @@ ProcessList::EntryPtr ProcessList::insert(const String & query_, const IAST * as + ", maximum: " + settings.max_concurrent_queries_for_user.toString(), ErrorCodes::TOO_MANY_SIMULTANEOUS_QUERIES); - auto range = user_process_list->second.queries.equal_range(client_info.current_query_id); - if (range.first != range.second) + auto running_query = user_process_list->second.queries.find(client_info.current_query_id); + + if (running_query != user_process_list->second.queries.end()) { if (!settings.replace_running_query) throw Exception("Query with id = " + client_info.current_query_id + " is already running.", ErrorCodes::QUERY_WITH_SAME_ID_IS_ALREADY_RUNNING); /// Ask queries to cancel. They will check this flag. - for (auto it = range.first; it != range.second; ++it) - it->second->is_killed.store(true, std::memory_order_relaxed); - } + running_query->second->is_killed.store(true, std::memory_order_relaxed); + + if (!max_wait_ms || !have_space.wait_for(lock, std::chrono::milliseconds(max_wait_ms), [&] + { + running_query = user_process_list->second.queries.find(client_info.current_query_id); + if (running_query == user_process_list->second.queries.end()) + return true; + running_query->second->is_killed.store(true, std::memory_order_relaxed); + return false; + })) + throw Exception("Query with id = " + client_info.current_query_id + " is already running and can't be stopped", + ErrorCodes::QUERY_WITH_SAME_ID_IS_ALREADY_RUNNING); + } } } + /// Check other users running query with our query_id + for (const auto & user_process_list : user_to_queries) + { + if (user_process_list.first == client_info.current_user) + continue; + if (auto running_query = user_process_list.second.queries.find(client_info.current_query_id); running_query != user_process_list.second.queries.end()) + throw Exception("Query with id = " + client_info.current_query_id + " is already running by user " + user_process_list.first, + ErrorCodes::QUERY_WITH_SAME_ID_IS_ALREADY_RUNNING); + } + auto process_it = processes.emplace(processes.end(), query_, client_info, settings.max_memory_usage, settings.memory_tracker_fault_probability, priorities.insert(settings.priority)); @@ -226,17 +246,12 @@ ProcessListEntry::~ProcessListEntry() bool found = false; - auto range = user_process_list.queries.equal_range(query_id); - if (range.first != range.second) + if (auto running_query = user_process_list.queries.find(query_id); running_query != user_process_list.queries.end()) { - for (auto jt = range.first; jt != range.second; ++jt) + if (running_query->second == process_list_element_ptr) { - if (jt->second == process_list_element_ptr) - { - user_process_list.queries.erase(jt); - found = true; - break; - } + user_process_list.queries.erase(running_query->first); + found = true; } } @@ -245,8 +260,7 @@ ProcessListEntry::~ProcessListEntry() LOG_ERROR(&Logger::get("ProcessList"), "Logical error: cannot find query by query_id and pointer to ProcessListElement in ProcessListForUser"); std::terminate(); } - - parent.have_space.notify_one(); + parent.have_space.notify_all(); /// If there are no more queries for the user, then we will reset memory tracker and network throttler. if (user_process_list.queries.empty()) diff --git a/dbms/src/Interpreters/ProcessList.h b/dbms/src/Interpreters/ProcessList.h index 32f59749450..b75a4e7a730 100644 --- a/dbms/src/Interpreters/ProcessList.h +++ b/dbms/src/Interpreters/ProcessList.h @@ -203,7 +203,7 @@ struct ProcessListForUser ProcessListForUser(); /// query_id -> ProcessListElement(s). There can be multiple queries with the same query_id as long as all queries except one are cancelled. - using QueryToElement = std::unordered_multimap; + using QueryToElement = std::unordered_map; QueryToElement queries; ProfileEvents::Counters user_performance_counters{VariableContext::User, &ProfileEvents::global_counters}; diff --git a/dbms/src/Interpreters/QueryNormalizer.cpp b/dbms/src/Interpreters/QueryNormalizer.cpp index 1573202a946..c35c47179c6 100644 --- a/dbms/src/Interpreters/QueryNormalizer.cpp +++ b/dbms/src/Interpreters/QueryNormalizer.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -20,6 +21,7 @@ namespace ErrorCodes { extern const int TOO_DEEP_AST; extern const int CYCLIC_ALIASES; + extern const int UNKNOWN_QUERY_PARAMETER; } @@ -227,14 +229,16 @@ void QueryNormalizer::visit(ASTPtr & ast, Data & data) data.current_alias = my_alias; } - if (auto * node = ast->as()) - visit(*node, ast, data); - if (auto * node = ast->as()) - visit(*node, ast, data); - if (auto * node = ast->as()) - visit(*node, ast, data); - if (auto * node = ast->as()) - visit(*node, ast, data); + if (auto * node_func = ast->as()) + visit(*node_func, ast, data); + else if (auto * node_id = ast->as()) + visit(*node_id, ast, data); + else if (auto * node_tables = ast->as()) + visit(*node_tables, ast, data); + else if (auto * node_select = ast->as()) + visit(*node_select, ast, data); + else if (auto * node_param = ast->as()) + throw Exception("Query parameter " + backQuote(node_param->name) + " was not set", ErrorCodes::UNKNOWN_QUERY_PARAMETER); /// If we replace the root of the subtree, we will be called again for the new root, in case the alias is replaced by an alias. if (ast.get() != initial_ast.get()) diff --git a/dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp b/dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp new file mode 100644 index 00000000000..325499d59d2 --- /dev/null +++ b/dbms/src/Interpreters/ReplaceQueryParameterVisitor.cpp @@ -0,0 +1,64 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int UNKNOWN_QUERY_PARAMETER; + extern const int BAD_QUERY_PARAMETER; +} + + +void ReplaceQueryParameterVisitor::visit(ASTPtr & ast) +{ + for (auto & child : ast->children) + { + if (child->as()) + visitQueryParameter(child); + else + visit(child); + } +} + +const String & ReplaceQueryParameterVisitor::getParamValue(const String & name) +{ + auto search = query_parameters.find(name); + if (search != query_parameters.end()) + return search->second; + else + throw Exception("Substitution " + backQuote(name) + " is not set", ErrorCodes::UNKNOWN_QUERY_PARAMETER); +} + +void ReplaceQueryParameterVisitor::visitQueryParameter(ASTPtr & ast) +{ + const auto & ast_param = ast->as(); + const String & value = getParamValue(ast_param.name); + const String & type_name = ast_param.type; + + const auto data_type = DataTypeFactory::instance().get(type_name); + auto temp_column_ptr = data_type->createColumn(); + IColumn & temp_column = *temp_column_ptr; + ReadBufferFromString read_buffer{value}; + FormatSettings format_settings; + data_type->deserializeAsWholeText(temp_column, read_buffer, format_settings); + + if (!read_buffer.eof()) + throw Exception("Value " + value + " cannot be parsed as " + type_name + " for query parameter '" + ast_param.name + "'", ErrorCodes::BAD_QUERY_PARAMETER); + + ast = addTypeConversionToAST(std::make_shared(temp_column[0]), type_name); +} + +} diff --git a/dbms/src/Interpreters/ReplaceQueryParameterVisitor.h b/dbms/src/Interpreters/ReplaceQueryParameterVisitor.h new file mode 100644 index 00000000000..1931d4c0ba8 --- /dev/null +++ b/dbms/src/Interpreters/ReplaceQueryParameterVisitor.h @@ -0,0 +1,27 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ASTQueryParameter; + +/// Visit substitutions in a query, replace ASTQueryParameter with ASTLiteral. +class ReplaceQueryParameterVisitor +{ +public: + ReplaceQueryParameterVisitor(const NameToNameMap & parameters) + : query_parameters(parameters) + {} + + void visit(ASTPtr & ast); + +private: + const NameToNameMap & query_parameters; + const String & getParamValue(const String & name); + void visitQueryParameter(ASTPtr & ast); +}; + +} diff --git a/dbms/src/Interpreters/SyntaxAnalyzer.cpp b/dbms/src/Interpreters/SyntaxAnalyzer.cpp index 71a49b4c106..62982ea1e59 100644 --- a/dbms/src/Interpreters/SyntaxAnalyzer.cpp +++ b/dbms/src/Interpreters/SyntaxAnalyzer.cpp @@ -76,7 +76,9 @@ void collectSourceColumns(const ASTSelectQuery * select_query, StoragePtr storag if (select_query) { const auto & storage_aliases = storage->getColumns().getAliases(); + const auto & storage_virtuals = storage->getColumns().getVirtuals(); source_columns.insert(source_columns.end(), storage_aliases.begin(), storage_aliases.end()); + source_columns.insert(source_columns.end(), storage_virtuals.begin(), storage_virtuals.end()); } } } diff --git a/dbms/src/Interpreters/SystemLog.cpp b/dbms/src/Interpreters/SystemLog.cpp index 94214b26f6e..f46b348db7a 100644 --- a/dbms/src/Interpreters/SystemLog.cpp +++ b/dbms/src/Interpreters/SystemLog.cpp @@ -50,6 +50,12 @@ SystemLogs::SystemLogs(Context & global_context, const Poco::Util::AbstractConfi SystemLogs::~SystemLogs() +{ + shutdown(); +} + + +void SystemLogs::shutdown() { if (query_log) query_log->shutdown(); diff --git a/dbms/src/Interpreters/SystemLog.h b/dbms/src/Interpreters/SystemLog.h index 59dda00e71b..48dbde5a38b 100644 --- a/dbms/src/Interpreters/SystemLog.h +++ b/dbms/src/Interpreters/SystemLog.h @@ -2,6 +2,7 @@ #include #include +#include #include #include #include @@ -67,6 +68,8 @@ struct SystemLogs SystemLogs(Context & global_context, const Poco::Util::AbstractConfiguration & config); ~SystemLogs(); + void shutdown(); + std::shared_ptr query_log; /// Used to log queries. std::shared_ptr query_thread_log; /// Used to log query threads. std::shared_ptr part_log; /// Used to log operations with parts @@ -101,22 +104,10 @@ public: /** Append a record into log. * Writing to table will be done asynchronously and in case of failure, record could be lost. */ - void add(const LogElement & element) - { - if (is_shutdown) - return; - - /// Without try we could block here in case of queue overflow. - if (!queue.tryPush({false, element})) - LOG_ERROR(log, "SystemLog queue is full"); - } + void add(const LogElement & element); /// Flush data in the buffer to disk - void flush() - { - if (!is_shutdown) - flushImpl(false); - } + void flush(); /// Stop the background flush thread before destructor. No more data will be written. void shutdown(); @@ -130,7 +121,15 @@ protected: const size_t flush_interval_milliseconds; std::atomic is_shutdown{false}; - using QueueItem = std::pair; /// First element is shutdown flag for thread. + enum class EntryType + { + LOG_ELEMENT = 0, + AUTO_FLUSH, + FORCE_FLUSH, + SHUTDOWN, + }; + + using QueueItem = std::pair; /// Queue is bounded. But its size is quite large to not block in all normal cases. ConcurrentBoundedQueue queue {DBMS_SYSTEM_LOG_QUEUE_SIZE}; @@ -140,7 +139,6 @@ protected: * than accumulation of large amount of log records (for example, for query log - processing of large amount of queries). */ std::vector data; - std::mutex data_mutex; Logger * log; @@ -157,7 +155,13 @@ protected: bool is_prepared = false; void prepareTable(); - void flushImpl(bool quiet); + std::mutex flush_mutex; + std::mutex condvar_mutex; + std::condition_variable flush_condvar; + bool force_flushing = false; + + /// flushImpl can be executed only in saving_thread. + void flushImpl(EntryType reason); }; @@ -178,6 +182,37 @@ SystemLog::SystemLog(Context & context_, } +template +void SystemLog::add(const LogElement & element) +{ + if (is_shutdown) + return; + + /// Without try we could block here in case of queue overflow. + if (!queue.tryPush({EntryType::LOG_ELEMENT, element})) + LOG_ERROR(log, "SystemLog queue is full"); +} + + +template +void SystemLog::flush() +{ + if (is_shutdown) + return; + + std::lock_guard flush_lock(flush_mutex); + force_flushing = true; + + /// Tell thread to execute extra flush. + queue.push({EntryType::FORCE_FLUSH, {}}); + + /// Wait for flush being finished. + std::unique_lock lock(condvar_mutex); + while (force_flushing) + flush_condvar.wait(lock); +} + + template void SystemLog::shutdown() { @@ -186,7 +221,7 @@ void SystemLog::shutdown() return; /// Tell thread to shutdown. - queue.push({true, {}}); + queue.push({EntryType::SHUTDOWN, {}}); saving_thread.join(); } @@ -219,16 +254,10 @@ void SystemLog::threadFunction() QueueItem element; bool has_element = false; - bool is_empty; - { - std::unique_lock lock(data_mutex); - is_empty = data.empty(); - } - /// data.size() is increased only in this function /// TODO: get rid of data and queue duality - if (is_empty) + if (data.empty()) { queue.pop(element); has_element = true; @@ -242,25 +271,27 @@ void SystemLog::threadFunction() if (has_element) { - if (element.first) + if (element.first == EntryType::SHUTDOWN) { - /// Shutdown. /// NOTE: MergeTree engine can write data even it is already in shutdown state. - flush(); + flushImpl(element.first); break; } - else + else if (element.first == EntryType::FORCE_FLUSH) { - std::unique_lock lock(data_mutex); - data.push_back(element.second); + flushImpl(element.first); + time_after_last_write.restart(); + continue; } + else + data.push_back(element.second); } size_t milliseconds_elapsed = time_after_last_write.elapsed() / 1000000; if (milliseconds_elapsed >= flush_interval_milliseconds) { /// Write data to a table. - flushImpl(true); + flushImpl(EntryType::AUTO_FLUSH); time_after_last_write.restart(); } } @@ -275,13 +306,11 @@ void SystemLog::threadFunction() template -void SystemLog::flushImpl(bool quiet) +void SystemLog::flushImpl(EntryType reason) { - std::unique_lock lock(data_mutex); - try { - if (quiet && data.empty()) + if ((reason == EntryType::AUTO_FLUSH || reason == EntryType::SHUTDOWN) && data.empty()) return; LOG_TRACE(log, "Flushing system log"); @@ -320,6 +349,12 @@ void SystemLog::flushImpl(bool quiet) /// In case of exception, also clean accumulated data - to avoid locking. data.clear(); } + if (reason == EntryType::FORCE_FLUSH) + { + std::lock_guard lock(condvar_mutex); + force_flushing = false; + flush_condvar.notify_one(); + } } diff --git a/dbms/src/Interpreters/addTypeConversionToAST.cpp b/dbms/src/Interpreters/addTypeConversionToAST.cpp new file mode 100644 index 00000000000..699c3bd27c3 --- /dev/null +++ b/dbms/src/Interpreters/addTypeConversionToAST.cpp @@ -0,0 +1,26 @@ +#include "addTypeConversionToAST.h" + +#include +#include +#include +#include + + +namespace DB +{ + +ASTPtr addTypeConversionToAST(ASTPtr && ast, const String & type_name) +{ + auto func = makeASTFunction("CAST", ast, std::make_shared(type_name)); + + if (ASTWithAlias * ast_with_alias = dynamic_cast(ast.get())) + { + func->alias = ast_with_alias->alias; + func->prefer_alias_to_column_name = ast_with_alias->prefer_alias_to_column_name; + ast_with_alias->alias.clear(); + } + + return func; +} + +} diff --git a/dbms/src/Interpreters/addTypeConversionToAST.h b/dbms/src/Interpreters/addTypeConversionToAST.h new file mode 100644 index 00000000000..56c3a636f45 --- /dev/null +++ b/dbms/src/Interpreters/addTypeConversionToAST.h @@ -0,0 +1,13 @@ +#pragma once + +#include +#include + + +namespace DB +{ + +/// It will produce an expression with CAST to get an AST with the required type. +ASTPtr addTypeConversionToAST(ASTPtr && ast, const String & type_name); + +} diff --git a/dbms/src/Interpreters/executeQuery.cpp b/dbms/src/Interpreters/executeQuery.cpp index 2e6062f4c19..1dfb7def86b 100644 --- a/dbms/src/Interpreters/executeQuery.cpp +++ b/dbms/src/Interpreters/executeQuery.cpp @@ -26,6 +26,7 @@ #include #include #include +#include #include #include "DNSCacheUpdater.h" @@ -180,7 +181,9 @@ static std::tuple executeQueryImpl( insert_query->has_tail = has_query_tail; } else + { query_end = end; + } } catch (...) { @@ -200,6 +203,17 @@ static std::tuple executeQueryImpl( try { + /// Replace ASTQueryParameter with ASTLiteral for prepared statements. + if (context.hasQueryParameters()) + { + ReplaceQueryParameterVisitor visitor(context.getQueryParameters()); + visitor.visit(ast); + } + + /// Get new query after substitutions. + if (context.hasQueryParameters()) + query = serializeAST(*ast); + logQuery(query.substr(0, settings.log_queries_cut_to_length), context, internal); /// Check the limits. diff --git a/dbms/src/Parsers/ASTAlterQuery.cpp b/dbms/src/Parsers/ASTAlterQuery.cpp index e614f64d208..c7cd100b415 100644 --- a/dbms/src/Parsers/ASTAlterQuery.cpp +++ b/dbms/src/Parsers/ASTAlterQuery.cpp @@ -82,6 +82,13 @@ void ASTAlterCommand::formatImpl( settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "MODIFY COLUMN " << (if_exists ? "IF EXISTS " : "") << (settings.hilite ? hilite_none : ""); col_decl->formatImpl(settings, state, frame); } + else if (type == ASTAlterCommand::COMMENT_COLUMN) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "COMMENT COLUMN " << (if_exists ? "IF EXISTS " : "") << (settings.hilite ? hilite_none : ""); + column->formatImpl(settings, state, frame); + settings.ostr << " " << (settings.hilite ? hilite_none : ""); + comment->formatImpl(settings, state, frame); + } else if (type == ASTAlterCommand::MODIFY_ORDER_BY) { settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "MODIFY ORDER BY " << (settings.hilite ? hilite_none : ""); @@ -172,13 +179,6 @@ void ASTAlterCommand::formatImpl( settings.ostr << (settings.hilite ? hilite_keyword : "") << " WHERE " << (settings.hilite ? hilite_none : ""); predicate->formatImpl(settings, state, frame); } - else if (type == ASTAlterCommand::COMMENT_COLUMN) - { - settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "COMMENT COLUMN " << (settings.hilite ? hilite_none : ""); - column->formatImpl(settings, state, frame); - settings.ostr << " " << (settings.hilite ? hilite_none : ""); - comment->formatImpl(settings, state, frame); - } else if (type == ASTAlterCommand::MODIFY_TTL) { settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "MODIFY TTL " << (settings.hilite ? hilite_none : ""); diff --git a/dbms/src/Parsers/ASTColumnDeclaration.cpp b/dbms/src/Parsers/ASTColumnDeclaration.cpp index 892be19c6b5..e718d5c292d 100644 --- a/dbms/src/Parsers/ASTColumnDeclaration.cpp +++ b/dbms/src/Parsers/ASTColumnDeclaration.cpp @@ -21,18 +21,18 @@ ASTPtr ASTColumnDeclaration::clone() const res->children.push_back(res->default_expression); } - if (codec) - { - res->codec = codec->clone(); - res->children.push_back(res->codec); - } - if (comment) { res->comment = comment->clone(); res->children.push_back(res->comment); } + if (codec) + { + res->codec = codec->clone(); + res->children.push_back(res->codec); + } + if (ttl) { res->ttl = ttl->clone(); diff --git a/dbms/src/Parsers/ASTColumnDeclaration.h b/dbms/src/Parsers/ASTColumnDeclaration.h index 311ceb4efbc..ad23e0669bc 100644 --- a/dbms/src/Parsers/ASTColumnDeclaration.h +++ b/dbms/src/Parsers/ASTColumnDeclaration.h @@ -15,8 +15,8 @@ public: ASTPtr type; String default_specifier; ASTPtr default_expression; - ASTPtr codec; ASTPtr comment; + ASTPtr codec; ASTPtr ttl; String getID(char delim) const override { return "ColumnDeclaration" + (delim + name); } diff --git a/dbms/src/Parsers/ASTExplainQuery.h b/dbms/src/Parsers/ASTExplainQuery.h index 5ebd02b85f8..d921ff427ae 100644 --- a/dbms/src/Parsers/ASTExplainQuery.h +++ b/dbms/src/Parsers/ASTExplainQuery.h @@ -26,9 +26,10 @@ public: ASTPtr clone() const override { return std::make_shared(*this); } protected: - void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override + void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override { - settings.ostr << (settings.hilite ? hilite_keyword : "") << toString(kind) << (settings.hilite ? hilite_none : ""); + settings.ostr << (settings.hilite ? hilite_keyword : "") << toString(kind) << (settings.hilite ? hilite_none : "") << " "; + children.at(0)->formatImpl(settings, state, frame); } private: @@ -38,8 +39,8 @@ private: { switch (kind) { - case ParsedAST: return "ParsedAST"; - case AnalyzedSyntax: return "AnalyzedSyntax"; + case ParsedAST: return "AST"; + case AnalyzedSyntax: return "ANALYZE"; } __builtin_unreachable(); diff --git a/dbms/src/Parsers/ASTFunction.cpp b/dbms/src/Parsers/ASTFunction.cpp index 5c5dbc9ba90..b550c7062d1 100644 --- a/dbms/src/Parsers/ASTFunction.cpp +++ b/dbms/src/Parsers/ASTFunction.cpp @@ -126,6 +126,9 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format { if (0 == strcmp(name.c_str(), func[0])) { + if (frame.need_parens) + settings.ostr << '('; + settings.ostr << (settings.hilite ? hilite_operator : "") << func[1] << (settings.hilite ? hilite_none : ""); /** A particularly stupid case. If we have a unary minus before a literal that is a negative number @@ -138,6 +141,9 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format arguments->formatImpl(settings, state, nested_need_parens); written = true; + + if (frame.need_parens) + settings.ostr << ')'; } } } @@ -209,11 +215,17 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format if (!written && 0 == strcmp(name.c_str(), "arrayElement")) { + if (frame.need_parens) + settings.ostr << '('; + arguments->children[0]->formatImpl(settings, state, nested_need_parens); settings.ostr << (settings.hilite ? hilite_operator : "") << '[' << (settings.hilite ? hilite_none : ""); - arguments->children[1]->formatImpl(settings, state, nested_need_parens); + arguments->children[1]->formatImpl(settings, state, nested_dont_need_parens); settings.ostr << (settings.hilite ? hilite_operator : "") << ']' << (settings.hilite ? hilite_none : ""); written = true; + + if (frame.need_parens) + settings.ostr << ')'; } if (!written && 0 == strcmp(name.c_str(), "tupleElement")) @@ -223,10 +235,16 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format { if (lit->value.getType() == Field::Types::UInt64) { + if (frame.need_parens) + settings.ostr << '('; + arguments->children[0]->formatImpl(settings, state, nested_need_parens); settings.ostr << (settings.hilite ? hilite_operator : "") << "." << (settings.hilite ? hilite_none : ""); - arguments->children[1]->formatImpl(settings, state, nested_need_parens); + arguments->children[1]->formatImpl(settings, state, nested_dont_need_parens); written = true; + + if (frame.need_parens) + settings.ostr << ')'; } } } diff --git a/dbms/src/Parsers/ASTQueryParameter.cpp b/dbms/src/Parsers/ASTQueryParameter.cpp new file mode 100644 index 00000000000..462a08b0447 --- /dev/null +++ b/dbms/src/Parsers/ASTQueryParameter.cpp @@ -0,0 +1,24 @@ +#include +#include + + +namespace DB +{ + +void ASTQueryParameter::formatImplWithoutAlias(const FormatSettings & settings, FormatState &, FormatStateStacked) const +{ + settings.ostr + << (settings.hilite ? hilite_substitution : "") << '{' + << (settings.hilite ? hilite_identifier : "") << backQuoteIfNeed(name) + << (settings.hilite ? hilite_substitution : "") << ':' + << (settings.hilite ? hilite_identifier : "") << type + << (settings.hilite ? hilite_substitution : "") << '}' + << (settings.hilite ? hilite_none : ""); +} + +void ASTQueryParameter::appendColumnNameImpl(WriteBuffer & ostr) const +{ + writeString(name, ostr); +} + +} diff --git a/dbms/src/Parsers/ASTQueryParameter.h b/dbms/src/Parsers/ASTQueryParameter.h new file mode 100644 index 00000000000..858b23a0250 --- /dev/null +++ b/dbms/src/Parsers/ASTQueryParameter.h @@ -0,0 +1,29 @@ +#pragma once + +#include + + +namespace DB +{ + +/// Parameter in query with name and type of substitution ({name:type}). +/// Example: SELECT * FROM table WHERE id = {pid:UInt16}. +class ASTQueryParameter : public ASTWithAlias +{ +public: + String name; + String type; + + ASTQueryParameter(const String & name_, const String & type_) : name(name_), type(type_) {} + + /** Get the text that identifies this element. */ + String getID(char delim) const override { return String("QueryParameter") + delim + name + ':' + type; } + + ASTPtr clone() const override { return std::make_shared(*this); } + +protected: + void formatImplWithoutAlias(const FormatSettings & settings, FormatState &, FormatStateStacked) const override; + void appendColumnNameImpl(WriteBuffer & ostr) const override; +}; + +} diff --git a/dbms/src/Parsers/ASTQueryWithTableAndOutput.cpp b/dbms/src/Parsers/ASTQueryWithTableAndOutput.cpp new file mode 100644 index 00000000000..1e16fb6f0ee --- /dev/null +++ b/dbms/src/Parsers/ASTQueryWithTableAndOutput.cpp @@ -0,0 +1,14 @@ +#include + + +namespace DB +{ + +void ASTQueryWithTableAndOutput::formatHelper(const FormatSettings & settings, const char * name) const +{ + settings.ostr << (settings.hilite ? hilite_keyword : "") << name << " " << (settings.hilite ? hilite_none : ""); + settings.ostr << (!database.empty() ? backQuoteIfNeed(database) + "." : "") << backQuoteIfNeed(table); +} + +} + diff --git a/dbms/src/Parsers/ASTQueryWithTableAndOutput.h b/dbms/src/Parsers/ASTQueryWithTableAndOutput.h index 3f3fd036d78..594876ace7b 100644 --- a/dbms/src/Parsers/ASTQueryWithTableAndOutput.h +++ b/dbms/src/Parsers/ASTQueryWithTableAndOutput.h @@ -9,7 +9,7 @@ namespace DB /** Query specifying table name and, possibly, the database and the FORMAT section. - */ + */ class ASTQueryWithTableAndOutput : public ASTQueryWithOutput { public: @@ -18,11 +18,7 @@ public: bool temporary{false}; protected: - void formatHelper(const FormatSettings & settings, const char * name) const - { - settings.ostr << (settings.hilite ? hilite_keyword : "") << name << " " << (settings.hilite ? hilite_none : "") - << (!database.empty() ? backQuoteIfNeed(database) + "." : "") << backQuoteIfNeed(table); - } + void formatHelper(const FormatSettings & settings, const char * name) const; }; @@ -43,7 +39,7 @@ public: protected: void formatQueryImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override { - formatHelper(settings, AstIDAndQueryNames::Query); + formatHelper(settings, temporary ? AstIDAndQueryNames::QueryTemporary : AstIDAndQueryNames::Query); } }; diff --git a/dbms/src/Parsers/ASTShowTablesQuery.cpp b/dbms/src/Parsers/ASTShowTablesQuery.cpp new file mode 100644 index 00000000000..dd7b0d013ad --- /dev/null +++ b/dbms/src/Parsers/ASTShowTablesQuery.cpp @@ -0,0 +1,37 @@ +#include +#include + + +namespace DB +{ + +ASTPtr ASTShowTablesQuery::clone() const +{ + auto res = std::make_shared(*this); + res->children.clear(); + cloneOutputOptions(*res); + return res; +} + +void ASTShowTablesQuery::formatQueryImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const +{ + if (databases) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << "SHOW DATABASES" << (settings.hilite ? hilite_none : ""); + } + else + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << "SHOW " << (temporary ? "TEMPORARY " : "") << "TABLES" << (settings.hilite ? hilite_none : ""); + + if (!from.empty()) + settings.ostr << (settings.hilite ? hilite_keyword : "") << " FROM " << (settings.hilite ? hilite_none : "") + << backQuoteIfNeed(from); + + if (!like.empty()) + settings.ostr << (settings.hilite ? hilite_keyword : "") << " LIKE " << (settings.hilite ? hilite_none : "") + << std::quoted(like, '\''); + } +} + +} + diff --git a/dbms/src/Parsers/ASTShowTablesQuery.h b/dbms/src/Parsers/ASTShowTablesQuery.h index 58915df0e60..9b994b6e31f 100644 --- a/dbms/src/Parsers/ASTShowTablesQuery.h +++ b/dbms/src/Parsers/ASTShowTablesQuery.h @@ -23,34 +23,10 @@ public: /** Get the text that identifies this element. */ String getID(char) const override { return "ShowTables"; } - ASTPtr clone() const override - { - auto res = std::make_shared(*this); - res->children.clear(); - cloneOutputOptions(*res); - return res; - } + ASTPtr clone() const override; protected: - void formatQueryImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override - { - if (databases) - { - settings.ostr << (settings.hilite ? hilite_keyword : "") << "SHOW DATABASES" << (settings.hilite ? hilite_none : ""); - } - else - { - settings.ostr << (settings.hilite ? hilite_keyword : "") << "SHOW TABLES" << (settings.hilite ? hilite_none : ""); - - if (!from.empty()) - settings.ostr << (settings.hilite ? hilite_keyword : "") << " FROM " << (settings.hilite ? hilite_none : "") - << backQuoteIfNeed(from); - - if (!like.empty()) - settings.ostr << (settings.hilite ? hilite_keyword : "") << " LIKE " << (settings.hilite ? hilite_none : "") - << std::quoted(like, '\''); - } - } + void formatQueryImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override; }; } diff --git a/dbms/src/Parsers/ASTTablesInSelectQuery.cpp b/dbms/src/Parsers/ASTTablesInSelectQuery.cpp index 98cf6254a4f..47be2008284 100644 --- a/dbms/src/Parsers/ASTTablesInSelectQuery.cpp +++ b/dbms/src/Parsers/ASTTablesInSelectQuery.cpp @@ -81,6 +81,7 @@ ASTPtr ASTTablesInSelectQuery::clone() const void ASTTableExpression::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const { + frame.current_select = this; std::string indent_str = settings.one_line ? "" : std::string(4 * frame.indent, ' '); if (database_and_table_name) @@ -183,14 +184,14 @@ void ASTTableJoin::formatImplAfterTable(const FormatSettings & settings, FormatS if (using_expression_list) { - settings.ostr << (settings.hilite ? hilite_keyword : "") << "USING " << (settings.hilite ? hilite_none : ""); + settings.ostr << (settings.hilite ? hilite_keyword : "") << " USING " << (settings.hilite ? hilite_none : ""); settings.ostr << "("; using_expression_list->formatImpl(settings, state, frame); settings.ostr << ")"; } else if (on_expression) { - settings.ostr << (settings.hilite ? hilite_keyword : "") << "ON " << (settings.hilite ? hilite_none : ""); + settings.ostr << (settings.hilite ? hilite_keyword : "") << " ON " << (settings.hilite ? hilite_none : ""); on_expression->formatImpl(settings, state, frame); } } @@ -226,7 +227,6 @@ void ASTTablesInSelectQueryElement::formatImpl(const FormatSettings & settings, } table_expression->formatImpl(settings, state, frame); - settings.ostr << " "; if (table_join) table_join->as().formatImplAfterTable(settings, state, frame); diff --git a/dbms/src/Parsers/ASTWithAlias.cpp b/dbms/src/Parsers/ASTWithAlias.cpp index 67a4401f9a5..0239d0b34cd 100644 --- a/dbms/src/Parsers/ASTWithAlias.cpp +++ b/dbms/src/Parsers/ASTWithAlias.cpp @@ -16,27 +16,27 @@ void ASTWithAlias::writeAlias(const String & name, const FormatSettings & settin void ASTWithAlias::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const { - if (!alias.empty()) + /// If we have previously output this node elsewhere in the query, now it is enough to output only the alias. + /// This is needed because the query can become extraordinary large after substitution of aliases. + if (!alias.empty() && !state.printed_asts_with_alias.emplace(frame.current_select, alias, getTreeHash()).second) { - /// If we have previously output this node elsewhere in the query, now it is enough to output only the alias. - if (!state.printed_asts_with_alias.emplace(frame.current_select, alias).second) - { - settings.writeIdentifier(alias); - return; - } + settings.writeIdentifier(alias); } - - /// If there is an alias, then parentheses are required around the entire expression, including the alias. Because a record of the form `0 AS x + 0` is syntactically invalid. - if (frame.need_parens && !alias.empty()) - settings.ostr <<'('; - - formatImplWithoutAlias(settings, state, frame); - - if (!alias.empty()) + else { - writeAlias(alias, settings); - if (frame.need_parens) - settings.ostr <<')'; + /// If there is an alias, then parentheses are required around the entire expression, including the alias. + /// Because a record of the form `0 AS x + 0` is syntactically invalid. + if (frame.need_parens && !alias.empty()) + settings.ostr << '('; + + formatImplWithoutAlias(settings, state, frame); + + if (!alias.empty()) + { + writeAlias(alias, settings); + if (frame.need_parens) + settings.ostr << ')'; + } } } diff --git a/dbms/src/Parsers/ExpressionElementParsers.cpp b/dbms/src/Parsers/ExpressionElementParsers.cpp index 2741aa0d491..9c0071c64e8 100644 --- a/dbms/src/Parsers/ExpressionElementParsers.cpp +++ b/dbms/src/Parsers/ExpressionElementParsers.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -1199,6 +1200,52 @@ bool ParserQualifiedAsterisk::parseImpl(Pos & pos, ASTPtr & node, Expected & exp } +bool ParserSubstitution::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + if (pos->type != TokenType::OpeningCurlyBrace) + return false; + + ++pos; + + if (pos->type != TokenType::BareWord) + { + expected.add(pos, "substitution name (identifier)"); + return false; + } + + String name(pos->begin, pos->end); + ++pos; + + if (pos->type != TokenType::Colon) + { + expected.add(pos, "colon between name and type"); + return false; + } + + ++pos; + + auto old_pos = pos; + ParserIdentifierWithOptionalParameters type_parser; + if (!type_parser.ignore(pos, expected)) + { + expected.add(pos, "substitution type"); + return false; + } + + String type(old_pos->begin, pos->begin); + + if (pos->type != TokenType::ClosingCurlyBrace) + { + expected.add(pos, "closing curly brace"); + return false; + } + + ++pos; + node = std::make_shared(name, type); + return true; +} + + bool ParserExpressionElement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { return ParserSubquery().parse(pos, node, expected) @@ -1218,7 +1265,8 @@ bool ParserExpressionElement::parseImpl(Pos & pos, ASTPtr & node, Expected & exp || ParserFunction().parse(pos, node, expected) || ParserQualifiedAsterisk().parse(pos, node, expected) || ParserAsterisk().parse(pos, node, expected) - || ParserCompoundIdentifier().parse(pos, node, expected); + || ParserCompoundIdentifier().parse(pos, node, expected) + || ParserSubstitution().parse(pos, node, expected); } diff --git a/dbms/src/Parsers/ExpressionElementParsers.h b/dbms/src/Parsers/ExpressionElementParsers.h index e3dc5ae44d0..b4fe77e8bb3 100644 --- a/dbms/src/Parsers/ExpressionElementParsers.h +++ b/dbms/src/Parsers/ExpressionElementParsers.h @@ -242,6 +242,17 @@ private: }; +/** Prepared statements. + * Parse query with parameter expression {name:type}. + */ +class ParserSubstitution : public IParserBase +{ +protected: + const char * getName() const { return "substitution"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected); +}; + + /** The expression element is one of: an expression in parentheses, an array, a literal, a function, an identifier, an asterisk. */ class ParserExpressionElement : public IParserBase diff --git a/dbms/src/Parsers/IAST.cpp b/dbms/src/Parsers/IAST.cpp index c54e8973c60..de19e3639db 100644 --- a/dbms/src/Parsers/IAST.cpp +++ b/dbms/src/Parsers/IAST.cpp @@ -17,12 +17,13 @@ namespace ErrorCodes } -const char * IAST::hilite_keyword = "\033[1m"; -const char * IAST::hilite_identifier = "\033[0;36m"; -const char * IAST::hilite_function = "\033[0;33m"; -const char * IAST::hilite_operator = "\033[1;33m"; -const char * IAST::hilite_alias = "\033[0;32m"; -const char * IAST::hilite_none = "\033[0m"; +const char * IAST::hilite_keyword = "\033[1m"; +const char * IAST::hilite_identifier = "\033[0;36m"; +const char * IAST::hilite_function = "\033[0;33m"; +const char * IAST::hilite_operator = "\033[1;33m"; +const char * IAST::hilite_alias = "\033[0;32m"; +const char * IAST::hilite_substitution = "\033[1;36m"; +const char * IAST::hilite_none = "\033[0m"; String backQuoteIfNeed(const String & x) diff --git a/dbms/src/Parsers/IAST.h b/dbms/src/Parsers/IAST.h index 89ab8fb05c3..a2aa9f2b23e 100644 --- a/dbms/src/Parsers/IAST.h +++ b/dbms/src/Parsers/IAST.h @@ -161,6 +161,13 @@ public: nl_or_ws = one_line ? ' ' : '\n'; } + FormatSettings(std::ostream & ostr_, const FormatSettings & other) + : ostr(ostr_), hilite(other.hilite), one_line(other.one_line), + always_quote_identifiers(other.always_quote_identifiers), identifier_quoting_style(other.identifier_quoting_style) + { + nl_or_ws = one_line ? ' ' : '\n'; + } + void writeIdentifier(const String & name) const; }; @@ -170,7 +177,10 @@ public: /** The SELECT query in which the alias was found; identifier of a node with such an alias. * It is necessary that when the node has met again, output only the alias. */ - std::set> printed_asts_with_alias; + std::set> printed_asts_with_alias; }; /// The state that is copied when each node is formatted. For example, nesting level. @@ -201,6 +211,7 @@ public: static const char * hilite_function; static const char * hilite_operator; static const char * hilite_alias; + static const char * hilite_substitution; static const char * hilite_none; private: diff --git a/dbms/src/Parsers/Lexer.cpp b/dbms/src/Parsers/Lexer.cpp index 0494eacd490..fe56dfadd5b 100644 --- a/dbms/src/Parsers/Lexer.cpp +++ b/dbms/src/Parsers/Lexer.cpp @@ -173,7 +173,10 @@ Token Lexer::nextTokenImpl() return Token(TokenType::OpeningSquareBracket, token_begin, ++pos); case ']': return Token(TokenType::ClosingSquareBracket, token_begin, ++pos); - + case '{': + return Token(TokenType::OpeningCurlyBrace, token_begin, ++pos); + case '}': + return Token(TokenType::ClosingCurlyBrace, token_begin, ++pos); case ',': return Token(TokenType::Comma, token_begin, ++pos); case ';': diff --git a/dbms/src/Parsers/Lexer.h b/dbms/src/Parsers/Lexer.h index 13cd00e3dd3..3f2712bae08 100644 --- a/dbms/src/Parsers/Lexer.h +++ b/dbms/src/Parsers/Lexer.h @@ -23,6 +23,9 @@ namespace DB M(OpeningSquareBracket) \ M(ClosingSquareBracket) \ \ + M(OpeningCurlyBrace) \ + M(ClosingCurlyBrace) \ + \ M(Comma) \ M(Semicolon) \ M(Dot) /** Compound identifiers, like a.b or tuple access operator a.1, (x, y).2. */ \ diff --git a/dbms/src/Parsers/TablePropertiesQueriesASTs.h b/dbms/src/Parsers/TablePropertiesQueriesASTs.h index e68a3b46e4a..f2fa7c506a6 100644 --- a/dbms/src/Parsers/TablePropertiesQueriesASTs.h +++ b/dbms/src/Parsers/TablePropertiesQueriesASTs.h @@ -10,24 +10,28 @@ struct ASTExistsQueryIDAndQueryNames { static constexpr auto ID = "ExistsQuery"; static constexpr auto Query = "EXISTS TABLE"; + static constexpr auto QueryTemporary = "EXISTS TEMPORARY TABLE"; }; struct ASTShowCreateTableQueryIDAndQueryNames { static constexpr auto ID = "ShowCreateTableQuery"; static constexpr auto Query = "SHOW CREATE TABLE"; + static constexpr auto QueryTemporary = "SHOW CREATE TEMPORARY TABLE"; }; struct ASTShowCreateDatabaseQueryIDAndQueryNames { static constexpr auto ID = "ShowCreateDatabaseQuery"; static constexpr auto Query = "SHOW CREATE DATABASE"; + static constexpr auto QueryTemporary = "SHOW CREATE TEMPORARY DATABASE"; }; struct ASTDescribeQueryExistsQueryIDAndQueryNames { static constexpr auto ID = "DescribeQuery"; static constexpr auto Query = "DESCRIBE TABLE"; + static constexpr auto QueryTemporary = "DESCRIBE TEMPORARY TABLE"; }; using ASTExistsQuery = ASTQueryWithTableAndOutputImpl; diff --git a/dbms/src/Parsers/tests/lexer.cpp b/dbms/src/Parsers/tests/lexer.cpp index dca93b469bd..d9135b08c28 100644 --- a/dbms/src/Parsers/tests/lexer.cpp +++ b/dbms/src/Parsers/tests/lexer.cpp @@ -28,6 +28,8 @@ std::map hilite = {TokenType::ClosingRoundBracket, "\033[1;33m"}, {TokenType::OpeningSquareBracket, "\033[1;33m"}, {TokenType::ClosingSquareBracket, "\033[1;33m"}, + {TokenType::OpeningCurlyBrace, "\033[1;33m"}, + {TokenType::ClosingCurlyBrace, "\033[1;33m"}, {TokenType::Comma, "\033[1;33m"}, {TokenType::Semicolon, "\033[1;33m"}, diff --git a/dbms/src/Storages/AlterCommands.cpp b/dbms/src/Storages/AlterCommands.cpp index 15f962f808d..7814f1a6ba0 100644 --- a/dbms/src/Storages/AlterCommands.cpp +++ b/dbms/src/Storages/AlterCommands.cpp @@ -182,7 +182,7 @@ void AlterCommand::apply(ColumnsDescription & columns_description, IndicesDescri { if (type == ADD_COLUMN) { - ColumnDescription column(column_name, data_type); + ColumnDescription column(column_name, data_type, false); if (default_expression) { column.default_desc.kind = default_kind; @@ -388,8 +388,8 @@ void AlterCommands::validate(const IStorage & table, const Context & context) column_to_command_idx[column_name] = i; /// we're creating dummy DataTypeUInt8 in order to prevent the NullPointerException in ExpressionActions - columns.add(ColumnDescription( - column_name, command.data_type ? command.data_type : std::make_shared())); + columns.add( + ColumnDescription(column_name, command.data_type ? command.data_type : std::make_shared(), false)); if (command.default_expression) { diff --git a/dbms/src/Storages/ColumnsDescription.cpp b/dbms/src/Storages/ColumnsDescription.cpp index 55eaf1b5022..2dbe308ea57 100644 --- a/dbms/src/Storages/ColumnsDescription.cpp +++ b/dbms/src/Storages/ColumnsDescription.cpp @@ -32,6 +32,11 @@ namespace ErrorCodes extern const int CANNOT_PARSE_TEXT; } +ColumnDescription::ColumnDescription(String name_, DataTypePtr type_, bool is_virtual_) + : name(std::move(name_)), type(std::move(type_)), is_virtual(is_virtual_) +{ +} + bool ColumnDescription::operator==(const ColumnDescription & other) const { auto codec_str = [](const CompressionCodecPtr & codec_ptr) { return codec_ptr ? codec_ptr->getCodecDesc() : String(); }; @@ -115,10 +120,10 @@ void ColumnDescription::readText(ReadBuffer & buf) } -ColumnsDescription::ColumnsDescription(NamesAndTypesList ordinary) +ColumnsDescription::ColumnsDescription(NamesAndTypesList ordinary, bool all_virtuals) { for (auto & elem : ordinary) - add(ColumnDescription(std::move(elem.name), std::move(elem.type))); + add(ColumnDescription(std::move(elem.name), std::move(elem.type), all_virtuals)); } @@ -227,7 +232,7 @@ NamesAndTypesList ColumnsDescription::getOrdinary() const { NamesAndTypesList ret; for (const auto & col : columns) - if (col.default_desc.kind == ColumnDefaultKind::Default) + if (col.default_desc.kind == ColumnDefaultKind::Default && !col.is_virtual) ret.emplace_back(col.name, col.type); return ret; } @@ -250,6 +255,15 @@ NamesAndTypesList ColumnsDescription::getAliases() const return ret; } +NamesAndTypesList ColumnsDescription::getVirtuals() const +{ + NamesAndTypesList result; + for (const auto & column : columns) + if (column.is_virtual) + result.emplace_back(column.name, column.type); + return result; +} + NamesAndTypesList ColumnsDescription::getAll() const { NamesAndTypesList ret; @@ -285,7 +299,7 @@ NamesAndTypesList ColumnsDescription::getAllPhysical() const { NamesAndTypesList ret; for (const auto & col : columns) - if (col.default_desc.kind != ColumnDefaultKind::Alias) + if (col.default_desc.kind != ColumnDefaultKind::Alias && !col.is_virtual) ret.emplace_back(col.name, col.type); return ret; } @@ -294,7 +308,7 @@ Names ColumnsDescription::getNamesOfPhysical() const { Names ret; for (const auto & col : columns) - if (col.default_desc.kind != ColumnDefaultKind::Alias) + if (col.default_desc.kind != ColumnDefaultKind::Alias && !col.is_virtual) ret.emplace_back(col.name); return ret; } @@ -302,7 +316,7 @@ Names ColumnsDescription::getNamesOfPhysical() const NameAndTypePair ColumnsDescription::getPhysical(const String & column_name) const { auto it = columns.get<1>().find(column_name); - if (it == columns.get<1>().end() || it->default_desc.kind == ColumnDefaultKind::Alias) + if (it == columns.get<1>().end() || it->default_desc.kind == ColumnDefaultKind::Alias || it->is_virtual) throw Exception("There is no physical column " + column_name + " in table.", ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); return NameAndTypePair(it->name, it->type); } @@ -310,7 +324,7 @@ NameAndTypePair ColumnsDescription::getPhysical(const String & column_name) cons bool ColumnsDescription::hasPhysical(const String & column_name) const { auto it = columns.get<1>().find(column_name); - return it != columns.get<1>().end() && it->default_desc.kind != ColumnDefaultKind::Alias; + return it != columns.get<1>().end() && it->default_desc.kind != ColumnDefaultKind::Alias && !it->is_virtual; } diff --git a/dbms/src/Storages/ColumnsDescription.h b/dbms/src/Storages/ColumnsDescription.h index 7ec8ed2c44f..d0d042498fa 100644 --- a/dbms/src/Storages/ColumnsDescription.h +++ b/dbms/src/Storages/ColumnsDescription.h @@ -32,9 +32,10 @@ struct ColumnDescription String comment; CompressionCodecPtr codec; ASTPtr ttl; + bool is_virtual = false; ColumnDescription() = default; - ColumnDescription(String name_, DataTypePtr type_) : name(std::move(name_)), type(std::move(type_)) {} + ColumnDescription(String name_, DataTypePtr type_, bool is_virtual_); bool operator==(const ColumnDescription & other) const; bool operator!=(const ColumnDescription & other) const { return !(*this == other); } @@ -49,7 +50,7 @@ class ColumnsDescription { public: ColumnsDescription() = default; - explicit ColumnsDescription(NamesAndTypesList ordinary_); + explicit ColumnsDescription(NamesAndTypesList ordinary_, bool all_virtuals = false); /// `after_column` can be a Nested column name; void add(ColumnDescription column, const String & after_column = String()); @@ -67,8 +68,9 @@ public: NamesAndTypesList getOrdinary() const; NamesAndTypesList getMaterialized() const; NamesAndTypesList getAliases() const; - /// ordinary + materialized + aliases. - NamesAndTypesList getAll() const; + NamesAndTypesList getVirtuals() const; + NamesAndTypesList getAllPhysical() const; /// ordinary + materialized. + NamesAndTypesList getAll() const; /// ordinary + materialized + aliases + virtuals. using ColumnTTLs = std::unordered_map; ColumnTTLs getColumnTTLs() const; @@ -87,8 +89,6 @@ public: throw Exception("Cannot modify ColumnDescription for column " + column_name + ": column name cannot be changed", ErrorCodes::LOGICAL_ERROR); } - /// ordinary + materialized. - NamesAndTypesList getAllPhysical() const; Names getNamesOfPhysical() const; bool hasPhysical(const String & column_name) const; NameAndTypePair getPhysical(const String & column_name) const; diff --git a/dbms/src/Storages/IStorage.cpp b/dbms/src/Storages/IStorage.cpp index 3d0ac164e26..687ca970311 100644 --- a/dbms/src/Storages/IStorage.cpp +++ b/dbms/src/Storages/IStorage.cpp @@ -25,28 +25,21 @@ IStorage::IStorage(ColumnsDescription columns_) setColumns(std::move(columns_)); } +IStorage::IStorage(ColumnsDescription columns_, ColumnsDescription virtuals_) : virtuals(std::move(virtuals_)) +{ + setColumns(std::move(columns_)); +} + const ColumnsDescription & IStorage::getColumns() const { return columns; } -void IStorage::setColumns(ColumnsDescription columns_) -{ - if (columns_.getOrdinary().empty()) - throw Exception("Empty list of columns passed", ErrorCodes::EMPTY_LIST_OF_COLUMNS_PASSED); - columns = std::move(columns_); -} - const IndicesDescription & IStorage::getIndices() const { return indices; } -void IStorage::setIndices(IndicesDescription indices_) -{ - indices = std::move(indices_); -} - NameAndTypePair IStorage::getColumn(const String & column_name) const { /// By default, we assume that there are no virtual columns in the storage. @@ -69,6 +62,16 @@ Block IStorage::getSampleBlock() const return res; } +Block IStorage::getSampleBlockWithVirtuals() const +{ + auto res = getSampleBlock(); + + for (const auto & column : getColumns().getVirtuals()) + res.insert({column.type->createColumn(), column.type, column.name}); + + return res; +} + Block IStorage::getSampleBlockNonMaterialized() const { Block res; @@ -266,6 +269,29 @@ void IStorage::check(const Block & block, bool need_all) const } } +void IStorage::setColumns(ColumnsDescription columns_) +{ + if (columns_.getOrdinary().empty()) + throw Exception("Empty list of columns passed", ErrorCodes::EMPTY_LIST_OF_COLUMNS_PASSED); + columns = std::move(columns_); + + for (const auto & column : virtuals) + { + if (!columns.has(column.name)) + columns.add(column); + } +} + +void IStorage::setIndices(IndicesDescription indices_) +{ + indices = std::move(indices_); +} + +bool IStorage::isVirtualColumn(const String & column_name) const +{ + return getColumns().get(column_name).is_virtual; +} + TableStructureReadLockHolder IStorage::lockStructureForShare(bool will_add_new_data, const String & query_id) { TableStructureReadLockHolder result; diff --git a/dbms/src/Storages/IStorage.h b/dbms/src/Storages/IStorage.h index f18592ebce5..5bfd8224372 100644 --- a/dbms/src/Storages/IStorage.h +++ b/dbms/src/Storages/IStorage.h @@ -50,6 +50,7 @@ class IStorage : public std::enable_shared_from_this public: IStorage() = default; explicit IStorage(ColumnsDescription columns_); + IStorage(ColumnsDescription columns_, ColumnsDescription virtuals_); virtual ~IStorage() = default; IStorage(const IStorage &) = delete; @@ -82,20 +83,18 @@ public: public: /// thread-unsafe part. lockStructure must be acquired - const ColumnsDescription & getColumns() const; - void setColumns(ColumnsDescription columns_); - + const ColumnsDescription & getColumns() const; /// returns combined set of columns const IndicesDescription & getIndices() const; - void setIndices(IndicesDescription indices_); /// NOTE: these methods should include virtual columns, /// but should NOT include ALIAS columns (they are treated separately). virtual NameAndTypePair getColumn(const String & column_name) const; virtual bool hasColumn(const String & column_name) const; - Block getSampleBlock() const; - Block getSampleBlockNonMaterialized() const; - Block getSampleBlockForColumns(const Names & column_names) const; /// including virtual and alias columns. + Block getSampleBlock() const; /// ordinary + materialized. + Block getSampleBlockWithVirtuals() const; /// ordinary + materialized + virtuals. + Block getSampleBlockNonMaterialized() const; /// ordinary. + Block getSampleBlockForColumns(const Names & column_names) const; /// ordinary + materialized + aliases + virtuals. /// Verify that all the requested names are in the table and are set correctly: /// list of names is not empty and the names do not repeat. @@ -112,8 +111,17 @@ public: /// thread-unsafe part. lockStructure must be acquired /// If |need_all| is set, then checks that all the columns of the table are in the block. void check(const Block & block, bool need_all = false) const; +protected: /// still thread-unsafe part. + void setColumns(ColumnsDescription columns_); /// sets only real columns, possibly overwrites virtual ones. + void setIndices(IndicesDescription indices_); + + /// Returns whether the column is virtual - by default all columns are real. + /// Initially reserved virtual column name may be shadowed by real column. + virtual bool isVirtualColumn(const String & column_name) const; + private: - ColumnsDescription columns; + ColumnsDescription columns; /// combined real and virtual columns + const ColumnsDescription virtuals = {}; IndicesDescription indices; public: @@ -322,12 +330,6 @@ public: /// Returns additional columns that need to be read for FINAL to work. virtual Names getColumnsRequiredForFinal() const { return {}; } -protected: - /// Returns whether the column is virtual - by default all columns are real. - /// Initially reserved virtual column name may be shadowed by real column. - /// Returns false even for non-existent non-virtual columns. - virtual bool isVirtualColumn(const String & /* column_name */) const { return false; } - private: /// You always need to take the next three locks in this order. diff --git a/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp b/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp index 62a86d44944..5b8d80cb062 100644 --- a/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp +++ b/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp @@ -1,5 +1,7 @@ #include +#include +#include #include #include @@ -7,15 +9,17 @@ namespace DB { KafkaBlockInputStream::KafkaBlockInputStream( - StorageKafka & storage_, const Context & context_, const String & schema, size_t max_block_size_) - : storage(storage_), context(context_), max_block_size(max_block_size_) + StorageKafka & storage_, const Context & context_, const Names & columns, size_t max_block_size_) + : storage(storage_), context(context_), column_names(columns), max_block_size(max_block_size_) { context.setSetting("input_format_skip_unknown_fields", 1u); // Always skip unknown fields regardless of the context (JSON or TSKV) context.setSetting("input_format_allow_errors_ratio", 0.); - context.setSetting("input_format_allow_errors_num", storage.skip_broken); + context.setSetting("input_format_allow_errors_num", storage.skipBroken()); - if (!schema.empty()) - context.setSetting("format_schema", schema); + if (!storage.getSchemaName().empty()) + context.setSetting("format_schema", storage.getSchemaName()); + + virtual_columns = storage.getSampleBlockForColumns({"_topic", "_key", "_offset"}).cloneEmptyColumns(); } KafkaBlockInputStream::~KafkaBlockInputStream() @@ -29,6 +33,11 @@ KafkaBlockInputStream::~KafkaBlockInputStream() storage.pushBuffer(buffer); } +Block KafkaBlockInputStream::getHeader() const +{ + return storage.getSampleBlockForColumns(column_names); +} + void KafkaBlockInputStream::readPrefixImpl() { buffer = storage.tryClaimBuffer(context.getSettingsRef().queue_max_wait_ms.totalMilliseconds()); @@ -37,20 +46,49 @@ void KafkaBlockInputStream::readPrefixImpl() if (!buffer) buffer = storage.createBuffer(); - buffer->subBufferAs()->subscribe(storage.topics); + buffer->subBufferAs()->subscribe(storage.getTopics()); const auto & limits = getLimits(); const size_t poll_timeout = buffer->subBufferAs()->pollTimeout(); size_t rows_portion_size = poll_timeout ? std::min(max_block_size, limits.max_execution_time.totalMilliseconds() / poll_timeout) : max_block_size; rows_portion_size = std::max(rows_portion_size, 1ul); - auto child = FormatFactory::instance().getInput(storage.format_name, *buffer, storage.getSampleBlock(), context, max_block_size, rows_portion_size); + auto non_virtual_header = storage.getSampleBlockNonMaterialized(); /// FIXME: add materialized columns support + auto read_callback = [this] + { + const auto * sub_buffer = buffer->subBufferAs(); + virtual_columns[0]->insert(sub_buffer->currentTopic()); // "topic" + virtual_columns[1]->insert(sub_buffer->currentKey()); // "key" + virtual_columns[2]->insert(sub_buffer->currentOffset()); // "offset" + }; + + auto child = FormatFactory::instance().getInput( + storage.getFormatName(), *buffer, non_virtual_header, context, max_block_size, rows_portion_size, read_callback); child->setLimits(limits); addChild(child); broken = true; } +Block KafkaBlockInputStream::readImpl() +{ + Block block = children.back()->read(); + if (!block) + return block; + + Block virtual_block = storage.getSampleBlockForColumns({"_topic", "_key", "_offset"}).cloneWithColumns(std::move(virtual_columns)); + virtual_columns = storage.getSampleBlockForColumns({"_topic", "_key", "_offset"}).cloneEmptyColumns(); + + for (const auto & column : virtual_block.getColumnsWithTypeAndName()) + block.insert(column); + + /// FIXME: materialize MATERIALIZED columns here. + + return ConvertingBlockInputStream( + context, std::make_shared(block), getHeader(), ConvertingBlockInputStream::MatchColumnsMode::Name) + .read(); +} + void KafkaBlockInputStream::readSuffixImpl() { buffer->subBufferAs()->commit(); diff --git a/dbms/src/Storages/Kafka/KafkaBlockInputStream.h b/dbms/src/Storages/Kafka/KafkaBlockInputStream.h index f1059ca218b..a7e82034eb9 100644 --- a/dbms/src/Storages/Kafka/KafkaBlockInputStream.h +++ b/dbms/src/Storages/Kafka/KafkaBlockInputStream.h @@ -11,22 +11,24 @@ namespace DB class KafkaBlockInputStream : public IBlockInputStream { public: - KafkaBlockInputStream(StorageKafka & storage_, const Context & context_, const String & schema, size_t max_block_size_); + KafkaBlockInputStream(StorageKafka & storage_, const Context & context_, const Names & columns, size_t max_block_size_); ~KafkaBlockInputStream() override; String getName() const override { return storage.getName(); } - Block readImpl() override { return children.back()->read(); } - Block getHeader() const override { return storage.getSampleBlock(); } + Block getHeader() const override; void readPrefixImpl() override; + Block readImpl() override; void readSuffixImpl() override; private: StorageKafka & storage; Context context; + Names column_names; UInt64 max_block_size; BufferPtr buffer; + MutableColumns virtual_columns; bool broken = true, claimed = false; }; diff --git a/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp b/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp index 9675b29d659..dba01b3fbe4 100644 --- a/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp +++ b/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp @@ -3,6 +3,7 @@ namespace DB { +using namespace std::chrono_literals; ReadBufferFromKafkaConsumer::ReadBufferFromKafkaConsumer( ConsumerPtr consumer_, Poco::Logger * log_, size_t max_batch_size, size_t poll_timeout_, bool intermediate_commit_) : ReadBuffer(nullptr, 0) @@ -17,7 +18,10 @@ ReadBufferFromKafkaConsumer::ReadBufferFromKafkaConsumer( ReadBufferFromKafkaConsumer::~ReadBufferFromKafkaConsumer() { + /// NOTE: see https://github.com/edenhill/librdkafka/issues/2077 consumer->unsubscribe(); + consumer->unassign(); + while (consumer->get_consumer_queue().next_event(1s)); } void ReadBufferFromKafkaConsumer::commit() @@ -53,8 +57,6 @@ void ReadBufferFromKafkaConsumer::subscribe(const Names & topics) // If we're doing a manual select then it's better to get something after a wait, then immediate nothing. if (consumer->get_subscription().empty()) { - using namespace std::chrono_literals; - consumer->pause(); // don't accidentally read any messages consumer->subscribe(topics); consumer->poll(5s); @@ -73,7 +75,7 @@ void ReadBufferFromKafkaConsumer::unsubscribe() consumer->unsubscribe(); } -/// Try to commit messages implicitly after we processed the previous batch. +/// Do commit messages implicitly after we processed the previous batch. bool ReadBufferFromKafkaConsumer::nextImpl() { /// NOTE: ReadBuffer was implemented with an immutable underlying contents in mind. @@ -86,18 +88,21 @@ bool ReadBufferFromKafkaConsumer::nextImpl() { if (intermediate_commit) commit(); - messages = consumer->poll_batch(batch_size, std::chrono::milliseconds(poll_timeout)); + + /// Don't drop old messages immediately, since we may need them for virtual columns. + auto new_messages = consumer->poll_batch(batch_size, std::chrono::milliseconds(poll_timeout)); + if (new_messages.empty()) + { + LOG_TRACE(log, "Stalled"); + stalled = true; + return false; + } + messages = std::move(new_messages); current = messages.begin(); LOG_TRACE(log, "Polled batch of " << messages.size() << " messages"); } - if (messages.empty()) - { - stalled = true; - return false; - } - if (auto err = current->get_error()) { ++current; diff --git a/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.h b/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.h index 2a473151d29..ac6011cfed0 100644 --- a/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.h +++ b/dbms/src/Storages/Kafka/ReadBufferFromKafkaConsumer.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include @@ -25,6 +26,11 @@ public: auto pollTimeout() { return poll_timeout; } + // Return values for the message that's being read. + String currentTopic() const { return current[-1].get_topic(); } + String currentKey() const { return current[-1].get_key(); } + auto currentOffset() const { return current[-1].get_offset(); } + private: using Messages = std::vector; diff --git a/dbms/src/Storages/Kafka/StorageKafka.cpp b/dbms/src/Storages/Kafka/StorageKafka.cpp index 81ed311eb6d..20599c7e4f8 100644 --- a/dbms/src/Storages/Kafka/StorageKafka.cpp +++ b/dbms/src/Storages/Kafka/StorageKafka.cpp @@ -4,6 +4,8 @@ #include #include #include +#include +#include #include #include #include @@ -69,21 +71,36 @@ StorageKafka::StorageKafka( const std::string & database_name_, Context & context_, const ColumnsDescription & columns_, - const String & brokers_, const String & group_, const Names & topics_, - const String & format_name_, char row_delimiter_, const String & schema_name_, - size_t num_consumers_, UInt64 max_block_size_, size_t skip_broken_, + const String & brokers_, + const String & group_, + const Names & topics_, + const String & format_name_, + char row_delimiter_, + const String & schema_name_, + size_t num_consumers_, + UInt64 max_block_size_, + size_t skip_broken_, bool intermediate_commit_) - : IStorage{columns_}, - table_name(table_name_), database_name(database_name_), global_context(context_), - topics(global_context.getMacros()->expand(topics_)), - brokers(global_context.getMacros()->expand(brokers_)), - group(global_context.getMacros()->expand(group_)), - format_name(global_context.getMacros()->expand(format_name_)), - row_delimiter(row_delimiter_), - schema_name(global_context.getMacros()->expand(schema_name_)), - num_consumers(num_consumers_), max_block_size(max_block_size_), log(&Logger::get("StorageKafka (" + table_name_ + ")")), - semaphore(0, num_consumers_), - skip_broken(skip_broken_), intermediate_commit(intermediate_commit_) + : IStorage( + columns_, + ColumnsDescription({{"_topic", std::make_shared()}, + {"_key", std::make_shared()}, + {"_offset", std::make_shared()}}, true)) + , table_name(table_name_) + , database_name(database_name_) + , global_context(context_) + , topics(global_context.getMacros()->expand(topics_)) + , brokers(global_context.getMacros()->expand(brokers_)) + , group(global_context.getMacros()->expand(group_)) + , format_name(global_context.getMacros()->expand(format_name_)) + , row_delimiter(row_delimiter_) + , schema_name(global_context.getMacros()->expand(schema_name_)) + , num_consumers(num_consumers_) + , max_block_size(max_block_size_) + , log(&Logger::get("StorageKafka (" + table_name_ + ")")) + , semaphore(0, num_consumers_) + , skip_broken(skip_broken_) + , intermediate_commit(intermediate_commit_) { task = global_context.getSchedulePool().createTask(log->name(), [this]{ streamThread(); }); task->deactivate(); @@ -92,14 +109,12 @@ StorageKafka::StorageKafka( BlockInputStreams StorageKafka::read( const Names & column_names, - const SelectQueryInfo & /*query_info*/, + const SelectQueryInfo & /* query_info */, const Context & context, QueryProcessingStage::Enum /* processed_stage */, size_t /* max_block_size */, unsigned /* num_streams */) { - check(column_names); - if (num_created_consumers == 0) return BlockInputStreams(); @@ -113,7 +128,7 @@ BlockInputStreams StorageKafka::read( /// Use block size of 1, otherwise LIMIT won't work properly as it will buffer excess messages in the last block /// TODO: probably that leads to awful performance. /// FIXME: seems that doesn't help with extra reading and committing unprocessed messages. - streams.emplace_back(std::make_shared(*this, context, schema_name, 1)); + streams.emplace_back(std::make_shared(*this, context, column_names, 1)); } LOG_DEBUG(log, "Starting reading " << streams.size() << " streams"); @@ -161,55 +176,22 @@ void StorageKafka::shutdown() } +void StorageKafka::rename(const String & /* new_path_to_db */, const String & new_database_name, const String & new_table_name) +{ + table_name = new_table_name; + database_name = new_database_name; +} + + void StorageKafka::updateDependencies() { task->activateAndSchedule(); } -cppkafka::Configuration StorageKafka::createConsumerConfiguration() -{ - cppkafka::Configuration conf; - - LOG_TRACE(log, "Setting brokers: " << brokers); - conf.set("metadata.broker.list", brokers); - - LOG_TRACE(log, "Setting Group ID: " << group << " Client ID: clickhouse"); - conf.set("group.id", group); - - conf.set("client.id", VERSION_FULL); - - // If no offset stored for this group, read all messages from the start - conf.set("auto.offset.reset", "smallest"); - - // We manually commit offsets after a stream successfully finished - conf.set("enable.auto.commit", "false"); - - // Ignore EOF messages - conf.set("enable.partition.eof", "false"); - - // for debug logs inside rdkafka - // conf.set("debug", "consumer,cgrp,topic,fetch"); - - // Update consumer configuration from the configuration - const auto & config = global_context.getConfigRef(); - if (config.has(CONFIG_PREFIX)) - loadFromConfig(conf, config, CONFIG_PREFIX); - - // Update consumer topic-specific configuration - for (const auto & topic : topics) - { - const auto topic_config_key = CONFIG_PREFIX + "_" + topic; - if (config.has(topic_config_key)) - loadFromConfig(conf, config, topic_config_key); - } - - return conf; -} - BufferPtr StorageKafka::createBuffer() { - // Create a consumer. + // Create a consumer and subscribe to topics auto consumer = std::make_shared(createConsumerConfiguration()); // Limit the number of batched messages to allow early cancellations @@ -253,6 +235,47 @@ void StorageKafka::pushBuffer(BufferPtr buffer) semaphore.set(); } + +cppkafka::Configuration StorageKafka::createConsumerConfiguration() +{ + cppkafka::Configuration conf; + + LOG_TRACE(log, "Setting brokers: " << brokers); + conf.set("metadata.broker.list", brokers); + + LOG_TRACE(log, "Setting Group ID: " << group << " Client ID: clickhouse"); + conf.set("group.id", group); + + conf.set("client.id", VERSION_FULL); + + // If no offset stored for this group, read all messages from the start + conf.set("auto.offset.reset", "smallest"); + + // We manually commit offsets after a stream successfully finished + conf.set("enable.auto.commit", "false"); + + // Ignore EOF messages + conf.set("enable.partition.eof", "false"); + + // for debug logs inside rdkafka + // conf.set("debug", "consumer,cgrp,topic,fetch"); + + // Update consumer configuration from the configuration + const auto & config = global_context.getConfigRef(); + if (config.has(CONFIG_PREFIX)) + loadFromConfig(conf, config, CONFIG_PREFIX); + + // Update consumer topic-specific configuration + for (const auto & topic : topics) + { + const auto topic_config_key = CONFIG_PREFIX + "_" + topic; + if (config.has(topic_config_key)) + loadFromConfig(conf, config, topic_config_key); + } + + return conf; +} + bool StorageKafka::checkDependencies(const String & current_database_name, const String & current_table_name) { // Check if all dependencies are attached @@ -321,19 +344,23 @@ bool StorageKafka::streamToViews() auto insert = std::make_shared(); insert->database = database_name; insert->table = table_name; - insert->no_destination = true; // Only insert into dependent views + insert->no_destination = true; // Only insert into dependent views and expect that input blocks contain virtual columns const Settings & settings = global_context.getSettingsRef(); size_t block_size = max_block_size; if (block_size == 0) block_size = settings.max_block_size.value; + // Create a stream for each consumer and join them in a union stream + InterpreterInsertQuery interpreter{insert, global_context}; + auto block_io = interpreter.execute(); + // Create a stream for each consumer and join them in a union stream BlockInputStreams streams; streams.reserve(num_created_consumers); for (size_t i = 0; i < num_created_consumers; ++i) { - auto stream = std::make_shared(*this, global_context, schema_name, block_size); + auto stream = std::make_shared(*this, global_context, block_io.out->getHeader().getNames(), block_size); streams.emplace_back(stream); // Limit read batch to maximum block size to allow DDL @@ -350,9 +377,6 @@ bool StorageKafka::streamToViews() else in = streams[0]; - // Execute the query - InterpreterInsertQuery interpreter{insert, global_context}; - auto block_io = interpreter.execute(); copyData(*in, *block_io.out, &stream_cancelled); // Check whether the limits were applied during query execution diff --git a/dbms/src/Storages/Kafka/StorageKafka.h b/dbms/src/Storages/Kafka/StorageKafka.h index 3a40e29a03e..f9b6609def5 100644 --- a/dbms/src/Storages/Kafka/StorageKafka.h +++ b/dbms/src/Storages/Kafka/StorageKafka.h @@ -20,9 +20,6 @@ namespace DB */ class StorageKafka : public ext::shared_ptr_helper, public IStorage { - friend class KafkaBlockInputStream; - friend class KafkaBlockOutputStream; - public: std::string getName() const override { return "Kafka"; } std::string getTableName() const override { return table_name; } @@ -39,14 +36,31 @@ public: size_t max_block_size, unsigned num_streams) override; - void rename(const String & /* new_path_to_db */, const String & new_database_name, const String & new_table_name) override - { - table_name = new_table_name; - database_name = new_database_name; - } + void rename(const String & /* new_path_to_db */, const String & new_database_name, const String & new_table_name) override; void updateDependencies() override; + BufferPtr createBuffer(); + BufferPtr claimBuffer(); + BufferPtr tryClaimBuffer(long wait_ms); + void pushBuffer(BufferPtr buf); + + const auto & getTopics() const { return topics; } + const auto & getFormatName() const { return format_name; } + const auto & getSchemaName() const { return schema_name; } + const auto & skipBroken() const { return skip_broken; } + +protected: + StorageKafka( + const std::string & table_name_, + const std::string & database_name_, + Context & context_, + const ColumnsDescription & columns_, + const String & brokers_, const String & group_, const Names & topics_, + const String & format_name_, char row_delimiter_, const String & schema_name_, + size_t num_consumers_, UInt64 max_block_size_, size_t skip_broken, + bool intermediate_commit_); + private: // Configuration and state String table_name; @@ -56,18 +70,15 @@ private: const String brokers; const String group; const String format_name; - // Optional row delimiter for generating char delimited stream - // in order to make various input stream parsers happy. - char row_delimiter; + char row_delimiter; /// optional row delimiter for generating char delimited stream in order to make various input stream parsers happy. const String schema_name; - /// Total number of consumers - size_t num_consumers; - /// Maximum block size for insertion into this table - UInt64 max_block_size; - /// Number of actually created consumers. + size_t num_consumers; /// total number of consumers + UInt64 max_block_size; /// maximum block size for insertion into this table + /// Can differ from num_consumers in case of exception in startup() (or if startup() hasn't been called). /// In this case we still need to be able to shutdown() properly. - size_t num_created_consumers = 0; + size_t num_created_consumers = 0; /// number of actually created consumers. + Poco::Logger * log; // Consumer list @@ -84,25 +95,10 @@ private: std::atomic stream_cancelled{false}; cppkafka::Configuration createConsumerConfiguration(); - BufferPtr createBuffer(); - BufferPtr claimBuffer(); - BufferPtr tryClaimBuffer(long wait_ms); - void pushBuffer(BufferPtr buf); void streamThread(); bool streamToViews(); bool checkDependencies(const String & database_name, const String & table_name); - -protected: - StorageKafka( - const std::string & table_name_, - const std::string & database_name_, - Context & context_, - const ColumnsDescription & columns_, - const String & brokers_, const String & group_, const Names & topics_, - const String & format_name_, char row_delimiter_, const String & schema_name_, - size_t num_consumers_, UInt64 max_block_size_, size_t skip_broken, - bool intermediate_commit_); }; } diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 2131067df28..dfbd9c0e246 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -518,7 +518,7 @@ BlockInputStreams MergeTreeDataSelectExecutor::readFromParts( RangesInDataParts parts_with_ranges; - std::vector> useful_indices; + std::vector> useful_indices; for (const auto & index : data.skip_indices) { auto condition = index->createIndexCondition(query_info, context); @@ -998,7 +998,7 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange( MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingIndex( MergeTreeIndexPtr index, - IndexConditionPtr condition, + MergeTreeIndexConditionPtr condition, MergeTreeData::DataPartPtr part, const MarkRanges & ranges, const Settings & settings) const diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h index a949d593904..d38d00d055b 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h +++ b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h @@ -84,7 +84,7 @@ private: MarkRanges filterMarksUsingIndex( MergeTreeIndexPtr index, - IndexConditionPtr condition, + MergeTreeIndexConditionPtr condition, MergeTreeData::DataPartPtr part, const MarkRanges & ranges, const Settings & settings) const; diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexAggregatorBloomFilter.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexAggregatorBloomFilter.cpp new file mode 100644 index 00000000000..760721b5f3c --- /dev/null +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexAggregatorBloomFilter.cpp @@ -0,0 +1,62 @@ +#include + +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; + extern const int ILLEGAL_COLUMN; +} + +MergeTreeIndexAggregatorBloomFilter::MergeTreeIndexAggregatorBloomFilter( + size_t bits_per_row_, size_t hash_functions_, const Names & columns_name_) + : bits_per_row(bits_per_row_), hash_functions(hash_functions_), index_columns_name(columns_name_) +{ +} + +bool MergeTreeIndexAggregatorBloomFilter::empty() const +{ + return !total_rows; +} + +MergeTreeIndexGranulePtr MergeTreeIndexAggregatorBloomFilter::getGranuleAndReset() +{ + const auto granule = std::make_shared(bits_per_row, hash_functions, total_rows, granule_index_blocks); + total_rows = 0; + granule_index_blocks.clear(); + return granule; +} + +void MergeTreeIndexAggregatorBloomFilter::update(const Block & block, size_t * pos, size_t limit) +{ + if (*pos >= block.rows()) + throw Exception("The provided position is not less than the number of block rows. Position: " + toString(*pos) + ", Block rows: " + + toString(block.rows()) + ".", ErrorCodes::LOGICAL_ERROR); + + Block granule_index_block; + size_t max_read_rows = std::min(block.rows() - *pos, limit); + + for (size_t index = 0; index < index_columns_name.size(); ++index) + { + const auto & column_and_type = block.getByName(index_columns_name[index]); + const auto & index_column = BloomFilterHash::hashWithColumn(column_and_type.type, column_and_type.column, *pos, max_read_rows); + + granule_index_block.insert({std::move(index_column), std::make_shared(), column_and_type.name}); + } + + *pos += max_read_rows; + total_rows += max_read_rows; + granule_index_blocks.push_back(granule_index_block); +} + +} diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexAggregatorBloomFilter.h b/dbms/src/Storages/MergeTree/MergeTreeIndexAggregatorBloomFilter.h new file mode 100644 index 00000000000..ebbe9865313 --- /dev/null +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexAggregatorBloomFilter.h @@ -0,0 +1,29 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class MergeTreeIndexAggregatorBloomFilter : public IMergeTreeIndexAggregator +{ +public: + MergeTreeIndexAggregatorBloomFilter(size_t bits_per_row_, size_t hash_functions_, const Names & columns_name_); + + bool empty() const override; + + MergeTreeIndexGranulePtr getGranuleAndReset() override; + + void update(const Block & block, size_t * pos, size_t limit) override; + +private: + size_t bits_per_row; + size_t hash_functions; + const Names index_columns_name; + + size_t total_rows = 0; + Blocks granule_index_blocks; +}; + +} diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp new file mode 100644 index 00000000000..b86da56649d --- /dev/null +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp @@ -0,0 +1,110 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; + extern const int INCORRECT_QUERY; +} + +MergeTreeIndexBloomFilter::MergeTreeIndexBloomFilter( + const String & name_, const ExpressionActionsPtr & expr_, const Names & columns_, const DataTypes & data_types_, const Block & header_, + size_t granularity_, size_t bits_per_row_, size_t hash_functions_) + : IMergeTreeIndex(name_, expr_, columns_, data_types_, header_, granularity_), bits_per_row(bits_per_row_), + hash_functions(hash_functions_) +{ +} + +MergeTreeIndexGranulePtr MergeTreeIndexBloomFilter::createIndexGranule() const +{ + return std::make_shared(bits_per_row, hash_functions, columns.size()); +} + +bool MergeTreeIndexBloomFilter::mayBenefitFromIndexForIn(const ASTPtr & node) const +{ + const String & column_name = node->getColumnName(); + + for (const auto & name : columns) + if (column_name == name) + return true; + + if (const auto * func = typeid_cast(node.get())) + { + for (const auto & children : func->arguments->children) + if (mayBenefitFromIndexForIn(children)) + return true; + } + + return false; +} + +MergeTreeIndexAggregatorPtr MergeTreeIndexBloomFilter::createIndexAggregator() const +{ + return std::make_shared(bits_per_row, hash_functions, columns); +} + +MergeTreeIndexConditionPtr MergeTreeIndexBloomFilter::createIndexCondition(const SelectQueryInfo & query_info, const Context & context) const +{ + return std::make_shared(query_info, context, header, hash_functions); +} + +static void assertIndexColumnsType(const Block & header) +{ + if (!header || !header.columns()) + throw Exception("Index must have columns.", ErrorCodes::INCORRECT_QUERY); + + const DataTypes & columns_data_types = header.getDataTypes(); + + for (size_t index = 0; index < columns_data_types.size(); ++index) + { + WhichDataType which(columns_data_types[index]); + + if (!which.isUInt() && !which.isInt() && !which.isString() && !which.isFixedString() && !which.isFloat() && + !which.isDateOrDateTime() && !which.isEnum()) + throw Exception("Unexpected type " + columns_data_types[index]->getName() + " of bloom filter index.", + ErrorCodes::ILLEGAL_COLUMN); + } +} + +std::unique_ptr bloomFilterIndexCreatorNew( + const NamesAndTypesList & columns, std::shared_ptr node, const Context & context) +{ + if (node->name.empty()) + throw Exception("Index must have unique name.", ErrorCodes::INCORRECT_QUERY); + + ASTPtr expr_list = MergeTreeData::extractKeyExpressionList(node->expr->clone()); + + auto syntax = SyntaxAnalyzer(context, {}).analyze(expr_list, columns); + auto index_expr = ExpressionAnalyzer(expr_list, syntax, context).getActions(false); + auto index_sample = ExpressionAnalyzer(expr_list, syntax, context).getActions(true)->getSampleBlock(); + + assertIndexColumnsType(index_sample); + + double max_conflict_probability = 0.025; + if (node->type->arguments && !node->type->arguments->children.empty()) + max_conflict_probability = typeid_cast(*node->type->arguments->children[0]).value.get(); + + const auto & bits_per_row_and_size_of_hash_functions = BloomFilterHash::calculationBestPractices(max_conflict_probability); + + return std::make_unique( + node->name, std::move(index_expr), index_sample.getNames(), index_sample.getDataTypes(), index_sample, node->granularity, + bits_per_row_and_size_of_hash_functions.first, bits_per_row_and_size_of_hash_functions.second); +} + +} diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.h b/dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.h new file mode 100644 index 00000000000..2b89b9bddfa --- /dev/null +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.h @@ -0,0 +1,31 @@ +#pragma once + +#include +#include +#include +#include + +namespace DB +{ + +class MergeTreeIndexBloomFilter : public IMergeTreeIndex +{ +public: + MergeTreeIndexBloomFilter( + const String & name_, const ExpressionActionsPtr & expr_, const Names & columns_, const DataTypes & data_types_, + const Block & header_, size_t granularity_, size_t bits_per_row_, size_t hash_functions_); + + MergeTreeIndexGranulePtr createIndexGranule() const override; + + MergeTreeIndexAggregatorPtr createIndexAggregator() const override; + + MergeTreeIndexConditionPtr createIndexCondition(const SelectQueryInfo & query_info, const Context & context) const override; + + bool mayBenefitFromIndexForIn(const ASTPtr & node) const override; + +private: + size_t bits_per_row; + size_t hash_functions; +}; + +} diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp new file mode 100644 index 00000000000..9c8a9d4b41c --- /dev/null +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp @@ -0,0 +1,352 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace +{ + +PreparedSetKey getPreparedSetKey(const ASTPtr & node, const DataTypePtr & data_type) +{ + /// If the data type is tuple, let's try unbox once + if (node->as() || node->as()) + return PreparedSetKey::forSubquery(*node); + + if (const auto * date_type_tuple = typeid_cast(&*data_type)) + return PreparedSetKey::forLiteral(*node, date_type_tuple->getElements()); + + return PreparedSetKey::forLiteral(*node, DataTypes(1, data_type)); +} + +ColumnWithTypeAndName getPreparedSetInfo(const SetPtr & prepared_set) +{ + if (prepared_set->getDataTypes().size() == 1) + return {prepared_set->getSetElements()[0], prepared_set->getDataTypes()[0], "dummy"}; + + return {ColumnTuple::create(prepared_set->getSetElements()), std::make_shared(prepared_set->getDataTypes()), "dummy"}; +} + +bool maybeTrueOnBloomFilter(const IColumn * hash_column, const BloomFilterPtr & bloom_filter, size_t hash_functions) +{ + const auto const_column = typeid_cast(hash_column); + const auto non_const_column = typeid_cast(hash_column); + + if (!const_column && !non_const_column) + throw Exception("LOGICAL ERROR: hash column must be Const Column or UInt64 Column.", ErrorCodes::LOGICAL_ERROR); + + if (const_column) + { + for (size_t index = 0; index < hash_functions; ++index) + if (!bloom_filter->findHashWithSeed(const_column->getValue(), BloomFilterHash::bf_hash_seed[index])) + return false; + return true; + } + else + { + bool missing_rows = true; + const ColumnUInt64::Container & data = non_const_column->getData(); + + for (size_t index = 0, size = data.size(); missing_rows && index < size; ++index) + { + bool match_row = true; + for (size_t hash_index = 0; match_row && hash_index < hash_functions; ++hash_index) + match_row = bloom_filter->findHashWithSeed(data[index], BloomFilterHash::bf_hash_seed[hash_index]); + + missing_rows = !match_row; + } + + return !missing_rows; + } +} + +} + +MergeTreeIndexConditionBloomFilter::MergeTreeIndexConditionBloomFilter( + const SelectQueryInfo & info, const Context & context, const Block & header, size_t hash_functions) + : header(header), context(context), query_info(info), hash_functions(hash_functions) +{ + auto atomFromAST = [this](auto & node, auto &, auto & constants, auto & out) { return traverseAtomAST(node, constants, out); }; + rpn = std::move(RPNBuilder(info, context, atomFromAST).extractRPN()); +} + +bool MergeTreeIndexConditionBloomFilter::alwaysUnknownOrTrue() const +{ + std::vector rpn_stack; + + for (const auto & element : rpn) + { + if (element.function == RPNElement::FUNCTION_UNKNOWN + || element.function == RPNElement::ALWAYS_TRUE) + { + rpn_stack.push_back(true); + } + else if (element.function == RPNElement::FUNCTION_EQUALS + || element.function == RPNElement::FUNCTION_NOT_EQUALS + || element.function == RPNElement::FUNCTION_IN + || element.function == RPNElement::FUNCTION_NOT_IN + || element.function == RPNElement::ALWAYS_FALSE) + { + rpn_stack.push_back(false); + } + else if (element.function == RPNElement::FUNCTION_NOT) + { + // do nothing + } + else if (element.function == RPNElement::FUNCTION_AND) + { + auto arg1 = rpn_stack.back(); + rpn_stack.pop_back(); + auto arg2 = rpn_stack.back(); + rpn_stack.back() = arg1 && arg2; + } + else if (element.function == RPNElement::FUNCTION_OR) + { + auto arg1 = rpn_stack.back(); + rpn_stack.pop_back(); + auto arg2 = rpn_stack.back(); + rpn_stack.back() = arg1 || arg2; + } + else + throw Exception("Unexpected function type in KeyCondition::RPNElement", ErrorCodes::LOGICAL_ERROR); + } + + return rpn_stack[0]; +} + +bool MergeTreeIndexConditionBloomFilter::mayBeTrueOnGranule(const MergeTreeIndexGranuleBloomFilter * granule) const +{ + std::vector rpn_stack; + const auto & filters = granule->getFilters(); + + for (const auto & element : rpn) + { + if (element.function == RPNElement::FUNCTION_UNKNOWN) + { + rpn_stack.emplace_back(true, true); + } + else if (element.function == RPNElement::FUNCTION_IN + || element.function == RPNElement::FUNCTION_NOT_IN + || element.function == RPNElement::FUNCTION_EQUALS + || element.function == RPNElement::FUNCTION_NOT_EQUALS) + { + bool match_rows = true; + const auto & predicate = element.predicate; + for (size_t index = 0; match_rows && index < predicate.size(); ++index) + { + const auto & query_index_hash = predicate[index]; + const auto & filter = filters[query_index_hash.first]; + const ColumnPtr & hash_column = query_index_hash.second; + match_rows = maybeTrueOnBloomFilter(&*hash_column, filter, hash_functions); + } + + rpn_stack.emplace_back(match_rows, !match_rows); + if (element.function == RPNElement::FUNCTION_NOT_EQUALS || element.function == RPNElement::FUNCTION_NOT_IN) + rpn_stack.back() = !rpn_stack.back(); + } + else if (element.function == RPNElement::FUNCTION_NOT) + { + rpn_stack.back() = !rpn_stack.back(); + } + else if (element.function == RPNElement::FUNCTION_OR) + { + auto arg1 = rpn_stack.back(); + rpn_stack.pop_back(); + auto arg2 = rpn_stack.back(); + rpn_stack.back() = arg1 | arg2; + } + else if (element.function == RPNElement::FUNCTION_AND) + { + auto arg1 = rpn_stack.back(); + rpn_stack.pop_back(); + auto arg2 = rpn_stack.back(); + rpn_stack.back() = arg1 & arg2; + } + else if (element.function == RPNElement::ALWAYS_TRUE) + { + rpn_stack.emplace_back(true, false); + } + else if (element.function == RPNElement::ALWAYS_FALSE) + { + rpn_stack.emplace_back(false, true); + } + else + throw Exception("Unexpected function type in KeyCondition::RPNElement", ErrorCodes::LOGICAL_ERROR); + } + + if (rpn_stack.size() != 1) + throw Exception("Unexpected stack size in KeyCondition::mayBeTrueInRange", ErrorCodes::LOGICAL_ERROR); + + return rpn_stack[0].can_be_true; +} + +bool MergeTreeIndexConditionBloomFilter::traverseAtomAST(const ASTPtr & node, Block & block_with_constants, RPNElement & out) +{ + { + Field const_value; + DataTypePtr const_type; + if (KeyCondition::getConstant(node, block_with_constants, const_value, const_type)) + { + if (const_value.getType() == Field::Types::UInt64 || const_value.getType() == Field::Types::Int64 || + const_value.getType() == Field::Types::Float64) + { + /// Zero in all types is represented in memory the same way as in UInt64. + out.function = const_value.get() ? RPNElement::ALWAYS_TRUE : RPNElement::ALWAYS_FALSE; + return true; + } + } + } + + if (const auto * function = node->as()) + { + const ASTs & arguments = function->arguments->children; + + if (arguments.size() != 2) + return false; + + if (functionIsInOrGlobalInOperator(function->name)) + { + if (const auto & prepared_set = getPreparedSet(arguments[1])) + return traverseASTIn(function->name, arguments[0], prepared_set, out); + } + else if (function->name == "equals" || function->name == "notEquals") + { + Field const_value; + DataTypePtr const_type; + if (KeyCondition::getConstant(arguments[1], block_with_constants, const_value, const_type)) + return traverseASTEquals(function->name, arguments[0], const_type, const_value, out); + else if (KeyCondition::getConstant(arguments[0], block_with_constants, const_value, const_type)) + return traverseASTEquals(function->name, arguments[1], const_type, const_value, out); + } + } + + return false; +} + +bool MergeTreeIndexConditionBloomFilter::traverseASTIn( + const String & function_name, const ASTPtr & key_ast, const SetPtr & prepared_set, RPNElement & out) +{ + const auto & prepared_info = getPreparedSetInfo(prepared_set); + return traverseASTIn(function_name, key_ast, prepared_info.type, prepared_info.column, out); +} + +bool MergeTreeIndexConditionBloomFilter::traverseASTIn( + const String & function_name, const ASTPtr & key_ast, const DataTypePtr & type, const ColumnPtr & column, RPNElement & out) +{ + if (header.has(key_ast->getColumnName())) + { + size_t row_size = column->size(); + size_t position = header.getPositionByName(key_ast->getColumnName()); + const DataTypePtr & index_type = header.getByPosition(position).type; + const auto & converted_column = castColumn(ColumnWithTypeAndName{column, type, ""}, index_type, context); + out.predicate.emplace_back(std::make_pair(position, BloomFilterHash::hashWithColumn(index_type, converted_column, 0, row_size))); + + if (function_name == "in" || function_name == "globalIn") + out.function = RPNElement::FUNCTION_IN; + + if (function_name == "notIn" || function_name == "globalNotIn") + out.function = RPNElement::FUNCTION_NOT_IN; + + return true; + } + + if (const auto * function = key_ast->as()) + { + WhichDataType which(type); + + if (which.isTuple() && function->name == "tuple") + { + const auto & tuple_column = typeid_cast(column.get()); + const auto & tuple_data_type = typeid_cast(type.get()); + const ASTs & arguments = typeid_cast(*function->arguments).children; + + if (tuple_data_type->getElements().size() != arguments.size() || tuple_column->getColumns().size() != arguments.size()) + throw Exception("Illegal types of arguments of function " + function_name, ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + bool match_with_subtype = false; + const auto & sub_columns = tuple_column->getColumns(); + const auto & sub_data_types = tuple_data_type->getElements(); + + for (size_t index = 0; index < arguments.size(); ++index) + match_with_subtype |= traverseASTIn(function_name, arguments[index], sub_data_types[index], sub_columns[index], out); + + return match_with_subtype; + } + } + + return false; +} + +bool MergeTreeIndexConditionBloomFilter::traverseASTEquals( + const String & function_name, const ASTPtr & key_ast, const DataTypePtr & value_type, const Field & value_field, RPNElement & out) +{ + if (header.has(key_ast->getColumnName())) + { + size_t position = header.getPositionByName(key_ast->getColumnName()); + const DataTypePtr & index_type = header.getByPosition(position).type; + Field converted_field = convertFieldToType(value_field, *index_type, &*value_type); + out.predicate.emplace_back(std::make_pair(position, BloomFilterHash::hashWithField(&*index_type, converted_field))); + out.function = function_name == "equals" ? RPNElement::FUNCTION_EQUALS : RPNElement::FUNCTION_NOT_EQUALS; + return true; + } + + if (const auto * function = key_ast->as()) + { + WhichDataType which(value_type); + + if (which.isTuple() && function->name == "tuple") + { + const TupleBackend & tuple = get(value_field).toUnderType(); + const auto value_tuple_data_type = typeid_cast(value_type.get()); + const ASTs & arguments = typeid_cast(*function->arguments).children; + + if (tuple.size() != arguments.size()) + throw Exception("Illegal types of arguments of function " + function_name, ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + bool match_with_subtype = false; + const DataTypes & subtypes = value_tuple_data_type->getElements(); + + for (size_t index = 0; index < tuple.size(); ++index) + match_with_subtype |= traverseASTEquals(function_name, arguments[index], subtypes[index], tuple[index], out); + + return match_with_subtype; + } + } + + return false; +} + +SetPtr MergeTreeIndexConditionBloomFilter::getPreparedSet(const ASTPtr & node) +{ + if (header.has(node->getColumnName())) + { + const auto & column_and_type = header.getByName(node->getColumnName()); + const auto & prepared_set_it = query_info.sets.find(getPreparedSetKey(node, column_and_type.type)); + + if (prepared_set_it != query_info.sets.end() && prepared_set_it->second->hasExplicitSetElements()) + return prepared_set_it->second; + } + else + { + for (const auto & prepared_set_it : query_info.sets) + if (prepared_set_it.first.ast_hash == node->getTreeHash() && prepared_set_it.second->hasExplicitSetElements()) + return prepared_set_it.second; + } + + return DB::SetPtr(); +} + +} diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.h b/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.h new file mode 100644 index 00000000000..6c268cadbb6 --- /dev/null +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.h @@ -0,0 +1,74 @@ +#pragma once + +#include +#include +#include +#include +#include + +namespace DB +{ + +class MergeTreeIndexConditionBloomFilter : public IMergeTreeIndexCondition +{ +public: + struct RPNElement + { + enum Function + { + /// Atoms of a Boolean expression. + FUNCTION_EQUALS, + FUNCTION_NOT_EQUALS, + FUNCTION_IN, + FUNCTION_NOT_IN, + FUNCTION_UNKNOWN, /// Can take any value. + /// Operators of the logical expression. + FUNCTION_NOT, + FUNCTION_AND, + FUNCTION_OR, + /// Constants + ALWAYS_FALSE, + ALWAYS_TRUE, + }; + + RPNElement(Function function_ = FUNCTION_UNKNOWN) : function(function_) {} + + Function function = FUNCTION_UNKNOWN; + std::vector> predicate; + }; + + MergeTreeIndexConditionBloomFilter(const SelectQueryInfo & info, const Context & context, const Block & header, size_t hash_functions); + + bool alwaysUnknownOrTrue() const override; + + bool mayBeTrueOnGranule(MergeTreeIndexGranulePtr granule) const override + { + if (const auto & bf_granule = typeid_cast(granule.get())) + return mayBeTrueOnGranule(bf_granule); + + throw Exception("LOGICAL ERROR: require bloom filter index granule.", ErrorCodes::LOGICAL_ERROR); + } + +private: + const Block & header; + const Context & context; + const SelectQueryInfo & query_info; + const size_t hash_functions; + std::vector rpn; + + SetPtr getPreparedSet(const ASTPtr & node); + + bool mayBeTrueOnGranule(const MergeTreeIndexGranuleBloomFilter * granule) const; + + bool traverseAtomAST(const ASTPtr & node, Block & block_with_constants, RPNElement & out); + + bool traverseASTIn(const String & function_name, const ASTPtr & key_ast, const SetPtr & prepared_set, RPNElement & out); + + bool traverseASTIn( + const String & function_name, const ASTPtr & key_ast, const DataTypePtr & type, const ColumnPtr & column, RPNElement & out); + + bool traverseASTEquals( + const String & function_name, const ASTPtr & key_ast, const DataTypePtr & value_type, const Field & value_field, RPNElement & out); +}; + +} diff --git a/dbms/src/Storages/MergeTree/MergeTreeBloomFilterIndex.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexFullText.cpp similarity index 87% rename from dbms/src/Storages/MergeTree/MergeTreeBloomFilterIndex.cpp rename to dbms/src/Storages/MergeTree/MergeTreeIndexFullText.cpp index 9c51428f0a2..895764339e5 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeBloomFilterIndex.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexFullText.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include @@ -31,7 +31,7 @@ namespace ErrorCodes /// Adds all tokens from string to bloom filter. static void stringToBloomFilter( - const char * data, size_t size, const std::unique_ptr & token_extractor, StringBloomFilter & bloom_filter) + const char * data, size_t size, const std::unique_ptr & token_extractor, BloomFilter & bloom_filter) { size_t cur = 0; size_t token_start = 0; @@ -42,7 +42,7 @@ static void stringToBloomFilter( /// Adds all tokens from like pattern string to bloom filter. (Because like pattern can contain `\%` and `\_`.) static void likeStringToBloomFilter( - const String & data, const std::unique_ptr & token_extractor, StringBloomFilter & bloom_filter) + const String & data, const std::unique_ptr & token_extractor, BloomFilter & bloom_filter) { size_t cur = 0; String token; @@ -51,24 +51,23 @@ static void likeStringToBloomFilter( } -MergeTreeBloomFilterIndexGranule::MergeTreeBloomFilterIndexGranule(const MergeTreeBloomFilterIndex & index) +MergeTreeIndexGranuleFullText::MergeTreeIndexGranuleFullText(const MergeTreeIndexFullText & index) : IMergeTreeIndexGranule() , index(index) , bloom_filters( - index.columns.size(), StringBloomFilter(index.bloom_filter_size, index.bloom_filter_hashes, index.seed)) + index.columns.size(), BloomFilter(index.bloom_filter_size, index.bloom_filter_hashes, index.seed)) , has_elems(false) {} -void MergeTreeBloomFilterIndexGranule::serializeBinary(WriteBuffer & ostr) const +void MergeTreeIndexGranuleFullText::serializeBinary(WriteBuffer & ostr) const { if (empty()) - throw Exception( - "Attempt to write empty minmax index " + backQuote(index.name), ErrorCodes::LOGICAL_ERROR); + throw Exception("Attempt to write empty minmax index " + backQuote(index.name), ErrorCodes::LOGICAL_ERROR); for (const auto & bloom_filter : bloom_filters) ostr.write(reinterpret_cast(bloom_filter.getFilter().data()), index.bloom_filter_size); } -void MergeTreeBloomFilterIndexGranule::deserializeBinary(ReadBuffer & istr) +void MergeTreeIndexGranuleFullText::deserializeBinary(ReadBuffer & istr) { for (auto & bloom_filter : bloom_filters) { @@ -78,17 +77,17 @@ void MergeTreeBloomFilterIndexGranule::deserializeBinary(ReadBuffer & istr) } -MergeTreeBloomFilterIndexAggregator::MergeTreeBloomFilterIndexAggregator(const MergeTreeBloomFilterIndex & index) - : index(index), granule(std::make_shared(index)) {} +MergeTreeIndexAggregatorFullText::MergeTreeIndexAggregatorFullText(const MergeTreeIndexFullText & index) + : index(index), granule(std::make_shared(index)) {} -MergeTreeIndexGranulePtr MergeTreeBloomFilterIndexAggregator::getGranuleAndReset() +MergeTreeIndexGranulePtr MergeTreeIndexAggregatorFullText::getGranuleAndReset() { - auto new_granule = std::make_shared(index); + auto new_granule = std::make_shared(index); new_granule.swap(granule); return new_granule; } -void MergeTreeBloomFilterIndexAggregator::update(const Block & block, size_t * pos, size_t limit) +void MergeTreeIndexAggregatorFullText::update(const Block & block, size_t * pos, size_t limit) { if (*pos >= block.rows()) throw Exception( @@ -111,14 +110,14 @@ void MergeTreeBloomFilterIndexAggregator::update(const Block & block, size_t * p } -const BloomFilterCondition::AtomMap BloomFilterCondition::atom_map +const MergeTreeConditionFullText::AtomMap MergeTreeConditionFullText::atom_map { { "notEquals", - [] (RPNElement & out, const Field & value, const MergeTreeBloomFilterIndex & idx) + [] (RPNElement & out, const Field & value, const MergeTreeIndexFullText & idx) { out.function = RPNElement::FUNCTION_NOT_EQUALS; - out.bloom_filter = std::make_unique( + out.bloom_filter = std::make_unique( idx.bloom_filter_size, idx.bloom_filter_hashes, idx.seed); const auto & str = value.get(); @@ -128,10 +127,10 @@ const BloomFilterCondition::AtomMap BloomFilterCondition::atom_map }, { "equals", - [] (RPNElement & out, const Field & value, const MergeTreeBloomFilterIndex & idx) + [] (RPNElement & out, const Field & value, const MergeTreeIndexFullText & idx) { out.function = RPNElement::FUNCTION_EQUALS; - out.bloom_filter = std::make_unique( + out.bloom_filter = std::make_unique( idx.bloom_filter_size, idx.bloom_filter_hashes, idx.seed); const auto & str = value.get(); @@ -141,10 +140,10 @@ const BloomFilterCondition::AtomMap BloomFilterCondition::atom_map }, { "like", - [] (RPNElement & out, const Field & value, const MergeTreeBloomFilterIndex & idx) + [] (RPNElement & out, const Field & value, const MergeTreeIndexFullText & idx) { out.function = RPNElement::FUNCTION_LIKE; - out.bloom_filter = std::make_unique( + out.bloom_filter = std::make_unique( idx.bloom_filter_size, idx.bloom_filter_hashes, idx.seed); const auto & str = value.get(); @@ -154,7 +153,7 @@ const BloomFilterCondition::AtomMap BloomFilterCondition::atom_map }, { "notIn", - [] (RPNElement & out, const Field &, const MergeTreeBloomFilterIndex &) + [] (RPNElement & out, const Field &, const MergeTreeIndexFullText &) { out.function = RPNElement::FUNCTION_NOT_IN; return true; @@ -162,7 +161,7 @@ const BloomFilterCondition::AtomMap BloomFilterCondition::atom_map }, { "in", - [] (RPNElement & out, const Field &, const MergeTreeBloomFilterIndex &) + [] (RPNElement & out, const Field &, const MergeTreeIndexFullText &) { out.function = RPNElement::FUNCTION_IN; return true; @@ -170,24 +169,21 @@ const BloomFilterCondition::AtomMap BloomFilterCondition::atom_map }, }; -BloomFilterCondition::BloomFilterCondition( +MergeTreeConditionFullText::MergeTreeConditionFullText( const SelectQueryInfo & query_info, const Context & context, - const MergeTreeBloomFilterIndex & index_) : index(index_), prepared_sets(query_info.sets) + const MergeTreeIndexFullText & index_) : index(index_), prepared_sets(query_info.sets) { rpn = std::move( RPNBuilder( query_info, context, - [this] (const ASTPtr & node, - const Context & /* context */, - Block & block_with_constants, - RPNElement & out) -> bool + [this] (const ASTPtr & node, const Context & /* context */, Block & block_with_constants, RPNElement & out) -> bool { return this->atomFromAST(node, block_with_constants, out); }).extractRPN()); } -bool BloomFilterCondition::alwaysUnknownOrTrue() const +bool MergeTreeConditionFullText::alwaysUnknownOrTrue() const { /// Check like in KeyCondition. std::vector rpn_stack; @@ -234,10 +230,10 @@ bool BloomFilterCondition::alwaysUnknownOrTrue() const return rpn_stack[0]; } -bool BloomFilterCondition::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) const +bool MergeTreeConditionFullText::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) const { - std::shared_ptr granule - = std::dynamic_pointer_cast(idx_granule); + std::shared_ptr granule + = std::dynamic_pointer_cast(idx_granule); if (!granule) throw Exception( "BloomFilter index condition got a granule with the wrong type.", ErrorCodes::LOGICAL_ERROR); @@ -323,7 +319,7 @@ bool BloomFilterCondition::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granu return rpn_stack[0].can_be_true; } -bool BloomFilterCondition::getKey(const ASTPtr & node, size_t & key_column_num) +bool MergeTreeConditionFullText::getKey(const ASTPtr & node, size_t & key_column_num) { auto it = std::find(index.columns.begin(), index.columns.end(), node->getColumnName()); if (it == index.columns.end()) @@ -333,7 +329,7 @@ bool BloomFilterCondition::getKey(const ASTPtr & node, size_t & key_column_num) return true; } -bool BloomFilterCondition::atomFromAST( +bool MergeTreeConditionFullText::atomFromAST( const ASTPtr & node, Block & block_with_constants, RPNElement & out) { Field const_value; @@ -399,7 +395,7 @@ bool BloomFilterCondition::atomFromAST( return false; } -bool BloomFilterCondition::tryPrepareSetBloomFilter( +bool MergeTreeConditionFullText::tryPrepareSetBloomFilter( const ASTs & args, RPNElement & out) { @@ -454,7 +450,7 @@ bool BloomFilterCondition::tryPrepareSetBloomFilter( if (data_type->getTypeId() != TypeIndex::String && data_type->getTypeId() != TypeIndex::FixedString) return false; - std::vector> bloom_filters; + std::vector> bloom_filters; std::vector key_position; Columns columns = prepared_set->getSetElements(); @@ -480,23 +476,23 @@ bool BloomFilterCondition::tryPrepareSetBloomFilter( } -MergeTreeIndexGranulePtr MergeTreeBloomFilterIndex::createIndexGranule() const +MergeTreeIndexGranulePtr MergeTreeIndexFullText::createIndexGranule() const { - return std::make_shared(*this); + return std::make_shared(*this); } -MergeTreeIndexAggregatorPtr MergeTreeBloomFilterIndex::createIndexAggregator() const +MergeTreeIndexAggregatorPtr MergeTreeIndexFullText::createIndexAggregator() const { - return std::make_shared(*this); + return std::make_shared(*this); } -IndexConditionPtr MergeTreeBloomFilterIndex::createIndexCondition( +MergeTreeIndexConditionPtr MergeTreeIndexFullText::createIndexCondition( const SelectQueryInfo & query, const Context & context) const { - return std::make_shared(query, context, *this); + return std::make_shared(query, context, *this); }; -bool MergeTreeBloomFilterIndex::mayBenefitFromIndexForIn(const ASTPtr & node) const +bool MergeTreeIndexFullText::mayBenefitFromIndexForIn(const ASTPtr & node) const { return std::find(std::cbegin(columns), std::cend(columns), node->getColumnName()) != std::cend(columns); } @@ -679,7 +675,7 @@ std::unique_ptr bloomFilterIndexCreator( auto tokenizer = std::make_unique(n); - return std::make_unique( + return std::make_unique( node->name, std::move(index_expr), columns, data_types, sample, node->granularity, bloom_filter_size, bloom_filter_hashes, seed, std::move(tokenizer)); } @@ -697,7 +693,7 @@ std::unique_ptr bloomFilterIndexCreator( auto tokenizer = std::make_unique(); - return std::make_unique( + return std::make_unique( node->name, std::move(index_expr), columns, data_types, sample, node->granularity, bloom_filter_size, bloom_filter_hashes, seed, std::move(tokenizer)); } diff --git a/dbms/src/Storages/MergeTree/MergeTreeBloomFilterIndex.h b/dbms/src/Storages/MergeTree/MergeTreeIndexFullText.h similarity index 79% rename from dbms/src/Storages/MergeTree/MergeTreeBloomFilterIndex.h rename to dbms/src/Storages/MergeTree/MergeTreeIndexFullText.h index 888ffe7f9cc..cd8ac534e64 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeBloomFilterIndex.h +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexFullText.h @@ -10,54 +10,54 @@ namespace DB { -class MergeTreeBloomFilterIndex; +class MergeTreeIndexFullText; -struct MergeTreeBloomFilterIndexGranule : public IMergeTreeIndexGranule +struct MergeTreeIndexGranuleFullText : public IMergeTreeIndexGranule { - explicit MergeTreeBloomFilterIndexGranule( - const MergeTreeBloomFilterIndex & index); + explicit MergeTreeIndexGranuleFullText( + const MergeTreeIndexFullText & index); - ~MergeTreeBloomFilterIndexGranule() override = default; + ~MergeTreeIndexGranuleFullText() override = default; void serializeBinary(WriteBuffer & ostr) const override; void deserializeBinary(ReadBuffer & istr) override; bool empty() const override { return !has_elems; } - const MergeTreeBloomFilterIndex & index; - std::vector bloom_filters; + const MergeTreeIndexFullText & index; + std::vector bloom_filters; bool has_elems; }; -using MergeTreeBloomFilterIndexGranulePtr = std::shared_ptr; +using MergeTreeIndexGranuleFullTextPtr = std::shared_ptr; -struct MergeTreeBloomFilterIndexAggregator : IMergeTreeIndexAggregator +struct MergeTreeIndexAggregatorFullText : IMergeTreeIndexAggregator { - explicit MergeTreeBloomFilterIndexAggregator(const MergeTreeBloomFilterIndex & index); + explicit MergeTreeIndexAggregatorFullText(const MergeTreeIndexFullText & index); - ~MergeTreeBloomFilterIndexAggregator() override = default; + ~MergeTreeIndexAggregatorFullText() override = default; bool empty() const override { return !granule || granule->empty(); } MergeTreeIndexGranulePtr getGranuleAndReset() override; void update(const Block & block, size_t * pos, size_t limit) override; - const MergeTreeBloomFilterIndex & index; - MergeTreeBloomFilterIndexGranulePtr granule; + const MergeTreeIndexFullText & index; + MergeTreeIndexGranuleFullTextPtr granule; }; -class BloomFilterCondition : public IIndexCondition +class MergeTreeConditionFullText : public IMergeTreeIndexCondition { public: - BloomFilterCondition( + MergeTreeConditionFullText( const SelectQueryInfo & query_info, const Context & context, - const MergeTreeBloomFilterIndex & index_); + const MergeTreeIndexFullText & index_); - ~BloomFilterCondition() override = default; + ~MergeTreeConditionFullText() override = default; bool alwaysUnknownOrTrue() const override; @@ -93,19 +93,19 @@ private: }; RPNElement( - Function function_ = FUNCTION_UNKNOWN, size_t key_column_ = 0, std::unique_ptr && const_bloom_filter_ = nullptr) + Function function_ = FUNCTION_UNKNOWN, size_t key_column_ = 0, std::unique_ptr && const_bloom_filter_ = nullptr) : function(function_), key_column(key_column_), bloom_filter(std::move(const_bloom_filter_)) {} Function function = FUNCTION_UNKNOWN; /// For FUNCTION_EQUALS, FUNCTION_NOT_EQUALS, FUNCTION_LIKE, FUNCTION_NOT_LIKE. size_t key_column; - std::unique_ptr bloom_filter; + std::unique_ptr bloom_filter; /// For FUNCTION_IN and FUNCTION_NOT_IN - std::vector> set_bloom_filters; + std::vector> set_bloom_filters; std::vector set_key_position; }; - using AtomMap = std::unordered_map; + using AtomMap = std::unordered_map; using RPN = std::vector; bool atomFromAST(const ASTPtr & node, Block & block_with_constants, RPNElement & out); @@ -115,7 +115,7 @@ private: static const AtomMap atom_map; - const MergeTreeBloomFilterIndex & index; + const MergeTreeIndexFullText & index; RPN rpn; /// Sets from syntax analyzer. PreparedSets prepared_sets; @@ -164,10 +164,10 @@ struct SplitTokenExtractor : public ITokenExtractor }; -class MergeTreeBloomFilterIndex : public IMergeTreeIndex +class MergeTreeIndexFullText : public IMergeTreeIndex { public: - MergeTreeBloomFilterIndex( + MergeTreeIndexFullText( String name_, ExpressionActionsPtr expr_, const Names & columns_, @@ -184,12 +184,12 @@ public: , seed(seed_) , token_extractor_func(std::move(token_extractor_func_)) {} - ~MergeTreeBloomFilterIndex() override = default; + ~MergeTreeIndexFullText() override = default; MergeTreeIndexGranulePtr createIndexGranule() const override; MergeTreeIndexAggregatorPtr createIndexAggregator() const override; - IndexConditionPtr createIndexCondition( + MergeTreeIndexConditionPtr createIndexCondition( const SelectQueryInfo & query, const Context & context) const override; bool mayBenefitFromIndexForIn(const ASTPtr & node) const override; diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.cpp new file mode 100644 index 00000000000..4eee7309811 --- /dev/null +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.cpp @@ -0,0 +1,115 @@ +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +MergeTreeIndexGranuleBloomFilter::MergeTreeIndexGranuleBloomFilter(size_t bits_per_row, size_t hash_functions, size_t index_columns) + : bits_per_row(bits_per_row), hash_functions(hash_functions) +{ + total_rows = 0; + bloom_filters.resize(index_columns); +} + +MergeTreeIndexGranuleBloomFilter::MergeTreeIndexGranuleBloomFilter( + size_t bits_per_row, size_t hash_functions, size_t total_rows, const Blocks & granule_index_blocks) + : total_rows(total_rows), bits_per_row(bits_per_row), hash_functions(hash_functions) +{ + if (granule_index_blocks.empty() || !total_rows) + throw Exception("LOGICAL ERROR: granule_index_blocks empty or total_rows is zero.", ErrorCodes::LOGICAL_ERROR); + + assertGranuleBlocksStructure(granule_index_blocks); + + for (size_t index = 0; index < granule_index_blocks.size(); ++index) + { + Block granule_index_block = granule_index_blocks[index]; + + if (unlikely(!granule_index_block || !granule_index_block.rows())) + throw Exception("LOGICAL ERROR: granule_index_block is empty.", ErrorCodes::LOGICAL_ERROR); + + if (index == 0) + { + static size_t atom_size = 8; + size_t bytes_size = (bits_per_row * total_rows + atom_size - 1) / atom_size; + + for (size_t column = 0, columns = granule_index_block.columns(); column < columns; ++column) + bloom_filters.emplace_back(std::make_shared(bytes_size, hash_functions, 0)); + } + + for (size_t column = 0, columns = granule_index_block.columns(); column < columns; ++column) + fillingBloomFilter(bloom_filters[column], granule_index_block, column); + } +} + +bool MergeTreeIndexGranuleBloomFilter::empty() const +{ + return !total_rows; +} + +void MergeTreeIndexGranuleBloomFilter::deserializeBinary(ReadBuffer & istr) +{ + if (!empty()) + throw Exception("Cannot read data to a non-empty bloom filter index.", ErrorCodes::LOGICAL_ERROR); + + readVarUInt(total_rows, istr); + for (size_t index = 0; index < bloom_filters.size(); ++index) + { + static size_t atom_size = 8; + size_t bytes_size = (bits_per_row * total_rows + atom_size - 1) / atom_size; + bloom_filters[index] = std::make_shared(bytes_size, hash_functions, 0); + istr.read(reinterpret_cast(bloom_filters[index]->getFilter().data()), bytes_size); + } +} + +void MergeTreeIndexGranuleBloomFilter::serializeBinary(WriteBuffer & ostr) const +{ + if (empty()) + throw Exception("Attempt to write empty bloom filter index.", ErrorCodes::LOGICAL_ERROR); + + static size_t atom_size = 8; + writeVarUInt(total_rows, ostr); + size_t bytes_size = (bits_per_row * total_rows + atom_size - 1) / atom_size; + for (const auto & bloom_filter : bloom_filters) + ostr.write(reinterpret_cast(bloom_filter->getFilter().data()), bytes_size); +} + +void MergeTreeIndexGranuleBloomFilter::assertGranuleBlocksStructure(const Blocks & granule_index_blocks) const +{ + Block prev_block; + for (size_t index = 0; index < granule_index_blocks.size(); ++index) + { + Block granule_index_block = granule_index_blocks[index]; + + if (index != 0) + assertBlocksHaveEqualStructure(prev_block, granule_index_block, "Granule blocks of bloom filter has difference structure."); + + prev_block = granule_index_block; + } +} + +void MergeTreeIndexGranuleBloomFilter::fillingBloomFilter(BloomFilterPtr & bf, const Block & granule_index_block, size_t index_hash_column) +{ + const auto & column = granule_index_block.getByPosition(index_hash_column); + + if (const auto hash_column = typeid_cast(column.column.get())) + { + const auto & hash_column_vec = hash_column->getData(); + + for (size_t index = 0, size = hash_column_vec.size(); index < size; ++index) + { + const UInt64 & bf_base_hash = hash_column_vec[index]; + + for (size_t i = 0; i < hash_functions; ++i) + bf->addHashWithSeed(bf_base_hash, BloomFilterHash::bf_hash_seed[i]); + } + } +} + +} diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.h b/dbms/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.h new file mode 100644 index 00000000000..79670678e79 --- /dev/null +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.h @@ -0,0 +1,36 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class MergeTreeIndexGranuleBloomFilter : public IMergeTreeIndexGranule +{ +public: + MergeTreeIndexGranuleBloomFilter(size_t bits_per_row, size_t hash_functions, size_t index_columns); + + MergeTreeIndexGranuleBloomFilter(size_t bits_per_row, size_t hash_functions, size_t total_rows, const Blocks & granule_index_blocks); + + bool empty() const override; + + void serializeBinary(WriteBuffer & ostr) const override; + + void deserializeBinary(ReadBuffer & istr) override; + + const std::vector getFilters() const { return bloom_filters; } + +private: + size_t total_rows; + size_t bits_per_row; + size_t hash_functions; + std::vector bloom_filters; + + void assertGranuleBlocksStructure(const Blocks & granule_index_blocks) const; + + void fillingBloomFilter(BloomFilterPtr & bf, const Block & granule_index_block, size_t index_hash_column); +}; + + +} diff --git a/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp similarity index 74% rename from dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.cpp rename to dbms/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp index 32baa186269..0d9c4722a25 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include @@ -16,14 +16,14 @@ namespace ErrorCodes } -MergeTreeMinMaxGranule::MergeTreeMinMaxGranule(const MergeTreeMinMaxIndex & index) +MergeTreeIndexGranuleMinMax::MergeTreeIndexGranuleMinMax(const MergeTreeIndexMinMax & index) : IMergeTreeIndexGranule(), index(index), parallelogram() {} -MergeTreeMinMaxGranule::MergeTreeMinMaxGranule( - const MergeTreeMinMaxIndex & index, std::vector && parallelogram) +MergeTreeIndexGranuleMinMax::MergeTreeIndexGranuleMinMax( + const MergeTreeIndexMinMax & index, std::vector && parallelogram) : IMergeTreeIndexGranule(), index(index), parallelogram(std::move(parallelogram)) {} -void MergeTreeMinMaxGranule::serializeBinary(WriteBuffer & ostr) const +void MergeTreeIndexGranuleMinMax::serializeBinary(WriteBuffer & ostr) const { if (empty()) throw Exception( @@ -50,7 +50,7 @@ void MergeTreeMinMaxGranule::serializeBinary(WriteBuffer & ostr) const } } -void MergeTreeMinMaxGranule::deserializeBinary(ReadBuffer & istr) +void MergeTreeIndexGranuleMinMax::deserializeBinary(ReadBuffer & istr) { parallelogram.clear(); Field min_val; @@ -83,15 +83,15 @@ void MergeTreeMinMaxGranule::deserializeBinary(ReadBuffer & istr) } -MergeTreeMinMaxAggregator::MergeTreeMinMaxAggregator(const MergeTreeMinMaxIndex & index) +MergeTreeIndexAggregatorMinMax::MergeTreeIndexAggregatorMinMax(const MergeTreeIndexMinMax & index) : index(index) {} -MergeTreeIndexGranulePtr MergeTreeMinMaxAggregator::getGranuleAndReset() +MergeTreeIndexGranulePtr MergeTreeIndexAggregatorMinMax::getGranuleAndReset() { - return std::make_shared(index, std::move(parallelogram)); + return std::make_shared(index, std::move(parallelogram)); } -void MergeTreeMinMaxAggregator::update(const Block & block, size_t * pos, size_t limit) +void MergeTreeIndexAggregatorMinMax::update(const Block & block, size_t * pos, size_t limit) { if (*pos >= block.rows()) throw Exception( @@ -122,21 +122,21 @@ void MergeTreeMinMaxAggregator::update(const Block & block, size_t * pos, size_t } -MinMaxCondition::MinMaxCondition( +MergeTreeIndexConditionMinMax::MergeTreeIndexConditionMinMax( const SelectQueryInfo &query, const Context &context, - const MergeTreeMinMaxIndex &index) - : IIndexCondition(), index(index), condition(query, context, index.columns, index.expr) {} + const MergeTreeIndexMinMax &index) + : IMergeTreeIndexCondition(), index(index), condition(query, context, index.columns, index.expr) {} -bool MinMaxCondition::alwaysUnknownOrTrue() const +bool MergeTreeIndexConditionMinMax::alwaysUnknownOrTrue() const { return condition.alwaysUnknownOrTrue(); } -bool MinMaxCondition::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) const +bool MergeTreeIndexConditionMinMax::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) const { - std::shared_ptr granule - = std::dynamic_pointer_cast(idx_granule); + std::shared_ptr granule + = std::dynamic_pointer_cast(idx_granule); if (!granule) throw Exception( "Minmax index condition got a granule with the wrong type.", ErrorCodes::LOGICAL_ERROR); @@ -147,25 +147,25 @@ bool MinMaxCondition::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) c } -MergeTreeIndexGranulePtr MergeTreeMinMaxIndex::createIndexGranule() const +MergeTreeIndexGranulePtr MergeTreeIndexMinMax::createIndexGranule() const { - return std::make_shared(*this); + return std::make_shared(*this); } -MergeTreeIndexAggregatorPtr MergeTreeMinMaxIndex::createIndexAggregator() const +MergeTreeIndexAggregatorPtr MergeTreeIndexMinMax::createIndexAggregator() const { - return std::make_shared(*this); + return std::make_shared(*this); } -IndexConditionPtr MergeTreeMinMaxIndex::createIndexCondition( +MergeTreeIndexConditionPtr MergeTreeIndexMinMax::createIndexCondition( const SelectQueryInfo & query, const Context & context) const { - return std::make_shared(query, context, *this); + return std::make_shared(query, context, *this); }; -bool MergeTreeMinMaxIndex::mayBenefitFromIndexForIn(const ASTPtr & node) const +bool MergeTreeIndexMinMax::mayBenefitFromIndexForIn(const ASTPtr & node) const { const String column_name = node->getColumnName(); @@ -210,7 +210,7 @@ std::unique_ptr minmaxIndexCreator( data_types.emplace_back(column.type); } - return std::make_unique( + return std::make_unique( node->name, std::move(minmax_expr), columns, data_types, sample, node->granularity); } diff --git a/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.h b/dbms/src/Storages/MergeTree/MergeTreeIndexMinMax.h similarity index 59% rename from dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.h rename to dbms/src/Storages/MergeTree/MergeTreeIndexMinMax.h index 06be8fe0cdd..5b514cdc738 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.h +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexMinMax.h @@ -10,62 +10,62 @@ namespace DB { -class MergeTreeMinMaxIndex; +class MergeTreeIndexMinMax; -struct MergeTreeMinMaxGranule : public IMergeTreeIndexGranule +struct MergeTreeIndexGranuleMinMax : public IMergeTreeIndexGranule { - explicit MergeTreeMinMaxGranule(const MergeTreeMinMaxIndex & index); - MergeTreeMinMaxGranule(const MergeTreeMinMaxIndex & index, std::vector && parallelogram); - ~MergeTreeMinMaxGranule() override = default; + explicit MergeTreeIndexGranuleMinMax(const MergeTreeIndexMinMax & index); + MergeTreeIndexGranuleMinMax(const MergeTreeIndexMinMax & index, std::vector && parallelogram); + ~MergeTreeIndexGranuleMinMax() override = default; void serializeBinary(WriteBuffer & ostr) const override; void deserializeBinary(ReadBuffer & istr) override; bool empty() const override { return parallelogram.empty(); } - const MergeTreeMinMaxIndex & index; + const MergeTreeIndexMinMax & index; std::vector parallelogram; }; -struct MergeTreeMinMaxAggregator : IMergeTreeIndexAggregator +struct MergeTreeIndexAggregatorMinMax : IMergeTreeIndexAggregator { - explicit MergeTreeMinMaxAggregator(const MergeTreeMinMaxIndex & index); - ~MergeTreeMinMaxAggregator() override = default; + explicit MergeTreeIndexAggregatorMinMax(const MergeTreeIndexMinMax & index); + ~MergeTreeIndexAggregatorMinMax() override = default; bool empty() const override { return parallelogram.empty(); } MergeTreeIndexGranulePtr getGranuleAndReset() override; void update(const Block & block, size_t * pos, size_t limit) override; - const MergeTreeMinMaxIndex & index; + const MergeTreeIndexMinMax & index; std::vector parallelogram; }; -class MinMaxCondition : public IIndexCondition +class MergeTreeIndexConditionMinMax : public IMergeTreeIndexCondition { public: - MinMaxCondition( + MergeTreeIndexConditionMinMax( const SelectQueryInfo & query, const Context & context, - const MergeTreeMinMaxIndex & index); + const MergeTreeIndexMinMax & index); bool alwaysUnknownOrTrue() const override; bool mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) const override; - ~MinMaxCondition() override = default; + ~MergeTreeIndexConditionMinMax() override = default; private: - const MergeTreeMinMaxIndex & index; + const MergeTreeIndexMinMax & index; KeyCondition condition; }; -class MergeTreeMinMaxIndex : public IMergeTreeIndex +class MergeTreeIndexMinMax : public IMergeTreeIndex { public: - MergeTreeMinMaxIndex( + MergeTreeIndexMinMax( String name_, ExpressionActionsPtr expr_, const Names & columns_, @@ -74,12 +74,12 @@ public: size_t granularity_) : IMergeTreeIndex(name_, expr_, columns_, data_types_, header_, granularity_) {} - ~MergeTreeMinMaxIndex() override = default; + ~MergeTreeIndexMinMax() override = default; MergeTreeIndexGranulePtr createIndexGranule() const override; MergeTreeIndexAggregatorPtr createIndexAggregator() const override; - IndexConditionPtr createIndexCondition( + MergeTreeIndexConditionPtr createIndexCondition( const SelectQueryInfo & query, const Context & context) const override; bool mayBenefitFromIndexForIn(const ASTPtr & node) const override; diff --git a/dbms/src/Storages/MergeTree/MergeTreeSetSkippingIndex.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexSet.cpp similarity index 87% rename from dbms/src/Storages/MergeTree/MergeTreeSetSkippingIndex.cpp rename to dbms/src/Storages/MergeTree/MergeTreeIndexSet.cpp index 5bf06a1ca6d..8efaae8e579 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeSetSkippingIndex.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexSet.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include @@ -21,18 +21,18 @@ namespace ErrorCodes const Field UNKNOWN_FIELD(3u); -MergeTreeSetIndexGranule::MergeTreeSetIndexGranule(const MergeTreeSetSkippingIndex & index) +MergeTreeIndexGranuleSet::MergeTreeIndexGranuleSet(const MergeTreeIndexSet & index) : IMergeTreeIndexGranule() , index(index) , block(index.header.cloneEmpty()) {} -MergeTreeSetIndexGranule::MergeTreeSetIndexGranule( - const MergeTreeSetSkippingIndex & index, MutableColumns && mutable_columns) +MergeTreeIndexGranuleSet::MergeTreeIndexGranuleSet( + const MergeTreeIndexSet & index, MutableColumns && mutable_columns) : IMergeTreeIndexGranule() , index(index) , block(index.header.cloneWithColumns(std::move(mutable_columns))) {} -void MergeTreeSetIndexGranule::serializeBinary(WriteBuffer & ostr) const +void MergeTreeIndexGranuleSet::serializeBinary(WriteBuffer & ostr) const { if (empty()) throw Exception( @@ -64,7 +64,7 @@ void MergeTreeSetIndexGranule::serializeBinary(WriteBuffer & ostr) const } } -void MergeTreeSetIndexGranule::deserializeBinary(ReadBuffer & istr) +void MergeTreeIndexGranuleSet::deserializeBinary(ReadBuffer & istr) { block.clear(); @@ -94,7 +94,7 @@ void MergeTreeSetIndexGranule::deserializeBinary(ReadBuffer & istr) } -MergeTreeSetIndexAggregator::MergeTreeSetIndexAggregator(const MergeTreeSetSkippingIndex & index) +MergeTreeIndexAggregatorSet::MergeTreeIndexAggregatorSet(const MergeTreeIndexSet & index) : index(index), columns(index.header.cloneEmptyColumns()) { ColumnRawPtrs column_ptrs; @@ -111,7 +111,7 @@ MergeTreeSetIndexAggregator::MergeTreeSetIndexAggregator(const MergeTreeSetSkipp columns = index.header.cloneEmptyColumns(); } -void MergeTreeSetIndexAggregator::update(const Block & block, size_t * pos, size_t limit) +void MergeTreeIndexAggregatorSet::update(const Block & block, size_t * pos, size_t limit) { if (*pos >= block.rows()) throw Exception( @@ -164,7 +164,7 @@ void MergeTreeSetIndexAggregator::update(const Block & block, size_t * pos, size } template -bool MergeTreeSetIndexAggregator::buildFilter( +bool MergeTreeIndexAggregatorSet::buildFilter( Method & method, const ColumnRawPtrs & column_ptrs, IColumn::Filter & filter, @@ -190,9 +190,9 @@ bool MergeTreeSetIndexAggregator::buildFilter( return has_new_data; } -MergeTreeIndexGranulePtr MergeTreeSetIndexAggregator::getGranuleAndReset() +MergeTreeIndexGranulePtr MergeTreeIndexAggregatorSet::getGranuleAndReset() { - auto granule = std::make_shared(index, std::move(columns)); + auto granule = std::make_shared(index, std::move(columns)); switch (data.type) { @@ -212,11 +212,11 @@ MergeTreeIndexGranulePtr MergeTreeSetIndexAggregator::getGranuleAndReset() } -SetIndexCondition::SetIndexCondition( +MergeTreeIndexConditionSet::MergeTreeIndexConditionSet( const SelectQueryInfo & query, const Context & context, - const MergeTreeSetSkippingIndex &index) - : IIndexCondition(), index(index) + const MergeTreeIndexSet &index) + : IMergeTreeIndexCondition(), index(index) { for (size_t i = 0, size = index.columns.size(); i < size; ++i) { @@ -253,14 +253,14 @@ SetIndexCondition::SetIndexCondition( actions = ExpressionAnalyzer(expression_ast, syntax_analyzer_result, context).getActions(true); } -bool SetIndexCondition::alwaysUnknownOrTrue() const +bool MergeTreeIndexConditionSet::alwaysUnknownOrTrue() const { return useless; } -bool SetIndexCondition::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) const +bool MergeTreeIndexConditionSet::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) const { - auto granule = std::dynamic_pointer_cast(idx_granule); + auto granule = std::dynamic_pointer_cast(idx_granule); if (!granule) throw Exception( "Set index condition got a granule with the wrong type.", ErrorCodes::LOGICAL_ERROR); @@ -294,7 +294,7 @@ bool SetIndexCondition::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) return false; } -void SetIndexCondition::traverseAST(ASTPtr & node) const +void MergeTreeIndexConditionSet::traverseAST(ASTPtr & node) const { if (operatorFromAST(node)) { @@ -309,7 +309,7 @@ void SetIndexCondition::traverseAST(ASTPtr & node) const node = std::make_shared(UNKNOWN_FIELD); } -bool SetIndexCondition::atomFromAST(ASTPtr & node) const +bool MergeTreeIndexConditionSet::atomFromAST(ASTPtr & node) const { /// Function, literal or column @@ -340,7 +340,7 @@ bool SetIndexCondition::atomFromAST(ASTPtr & node) const return false; } -bool SetIndexCondition::operatorFromAST(ASTPtr & node) const +bool MergeTreeIndexConditionSet::operatorFromAST(ASTPtr & node) const { /// Functions AND, OR, NOT. Replace with bit*. auto * func = node->as(); @@ -416,7 +416,7 @@ static bool checkAtomName(const String & name) return atoms.find(name) != atoms.end(); } -bool SetIndexCondition::checkASTUseless(const ASTPtr &node, bool atomic) const +bool MergeTreeIndexConditionSet::checkASTUseless(const ASTPtr &node, bool atomic) const { if (const auto * func = node->as()) { @@ -446,23 +446,23 @@ bool SetIndexCondition::checkASTUseless(const ASTPtr &node, bool atomic) const } -MergeTreeIndexGranulePtr MergeTreeSetSkippingIndex::createIndexGranule() const +MergeTreeIndexGranulePtr MergeTreeIndexSet::createIndexGranule() const { - return std::make_shared(*this); + return std::make_shared(*this); } -MergeTreeIndexAggregatorPtr MergeTreeSetSkippingIndex::createIndexAggregator() const +MergeTreeIndexAggregatorPtr MergeTreeIndexSet::createIndexAggregator() const { - return std::make_shared(*this); + return std::make_shared(*this); } -IndexConditionPtr MergeTreeSetSkippingIndex::createIndexCondition( +MergeTreeIndexConditionPtr MergeTreeIndexSet::createIndexCondition( const SelectQueryInfo & query, const Context & context) const { - return std::make_shared(query, context, *this); + return std::make_shared(query, context, *this); }; -bool MergeTreeSetSkippingIndex::mayBenefitFromIndexForIn(const ASTPtr &) const +bool MergeTreeIndexSet::mayBenefitFromIndexForIn(const ASTPtr &) const { return false; } @@ -506,7 +506,7 @@ std::unique_ptr setIndexCreator( header.insert(ColumnWithTypeAndName(column.type->createColumn(), column.type, column.name)); } - return std::make_unique( + return std::make_unique( node->name, std::move(unique_expr), columns, data_types, header, node->granularity, max_rows); } diff --git a/dbms/src/Storages/MergeTree/MergeTreeSetSkippingIndex.h b/dbms/src/Storages/MergeTree/MergeTreeIndexSet.h similarity index 69% rename from dbms/src/Storages/MergeTree/MergeTreeSetSkippingIndex.h rename to dbms/src/Storages/MergeTree/MergeTreeIndexSet.h index 61d409af589..04f4d2bec1e 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeSetSkippingIndex.h +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexSet.h @@ -12,12 +12,12 @@ namespace DB { -class MergeTreeSetSkippingIndex; +class MergeTreeIndexSet; -struct MergeTreeSetIndexGranule : public IMergeTreeIndexGranule +struct MergeTreeIndexGranuleSet : public IMergeTreeIndexGranule { - explicit MergeTreeSetIndexGranule(const MergeTreeSetSkippingIndex & index); - MergeTreeSetIndexGranule(const MergeTreeSetSkippingIndex & index, MutableColumns && columns); + explicit MergeTreeIndexGranuleSet(const MergeTreeIndexSet & index); + MergeTreeIndexGranuleSet(const MergeTreeIndexSet & index, MutableColumns && columns); void serializeBinary(WriteBuffer & ostr) const override; void deserializeBinary(ReadBuffer & istr) override; @@ -25,17 +25,17 @@ struct MergeTreeSetIndexGranule : public IMergeTreeIndexGranule size_t size() const { return block.rows(); } bool empty() const override { return !size(); } - ~MergeTreeSetIndexGranule() override = default; + ~MergeTreeIndexGranuleSet() override = default; - const MergeTreeSetSkippingIndex & index; + const MergeTreeIndexSet & index; Block block; }; -struct MergeTreeSetIndexAggregator : IMergeTreeIndexAggregator +struct MergeTreeIndexAggregatorSet : IMergeTreeIndexAggregator { - explicit MergeTreeSetIndexAggregator(const MergeTreeSetSkippingIndex & index); - ~MergeTreeSetIndexAggregator() override = default; + explicit MergeTreeIndexAggregatorSet(const MergeTreeIndexSet & index); + ~MergeTreeIndexAggregatorSet() override = default; size_t size() const { return data.getTotalRowCount(); } bool empty() const override { return !size(); } @@ -55,26 +55,26 @@ private: size_t limit, ClearableSetVariants & variants) const; - const MergeTreeSetSkippingIndex & index; + const MergeTreeIndexSet & index; ClearableSetVariants data; Sizes key_sizes; MutableColumns columns; }; -class SetIndexCondition : public IIndexCondition +class MergeTreeIndexConditionSet : public IMergeTreeIndexCondition { public: - SetIndexCondition( + MergeTreeIndexConditionSet( const SelectQueryInfo & query, const Context & context, - const MergeTreeSetSkippingIndex & index); + const MergeTreeIndexSet & index); bool alwaysUnknownOrTrue() const override; bool mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) const override; - ~SetIndexCondition() override = default; + ~MergeTreeIndexConditionSet() override = default; private: void traverseAST(ASTPtr & node) const; bool atomFromAST(ASTPtr & node) const; @@ -82,7 +82,7 @@ private: bool checkASTUseless(const ASTPtr &node, bool atomic = false) const; - const MergeTreeSetSkippingIndex & index; + const MergeTreeIndexSet & index; bool useless; std::set key_columns; @@ -91,10 +91,10 @@ private: }; -class MergeTreeSetSkippingIndex : public IMergeTreeIndex +class MergeTreeIndexSet : public IMergeTreeIndex { public: - MergeTreeSetSkippingIndex( + MergeTreeIndexSet( String name_, ExpressionActionsPtr expr_, const Names & columns_, @@ -104,12 +104,12 @@ public: size_t max_rows_) : IMergeTreeIndex(std::move(name_), std::move(expr_), columns_, data_types_, header_, granularity_), max_rows(max_rows_) {} - ~MergeTreeSetSkippingIndex() override = default; + ~MergeTreeIndexSet() override = default; MergeTreeIndexGranulePtr createIndexGranule() const override; MergeTreeIndexAggregatorPtr createIndexAggregator() const override; - IndexConditionPtr createIndexCondition( + MergeTreeIndexConditionPtr createIndexCondition( const SelectQueryInfo & query, const Context & context) const override; bool mayBenefitFromIndexForIn(const ASTPtr & node) const override; diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndices.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndices.cpp index 74eb31ecd46..e19aafbd25d 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndices.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeIndices.cpp @@ -19,7 +19,7 @@ namespace ErrorCodes extern const int UNKNOWN_EXCEPTION; } -void MergeTreeIndexFactory::registerIndex(const std::string &name, Creator creator) +void MergeTreeIndexFactory::registerIndex(const std::string & name, Creator creator) { if (!indexes.emplace(name, std::move(creator)).second) throw Exception("MergeTreeIndexFactory: the Index creator name '" + name + "' is not unique", @@ -70,6 +70,11 @@ std::unique_ptr bloomFilterIndexCreator( std::shared_ptr node, const Context & context); +std::unique_ptr bloomFilterIndexCreatorNew( + const NamesAndTypesList & columns, + std::shared_ptr node, + const Context & context); + MergeTreeIndexFactory::MergeTreeIndexFactory() { @@ -77,6 +82,7 @@ MergeTreeIndexFactory::MergeTreeIndexFactory() registerIndex("set", setIndexCreator); registerIndex("ngrambf_v1", bloomFilterIndexCreator); registerIndex("tokenbf_v1", bloomFilterIndexCreator); + registerIndex("bloom_filter", bloomFilterIndexCreatorNew); } } diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndices.h b/dbms/src/Storages/MergeTree/MergeTreeIndices.h index b6ee89d87ef..2a00c902810 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndices.h +++ b/dbms/src/Storages/MergeTree/MergeTreeIndices.h @@ -59,17 +59,17 @@ using MergeTreeIndexAggregators = std::vector; /// Condition on the index. -class IIndexCondition +class IMergeTreeIndexCondition { public: - virtual ~IIndexCondition() = default; + virtual ~IMergeTreeIndexCondition() = default; /// Checks if this index is useful for query. virtual bool alwaysUnknownOrTrue() const = 0; virtual bool mayBeTrueOnGranule(MergeTreeIndexGranulePtr granule) const = 0; }; -using IndexConditionPtr = std::shared_ptr; +using MergeTreeIndexConditionPtr = std::shared_ptr; /// Structure for storing basic index info like columns, expression, arguments, ... @@ -101,7 +101,7 @@ public: virtual MergeTreeIndexGranulePtr createIndexGranule() const = 0; virtual MergeTreeIndexAggregatorPtr createIndexAggregator() const = 0; - virtual IndexConditionPtr createIndexCondition( + virtual MergeTreeIndexConditionPtr createIndexCondition( const SelectQueryInfo & query_info, const Context & context) const = 0; String name; diff --git a/dbms/src/Storages/MergeTree/RPNBuilder.h b/dbms/src/Storages/MergeTree/RPNBuilder.h index 6a557cb5f6a..d5244c3285d 100644 --- a/dbms/src/Storages/MergeTree/RPNBuilder.h +++ b/dbms/src/Storages/MergeTree/RPNBuilder.h @@ -24,10 +24,7 @@ public: using AtomFromASTFunc = std::function< bool(const ASTPtr & node, const Context & context, Block & block_with_constants, RPNElement & out)>; - RPNBuilder( - const SelectQueryInfo & query_info, - const Context & context_, - const AtomFromASTFunc & atomFromAST_) + RPNBuilder(const SelectQueryInfo & query_info, const Context & context_, const AtomFromASTFunc & atomFromAST_) : context(context_), atomFromAST(atomFromAST_) { /** Evaluation of expressions that depend only on constants. diff --git a/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp b/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp index b23a2eedc0e..138e7c14f9d 100644 --- a/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -2,8 +2,8 @@ #include #include #include -#include -#include +#include +#include #include #include diff --git a/dbms/src/Storages/StorageCatBoostPool.cpp b/dbms/src/Storages/StorageCatBoostPool.cpp index 1258ebec7e2..b76150611c4 100644 --- a/dbms/src/Storages/StorageCatBoostPool.cpp +++ b/dbms/src/Storages/StorageCatBoostPool.cpp @@ -254,12 +254,12 @@ void StorageCatBoostPool::createSampleBlockAndColumns() /// Order is important: first numeric columns, then categorial, then all others. for (const auto & column : num_columns) - columns.add(DB::ColumnDescription(column.name, column.type)); + columns.add(DB::ColumnDescription(column.name, column.type, false)); for (const auto & column : cat_columns) - columns.add(DB::ColumnDescription(column.name, column.type)); + columns.add(DB::ColumnDescription(column.name, column.type, false)); for (const auto & column : other_columns) { - DB::ColumnDescription column_desc(column.name, column.type); + DB::ColumnDescription column_desc(column.name, column.type, false); /// We assign Materialized kind to the column so that it doesn't show in SELECT *. /// Because the table is readonly, we do not need default expression. column_desc.default_desc.kind = ColumnDefaultKind::Materialized; @@ -270,7 +270,7 @@ void StorageCatBoostPool::createSampleBlockAndColumns() { if (!desc.alias.empty()) { - DB::ColumnDescription column(desc.alias, get_type(desc.column_type)); + DB::ColumnDescription column(desc.alias, get_type(desc.column_type), false); column.default_desc.kind = ColumnDefaultKind::Alias; column.default_desc.expression = std::make_shared(desc.column_name); columns.add(std::move(column)); diff --git a/dbms/src/Storages/StorageMerge.cpp b/dbms/src/Storages/StorageMerge.cpp index 645ec1e9230..713ca9b7be9 100644 --- a/dbms/src/Storages/StorageMerge.cpp +++ b/dbms/src/Storages/StorageMerge.cpp @@ -51,9 +51,11 @@ StorageMerge::StorageMerge( const String & source_database_, const String & table_name_regexp_, const Context & context_) - : IStorage{columns_}, - name(name_), source_database(source_database_), - table_name_regexp(table_name_regexp_), global_context(context_) + : IStorage(columns_, ColumnsDescription({{"_table", std::make_shared()}}, true)) + , name(name_) + , source_database(source_database_) + , table_name_regexp(table_name_regexp_) + , global_context(context_) { } @@ -61,44 +63,29 @@ StorageMerge::StorageMerge( /// NOTE: structure of underlying tables as well as their set are not constant, /// so the results of these methods may become obsolete after the call. -bool StorageMerge::isVirtualColumn(const String & column_name) const -{ - if (column_name != "_table") - return false; - - return !IStorage::hasColumn(column_name); -} - NameAndTypePair StorageMerge::getColumn(const String & column_name) const { - if (IStorage::hasColumn(column_name)) - return IStorage::getColumn(column_name); + if (!IStorage::hasColumn(column_name)) + { + auto first_table = getFirstTable([](auto &&) { return true; }); + if (first_table) + return first_table->getColumn(column_name); + } - /// virtual column of the Merge table itself - if (column_name == "_table") - return { column_name, std::make_shared() }; - - /// virtual (and real) columns of the underlying tables - auto first_table = getFirstTable([](auto &&) { return true; }); - if (first_table) - return first_table->getColumn(column_name); - - throw Exception("There is no column " + column_name + " in table.", ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); + return IStorage::getColumn(column_name); } + bool StorageMerge::hasColumn(const String & column_name) const { - if (column_name == "_table") - return true; + if (!IStorage::hasColumn(column_name)) + { + auto first_table = getFirstTable([](auto &&) { return true; }); + if (first_table) + return first_table->hasColumn(column_name); + } - if (IStorage::hasColumn(column_name)) - return true; - - auto first_table = getFirstTable([](auto &&) { return true; }); - if (first_table) - return first_table->hasColumn(column_name); - - return false; + return true; } @@ -188,7 +175,7 @@ BlockInputStreams StorageMerge::read( for (const auto & column_name : column_names) { - if (isVirtualColumn(column_name)) + if (column_name == "_table" && isVirtualColumn(column_name)) has_table_virtual_column = true; else real_column_names.push_back(column_name); diff --git a/dbms/src/Storages/StorageMerge.h b/dbms/src/Storages/StorageMerge.h index b5ebf97ba9d..e51f89e93f8 100644 --- a/dbms/src/Storages/StorageMerge.h +++ b/dbms/src/Storages/StorageMerge.h @@ -26,6 +26,7 @@ public: bool supportsFinal() const override { return true; } bool supportsIndexForIn() const override { return true; } + /// Consider columns coming from the underlying tables NameAndTypePair getColumn(const String & column_name) const override; bool hasColumn(const String & column_name) const override; @@ -86,8 +87,6 @@ protected: void convertingSourceStream(const Block & header, const Context & context, ASTPtr & query, BlockInputStreamPtr & source_stream, QueryProcessingStage::Enum processed_stage); - - bool isVirtualColumn(const String & column_name) const override; }; } diff --git a/dbms/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in b/dbms/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in index 99591648bae..63ddfe15649 100644 --- a/dbms/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in +++ b/dbms/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in @@ -40,6 +40,7 @@ const char * auto_config_build[] "USE_MIMALLOC", "@USE_MIMALLOC@", "USE_UNWIND", "@USE_UNWIND@", "USE_ICU", "@USE_ICU@", + "USE_H3", "@USE_H3@", "USE_MYSQL", "@USE_MYSQL@", "USE_RE2_ST", "@USE_RE2_ST@", "USE_VECTORCLASS", "@USE_VECTORCLASS@", diff --git a/dbms/src/Storages/System/StorageSystemPartsBase.cpp b/dbms/src/Storages/System/StorageSystemPartsBase.cpp index 05ae6e086e2..303a8ddd939 100644 --- a/dbms/src/Storages/System/StorageSystemPartsBase.cpp +++ b/dbms/src/Storages/System/StorageSystemPartsBase.cpp @@ -257,7 +257,7 @@ StorageSystemPartsBase::StorageSystemPartsBase(std::string name_, NamesAndTypesL auto add_alias = [&](const String & alias_name, const String & column_name) { - ColumnDescription column(alias_name, columns.get(column_name).type); + ColumnDescription column(alias_name, columns.get(column_name).type, false); column.default_desc.kind = ColumnDefaultKind::Alias; column.default_desc.expression = std::make_shared(column_name); columns.add(column); diff --git a/dbms/src/Storages/VirtualColumnUtils.cpp b/dbms/src/Storages/VirtualColumnUtils.cpp index 3ac32ce0f5b..cbb1feef3af 100644 --- a/dbms/src/Storages/VirtualColumnUtils.cpp +++ b/dbms/src/Storages/VirtualColumnUtils.cpp @@ -23,71 +23,11 @@ namespace DB { -namespace VirtualColumnUtils +namespace { -String chooseSuffix(const NamesAndTypesList & columns, const String & name) -{ - int id = 0; - String current_suffix; - while (true) - { - bool done = true; - for (const auto & it : columns) - if (it.name == name + current_suffix) - { - done = false; - break; - } - if (done) break; - ++id; - current_suffix = toString(id); - } - return current_suffix; -} - -String chooseSuffixForSet(const NamesAndTypesList & columns, const std::vector & names) -{ - int id = 0; - String current_suffix; - while (true) - { - bool done = true; - for (const auto & it : columns) - { - for (size_t i = 0; i < names.size(); ++i) - { - if (it.name == names[i] + current_suffix) - { - done = false; - break; - } - } - if (!done) - break; - } - if (done) - break; - ++id; - current_suffix = toString(id); - } - return current_suffix; -} - -void rewriteEntityInAst(ASTPtr ast, const String & column_name, const Field & value) -{ - auto & select = ast->as(); - if (!select.with()) - select.setExpression(ASTSelectQuery::Expression::WITH, std::make_shared()); - - auto literal = std::make_shared(value); - literal->alias = column_name; - literal->prefer_alias_to_column_name = true; - select.with()->children.push_back(literal); -} - /// Verifying that the function depends only on the specified columns -static bool isValidFunction(const ASTPtr & expression, const NameSet & columns) +bool isValidFunction(const ASTPtr & expression, const NameSet & columns) { for (size_t i = 0; i < expression->children.size(); ++i) if (!isValidFunction(expression->children[i], columns)) @@ -100,7 +40,7 @@ static bool isValidFunction(const ASTPtr & expression, const NameSet & columns) } /// Extract all subfunctions of the main conjunction, but depending only on the specified columns -static void extractFunctions(const ASTPtr & expression, const NameSet & columns, std::vector & result) +void extractFunctions(const ASTPtr & expression, const NameSet & columns, std::vector & result) { const auto * function = expression->as(); if (function && function->name == "and") @@ -115,7 +55,7 @@ static void extractFunctions(const ASTPtr & expression, const NameSet & columns, } /// Construct a conjunction from given functions -static ASTPtr buildWhereExpression(const ASTs & functions) +ASTPtr buildWhereExpression(const ASTs & functions) { if (functions.size() == 0) return nullptr; @@ -130,6 +70,23 @@ static ASTPtr buildWhereExpression(const ASTs & functions) return new_query; } +} + +namespace VirtualColumnUtils +{ + +void rewriteEntityInAst(ASTPtr ast, const String & column_name, const Field & value) +{ + auto & select = ast->as(); + if (!select.with()) + select.setExpression(ASTSelectQuery::Expression::WITH, std::make_shared()); + + auto literal = std::make_shared(value); + literal->alias = column_name; + literal->prefer_alias_to_column_name = true; + select.with()->children.push_back(literal); +} + void filterBlockWithQuery(const ASTPtr & query, Block & block, const Context & context) { const auto & select = query->as(); diff --git a/dbms/src/Storages/VirtualColumnUtils.h b/dbms/src/Storages/VirtualColumnUtils.h index a1e1db4f04c..4976deaa4c9 100644 --- a/dbms/src/Storages/VirtualColumnUtils.h +++ b/dbms/src/Storages/VirtualColumnUtils.h @@ -3,7 +3,7 @@ #include #include -#include +#include namespace DB @@ -16,13 +16,6 @@ class NamesAndTypesList; namespace VirtualColumnUtils { -/// Calculate the minimum numeric suffix to add to the string so that it is not present in the set -String chooseSuffix(const NamesAndTypesList & columns, const String & name); - -/// Calculate the minimum total numeric suffix to add to each string, -/// so that none is present in the set. -String chooseSuffixForSet(const NamesAndTypesList & columns, const std::vector & names); - /// Adds to the select query section `select column_name as value` /// For example select _port as 9000. void rewriteEntityInAst(ASTPtr ast, const String & column_name, const Field & value); @@ -33,14 +26,14 @@ void rewriteEntityInAst(ASTPtr ast, const String & column_name, const Field & va void filterBlockWithQuery(const ASTPtr & query, Block & block, const Context & context); /// Extract from the input stream a set of `name` column values -template -std::multiset extractSingleValueFromBlock(const Block & block, const String & name) +template +std::multiset extractSingleValueFromBlock(const Block & block, const String & name) { - std::multiset res; + std::multiset res; const ColumnWithTypeAndName & data = block.getByName(name); size_t rows = block.rows(); for (size_t i = 0; i < rows; ++i) - res.insert((*data.column)[i].get()); + res.insert((*data.column)[i].get()); return res; } diff --git a/dbms/src/Storages/tests/gtest_transform_query_for_external_database.cpp b/dbms/src/Storages/tests/gtest_transform_query_for_external_database.cpp index 4a25bff5d87..bcee0b8d8e1 100644 --- a/dbms/src/Storages/tests/gtest_transform_query_for_external_database.cpp +++ b/dbms/src/Storages/tests/gtest_transform_query_for_external_database.cpp @@ -54,22 +54,22 @@ void check(const std::string & query, const std::string & expected, const Contex TEST(TransformQueryForExternalDatabase, InWithSingleElement) { check("SELECT column FROM test.table WHERE 1 IN (1)", - "SELECT \"column\" FROM \"test\".\"table\" WHERE 1 IN (1)", + "SELECT \"column\" FROM \"test\".\"table\" WHERE 1 IN (1)", state().context, state().columns); check("SELECT column FROM test.table WHERE column IN (1, 2)", - "SELECT \"column\" FROM \"test\".\"table\" WHERE \"column\" IN (1, 2)", + "SELECT \"column\" FROM \"test\".\"table\" WHERE \"column\" IN (1, 2)", state().context, state().columns); check("SELECT column FROM test.table WHERE column NOT IN ('hello', 'world')", - "SELECT \"column\" FROM \"test\".\"table\" WHERE \"column\" NOT IN ('hello', 'world')", + "SELECT \"column\" FROM \"test\".\"table\" WHERE \"column\" NOT IN ('hello', 'world')", state().context, state().columns); } TEST(TransformQueryForExternalDatabase, Like) { check("SELECT column FROM test.table WHERE column LIKE '%hello%'", - "SELECT \"column\" FROM \"test\".\"table\" WHERE \"column\" LIKE '%hello%'", + "SELECT \"column\" FROM \"test\".\"table\" WHERE \"column\" LIKE '%hello%'", state().context, state().columns); check("SELECT column FROM test.table WHERE column NOT LIKE 'w%rld'", - "SELECT \"column\" FROM \"test\".\"table\" WHERE \"column\" NOT LIKE 'w%rld'", + "SELECT \"column\" FROM \"test\".\"table\" WHERE \"column\" NOT LIKE 'w%rld'", state().context, state().columns); } diff --git a/dbms/tests/integration/README.md b/dbms/tests/integration/README.md index 1b2d190b383..06819af7668 100644 --- a/dbms/tests/integration/README.md +++ b/dbms/tests/integration/README.md @@ -12,7 +12,7 @@ You must install latest Docker from https://docs.docker.com/engine/installation/linux/docker-ce/ubuntu/#set-up-the-repository Don't use Docker from your system repository. -* [pip](https://pypi.python.org/pypi/pip). To install: `sudo apt-get install python-pip` +* [pip](https://pypi.python.org/pypi/pip) and `libpq-dev`. To install: `sudo apt-get install python-pip libpq-dev` * [py.test](https://docs.pytest.org/) testing framework. To install: `sudo -H pip install pytest` * [docker-compose](https://docs.docker.com/compose/) and additional python libraries. To install: `sudo -H pip install docker-compose docker dicttoxml kazoo PyMySQL psycopg2 pymongo tzlocal kafka-python protobuf pytest-timeout` diff --git a/dbms/tests/integration/helpers/cluster.py b/dbms/tests/integration/helpers/cluster.py index 157ba616246..d8e4e9e506e 100644 --- a/dbms/tests/integration/helpers/cluster.py +++ b/dbms/tests/integration/helpers/cluster.py @@ -338,30 +338,32 @@ class ClickHouseCluster: self.docker_client = docker.from_env(version=self.docker_api_version) + common_opts = ['up', '-d', '--force-recreate'] + if self.with_zookeeper and self.base_zookeeper_cmd: - subprocess_check_call(self.base_zookeeper_cmd + ['up', '-d', '--force-recreate']) + subprocess_check_call(self.base_zookeeper_cmd + common_opts) for command in self.pre_zookeeper_commands: self.run_kazoo_commands_with_retries(command, repeats=5) self.wait_zookeeper_to_start(120) if self.with_mysql and self.base_mysql_cmd: - subprocess_check_call(self.base_mysql_cmd + ['up', '-d', '--force-recreate']) + subprocess_check_call(self.base_mysql_cmd + common_opts) self.wait_mysql_to_start(120) if self.with_postgres and self.base_postgres_cmd: - subprocess_check_call(self.base_postgres_cmd + ['up', '-d', '--force-recreate']) + subprocess_check_call(self.base_postgres_cmd + common_opts) self.wait_postgres_to_start(120) if self.with_kafka and self.base_kafka_cmd: - subprocess_check_call(self.base_kafka_cmd + ['up', '-d', '--force-recreate']) + subprocess_check_call(self.base_kafka_cmd + common_opts + ['--renew-anon-volumes']) self.kafka_docker_id = self.get_instance_docker_id('kafka1') if self.with_hdfs and self.base_hdfs_cmd: - subprocess_check_call(self.base_hdfs_cmd + ['up', '-d', '--force-recreate']) + subprocess_check_call(self.base_hdfs_cmd + common_opts) self.wait_hdfs_to_start(120) if self.with_mongo and self.base_mongo_cmd: - subprocess_check_call(self.base_mongo_cmd + ['up', '-d', '--force-recreate']) + subprocess_check_call(self.base_mongo_cmd + common_opts) self.wait_mongo_to_start(30) subprocess_check_call(self.base_cmd + ['up', '-d', '--no-recreate']) diff --git a/dbms/tests/integration/test_storage_kafka/test.py b/dbms/tests/integration/test_storage_kafka/test.py index 3e562fbbcbb..3f38b068a22 100644 --- a/dbms/tests/integration/test_storage_kafka/test.py +++ b/dbms/tests/integration/test_storage_kafka/test.py @@ -22,7 +22,6 @@ import kafka_pb2 # TODO: add test for run-time offset update in CH, if we manually update it on Kafka side. -# TODO: add test for mat. view is working. # TODO: add test for SELECT LIMIT is working. # TODO: modify tests to respect `skip_broken_messages` setting. @@ -86,8 +85,8 @@ def kafka_produce_protobuf_messages(topic, start_index, num_messages): # Since everything is async and shaky when receiving messages from Kafka, # we may want to try and check results multiple times in a loop. -def kafka_check_result(result, check=False): - fpath = p.join(p.dirname(__file__), 'test_kafka_json.reference') +def kafka_check_result(result, check=False, ref_file='test_kafka_json.reference'): + fpath = p.join(p.dirname(__file__), ref_file) with open(fpath) as reference: if check: assert TSV(result) == TSV(reference) @@ -148,13 +147,12 @@ def test_kafka_settings_new_syntax(kafka_cluster): instance.query(''' CREATE TABLE test.kafka (key UInt64, value UInt64) ENGINE = Kafka - SETTINGS - kafka_broker_list = 'kafka1:19092', - kafka_topic_list = 'new', - kafka_group_name = 'new', - kafka_format = 'JSONEachRow', - kafka_row_delimiter = '\\n', - kafka_skip_broken_messages = 1; + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = 'new', + kafka_group_name = 'new', + kafka_format = 'JSONEachRow', + kafka_row_delimiter = '\\n', + kafka_skip_broken_messages = 1; ''') messages = [] @@ -172,7 +170,7 @@ def test_kafka_settings_new_syntax(kafka_cluster): kafka_produce('new', messages) result = '' - for i in range(50): + while True: result += instance.query('SELECT * FROM test.kafka') if kafka_check_result(result): break @@ -183,12 +181,11 @@ def test_kafka_csv_with_delimiter(kafka_cluster): instance.query(''' CREATE TABLE test.kafka (key UInt64, value UInt64) ENGINE = Kafka - SETTINGS - kafka_broker_list = 'kafka1:19092', - kafka_topic_list = 'csv', - kafka_group_name = 'csv', - kafka_format = 'CSV', - kafka_row_delimiter = '\\n'; + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = 'csv', + kafka_group_name = 'csv', + kafka_format = 'CSV', + kafka_row_delimiter = '\\n'; ''') messages = [] @@ -197,7 +194,7 @@ def test_kafka_csv_with_delimiter(kafka_cluster): kafka_produce('csv', messages) result = '' - for i in range(50): + while True: result += instance.query('SELECT * FROM test.kafka') if kafka_check_result(result): break @@ -208,12 +205,11 @@ def test_kafka_tsv_with_delimiter(kafka_cluster): instance.query(''' CREATE TABLE test.kafka (key UInt64, value UInt64) ENGINE = Kafka - SETTINGS - kafka_broker_list = 'kafka1:19092', - kafka_topic_list = 'tsv', - kafka_group_name = 'tsv', - kafka_format = 'TSV', - kafka_row_delimiter = '\\n'; + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = 'tsv', + kafka_group_name = 'tsv', + kafka_format = 'TSV', + kafka_row_delimiter = '\\n'; ''') messages = [] @@ -222,7 +218,7 @@ def test_kafka_tsv_with_delimiter(kafka_cluster): kafka_produce('tsv', messages) result = '' - for i in range(50): + while True: result += instance.query('SELECT * FROM test.kafka') if kafka_check_result(result): break @@ -233,11 +229,10 @@ def test_kafka_json_without_delimiter(kafka_cluster): instance.query(''' CREATE TABLE test.kafka (key UInt64, value UInt64) ENGINE = Kafka - SETTINGS - kafka_broker_list = 'kafka1:19092', - kafka_topic_list = 'json', - kafka_group_name = 'json', - kafka_format = 'JSONEachRow'; + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = 'json', + kafka_group_name = 'json', + kafka_format = 'JSONEachRow'; ''') messages = '' @@ -251,7 +246,7 @@ def test_kafka_json_without_delimiter(kafka_cluster): kafka_produce('json', [messages]) result = '' - for i in range(50): + while True: result += instance.query('SELECT * FROM test.kafka') if kafka_check_result(result): break @@ -262,12 +257,11 @@ def test_kafka_protobuf(kafka_cluster): instance.query(''' CREATE TABLE test.kafka (key UInt64, value String) ENGINE = Kafka - SETTINGS - kafka_broker_list = 'kafka1:19092', - kafka_topic_list = 'pb', - kafka_group_name = 'pb', - kafka_format = 'Protobuf', - kafka_schema = 'kafka.proto:KeyValuePair'; + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = 'pb', + kafka_group_name = 'pb', + kafka_format = 'Protobuf', + kafka_schema = 'kafka.proto:KeyValuePair'; ''') kafka_produce_protobuf_messages('pb', 0, 20) @@ -275,7 +269,7 @@ def test_kafka_protobuf(kafka_cluster): kafka_produce_protobuf_messages('pb', 21, 29) result = '' - for i in range(50): + while True: result += instance.query('SELECT * FROM test.kafka') if kafka_check_result(result): break @@ -288,12 +282,11 @@ def test_kafka_materialized_view(kafka_cluster): DROP TABLE IF EXISTS test.consumer; CREATE TABLE test.kafka (key UInt64, value UInt64) ENGINE = Kafka - SETTINGS - kafka_broker_list = 'kafka1:19092', - kafka_topic_list = 'json', - kafka_group_name = 'json', - kafka_format = 'JSONEachRow', - kafka_row_delimiter = '\\n'; + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = 'mv', + kafka_group_name = 'mv', + kafka_format = 'JSONEachRow', + kafka_row_delimiter = '\\n'; CREATE TABLE test.view (key UInt64, value UInt64) ENGINE = MergeTree() ORDER BY key; @@ -304,9 +297,9 @@ def test_kafka_materialized_view(kafka_cluster): messages = [] for i in range(50): messages.append(json.dumps({'key': i, 'value': i})) - kafka_produce('json', messages) + kafka_produce('mv', messages) - for i in range(20): + while True: time.sleep(1) result = instance.query('SELECT * FROM test.view') if kafka_check_result(result): @@ -321,7 +314,7 @@ def test_kafka_materialized_view(kafka_cluster): @pytest.mark.skip(reason="Hungs") def test_kafka_flush_on_big_message(kafka_cluster): # Create batchs of messages of size ~100Kb - kafka_messages = 10000 + kafka_messages = 1000 batch_messages = 1000 messages = [json.dumps({'key': i, 'value': 'x' * 100}) * batch_messages for i in range(kafka_messages)] kafka_produce('flush', messages) @@ -331,12 +324,11 @@ def test_kafka_flush_on_big_message(kafka_cluster): DROP TABLE IF EXISTS test.consumer; CREATE TABLE test.kafka (key UInt64, value String) ENGINE = Kafka - SETTINGS - kafka_broker_list = 'kafka1:19092', - kafka_topic_list = 'flush', - kafka_group_name = 'flush', - kafka_format = 'JSONEachRow', - kafka_max_block_size = 10; + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = 'flush', + kafka_group_name = 'flush', + kafka_format = 'JSONEachRow', + kafka_max_block_size = 10; CREATE TABLE test.view (key UInt64, value String) ENGINE = MergeTree ORDER BY key; @@ -356,7 +348,7 @@ def test_kafka_flush_on_big_message(kafka_cluster): except kafka.errors.GroupCoordinatorNotAvailableError: continue - for _ in range(20): + while True: time.sleep(1) result = instance.query('SELECT count() FROM test.view') if int(result) == kafka_messages*batch_messages: @@ -365,6 +357,71 @@ def test_kafka_flush_on_big_message(kafka_cluster): assert int(result) == kafka_messages*batch_messages, 'ClickHouse lost some messages: {}'.format(result) +def test_kafka_virtual_columns(kafka_cluster): + instance.query(''' + CREATE TABLE test.kafka (key UInt64, value UInt64) + ENGINE = Kafka + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = 'virt1', + kafka_group_name = 'virt1', + kafka_format = 'JSONEachRow'; + ''') + + messages = '' + for i in range(25): + messages += json.dumps({'key': i, 'value': i}) + '\n' + kafka_produce('virt1', [messages]) + + messages = '' + for i in range(25, 50): + messages += json.dumps({'key': i, 'value': i}) + '\n' + kafka_produce('virt1', [messages]) + + result = '' + while True: + time.sleep(1) + result += instance.query('SELECT _key, key, _topic, value, _offset FROM test.kafka') + if kafka_check_result(result, False, 'test_kafka_virtual1.reference'): + break + kafka_check_result(result, True, 'test_kafka_virtual1.reference') + + +def test_kafka_virtual_columns_with_materialized_view(kafka_cluster): + instance.query(''' + DROP TABLE IF EXISTS test.view; + DROP TABLE IF EXISTS test.consumer; + CREATE TABLE test.kafka (key UInt64, value UInt64) + ENGINE = Kafka + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = 'virt2', + kafka_group_name = 'virt2', + kafka_format = 'JSONEachRow', + kafka_row_delimiter = '\\n'; + CREATE TABLE test.view (key UInt64, value UInt64, kafka_key String, topic String, offset UInt64) + ENGINE = MergeTree() + ORDER BY key; + CREATE MATERIALIZED VIEW test.consumer TO test.view AS + SELECT *, _key as kafka_key, _topic as topic, _offset as offset FROM test.kafka; + ''') + + messages = [] + for i in range(50): + messages.append(json.dumps({'key': i, 'value': i})) + kafka_produce('virt2', messages) + + while True: + time.sleep(1) + result = instance.query('SELECT kafka_key, key, topic, value, offset FROM test.view') + if kafka_check_result(result, False, 'test_kafka_virtual2.reference'): + break + kafka_check_result(result, True, 'test_kafka_virtual2.reference') + + instance.query(''' + DROP TABLE test.consumer; + DROP TABLE test.view; + ''') + + if __name__ == '__main__': cluster.start() raw_input("Cluster created, press any key to destroy...") diff --git a/dbms/tests/integration/test_storage_kafka/test_kafka_virtual1.reference b/dbms/tests/integration/test_storage_kafka/test_kafka_virtual1.reference new file mode 100644 index 00000000000..5956210d25e --- /dev/null +++ b/dbms/tests/integration/test_storage_kafka/test_kafka_virtual1.reference @@ -0,0 +1,50 @@ + 0 virt1 0 0 + 1 virt1 1 0 + 2 virt1 2 0 + 3 virt1 3 0 + 4 virt1 4 0 + 5 virt1 5 0 + 6 virt1 6 0 + 7 virt1 7 0 + 8 virt1 8 0 + 9 virt1 9 0 + 10 virt1 10 0 + 11 virt1 11 0 + 12 virt1 12 0 + 13 virt1 13 0 + 14 virt1 14 0 + 15 virt1 15 0 + 16 virt1 16 0 + 17 virt1 17 0 + 18 virt1 18 0 + 19 virt1 19 0 + 20 virt1 20 0 + 21 virt1 21 0 + 22 virt1 22 0 + 23 virt1 23 0 + 24 virt1 24 0 + 25 virt1 25 1 + 26 virt1 26 1 + 27 virt1 27 1 + 28 virt1 28 1 + 29 virt1 29 1 + 30 virt1 30 1 + 31 virt1 31 1 + 32 virt1 32 1 + 33 virt1 33 1 + 34 virt1 34 1 + 35 virt1 35 1 + 36 virt1 36 1 + 37 virt1 37 1 + 38 virt1 38 1 + 39 virt1 39 1 + 40 virt1 40 1 + 41 virt1 41 1 + 42 virt1 42 1 + 43 virt1 43 1 + 44 virt1 44 1 + 45 virt1 45 1 + 46 virt1 46 1 + 47 virt1 47 1 + 48 virt1 48 1 + 49 virt1 49 1 diff --git a/dbms/tests/integration/test_storage_kafka/test_kafka_virtual2.reference b/dbms/tests/integration/test_storage_kafka/test_kafka_virtual2.reference new file mode 100644 index 00000000000..50c2edbf802 --- /dev/null +++ b/dbms/tests/integration/test_storage_kafka/test_kafka_virtual2.reference @@ -0,0 +1,50 @@ + 0 virt2 0 0 + 1 virt2 1 1 + 2 virt2 2 2 + 3 virt2 3 3 + 4 virt2 4 4 + 5 virt2 5 5 + 6 virt2 6 6 + 7 virt2 7 7 + 8 virt2 8 8 + 9 virt2 9 9 + 10 virt2 10 10 + 11 virt2 11 11 + 12 virt2 12 12 + 13 virt2 13 13 + 14 virt2 14 14 + 15 virt2 15 15 + 16 virt2 16 16 + 17 virt2 17 17 + 18 virt2 18 18 + 19 virt2 19 19 + 20 virt2 20 20 + 21 virt2 21 21 + 22 virt2 22 22 + 23 virt2 23 23 + 24 virt2 24 24 + 25 virt2 25 25 + 26 virt2 26 26 + 27 virt2 27 27 + 28 virt2 28 28 + 29 virt2 29 29 + 30 virt2 30 30 + 31 virt2 31 31 + 32 virt2 32 32 + 33 virt2 33 33 + 34 virt2 34 34 + 35 virt2 35 35 + 36 virt2 36 36 + 37 virt2 37 37 + 38 virt2 38 38 + 39 virt2 39 39 + 40 virt2 40 40 + 41 virt2 41 41 + 42 virt2 42 42 + 43 virt2 43 43 + 44 virt2 44 44 + 45 virt2 45 45 + 46 virt2 46 46 + 47 virt2 47 47 + 48 virt2 48 48 + 49 virt2 49 49 diff --git a/dbms/tests/integration/test_system_queries/configs/config.d/query_log.xml b/dbms/tests/integration/test_system_queries/configs/config.d/query_log.xml new file mode 100644 index 00000000000..9f55dcb829e --- /dev/null +++ b/dbms/tests/integration/test_system_queries/configs/config.d/query_log.xml @@ -0,0 +1,9 @@ + + + + system + query_log
+ toYYYYMM(event_date) + 300 +
+
diff --git a/dbms/tests/integration/test_system_queries/test.py b/dbms/tests/integration/test_system_queries/test.py index a3899bab577..1761017362a 100644 --- a/dbms/tests/integration/test_system_queries/test.py +++ b/dbms/tests/integration/test_system_queries/test.py @@ -92,6 +92,23 @@ def test_RELOAD_CONFIG_AND_MACROS(started_cluster): instance.query("SYSTEM RELOAD CONFIG") assert TSV(instance.query("select * from system.macros")) == TSV("mac\tro\n") + +def test_SYSTEM_FLUSH_LOGS(started_cluster): + instance = cluster.instances['ch1'] + for i in range(4): + # Sleep to execute flushing from background thread at first query + # by expiration of flush_interval_millisecond and test probable race condition. + time.sleep(0.5) + result = instance.query(''' + SET log_queries = 1; + SELECT 1 FORMAT Null; + SET log_queries = 0; + SYSTEM FLUSH LOGS; + SELECT count() FROM system.query_log;''') + instance.query('TRUNCATE TABLE system.query_log') + assert TSV(result) == TSV('4') + + if __name__ == '__main__': with contextmanager(started_cluster)() as cluster: for name, instance in cluster.instances.items(): diff --git a/dbms/tests/performance/h3.xml b/dbms/tests/performance/h3.xml new file mode 100644 index 00000000000..7381f559a0f --- /dev/null +++ b/dbms/tests/performance/h3.xml @@ -0,0 +1,14 @@ + + once + + + + + 2000 + 10000 + + + + + SELECT count() FROM system.numbers WHERE NOT ignore(geoToH3(37.62 + rand(1) / 0x100000000, 55.75 + rand(2) / 0x100000000, 15)) + diff --git a/dbms/tests/queries/0_stateless/00080_show_tables_and_system_tables.sql b/dbms/tests/queries/0_stateless/00080_show_tables_and_system_tables.sql index d3295f086e8..88facac19e1 100644 --- a/dbms/tests/queries/0_stateless/00080_show_tables_and_system_tables.sql +++ b/dbms/tests/queries/0_stateless/00080_show_tables_and_system_tables.sql @@ -24,11 +24,6 @@ DROP DATABASE IF EXISTS test_DatabaseMemory; CREATE DATABASE test_DatabaseMemory ENGINE = Memory; CREATE TABLE test_DatabaseMemory.A (A UInt8) ENGINE = Null; --- Just in case -DROP DATABASE IF EXISTS test_DatabaseDictionary; -CREATE DATABASE test_DatabaseDictionary ENGINE = Dictionary; - SELECT sum(ignore(*, metadata_modification_time, engine_full, create_table_query)) FROM system.tables; -DROP DATABASE test_DatabaseDictionary; -- { serverError 48 } DROP DATABASE test_DatabaseMemory; diff --git a/dbms/tests/queries/0_stateless/00381_first_significant_subdomain.reference b/dbms/tests/queries/0_stateless/00381_first_significant_subdomain.reference index f13e9ddb1bd..1f1230a2104 100644 --- a/dbms/tests/queries/0_stateless/00381_first_significant_subdomain.reference +++ b/dbms/tests/queries/0_stateless/00381_first_significant_subdomain.reference @@ -1,3 +1,3 @@ canada congo net-domena yandex yandex yandex яндекс yandex -canada hello hello hello hello hello canada canada +canada hello hello canada diff --git a/dbms/tests/queries/0_stateless/00398_url_functions.reference b/dbms/tests/queries/0_stateless/00398_url_functions.reference index e4a31f0654a..acb605597d3 100644 --- a/dbms/tests/queries/0_stateless/00398_url_functions.reference +++ b/dbms/tests/queries/0_stateless/00398_url_functions.reference @@ -12,13 +12,17 @@ www.example.com 127.0.0.1 www.example.com www.example.com +www.example.com +example.com example.com example.com ====DOMAIN==== com ru -ru + +com +com com ====PATH==== П @@ -61,6 +65,8 @@ example.com example.com example.com example.com +example.com +example.com ====CUT WWW==== http://example.com http://example.com:1234 diff --git a/dbms/tests/queries/0_stateless/00398_url_functions.sql b/dbms/tests/queries/0_stateless/00398_url_functions.sql index 16425dae46d..d301cac5b15 100644 --- a/dbms/tests/queries/0_stateless/00398_url_functions.sql +++ b/dbms/tests/queries/0_stateless/00398_url_functions.sql @@ -13,6 +13,8 @@ SELECT domain('http://www.example.com?q=4') AS Host; SELECT domain('http://127.0.0.1:443/') AS Host; SELECT domain('//www.example.com') AS Host; SELECT domain('//paul@www.example.com') AS Host; +SELECT domain('www.example.com') as Host; +SELECT domain('example.com') as Host; SELECT domainWithoutWWW('//paul@www.example.com') AS Host; SELECT domainWithoutWWW('http://paul@www.example.com:80/') AS Host; @@ -23,6 +25,8 @@ SELECT topLevelDomain('http://127.0.0.1:443/') AS Domain; SELECT topLevelDomain('svn+ssh://example.ru?q=hello%20world') AS Domain; SELECT topLevelDomain('svn+ssh://example.ru.?q=hello%20world') AS Domain; SELECT topLevelDomain('//www.example.com') AS Domain; +SELECT topLevelDomain('www.example.com') as Domain; +SELECT topLevelDomain('example.com') as Domain; SELECT '====PATH===='; SELECT decodeURLComponent('%D0%9F'); @@ -69,6 +73,8 @@ SELECT cutToFirstSignificantSubdomain('http://www.example.com/a/b/c?a=b'); SELECT cutToFirstSignificantSubdomain('http://www.example.com/a/b/c?a=b#d=f'); SELECT cutToFirstSignificantSubdomain('http://paul@www.example.com/a/b/c?a=b#d=f'); SELECT cutToFirstSignificantSubdomain('//paul@www.example.com/a/b/c?a=b#d=f'); +SELECT cutToFirstSignificantSubdomain('www.example.com'); +SELECT cutToFirstSignificantSubdomain('example.com'); SELECT '====CUT WWW===='; SELECT cutWWW('http://www.example.com'); diff --git a/dbms/tests/queries/0_stateless/00597_push_down_predicate.reference b/dbms/tests/queries/0_stateless/00597_push_down_predicate.reference index ee84060db57..c71e5c1cdd9 100644 --- a/dbms/tests/queries/0_stateless/00597_push_down_predicate.reference +++ b/dbms/tests/queries/0_stateless/00597_push_down_predicate.reference @@ -4,59 +4,59 @@ 1 2000-01-01 1 test string 1 1 -------Forbid push down------- -SELECT count()\nFROM \n(\n SELECT \n [number] AS a, \n [number * 2] AS b\n FROM system.numbers \n LIMIT 1\n) AS t \nARRAY JOIN \n a, \n b\nWHERE NOT ignore(a + b) +SELECT count()\nFROM \n(\n SELECT \n [number] AS a, \n [number * 2] AS b\n FROM system.numbers\n LIMIT 1\n) AS t\nARRAY JOIN \n a, \n b\nWHERE NOT ignore(a + b) 1 -SELECT \n a, \n b\nFROM \n(\n SELECT 1 AS a\n) \nANY LEFT JOIN \n(\n SELECT \n 1 AS a, \n 1 AS b\n) USING (a)\nWHERE b = 0 -SELECT \n a, \n b\nFROM \n(\n SELECT \n 1 AS a, \n 1 AS b\n) \nANY RIGHT JOIN \n(\n SELECT 1 AS a\n) USING (a)\nWHERE b = 0 -SELECT \n a, \n b\nFROM \n(\n SELECT 1 AS a\n) \nANY FULL OUTER JOIN \n(\n SELECT \n 1 AS a, \n 1 AS b\n) USING (a)\nWHERE b = 0 -SELECT \n a, \n b\nFROM \n(\n SELECT \n 1 AS a, \n 1 AS b\n) \nANY FULL OUTER JOIN \n(\n SELECT 1 AS a\n) USING (a)\nWHERE b = 0 +SELECT \n a, \n b\nFROM \n(\n SELECT 1 AS a\n)\nANY LEFT JOIN \n(\n SELECT \n 1 AS a, \n 1 AS b\n) USING (a)\nWHERE b = 0 +SELECT \n a, \n b\nFROM \n(\n SELECT \n 1 AS a, \n 1 AS b\n)\nANY RIGHT JOIN \n(\n SELECT 1 AS a\n) USING (a)\nWHERE b = 0 +SELECT \n a, \n b\nFROM \n(\n SELECT 1 AS a\n)\nANY FULL OUTER JOIN \n(\n SELECT \n 1 AS a, \n 1 AS b\n) USING (a)\nWHERE b = 0 +SELECT \n a, \n b\nFROM \n(\n SELECT \n 1 AS a, \n 1 AS b\n)\nANY FULL OUTER JOIN \n(\n SELECT 1 AS a\n) USING (a)\nWHERE b = 0 -------Need push down------- -SELECT toString(value) AS value\nFROM \n(\n SELECT 1 AS value\n WHERE toString(value) = \'1\'\n) \nWHERE value = \'1\' +SELECT toString(value) AS value\nFROM \n(\n SELECT 1 AS value\n WHERE toString(value) = \'1\'\n)\nWHERE value = \'1\' 1 -SELECT id\nFROM \n(\n SELECT 1 AS id\n WHERE id = 1\n UNION ALL\n SELECT 2 AS `2`\n WHERE `2` = 1\n) \nWHERE id = 1 +SELECT id\nFROM \n(\n SELECT 1 AS id\n WHERE id = 1\n UNION ALL\n SELECT 2 AS `2`\n WHERE `2` = 1\n)\nWHERE id = 1 1 -SELECT id\nFROM \n(\n SELECT arrayJoin([1, 2, 3]) AS id\n WHERE id = 1\n) \nWHERE id = 1 +SELECT id\nFROM \n(\n SELECT arrayJoin([1, 2, 3]) AS id\n WHERE id = 1\n)\nWHERE id = 1 1 -SELECT id\nFROM \n(\n SELECT arrayJoin([1, 2, 3]) AS id\n WHERE id = 1\n) \nWHERE id = 1 +SELECT id\nFROM \n(\n SELECT arrayJoin([1, 2, 3]) AS id\n WHERE id = 1\n)\nWHERE id = 1 1 -SELECT \n id, \n subquery\nFROM \n(\n SELECT \n 1 AS id, \n CAST(1, \'UInt8\') AS subquery\n WHERE subquery = 1\n) \nWHERE subquery = 1 +SELECT \n id, \n subquery\nFROM \n(\n SELECT \n 1 AS id, \n CAST(1, \'UInt8\') AS subquery\n WHERE subquery = 1\n)\nWHERE subquery = 1 1 1 -SELECT \n a, \n b\nFROM \n(\n SELECT \n toUInt64(sum(id) AS b) AS a, \n b\n FROM test_00597 \n HAVING a = 3\n) \nWHERE a = 3 +SELECT \n a, \n b\nFROM \n(\n SELECT \n toUInt64(sum(id) AS b) AS a, \n b\n FROM test_00597\n HAVING a = 3\n)\nWHERE a = 3 3 3 -SELECT \n date, \n id, \n name, \n value\nFROM \n(\n SELECT \n date, \n name, \n value, \n min(id) AS id\n FROM test_00597 \n GROUP BY \n date, \n name, \n value\n HAVING id = 1\n) \nWHERE id = 1 +SELECT \n date, \n id, \n name, \n value\nFROM \n(\n SELECT \n date, \n name, \n value, \n min(id) AS id\n FROM test_00597\n GROUP BY \n date, \n name, \n value\n HAVING id = 1\n)\nWHERE id = 1 2000-01-01 1 test string 1 1 -SELECT \n a, \n b\nFROM \n(\n SELECT \n toUInt64(sum(id) AS b) AS a, \n b\n FROM test_00597 AS table_alias \n HAVING b = 3\n) AS outer_table_alias \nWHERE b = 3 +SELECT \n a, \n b\nFROM \n(\n SELECT \n toUInt64(sum(id) AS b) AS a, \n b\n FROM test_00597 AS table_alias\n HAVING b = 3\n) AS outer_table_alias\nWHERE b = 3 3 3 -SELECT \n date, \n id, \n name, \n value\nFROM \n(\n SELECT \n date, \n id, \n name, \n value\n FROM test_00597 \n WHERE id = 1\n) \nWHERE id = 1 +SELECT \n date, \n id, \n name, \n value\nFROM \n(\n SELECT \n date, \n id, \n name, \n value\n FROM test_00597\n WHERE id = 1\n)\nWHERE id = 1 2000-01-01 1 test string 1 1 -SELECT \n date, \n id, \n name, \n value\nFROM \n(\n SELECT \n date, \n id, \n name, \n value\n FROM \n (\n SELECT \n date, \n id, \n name, \n value\n FROM test_00597 \n WHERE id = 1\n ) \n WHERE id = 1\n) \nWHERE id = 1 +SELECT \n date, \n id, \n name, \n value\nFROM \n(\n SELECT \n date, \n id, \n name, \n value\n FROM \n (\n SELECT \n date, \n id, \n name, \n value\n FROM test_00597\n WHERE id = 1\n )\n WHERE id = 1\n)\nWHERE id = 1 2000-01-01 1 test string 1 1 -SELECT \n date, \n id, \n name, \n value\nFROM \n(\n SELECT \n date, \n id, \n name, \n value\n FROM \n (\n SELECT \n date, \n id, \n name, \n value\n FROM test_00597 \n WHERE id = 1\n ) AS b \n WHERE id = 1\n) \nWHERE id = 1 +SELECT \n date, \n id, \n name, \n value\nFROM \n(\n SELECT \n date, \n id, \n name, \n value\n FROM \n (\n SELECT \n date, \n id, \n name, \n value\n FROM test_00597\n WHERE id = 1\n ) AS b\n WHERE id = 1\n)\nWHERE id = 1 2000-01-01 1 test string 1 1 -SELECT \n date, \n id, \n name, \n value\nFROM \n(\n SELECT \n date, \n id, \n name, \n value\n FROM test_00597 \n WHERE id = 1\n) \nWHERE id = 1 +SELECT \n date, \n id, \n name, \n value\nFROM \n(\n SELECT \n date, \n id, \n name, \n value\n FROM test_00597\n WHERE id = 1\n)\nWHERE id = 1 2000-01-01 1 test string 1 1 -SELECT \n date, \n id, \n name, \n value\nFROM \n(\n SELECT \n date, \n id, \n name, \n value\n FROM \n (\n SELECT \n date, \n id, \n name, \n value\n FROM test_00597 \n WHERE id = 1\n ) \n WHERE id = 1\n) \nWHERE id = 1 +SELECT \n date, \n id, \n name, \n value\nFROM \n(\n SELECT \n date, \n id, \n name, \n value\n FROM \n (\n SELECT \n date, \n id, \n name, \n value\n FROM test_00597\n WHERE id = 1\n )\n WHERE id = 1\n)\nWHERE id = 1 2000-01-01 1 test string 1 1 -SELECT \n date, \n id, \n name, \n value\nFROM \n(\n SELECT \n date, \n id, \n name, \n value\n FROM test_00597 \n WHERE id = 1\n) AS b \nWHERE id = 1 +SELECT \n date, \n id, \n name, \n value\nFROM \n(\n SELECT \n date, \n id, \n name, \n value\n FROM test_00597\n WHERE id = 1\n) AS b\nWHERE id = 1 2000-01-01 1 test string 1 1 -SELECT \n date, \n id, \n name, \n value\nFROM \n(\n SELECT \n date, \n id, \n name, \n value\n FROM \n (\n SELECT \n date, \n id, \n name, \n value\n FROM test_00597 \n WHERE id = 1\n ) AS a \n WHERE id = 1\n) AS b \nWHERE id = 1 +SELECT \n date, \n id, \n name, \n value\nFROM \n(\n SELECT \n date, \n id, \n name, \n value\n FROM \n (\n SELECT \n date, \n id, \n name, \n value\n FROM test_00597\n WHERE id = 1\n ) AS a\n WHERE id = 1\n) AS b\nWHERE id = 1 2000-01-01 1 test string 1 1 -SELECT \n id, \n date, \n value\nFROM \n(\n SELECT \n id, \n date, \n min(value) AS value\n FROM test_00597 \n WHERE id = 1\n GROUP BY \n id, \n date\n) \nWHERE id = 1 +SELECT \n id, \n date, \n value\nFROM \n(\n SELECT \n id, \n date, \n min(value) AS value\n FROM test_00597\n WHERE id = 1\n GROUP BY \n id, \n date\n)\nWHERE id = 1 1 2000-01-01 1 -SELECT \n date, \n id, \n name, \n value\nFROM \n(\n SELECT \n date, \n id, \n name, \n value\n FROM test_00597 \n WHERE id = 1\n UNION ALL\n SELECT \n date, \n id, \n name, \n value\n FROM test_00597 \n WHERE id = 1\n) \nWHERE id = 1 +SELECT \n date, \n id, \n name, \n value\nFROM \n(\n SELECT \n date, \n id, \n name, \n value\n FROM test_00597\n WHERE id = 1\n UNION ALL\n SELECT \n date, \n id, \n name, \n value\n FROM test_00597\n WHERE id = 1\n)\nWHERE id = 1 2000-01-01 1 test string 1 1 2000-01-01 1 test string 1 1 -SELECT \n date, \n id, \n name, \n value, \n date, \n name, \n value\nFROM \n(\n SELECT \n date, \n id, \n name, \n value\n FROM test_00597 \n WHERE id = 1\n) \nANY LEFT JOIN \n(\n SELECT *\n FROM test_00597 \n) USING (id)\nWHERE id = 1 +SELECT \n date, \n id, \n name, \n value, \n date, \n name, \n value\nFROM \n(\n SELECT \n date, \n id, \n name, \n value\n FROM test_00597\n WHERE id = 1\n)\nANY LEFT JOIN \n(\n SELECT *\n FROM test_00597\n) USING (id)\nWHERE id = 1 2000-01-01 1 test string 1 1 2000-01-01 test string 1 1 -SELECT \n id, \n date, \n name, \n value\nFROM \n(\n SELECT toInt8(1) AS id\n) \nANY LEFT JOIN \n(\n SELECT *\n FROM test_00597 \n) AS test_00597 USING (id)\nWHERE value = 1 +SELECT \n id, \n date, \n name, \n value\nFROM \n(\n SELECT toInt8(1) AS id\n)\nANY LEFT JOIN \n(\n SELECT *\n FROM test_00597\n) AS test_00597 USING (id)\nWHERE value = 1 1 2000-01-01 test string 1 1 -SELECT value\nFROM \n(\n SELECT toInt8(1) AS id\n) \nANY LEFT JOIN test_00597 AS b USING (id)\nWHERE value = 1 +SELECT value\nFROM \n(\n SELECT toInt8(1) AS id\n)\nANY LEFT JOIN test_00597 AS b USING (id)\nWHERE value = 1 1 -SELECT \n date, \n id, \n name, \n value\nFROM \n(\n SELECT \n date, \n id, \n name, \n value, \n date, \n name, \n value\n FROM \n (\n SELECT \n date, \n id, \n name, \n value\n FROM test_00597 \n WHERE id = 1\n ) \n ANY LEFT JOIN \n (\n SELECT *\n FROM test_00597 \n ) USING (id)\n WHERE id = 1\n) \nWHERE id = 1 +SELECT \n date, \n id, \n name, \n value\nFROM \n(\n SELECT \n date, \n id, \n name, \n value, \n date, \n name, \n value\n FROM \n (\n SELECT \n date, \n id, \n name, \n value\n FROM test_00597\n WHERE id = 1\n )\n ANY LEFT JOIN \n (\n SELECT *\n FROM test_00597\n ) USING (id)\n WHERE id = 1\n)\nWHERE id = 1 2000-01-01 1 test string 1 1 -SELECT \n date, \n id, \n name, \n value, \n b.date, \n b.name, \n b.value\nFROM \n(\n SELECT \n date, \n id, \n name, \n value\n FROM test_00597 \n) \nANY LEFT JOIN \n(\n SELECT *\n FROM test_00597 \n) AS b USING (id)\nWHERE b.id = 1 +SELECT \n date, \n id, \n name, \n value, \n b.date, \n b.name, \n b.value\nFROM \n(\n SELECT \n date, \n id, \n name, \n value\n FROM test_00597\n)\nANY LEFT JOIN \n(\n SELECT *\n FROM test_00597\n) AS b USING (id)\nWHERE b.id = 1 2000-01-01 1 test string 1 1 2000-01-01 test string 1 1 -SELECT \n id, \n date, \n name, \n value\nFROM \n(\n SELECT \n toInt8(1) AS id, \n toDate(\'2000-01-01\') AS date\n FROM system.numbers \n LIMIT 1\n) \nANY LEFT JOIN \n(\n SELECT *\n FROM test_00597 \n) AS b USING (date, id)\nWHERE b.date = toDate(\'2000-01-01\') +SELECT \n id, \n date, \n name, \n value\nFROM \n(\n SELECT \n toInt8(1) AS id, \n toDate(\'2000-01-01\') AS date\n FROM system.numbers\n LIMIT 1\n)\nANY LEFT JOIN \n(\n SELECT *\n FROM test_00597\n) AS b USING (date, id)\nWHERE b.date = toDate(\'2000-01-01\') 1 2000-01-01 test string 1 1 -SELECT \n date, \n id, \n name, \n value, \n `b.date`, \n `b.id`, \n `b.name`, \n `b.value`\nFROM \n(\n SELECT \n date, \n id, \n name, \n value, \n b.date, \n b.id, \n b.name, \n b.value\n FROM \n (\n SELECT \n date, \n id, \n name, \n value\n FROM test_00597 \n WHERE id = 1\n ) AS a \n ANY LEFT JOIN \n (\n SELECT *\n FROM test_00597 \n ) AS b ON id = b.id\n WHERE id = 1\n) \nWHERE id = 1 +SELECT \n date, \n id, \n name, \n value, \n `b.date`, \n `b.id`, \n `b.name`, \n `b.value`\nFROM \n(\n SELECT \n date, \n id, \n name, \n value, \n b.date, \n b.id, \n b.name, \n b.value\n FROM \n (\n SELECT \n date, \n id, \n name, \n value\n FROM test_00597\n WHERE id = 1\n ) AS a\n ANY LEFT JOIN \n (\n SELECT *\n FROM test_00597\n ) AS b ON id = b.id\n WHERE id = 1\n)\nWHERE id = 1 2000-01-01 1 test string 1 1 2000-01-01 1 test string 1 1 diff --git a/dbms/tests/queries/0_stateless/00599_create_view_with_subquery.reference b/dbms/tests/queries/0_stateless/00599_create_view_with_subquery.reference index 311c1ed53a4..13e0f35b075 100644 --- a/dbms/tests/queries/0_stateless/00599_create_view_with_subquery.reference +++ b/dbms/tests/queries/0_stateless/00599_create_view_with_subquery.reference @@ -1 +1 @@ -CREATE VIEW default.test_view_00599 (`id` UInt64) AS SELECT * FROM default.test_00599 WHERE id = (SELECT 1) +CREATE VIEW default.test_view_00599 (`id` UInt64) AS SELECT * FROM default.test_00599 WHERE id = (SELECT 1) diff --git a/dbms/tests/queries/0_stateless/00600_replace_running_query.reference b/dbms/tests/queries/0_stateless/00600_replace_running_query.reference index 573541ac970..237dd6b5309 100644 --- a/dbms/tests/queries/0_stateless/00600_replace_running_query.reference +++ b/dbms/tests/queries/0_stateless/00600_replace_running_query.reference @@ -1 +1,5 @@ 0 +1 0 +3 0 +2 0 +44 diff --git a/dbms/tests/queries/0_stateless/00600_replace_running_query.sh b/dbms/tests/queries/0_stateless/00600_replace_running_query.sh index 6778bbce149..ce0a4e185ad 100755 --- a/dbms/tests/queries/0_stateless/00600_replace_running_query.sh +++ b/dbms/tests/queries/0_stateless/00600_replace_running_query.sh @@ -9,3 +9,16 @@ $CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL?query_id=hello&replace_running_query=1" -d sleep 0.1 # First query (usually) should be received by the server after this sleep. $CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL?query_id=hello&replace_running_query=1" -d 'SELECT 0' wait + +${CLICKHOUSE_CLIENT} --user=readonly --query_id=42 --query='SELECT 1, sleep(1)' & +sleep 0.1 +( ${CLICKHOUSE_CLIENT} --query_id=42 --query='SELECT 43' ||: ) 2>&1 | grep -F 'is already running by user' > /dev/null +wait + +${CLICKHOUSE_CLIENT} --query='SELECT 3, sleep(1)' & +sleep 0.1 +${CLICKHOUSE_CLIENT} --query_id=42 --query='SELECT 2, sleep(1)' & +sleep 0.1 +( ${CLICKHOUSE_CLIENT} --query_id=42 --replace_running_query=1 --queue_max_wait_ms=500 --query='SELECT 43' ||: ) 2>&1 | grep -F "can't be stopped" > /dev/null +${CLICKHOUSE_CLIENT} --query_id=42 --replace_running_query=1 --query='SELECT 44' +wait diff --git a/dbms/tests/queries/0_stateless/00731_long_merge_tree_select_opened_files.sh b/dbms/tests/queries/0_stateless/00731_long_merge_tree_select_opened_files.sh index bb67ae9fa83..350c9b05ea8 100755 --- a/dbms/tests/queries/0_stateless/00731_long_merge_tree_select_opened_files.sh +++ b/dbms/tests/queries/0_stateless/00731_long_merge_tree_select_opened_files.sh @@ -18,7 +18,7 @@ $CLICKHOUSE_CLIENT $settings -q "INSERT INTO merge_tree_table SELECT (intHash64( $CLICKHOUSE_CLIENT $settings -q "OPTIMIZE TABLE merge_tree_table FINAL;" -toching_many_parts_query="SELECT count() from (SELECT toDayOfWeek(date) as m, id, count() FROM merge_tree_table GROUP BY id, m ORDER BY count() DESC LIMIT 10 SETTINGS max_threads = 1)" +toching_many_parts_query="SELECT count() FROM (SELECT toDayOfWeek(date) AS m, id, count() FROM merge_tree_table GROUP BY id, m ORDER BY count() DESC LIMIT 10 SETTINGS max_threads = 1)" $CLICKHOUSE_CLIENT $settings -q "$toching_many_parts_query" &> /dev/null $CLICKHOUSE_CLIENT $settings -q "SYSTEM FLUSH LOGS" diff --git a/dbms/tests/queries/0_stateless/00746_sql_fuzzy.pl b/dbms/tests/queries/0_stateless/00746_sql_fuzzy.pl index e0e510e72fc..dd12d9ffdfa 100755 --- a/dbms/tests/queries/0_stateless/00746_sql_fuzzy.pl +++ b/dbms/tests/queries/0_stateless/00746_sql_fuzzy.pl @@ -133,7 +133,7 @@ sub main { split /[\s;,]+/, $ENV{SQL_FUZZY_FUNCTIONS} || file_read($ENV{SQL_FUZZY_FILE_FUNCTIONS} || 'clickhouse-functions') - || '__inner_restore_projection__ __inner_build_projection_composition__ convertCharset one_or_zero findClusterValue findClusterIndex toNullable coalesce isNotNull pointInEllipses transform pow acos asin tan cos tgamma lgamma erfc erf sqrt log10 exp10 e visitParamExtractFloat visitParamExtractUInt decodeURLComponent cutURLParameter cutQueryStringAndFragment cutFragment cutWWW URLPathHierarchy URLHierarchy extractURLParameterNames extractURLParameter queryStringAndFragment pathFull sin topLevelDomain domainWithoutWWW domain protocol greatCircleDistance extract match positionCaseInsensitiveUTF8 positionCaseInsensitive positionUTF8 position replaceRegexpAll replaceRegexpOne arrayStringConcat splitByString splitByChar alphaTokens endsWith startsWith appendTrailingCharIfAbsent substringUTF8 concatAssumeInjective reverseUTF8 upperUTF8 __inner_project__ upper lower CRC32 length notEmpty trunc round roundAge roundDuration roundToExp2 reinterpretAsString reinterpretAsDateTime reinterpretAsDate reinterpretAsFloat64 reinterpretAsFloat32 reinterpretAsInt64 reinterpretAsInt8 reinterpretAsUInt32 toStartOfFiveMinute toISOYear toISOWeek concat toDecimal64 ifNull toStartOfDay toSecond addSeconds sleepEachRow materialize visitParamExtractInt toStartOfMinute toDayOfWeek toDayOfMonth bitShiftLeft emptyArrayUInt8 parseDateTimeBestEffort toTime toDateTimeOrNull toFloat32OrNull toInt16 IPv6NumToString atan substring arrayIntersect isInfinite toRelativeHourNum hex arrayEnumerateDense toUInt8OrZero toRelativeSecondNum toUInt64OrNull MACNumToString toInt32OrNull toDayOfYear toUnixTimestamp toString toDateOrZero subtractDays toMinute murmurHash3_64 murmurHash2_32 toUInt64 toUInt8 dictGetDateTime empty isFinite caseWithoutExpression caseWithoutExpr visitParamExtractRaw queryString dictGetInt32OrDefault caseWithExpression toInt8OrZero multiIf if intExp10 bitShiftRight less toUInt8OrNull toInt8OrNull bitmaskToArray toIntervalYear toFloat64OrZero dateDiff generateUUIDv4 arrayPopBack toIntervalMonth toUUID notEquals toInt16OrNull murmurHash2_64 hasAny toIntervalMinute isNull tupleElement replaceAll parseDateTimeBestEffortOrZero toFloat32OrZero lowerUTF8 notIn gcd like regionToPopulation MACStringToOUI notLike toStringCutToZero lcm parseDateTimeBestEffortOrNull not toInt32OrZero arrayFilter toInt16OrZero range equals now toTypeName toUInt32OrNull emptyArrayString dictGetDateTimeOrDefault bitRotateRight cutIPv6 toUInt32OrZero timezone reverse runningDifferenceStartingWithFirstValue toDateTime arrayPopFront toInt32 intHash64 extractURLParameters lowCardinalityIndices toStartOfMonth toYear hasAll rowNumberInAllBlocks bitTestAll arrayCount arraySort abs bitNot intDiv intDivOrZero firstSignificantSubdomain dictGetFloat32OrDefault reinterpretAsUInt16 toHour minus regionToArea unhex IPv4StringToNum toIntervalHour toInt8 dictGetFloat32 log IPv4NumToString modulo arrayEnumerate cutQueryString reinterpretAsFixedString countEqual bitTest toDecimal128 plus or reinterpretAsUInt64 toMonth visitParamExtractBool emptyArrayUInt64 replaceOne arrayReverseSort toFloat32 toRelativeMonthNum emptyArrayInt32 toRelativeYearNum arrayElement log2 array arrayReverse toUInt64OrZero emptyArrayFloat64 negate arrayPushBack subtractWeeks bitTestAny bitAnd toDecimal32 arrayPushFront lessOrEquals intExp2 toUInt16OrZero arrayConcat arrayCumSum arraySlice addDays dictGetUInt8 toUInt32 bitOr caseWithExpr toStartOfYear toIntervalDay MD5 emptyArrayUInt32 emptyArrayInt8 toMonday addMonths arrayUniq SHA256 arrayExists multiply toUInt16OrNull dictGetInt8 visitParamHas emptyArrayInt64 toIntervalSecond toDate sleep emptyArrayToSingle path toInt64OrZero SHA1 extractAll emptyArrayDate dumpColumnStructure toInt64 lengthUTF8 greatest arrayEnumerateUniq arrayDistinct arrayFirst toFixedString IPv4NumToStringClassC toFloat64OrNull IPv4ToIPv6 identity ceil toStartOfQuarter dictGetInt8OrDefault MACStringToNum emptyArrayUInt16 UUIDStringToNum dictGetUInt16 toStartOfFifteenMinutes toStartOfHour sumburConsistentHash toStartOfISOYear toRelativeQuarterNum toRelativeWeekNum toRelativeDayNum cbrt yesterday bitXor timeSlot timeSlots emptyArrayInt16 dictGetInt16 toYYYYMM toYYYYMMDDhhmmss toUInt16 addMinutes addHours addWeeks nullIf subtractSeconds subtractMinutes toIntervalWeek subtractHours isNaN subtractMonths toDateOrNull subtractYears toTimeZone formatDateTime has cityHash64 intHash32 fragment regionToCity indexOf regionToDistrict regionToCountry visibleWidth regionToContinent regionToTopContinent toColumnTypeName regionHierarchy CHAR_LENGTH least divide SEHierarchy dictGetDate OSToRoot SEToRoot OSIn SEIn regionToName dictGetStringOrDefault OSHierarchy exp floor dictGetUInt8OrDefault dictHas dictGetUInt64 cutToFirstSignificantSubdomain dictGetInt32 pointInPolygon dictGetInt64 blockNumber IPv6StringToNum dictGetString dictGetFloat64 dictGetUUID CHARACTER_LENGTH toQuarter dictGetHierarchy toFloat64 arraySum toInt64OrNull dictIsIn dictGetUInt16OrDefault dictGetUInt32OrDefault emptyArrayDateTime greater jumpConsistentHash dictGetUInt64OrDefault dictGetInt16OrDefault dictGetInt64OrDefault reinterpretAsInt32 dictGetUInt32 murmurHash3_32 bar dictGetUUIDOrDefault rand modelEvaluate arrayReduce farmHash64 bitmaskToList formatReadableSize halfMD5 SHA224 arrayMap sipHash64 dictGetFloat64OrDefault sipHash128 metroHash64 murmurHash3_128 yandexConsistentHash emptyArrayFloat32 arrayAll toYYYYMMDD today arrayFirstIndex greaterOrEquals arrayDifference visitParamExtractString toDateTimeOrZero globalNotIn throwIf and xor currentDatabase hostName URLHash getSizeOfEnumType defaultValueOfArgumentType blockSize tuple arrayCumSumNonNegative rowNumberInBlock arrayResize ignore toRelativeMinuteNum indexHint reinterpretAsInt16 addYears arrayJoin replicate hasColumnInTable version regionIn uptime runningAccumulate runningDifference assumeNotNull pi finalizeAggregation toLowCardinality exp2 lowCardinalityKeys in globalIn dictGetDateOrDefault rand64 CAST bitRotateLeft randConstant UUIDNumToString reinterpretAsUInt8 truncate ceiling retention maxIntersections groupBitXor groupBitOr uniqUpTo uniqCombined uniqExact uniq covarPop stddevPop varPop covarSamp varSamp sumMap corrStable corr quantileTiming quantileDeterministic quantilesExact uniqHLL12 quantilesTiming covarPopStable stddevSampStable quantilesExactWeighted quantileExactWeighted quantileTimingWeighted quantileExact quantilesDeterministic quantiles topK sumWithOverflow count groupArray stddevSamp groupArrayInsertAt quantile quantilesTimingWeighted quantileTDigest quantilesTDigest windowFunnel min argMax varSampStable maxIntersectionsPosition quantilesTDigestWeighted groupUniqArray sequenceCount sumKahan any anyHeavy histogram quantileTDigestWeighted max groupBitAnd argMin varPopStable avg sequenceMatch stddevPopStable sum anyLast covarSampStable BIT_XOR medianExactWeighted medianTiming medianExact median medianDeterministic VAR_SAMP STDDEV_POP medianTDigest VAR_POP medianTDigestWeighted BIT_OR STDDEV_SAMP medianTimingWeighted COVAR_SAMP COVAR_POP BIT_AND' + || '__inner_restore_projection__ __inner_build_projection_composition__ convertCharset one_or_zero findClusterValue findClusterIndex toNullable coalesce isNotNull geoToH3 pointInEllipses transform pow acos asin tan cos tgamma lgamma erfc erf sqrt log10 exp10 e visitParamExtractFloat visitParamExtractUInt decodeURLComponent cutURLParameter cutQueryStringAndFragment cutFragment cutWWW URLPathHierarchy URLHierarchy extractURLParameterNames extractURLParameter queryStringAndFragment pathFull sin topLevelDomain domainWithoutWWW domain protocol greatCircleDistance extract match positionCaseInsensitiveUTF8 positionCaseInsensitive positionUTF8 position replaceRegexpAll replaceRegexpOne arrayStringConcat splitByString splitByChar alphaTokens endsWith startsWith appendTrailingCharIfAbsent substringUTF8 concatAssumeInjective reverseUTF8 upperUTF8 __inner_project__ upper lower CRC32 length notEmpty trunc round roundAge roundDuration roundToExp2 reinterpretAsString reinterpretAsDateTime reinterpretAsDate reinterpretAsFloat64 reinterpretAsFloat32 reinterpretAsInt64 reinterpretAsInt8 reinterpretAsUInt32 toStartOfFiveMinute toISOYear toISOWeek concat toDecimal64 ifNull toStartOfDay toSecond addSeconds sleepEachRow materialize visitParamExtractInt toStartOfMinute toDayOfWeek toDayOfMonth bitShiftLeft emptyArrayUInt8 parseDateTimeBestEffort toTime toDateTimeOrNull toFloat32OrNull toInt16 IPv6NumToString atan substring arrayIntersect isInfinite toRelativeHourNum hex arrayEnumerateDense toUInt8OrZero toRelativeSecondNum toUInt64OrNull MACNumToString toInt32OrNull toDayOfYear toUnixTimestamp toString toDateOrZero subtractDays toMinute murmurHash3_64 murmurHash2_32 toUInt64 toUInt8 dictGetDateTime empty isFinite caseWithoutExpression caseWithoutExpr visitParamExtractRaw queryString dictGetInt32OrDefault caseWithExpression toInt8OrZero multiIf if intExp10 bitShiftRight less toUInt8OrNull toInt8OrNull bitmaskToArray toIntervalYear toFloat64OrZero dateDiff generateUUIDv4 arrayPopBack toIntervalMonth toUUID notEquals toInt16OrNull murmurHash2_64 hasAny toIntervalMinute isNull tupleElement replaceAll parseDateTimeBestEffortOrZero toFloat32OrZero lowerUTF8 notIn gcd like regionToPopulation MACStringToOUI notLike toStringCutToZero lcm parseDateTimeBestEffortOrNull not toInt32OrZero arrayFilter toInt16OrZero range equals now toTypeName toUInt32OrNull emptyArrayString dictGetDateTimeOrDefault bitRotateRight cutIPv6 toUInt32OrZero timezone reverse runningDifferenceStartingWithFirstValue toDateTime arrayPopFront toInt32 intHash64 extractURLParameters lowCardinalityIndices toStartOfMonth toYear hasAll rowNumberInAllBlocks bitTestAll arrayCount arraySort abs bitNot intDiv intDivOrZero firstSignificantSubdomain dictGetFloat32OrDefault reinterpretAsUInt16 toHour minus regionToArea unhex IPv4StringToNum toIntervalHour toInt8 dictGetFloat32 log IPv4NumToString modulo arrayEnumerate cutQueryString reinterpretAsFixedString countEqual bitTest toDecimal128 plus or reinterpretAsUInt64 toMonth visitParamExtractBool emptyArrayUInt64 replaceOne arrayReverseSort toFloat32 toRelativeMonthNum emptyArrayInt32 toRelativeYearNum arrayElement log2 array arrayReverse toUInt64OrZero emptyArrayFloat64 negate arrayPushBack subtractWeeks bitTestAny bitAnd toDecimal32 arrayPushFront lessOrEquals intExp2 toUInt16OrZero arrayConcat arrayCumSum arraySlice addDays dictGetUInt8 toUInt32 bitOr caseWithExpr toStartOfYear toIntervalDay MD5 emptyArrayUInt32 emptyArrayInt8 toMonday addMonths arrayUniq SHA256 arrayExists multiply toUInt16OrNull dictGetInt8 visitParamHas emptyArrayInt64 toIntervalSecond toDate sleep emptyArrayToSingle path toInt64OrZero SHA1 extractAll emptyArrayDate dumpColumnStructure toInt64 lengthUTF8 greatest arrayEnumerateUniq arrayDistinct arrayFirst toFixedString IPv4NumToStringClassC toFloat64OrNull IPv4ToIPv6 identity ceil toStartOfQuarter dictGetInt8OrDefault MACStringToNum emptyArrayUInt16 UUIDStringToNum dictGetUInt16 toStartOfFifteenMinutes toStartOfHour sumburConsistentHash toStartOfISOYear toRelativeQuarterNum toRelativeWeekNum toRelativeDayNum cbrt yesterday bitXor timeSlot timeSlots emptyArrayInt16 dictGetInt16 toYYYYMM toYYYYMMDDhhmmss toUInt16 addMinutes addHours addWeeks nullIf subtractSeconds subtractMinutes toIntervalWeek subtractHours isNaN subtractMonths toDateOrNull subtractYears toTimeZone formatDateTime has cityHash64 intHash32 fragment regionToCity indexOf regionToDistrict regionToCountry visibleWidth regionToContinent regionToTopContinent toColumnTypeName regionHierarchy CHAR_LENGTH least divide SEHierarchy dictGetDate OSToRoot SEToRoot OSIn SEIn regionToName dictGetStringOrDefault OSHierarchy exp floor dictGetUInt8OrDefault dictHas dictGetUInt64 cutToFirstSignificantSubdomain dictGetInt32 pointInPolygon dictGetInt64 blockNumber IPv6StringToNum dictGetString dictGetFloat64 dictGetUUID CHARACTER_LENGTH toQuarter dictGetHierarchy toFloat64 arraySum toInt64OrNull dictIsIn dictGetUInt16OrDefault dictGetUInt32OrDefault emptyArrayDateTime greater jumpConsistentHash dictGetUInt64OrDefault dictGetInt16OrDefault dictGetInt64OrDefault reinterpretAsInt32 dictGetUInt32 murmurHash3_32 bar dictGetUUIDOrDefault rand modelEvaluate arrayReduce farmHash64 bitmaskToList formatReadableSize halfMD5 SHA224 arrayMap sipHash64 dictGetFloat64OrDefault sipHash128 metroHash64 murmurHash3_128 yandexConsistentHash emptyArrayFloat32 arrayAll toYYYYMMDD today arrayFirstIndex greaterOrEquals arrayDifference visitParamExtractString toDateTimeOrZero globalNotIn throwIf and xor currentDatabase hostName URLHash getSizeOfEnumType defaultValueOfArgumentType blockSize tuple arrayCumSumNonNegative rowNumberInBlock arrayResize ignore toRelativeMinuteNum indexHint reinterpretAsInt16 addYears arrayJoin replicate hasColumnInTable version regionIn uptime runningAccumulate runningDifference assumeNotNull pi finalizeAggregation toLowCardinality exp2 lowCardinalityKeys in globalIn dictGetDateOrDefault rand64 CAST bitRotateLeft randConstant UUIDNumToString reinterpretAsUInt8 truncate ceiling retention maxIntersections groupBitXor groupBitOr uniqUpTo uniqCombined uniqExact uniq covarPop stddevPop varPop covarSamp varSamp sumMap corrStable corr quantileTiming quantileDeterministic quantilesExact uniqHLL12 quantilesTiming covarPopStable stddevSampStable quantilesExactWeighted quantileExactWeighted quantileTimingWeighted quantileExact quantilesDeterministic quantiles topK sumWithOverflow count groupArray stddevSamp groupArrayInsertAt quantile quantilesTimingWeighted quantileTDigest quantilesTDigest windowFunnel min argMax varSampStable maxIntersectionsPosition quantilesTDigestWeighted groupUniqArray sequenceCount sumKahan any anyHeavy histogram quantileTDigestWeighted max groupBitAnd argMin varPopStable avg sequenceMatch stddevPopStable sum anyLast covarSampStable BIT_XOR medianExactWeighted medianTiming medianExact median medianDeterministic VAR_SAMP STDDEV_POP medianTDigest VAR_POP medianTDigestWeighted BIT_OR STDDEV_SAMP medianTimingWeighted COVAR_SAMP COVAR_POP BIT_AND' ]; # $functions = [grep { not $_ ~~ [qw( )] } @$functions]; # will be removed # select name from system.table_functions format TSV; diff --git a/dbms/tests/queries/0_stateless/00751_default_databasename_for_view.reference b/dbms/tests/queries/0_stateless/00751_default_databasename_for_view.reference index 35217410c2d..e45dde1921e 100644 --- a/dbms/tests/queries/0_stateless/00751_default_databasename_for_view.reference +++ b/dbms/tests/queries/0_stateless/00751_default_databasename_for_view.reference @@ -1,4 +1,4 @@ -CREATE MATERIALIZED VIEW test.t_mv_00751 (`date` Date, `platform` Enum8('a' = 0, 'b' = 1), `app` Enum8('a' = 0, 'b' = 1)) ENGINE = MergeTree ORDER BY date SETTINGS index_granularity = 8192 AS SELECT date, platform, app FROM test.t_00751 WHERE (app = (SELECT min(app) FROM test.u_00751 )) AND (platform = (SELECT (SELECT min(platform) FROM test.v_00751 ))) +CREATE MATERIALIZED VIEW test.t_mv_00751 (`date` Date, `platform` Enum8('a' = 0, 'b' = 1), `app` Enum8('a' = 0, 'b' = 1)) ENGINE = MergeTree ORDER BY date SETTINGS index_granularity = 8192 AS SELECT date, platform, app FROM test.t_00751 WHERE (app = (SELECT min(app) FROM test.u_00751)) AND (platform = (SELECT (SELECT min(platform) FROM test.v_00751))) 2000-01-01 a a 2000-01-02 b b 2000-01-03 a a diff --git a/dbms/tests/queries/0_stateless/00826_cross_to_inner_join.reference b/dbms/tests/queries/0_stateless/00826_cross_to_inner_join.reference index 04c21a1e29a..24649ea3acb 100644 --- a/dbms/tests/queries/0_stateless/00826_cross_to_inner_join.reference +++ b/dbms/tests/queries/0_stateless/00826_cross_to_inner_join.reference @@ -56,26 +56,26 @@ comma nullable 1 1 1 1 2 2 1 2 cross -SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826 \nCROSS JOIN \n(\n SELECT *\n FROM t2_00826 \n) AS t2_00826 \nWHERE a = t2_00826.a -SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826 \nALL INNER JOIN \n(\n SELECT *\n FROM t2_00826 \n) AS t2_00826 ON a = t2_00826.a\nWHERE a = t2_00826.a +SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826\nCROSS JOIN \n(\n SELECT *\n FROM t2_00826\n) AS t2_00826\nWHERE a = t2_00826.a +SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826\nALL INNER JOIN \n(\n SELECT *\n FROM t2_00826\n) AS t2_00826 ON a = t2_00826.a\nWHERE a = t2_00826.a cross nullable -SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826 \n, \n(\n SELECT *\n FROM t2_00826 \n) AS t2_00826 \nWHERE a = t2_00826.a -SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826 \nALL INNER JOIN \n(\n SELECT *\n FROM t2_00826 \n) AS t2_00826 ON a = t2_00826.a\nWHERE a = t2_00826.a +SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826\n, \n(\n SELECT *\n FROM t2_00826\n) AS t2_00826\nWHERE a = t2_00826.a +SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826\nALL INNER JOIN \n(\n SELECT *\n FROM t2_00826\n) AS t2_00826 ON a = t2_00826.a\nWHERE a = t2_00826.a cross nullable vs not nullable -SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826 \nCROSS JOIN \n(\n SELECT *\n FROM t2_00826 \n) AS t2_00826 \nWHERE a = t2_00826.b -SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826 \nALL INNER JOIN \n(\n SELECT *\n FROM t2_00826 \n) AS t2_00826 ON a = t2_00826.b\nWHERE a = t2_00826.b +SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826\nCROSS JOIN \n(\n SELECT *\n FROM t2_00826\n) AS t2_00826\nWHERE a = t2_00826.b +SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826\nALL INNER JOIN \n(\n SELECT *\n FROM t2_00826\n) AS t2_00826 ON a = t2_00826.b\nWHERE a = t2_00826.b cross self -SELECT \n a, \n b, \n y.a, \n y.b\nFROM t1_00826 AS x \nCROSS JOIN t1_00826 AS y \nWHERE (a = y.a) AND (b = y.b) -SELECT \n a, \n b, \n y.a, \n y.b\nFROM t1_00826 AS x \nALL INNER JOIN t1_00826 AS y ON (a = y.a) AND (b = y.b)\nWHERE (a = y.a) AND (b = y.b) +SELECT \n a, \n b, \n y.a, \n y.b\nFROM t1_00826 AS x\nCROSS JOIN t1_00826 AS y\nWHERE (a = y.a) AND (b = y.b) +SELECT \n a, \n b, \n y.a, \n y.b\nFROM t1_00826 AS x\nALL INNER JOIN t1_00826 AS y ON (a = y.a) AND (b = y.b)\nWHERE (a = y.a) AND (b = y.b) cross one table expr -SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826 \nCROSS JOIN \n(\n SELECT *\n FROM t2_00826 \n) AS t2_00826 \nWHERE a = b -SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826 \nCROSS JOIN \n(\n SELECT *\n FROM t2_00826 \n) AS t2_00826 \nWHERE a = b +SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826\nCROSS JOIN \n(\n SELECT *\n FROM t2_00826\n) AS t2_00826\nWHERE a = b +SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826\nCROSS JOIN \n(\n SELECT *\n FROM t2_00826\n) AS t2_00826\nWHERE a = b cross multiple ands -SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826 \nCROSS JOIN \n(\n SELECT *\n FROM t2_00826 \n) AS t2_00826 \nWHERE (a = t2_00826.a) AND (b = t2_00826.b) -SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826 \nALL INNER JOIN \n(\n SELECT *\n FROM t2_00826 \n) AS t2_00826 ON (a = t2_00826.a) AND (b = t2_00826.b)\nWHERE (a = t2_00826.a) AND (b = t2_00826.b) +SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826\nCROSS JOIN \n(\n SELECT *\n FROM t2_00826\n) AS t2_00826\nWHERE (a = t2_00826.a) AND (b = t2_00826.b) +SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826\nALL INNER JOIN \n(\n SELECT *\n FROM t2_00826\n) AS t2_00826 ON (a = t2_00826.a) AND (b = t2_00826.b)\nWHERE (a = t2_00826.a) AND (b = t2_00826.b) cross and inside and -SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826 \nCROSS JOIN \n(\n SELECT *\n FROM t2_00826 \n) AS t2_00826 \nWHERE (a = t2_00826.a) AND ((a = t2_00826.a) AND ((a = t2_00826.a) AND (b = t2_00826.b))) -SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826 \nALL INNER JOIN \n(\n SELECT *\n FROM t2_00826 \n) AS t2_00826 ON (a = t2_00826.a) AND (a = t2_00826.a) AND (a = t2_00826.a) AND (b = t2_00826.b)\nWHERE (a = t2_00826.a) AND ((a = t2_00826.a) AND ((a = t2_00826.a) AND (b = t2_00826.b))) +SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826\nCROSS JOIN \n(\n SELECT *\n FROM t2_00826\n) AS t2_00826\nWHERE (a = t2_00826.a) AND ((a = t2_00826.a) AND ((a = t2_00826.a) AND (b = t2_00826.b))) +SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826\nALL INNER JOIN \n(\n SELECT *\n FROM t2_00826\n) AS t2_00826 ON (a = t2_00826.a) AND (a = t2_00826.a) AND (a = t2_00826.a) AND (b = t2_00826.b)\nWHERE (a = t2_00826.a) AND ((a = t2_00826.a) AND ((a = t2_00826.a) AND (b = t2_00826.b))) cross split conjunction -SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826 \nCROSS JOIN \n(\n SELECT *\n FROM t2_00826 \n WHERE b > 0\n) AS t2_00826 \nWHERE (a = t2_00826.a) AND (b = t2_00826.b) AND (a >= 1) AND (t2_00826.b > 0) -SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826 \nALL INNER JOIN \n(\n SELECT *\n FROM t2_00826 \n WHERE b > 0\n) AS t2_00826 ON (a = t2_00826.a) AND (b = t2_00826.b)\nWHERE (a = t2_00826.a) AND (b = t2_00826.b) AND (a >= 1) AND (t2_00826.b > 0) +SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826\nCROSS JOIN \n(\n SELECT *\n FROM t2_00826\n WHERE b > 0\n) AS t2_00826\nWHERE (a = t2_00826.a) AND (b = t2_00826.b) AND (a >= 1) AND (t2_00826.b > 0) +SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826\nALL INNER JOIN \n(\n SELECT *\n FROM t2_00826\n WHERE b > 0\n) AS t2_00826 ON (a = t2_00826.a) AND (b = t2_00826.b)\nWHERE (a = t2_00826.a) AND (b = t2_00826.b) AND (a >= 1) AND (t2_00826.b > 0) diff --git a/dbms/tests/queries/0_stateless/00849_multiple_comma_join.reference b/dbms/tests/queries/0_stateless/00849_multiple_comma_join.reference index 6a3ccd22249..e1256053739 100644 --- a/dbms/tests/queries/0_stateless/00849_multiple_comma_join.reference +++ b/dbms/tests/queries/0_stateless/00849_multiple_comma_join.reference @@ -1,17 +1,17 @@ -SELECT a\nFROM t1_00849 \nCROSS JOIN \n(\n SELECT *\n FROM t2_00849 \n) AS t2_00849 -SELECT a\nFROM t1_00849 \nALL INNER JOIN \n(\n SELECT *\n FROM t2_00849 \n) AS t2_00849 ON a = t2_00849.a\nWHERE a = t2_00849.a -SELECT a\nFROM t1_00849 \nALL INNER JOIN \n(\n SELECT *\n FROM t2_00849 \n) AS t2_00849 ON b = t2_00849.b\nWHERE b = t2_00849.b -SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a AS `--t2_00849.a`, \n t2_00849.b\n FROM t1_00849 \n ALL INNER JOIN \n (\n SELECT *\n FROM t2_00849 \n ) AS t2_00849 ON `--t1_00849.a` = `--t2_00849.a`\n WHERE `--t1_00849.a` = `--t2_00849.a`\n) \nALL INNER JOIN \n(\n SELECT *\n FROM t3_00849 \n) AS t3_00849 ON `--t1_00849.a` = a\nWHERE (`--t1_00849.a` = `--t2_00849.a`) AND (`--t1_00849.a` = a) -SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n a AS `--t1_00849.a`, \n b AS `--t1_00849.b`, \n t2_00849.a, \n t2_00849.b AS `--t2_00849.b`\n FROM t1_00849 \n ALL INNER JOIN \n (\n SELECT *\n FROM t2_00849 \n ) AS t2_00849 ON `--t1_00849.b` = `--t2_00849.b`\n WHERE `--t1_00849.b` = `--t2_00849.b`\n) \nALL INNER JOIN \n(\n SELECT *\n FROM t3_00849 \n) AS t3_00849 ON `--t1_00849.b` = b\nWHERE (`--t1_00849.b` = `--t2_00849.b`) AND (`--t1_00849.b` = b) -SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`, \n b, \n `--t2_00849.a`, \n `t2_00849.b`, \n a AS `--t3_00849.a`, \n t3_00849.b\n FROM \n (\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a AS `--t2_00849.a`, \n t2_00849.b\n FROM t1_00849 \n ALL INNER JOIN \n (\n SELECT *\n FROM t2_00849 \n ) AS t2_00849 ON `--t1_00849.a` = `--t2_00849.a`\n WHERE `--t1_00849.a` = `--t2_00849.a`\n ) \n ALL INNER JOIN \n (\n SELECT *\n FROM t3_00849 \n ) AS t3_00849 ON `--t1_00849.a` = `--t3_00849.a`\n WHERE (`--t1_00849.a` = `--t3_00849.a`) AND (`--t1_00849.a` = `--t2_00849.a`)\n) \nALL INNER JOIN \n(\n SELECT *\n FROM t4_00849 \n) AS t4_00849 ON `--t1_00849.a` = a\nWHERE (`--t1_00849.a` = `--t2_00849.a`) AND (`--t1_00849.a` = `--t3_00849.a`) AND (`--t1_00849.a` = a) -SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`, \n `--t1_00849.b`, \n `t2_00849.a`, \n `--t2_00849.b`, \n a, \n b AS `--t3_00849.b`\n FROM \n (\n SELECT \n a AS `--t1_00849.a`, \n b AS `--t1_00849.b`, \n t2_00849.a, \n t2_00849.b AS `--t2_00849.b`\n FROM t1_00849 \n ALL INNER JOIN \n (\n SELECT *\n FROM t2_00849 \n ) AS t2_00849 ON `--t1_00849.b` = `--t2_00849.b`\n WHERE `--t1_00849.b` = `--t2_00849.b`\n ) \n ALL INNER JOIN \n (\n SELECT *\n FROM t3_00849 \n ) AS t3_00849 ON `--t1_00849.b` = `--t3_00849.b`\n WHERE (`--t1_00849.b` = `--t3_00849.b`) AND (`--t1_00849.b` = `--t2_00849.b`)\n) \nALL INNER JOIN \n(\n SELECT *\n FROM t4_00849 \n) AS t4_00849 ON `--t1_00849.b` = b\nWHERE (`--t1_00849.b` = `--t2_00849.b`) AND (`--t1_00849.b` = `--t3_00849.b`) AND (`--t1_00849.b` = b) -SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`, \n b, \n `--t2_00849.a`, \n `t2_00849.b`, \n a AS `--t3_00849.a`, \n t3_00849.b\n FROM \n (\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a AS `--t2_00849.a`, \n t2_00849.b\n FROM t1_00849 \n ALL INNER JOIN \n (\n SELECT *\n FROM t2_00849 \n ) AS t2_00849 ON `--t2_00849.a` = `--t1_00849.a`\n WHERE `--t2_00849.a` = `--t1_00849.a`\n ) \n ALL INNER JOIN \n (\n SELECT *\n FROM t3_00849 \n ) AS t3_00849 ON `--t2_00849.a` = `--t3_00849.a`\n WHERE (`--t2_00849.a` = `--t3_00849.a`) AND (`--t2_00849.a` = `--t1_00849.a`)\n) \nALL INNER JOIN \n(\n SELECT *\n FROM t4_00849 \n) AS t4_00849 ON `--t2_00849.a` = a\nWHERE (`--t2_00849.a` = `--t1_00849.a`) AND (`--t2_00849.a` = `--t3_00849.a`) AND (`--t2_00849.a` = a) -SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`, \n b, \n `--t2_00849.a`, \n `t2_00849.b`, \n a AS `--t3_00849.a`, \n t3_00849.b\n FROM \n (\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a AS `--t2_00849.a`, \n t2_00849.b\n FROM t1_00849 \n CROSS JOIN \n (\n SELECT *\n FROM t2_00849 \n ) AS t2_00849 \n ) \n ALL INNER JOIN \n (\n SELECT *\n FROM t3_00849 \n ) AS t3_00849 ON (`--t3_00849.a` = `--t1_00849.a`) AND (`--t3_00849.a` = `--t2_00849.a`)\n WHERE (`--t3_00849.a` = `--t2_00849.a`) AND (`--t3_00849.a` = `--t1_00849.a`)\n) \nALL INNER JOIN \n(\n SELECT *\n FROM t4_00849 \n) AS t4_00849 ON `--t3_00849.a` = a\nWHERE (`--t3_00849.a` = `--t1_00849.a`) AND (`--t3_00849.a` = `--t2_00849.a`) AND (`--t3_00849.a` = a) -SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`, \n b, \n `--t2_00849.a`, \n `t2_00849.b`, \n a AS `--t3_00849.a`, \n t3_00849.b\n FROM \n (\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a AS `--t2_00849.a`, \n t2_00849.b\n FROM t1_00849 \n CROSS JOIN \n (\n SELECT *\n FROM t2_00849 \n ) AS t2_00849 \n ) \n CROSS JOIN \n (\n SELECT *\n FROM t3_00849 \n ) AS t3_00849 \n) \nALL INNER JOIN \n(\n SELECT *\n FROM t4_00849 \n) AS t4_00849 ON (a = `--t1_00849.a`) AND (a = `--t2_00849.a`) AND (a = `--t3_00849.a`)\nWHERE (a = `--t1_00849.a`) AND (a = `--t2_00849.a`) AND (a = `--t3_00849.a`) -SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`, \n b, \n `--t2_00849.a`, \n `t2_00849.b`, \n a AS `--t3_00849.a`, \n t3_00849.b\n FROM \n (\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a AS `--t2_00849.a`, \n t2_00849.b\n FROM t1_00849 \n ALL INNER JOIN \n (\n SELECT *\n FROM t2_00849 \n ) AS t2_00849 ON `--t1_00849.a` = `--t2_00849.a`\n WHERE `--t1_00849.a` = `--t2_00849.a`\n ) \n ALL INNER JOIN \n (\n SELECT *\n FROM t3_00849 \n ) AS t3_00849 ON `--t2_00849.a` = `--t3_00849.a`\n WHERE (`--t2_00849.a` = `--t3_00849.a`) AND (`--t1_00849.a` = `--t2_00849.a`)\n) \nALL INNER JOIN \n(\n SELECT *\n FROM t4_00849 \n) AS t4_00849 ON `--t3_00849.a` = a\nWHERE (`--t1_00849.a` = `--t2_00849.a`) AND (`--t2_00849.a` = `--t3_00849.a`) AND (`--t3_00849.a` = a) -SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`, \n b, \n `t2_00849.a`, \n `t2_00849.b`, \n a, \n t3_00849.b\n FROM \n (\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a, \n t2_00849.b\n FROM t1_00849 \n CROSS JOIN \n (\n SELECT *\n FROM t2_00849 \n ) AS t2_00849 \n ) \n CROSS JOIN \n (\n SELECT *\n FROM t3_00849 \n ) AS t3_00849 \n) \nCROSS JOIN \n(\n SELECT *\n FROM t4_00849 \n) AS t4_00849 -SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`, \n b, \n `t2_00849.a`, \n `t2_00849.b`, \n a, \n t3_00849.b\n FROM \n (\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a, \n t2_00849.b\n FROM t1_00849 \n CROSS JOIN \n (\n SELECT *\n FROM t2_00849 \n ) AS t2_00849 \n ) \n CROSS JOIN \n (\n SELECT *\n FROM t3_00849 \n ) AS t3_00849 \n) \nCROSS JOIN \n(\n SELECT *\n FROM t4_00849 \n) AS t4_00849 -SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a AS `--t2_00849.a`, \n t2_00849.b\n FROM t1_00849 \n ALL INNER JOIN \n (\n SELECT *\n FROM t2_00849 \n ) AS t2_00849 ON `--t1_00849.a` = `--t2_00849.a`\n) \nCROSS JOIN \n(\n SELECT *\n FROM t3_00849 \n) AS t3_00849 +SELECT a\nFROM t1_00849\nCROSS JOIN \n(\n SELECT *\n FROM t2_00849\n) AS t2_00849 +SELECT a\nFROM t1_00849\nALL INNER JOIN \n(\n SELECT *\n FROM t2_00849\n) AS t2_00849 ON a = t2_00849.a\nWHERE a = t2_00849.a +SELECT a\nFROM t1_00849\nALL INNER JOIN \n(\n SELECT *\n FROM t2_00849\n) AS t2_00849 ON b = t2_00849.b\nWHERE b = t2_00849.b +SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a AS `--t2_00849.a`, \n t2_00849.b\n FROM t1_00849\n ALL INNER JOIN \n (\n SELECT *\n FROM t2_00849\n ) AS t2_00849 ON `--t1_00849.a` = `--t2_00849.a`\n WHERE `--t1_00849.a` = `--t2_00849.a`\n)\nALL INNER JOIN \n(\n SELECT *\n FROM t3_00849\n) AS t3_00849 ON `--t1_00849.a` = a\nWHERE (`--t1_00849.a` = `--t2_00849.a`) AND (`--t1_00849.a` = a) +SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n a AS `--t1_00849.a`, \n b AS `--t1_00849.b`, \n t2_00849.a, \n t2_00849.b AS `--t2_00849.b`\n FROM t1_00849\n ALL INNER JOIN \n (\n SELECT *\n FROM t2_00849\n ) AS t2_00849 ON `--t1_00849.b` = `--t2_00849.b`\n WHERE `--t1_00849.b` = `--t2_00849.b`\n)\nALL INNER JOIN \n(\n SELECT *\n FROM t3_00849\n) AS t3_00849 ON `--t1_00849.b` = b\nWHERE (`--t1_00849.b` = `--t2_00849.b`) AND (`--t1_00849.b` = b) +SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`, \n b, \n `--t2_00849.a`, \n `t2_00849.b`, \n a AS `--t3_00849.a`, \n t3_00849.b\n FROM \n (\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a AS `--t2_00849.a`, \n t2_00849.b\n FROM t1_00849\n ALL INNER JOIN \n (\n SELECT *\n FROM t2_00849\n ) AS t2_00849 ON `--t1_00849.a` = `--t2_00849.a`\n WHERE `--t1_00849.a` = `--t2_00849.a`\n )\n ALL INNER JOIN \n (\n SELECT *\n FROM t3_00849\n ) AS t3_00849 ON `--t1_00849.a` = `--t3_00849.a`\n WHERE (`--t1_00849.a` = `--t3_00849.a`) AND (`--t1_00849.a` = `--t2_00849.a`)\n)\nALL INNER JOIN \n(\n SELECT *\n FROM t4_00849\n) AS t4_00849 ON `--t1_00849.a` = a\nWHERE (`--t1_00849.a` = `--t2_00849.a`) AND (`--t1_00849.a` = `--t3_00849.a`) AND (`--t1_00849.a` = a) +SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`, \n `--t1_00849.b`, \n `t2_00849.a`, \n `--t2_00849.b`, \n a, \n b AS `--t3_00849.b`\n FROM \n (\n SELECT \n a AS `--t1_00849.a`, \n b AS `--t1_00849.b`, \n t2_00849.a, \n t2_00849.b AS `--t2_00849.b`\n FROM t1_00849\n ALL INNER JOIN \n (\n SELECT *\n FROM t2_00849\n ) AS t2_00849 ON `--t1_00849.b` = `--t2_00849.b`\n WHERE `--t1_00849.b` = `--t2_00849.b`\n )\n ALL INNER JOIN \n (\n SELECT *\n FROM t3_00849\n ) AS t3_00849 ON `--t1_00849.b` = `--t3_00849.b`\n WHERE (`--t1_00849.b` = `--t3_00849.b`) AND (`--t1_00849.b` = `--t2_00849.b`)\n)\nALL INNER JOIN \n(\n SELECT *\n FROM t4_00849\n) AS t4_00849 ON `--t1_00849.b` = b\nWHERE (`--t1_00849.b` = `--t2_00849.b`) AND (`--t1_00849.b` = `--t3_00849.b`) AND (`--t1_00849.b` = b) +SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`, \n b, \n `--t2_00849.a`, \n `t2_00849.b`, \n a AS `--t3_00849.a`, \n t3_00849.b\n FROM \n (\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a AS `--t2_00849.a`, \n t2_00849.b\n FROM t1_00849\n ALL INNER JOIN \n (\n SELECT *\n FROM t2_00849\n ) AS t2_00849 ON `--t2_00849.a` = `--t1_00849.a`\n WHERE `--t2_00849.a` = `--t1_00849.a`\n )\n ALL INNER JOIN \n (\n SELECT *\n FROM t3_00849\n ) AS t3_00849 ON `--t2_00849.a` = `--t3_00849.a`\n WHERE (`--t2_00849.a` = `--t3_00849.a`) AND (`--t2_00849.a` = `--t1_00849.a`)\n)\nALL INNER JOIN \n(\n SELECT *\n FROM t4_00849\n) AS t4_00849 ON `--t2_00849.a` = a\nWHERE (`--t2_00849.a` = `--t1_00849.a`) AND (`--t2_00849.a` = `--t3_00849.a`) AND (`--t2_00849.a` = a) +SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`, \n b, \n `--t2_00849.a`, \n `t2_00849.b`, \n a AS `--t3_00849.a`, \n t3_00849.b\n FROM \n (\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a AS `--t2_00849.a`, \n t2_00849.b\n FROM t1_00849\n CROSS JOIN \n (\n SELECT *\n FROM t2_00849\n ) AS t2_00849\n )\n ALL INNER JOIN \n (\n SELECT *\n FROM t3_00849\n ) AS t3_00849 ON (`--t3_00849.a` = `--t1_00849.a`) AND (`--t3_00849.a` = `--t2_00849.a`)\n WHERE (`--t3_00849.a` = `--t2_00849.a`) AND (`--t3_00849.a` = `--t1_00849.a`)\n)\nALL INNER JOIN \n(\n SELECT *\n FROM t4_00849\n) AS t4_00849 ON `--t3_00849.a` = a\nWHERE (`--t3_00849.a` = `--t1_00849.a`) AND (`--t3_00849.a` = `--t2_00849.a`) AND (`--t3_00849.a` = a) +SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`, \n b, \n `--t2_00849.a`, \n `t2_00849.b`, \n a AS `--t3_00849.a`, \n t3_00849.b\n FROM \n (\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a AS `--t2_00849.a`, \n t2_00849.b\n FROM t1_00849\n CROSS JOIN \n (\n SELECT *\n FROM t2_00849\n ) AS t2_00849\n )\n CROSS JOIN \n (\n SELECT *\n FROM t3_00849\n ) AS t3_00849\n)\nALL INNER JOIN \n(\n SELECT *\n FROM t4_00849\n) AS t4_00849 ON (a = `--t1_00849.a`) AND (a = `--t2_00849.a`) AND (a = `--t3_00849.a`)\nWHERE (a = `--t1_00849.a`) AND (a = `--t2_00849.a`) AND (a = `--t3_00849.a`) +SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`, \n b, \n `--t2_00849.a`, \n `t2_00849.b`, \n a AS `--t3_00849.a`, \n t3_00849.b\n FROM \n (\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a AS `--t2_00849.a`, \n t2_00849.b\n FROM t1_00849\n ALL INNER JOIN \n (\n SELECT *\n FROM t2_00849\n ) AS t2_00849 ON `--t1_00849.a` = `--t2_00849.a`\n WHERE `--t1_00849.a` = `--t2_00849.a`\n )\n ALL INNER JOIN \n (\n SELECT *\n FROM t3_00849\n ) AS t3_00849 ON `--t2_00849.a` = `--t3_00849.a`\n WHERE (`--t2_00849.a` = `--t3_00849.a`) AND (`--t1_00849.a` = `--t2_00849.a`)\n)\nALL INNER JOIN \n(\n SELECT *\n FROM t4_00849\n) AS t4_00849 ON `--t3_00849.a` = a\nWHERE (`--t1_00849.a` = `--t2_00849.a`) AND (`--t2_00849.a` = `--t3_00849.a`) AND (`--t3_00849.a` = a) +SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`, \n b, \n `t2_00849.a`, \n `t2_00849.b`, \n a, \n t3_00849.b\n FROM \n (\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a, \n t2_00849.b\n FROM t1_00849\n CROSS JOIN \n (\n SELECT *\n FROM t2_00849\n ) AS t2_00849\n )\n CROSS JOIN \n (\n SELECT *\n FROM t3_00849\n ) AS t3_00849\n)\nCROSS JOIN \n(\n SELECT *\n FROM t4_00849\n) AS t4_00849 +SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`, \n b, \n `t2_00849.a`, \n `t2_00849.b`, \n a, \n t3_00849.b\n FROM \n (\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a, \n t2_00849.b\n FROM t1_00849\n CROSS JOIN \n (\n SELECT *\n FROM t2_00849\n ) AS t2_00849\n )\n CROSS JOIN \n (\n SELECT *\n FROM t3_00849\n ) AS t3_00849\n)\nCROSS JOIN \n(\n SELECT *\n FROM t4_00849\n) AS t4_00849 +SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a AS `--t2_00849.a`, \n t2_00849.b\n FROM t1_00849\n ALL INNER JOIN \n (\n SELECT *\n FROM t2_00849\n ) AS t2_00849 ON `--t1_00849.a` = `--t2_00849.a`\n)\nCROSS JOIN \n(\n SELECT *\n FROM t3_00849\n) AS t3_00849 SELECT * FROM t1, t2 1 1 1 1 1 1 1 \N diff --git a/dbms/tests/queries/0_stateless/00908_analyze_query.reference b/dbms/tests/queries/0_stateless/00908_analyze_query.reference index a10c36ca4dd..a8619cfcd4b 100644 --- a/dbms/tests/queries/0_stateless/00908_analyze_query.reference +++ b/dbms/tests/queries/0_stateless/00908_analyze_query.reference @@ -1 +1 @@ -SELECT \n a, \n b\nFROM a +SELECT \n a, \n b\nFROM a diff --git a/dbms/tests/queries/0_stateless/00909_kill_not_initialized_query.sh b/dbms/tests/queries/0_stateless/00909_kill_not_initialized_query.sh index d8a4f29b30f..67454f676b3 100755 --- a/dbms/tests/queries/0_stateless/00909_kill_not_initialized_query.sh +++ b/dbms/tests/queries/0_stateless/00909_kill_not_initialized_query.sh @@ -11,7 +11,7 @@ $CLICKHOUSE_CLIENT -q "CREATE TABLE cannot_kill_query (x UInt64) ENGINE = MergeT $CLICKHOUSE_CLIENT -q "INSERT INTO cannot_kill_query SELECT * FROM numbers(10000000)" &> /dev/null # This SELECT query will run for a long time. It's used as bloker for ALTER query. It will be killed with SYNC kill. -query_for_pending="SELECT count() FROM cannot_kill_query WHERE NOT ignore(sleep(1)) SETTINGS max_threads=1, max_block_size=1" +query_for_pending="SELECT count() FROM cannot_kill_query WHERE NOT ignore(sleep(1)) SETTINGS max_threads = 1, max_block_size = 1" $CLICKHOUSE_CLIENT -q "$query_for_pending" &>/dev/null & sleep 1 # queries should be in strict order @@ -23,7 +23,7 @@ sleep 1 # This SELECT query will also run for a long time. Also it's blocked by ALTER query. It will be killed with ASYNC kill. # This is main idea which we check -- blocked queries can be killed with ASYNC kill. -query_to_kill="SELECT sum(1) FROM cannot_kill_query WHERE NOT ignore(sleep(1)) SETTINGS max_threads=1" +query_to_kill="SELECT sum(1) FROM cannot_kill_query WHERE NOT ignore(sleep(1)) SETTINGS max_threads = 1" $CLICKHOUSE_CLIENT -q "$query_to_kill" &>/dev/null & sleep 1 # just to be sure that kill of $query_to_kill will be executed after $query_to_kill. diff --git a/dbms/tests/queries/0_stateless/00910_client_window_size_detection.reference b/dbms/tests/queries/0_stateless/00910_client_window_size_detection.reference index 85322d0b541..f96ac067218 100644 --- a/dbms/tests/queries/0_stateless/00910_client_window_size_detection.reference +++ b/dbms/tests/queries/0_stateless/00910_client_window_size_detection.reference @@ -1 +1 @@ -79 +105 diff --git a/dbms/tests/queries/0_stateless/00916_create_or_replace_view.reference b/dbms/tests/queries/0_stateless/00916_create_or_replace_view.reference index a0313be86ff..30d14bf1e41 100644 --- a/dbms/tests/queries/0_stateless/00916_create_or_replace_view.reference +++ b/dbms/tests/queries/0_stateless/00916_create_or_replace_view.reference @@ -1,2 +1,2 @@ -CREATE VIEW default.t (`number` UInt64) AS SELECT number FROM system.numbers -CREATE VIEW default.t (`next_number` UInt64) AS SELECT number + 1 AS next_number FROM system.numbers +CREATE VIEW default.t (`number` UInt64) AS SELECT number FROM system.numbers +CREATE VIEW default.t (`next_number` UInt64) AS SELECT number + 1 AS next_number FROM system.numbers diff --git a/dbms/tests/queries/0_stateless/00926_geo_to_h3.reference b/dbms/tests/queries/0_stateless/00926_geo_to_h3.reference new file mode 100644 index 00000000000..ad594f0e81f --- /dev/null +++ b/dbms/tests/queries/0_stateless/00926_geo_to_h3.reference @@ -0,0 +1,20 @@ +644325529094369568 +639821928864584823 +644325528491955313 +644325528491955313 +644325528627451570 +644325529094369568 +644325528491955313 +644325528491955313 +644325528491955313 +644325528627451570 +644325529094369568 +55.720762 37.598135 644325528491955313 +55.720762 37.598135 644325528491955313 +55.72076201 37.598135 644325528491955313 +55.763241 37.660183 644325528627451570 +55.77922738 37.63098076 644325529094369568 +639821928864584823 1 +644325528491955313 2 +644325528627451570 1 +644325529094369568 1 diff --git a/dbms/tests/queries/0_stateless/00926_geo_to_h3.sql b/dbms/tests/queries/0_stateless/00926_geo_to_h3.sql new file mode 100644 index 00000000000..d3ce898c56a --- /dev/null +++ b/dbms/tests/queries/0_stateless/00926_geo_to_h3.sql @@ -0,0 +1,19 @@ +USE test; + +DROP TABLE IF EXISTS table1; + +CREATE TABLE table1 (lat Float64, lon Float64, resolution UInt8) ENGINE = Memory; + +INSERT INTO table1 VALUES(55.77922738, 37.63098076, 15); +INSERT INTO table1 VALUES(55.76324100, 37.66018300, 15); +INSERT INTO table1 VALUES(55.72076200, 37.59813500, 15); +INSERT INTO table1 VALUES(55.72076201, 37.59813500, 15); +INSERT INTO table1 VALUES(55.72076200, 37.59813500, 14); + +select geoToH3(37.63098076, 55.77922738, 15); +select geoToH3(lon, lat, resolution) from table1 order by lat, lon, resolution; +select geoToH3(lon, lat, 15) AS k from table1 order by lat, lon, k; +select lat, lon, geoToH3(lon, lat, 15) AS k from table1 order by lat, lon, k; +select geoToH3(lon, lat, resolution) AS k, count(*) from table1 group by k order by k; + +DROP TABLE table1 diff --git a/dbms/tests/queries/0_stateless/00933_ttl_simple.reference b/dbms/tests/queries/0_stateless/00933_ttl_simple.reference index f1377e3d220..09e5d7d1f02 100644 --- a/dbms/tests/queries/0_stateless/00933_ttl_simple.reference +++ b/dbms/tests/queries/0_stateless/00933_ttl_simple.reference @@ -1,5 +1,6 @@ 0 0 0 0 +5 6 2000-10-10 00:00:00 0 2000-10-10 00:00:00 0 2000-10-10 00:00:00 0 diff --git a/dbms/tests/queries/0_stateless/00933_ttl_simple.sql b/dbms/tests/queries/0_stateless/00933_ttl_simple.sql index 62b320cc0b0..11f0055a377 100644 --- a/dbms/tests/queries/0_stateless/00933_ttl_simple.sql +++ b/dbms/tests/queries/0_stateless/00933_ttl_simple.sql @@ -9,6 +9,17 @@ select a, b from ttl_00933_1; drop table if exists ttl_00933_1; +create table ttl_00933_1 (d DateTime, a Int, b Int) engine = MergeTree order by toDate(d) partition by tuple() ttl d + interval 1 second; +insert into ttl_00933_1 values (now(), 1, 2); +insert into ttl_00933_1 values (now(), 3, 4); +insert into ttl_00933_1 values (now() + 1000, 5, 6); +optimize table ttl_00933_1 final; -- check ttl merge for part with both expired and unexpired values +select sleep(1.1) format Null; -- wait if very fast merge happen +optimize table ttl_00933_1 final; +select a, b from ttl_00933_1; + +drop table if exists ttl_00933_1; + create table ttl_00933_1 (d DateTime, a Int ttl d + interval 1 DAY) engine = MergeTree order by tuple() partition by toDayOfMonth(d); insert into ttl_00933_1 values (toDateTime('2000-10-10 00:00:00'), 1); insert into ttl_00933_1 values (toDateTime('2000-10-10 00:00:00'), 2); diff --git a/dbms/tests/queries/0_stateless/00944_create_bloom_filter_index_with_merge_tree.reference b/dbms/tests/queries/0_stateless/00944_create_bloom_filter_index_with_merge_tree.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/dbms/tests/queries/0_stateless/00944_create_bloom_filter_index_with_merge_tree.sh b/dbms/tests/queries/0_stateless/00944_create_bloom_filter_index_with_merge_tree.sh new file mode 100755 index 00000000000..52246b50b7a --- /dev/null +++ b/dbms/tests/queries/0_stateless/00944_create_bloom_filter_index_with_merge_tree.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + +set -e + +for sequence in 1 10 100 1000 10000 100000 1000000 10000000 100000000 1000000000; do \ +rate=`echo "1 $sequence" | awk '{printf("%0.9f\n",$1/$2)}'` +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS test.bloom_filter_idx"; +$CLICKHOUSE_CLIENT --allow_experimental_data_skipping_indices=1 --query="CREATE TABLE test.bloom_filter_idx ( u64 UInt64, i32 Int32, f64 Float64, d Decimal(10, 2), s String, e Enum8('a' = 1, 'b' = 2, 'c' = 3), dt Date, INDEX bloom_filter_a i32 TYPE bloom_filter($rate) GRANULARITY 1 ) ENGINE = MergeTree() ORDER BY u64 SETTINGS index_granularity = 8192" +done diff --git a/dbms/tests/queries/0_stateless/00945_bloom_filter_index.reference b/dbms/tests/queries/0_stateless/00945_bloom_filter_index.reference new file mode 100755 index 00000000000..7b6d919d404 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00945_bloom_filter_index.reference @@ -0,0 +1,30 @@ +1 +0 +1 +1 +2 +0 +2 +2 +2 +0 +2 +2 +2 +0 +2 +2 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 diff --git a/dbms/tests/queries/0_stateless/00945_bloom_filter_index.sql b/dbms/tests/queries/0_stateless/00945_bloom_filter_index.sql new file mode 100755 index 00000000000..bb258b886a4 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00945_bloom_filter_index.sql @@ -0,0 +1,50 @@ +SET allow_experimental_data_skipping_indices = 1; + +DROP TABLE IF EXISTS test.single_column_bloom_filter; + +CREATE TABLE test.single_column_bloom_filter (u64 UInt64, i32 Int32, i64 UInt64, INDEX idx (i32) TYPE bloom_filter GRANULARITY 1) ENGINE = MergeTree() ORDER BY u64 SETTINGS index_granularity = 6; + +INSERT INTO test.single_column_bloom_filter SELECT number AS u64, number AS i32, number AS i64 FROM system.numbers LIMIT 100; + +SELECT COUNT() FROM test.single_column_bloom_filter WHERE i32 = 1 SETTINGS max_rows_to_read = 6; +SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i32, i32) = (1, 2) SETTINGS max_rows_to_read = 6; +SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i32, i64) = (1, 1) SETTINGS max_rows_to_read = 6; +SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i64, (i64, i32)) = (1, (1, 1)) SETTINGS max_rows_to_read = 6; + +SELECT COUNT() FROM test.single_column_bloom_filter WHERE i32 IN (1, 2) SETTINGS max_rows_to_read = 6; +SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i32, i32) IN ((1, 2), (2, 3)) SETTINGS max_rows_to_read = 6; +SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i32, i64) IN ((1, 1), (2, 2)) SETTINGS max_rows_to_read = 6; +SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i64, (i64, i32)) IN ((1, (1, 1)), (2, (2, 2))) SETTINGS max_rows_to_read = 6; +SELECT COUNT() FROM test.single_column_bloom_filter WHERE i32 IN (SELECT arrayJoin([toInt32(1), toInt32(2)])) SETTINGS max_rows_to_read = 6; +SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i32, i32) IN (SELECT arrayJoin([(toInt32(1), toInt32(2)), (toInt32(2), toInt32(3))])) SETTINGS max_rows_to_read = 6; +SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i32, i64) IN (SELECT arrayJoin([(toInt32(1), toUInt64(1)), (toInt32(2), toUInt64(2))])) SETTINGS max_rows_to_read = 6; +SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i64, (i64, i32)) IN (SELECT arrayJoin([(toUInt64(1), (toUInt64(1), toInt32(1))), (toUInt64(2), (toUInt64(2), toInt32(2)))])) SETTINGS max_rows_to_read = 6; +WITH (1, 2) AS liter_prepared_set SELECT COUNT() FROM test.single_column_bloom_filter WHERE i32 IN liter_prepared_set SETTINGS max_rows_to_read = 6; +WITH ((1, 2), (2, 3)) AS liter_prepared_set SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i32, i32) IN liter_prepared_set SETTINGS max_rows_to_read = 6; +WITH ((1, 1), (2, 2)) AS liter_prepared_set SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i32, i64) IN liter_prepared_set SETTINGS max_rows_to_read = 6; +WITH ((1, (1, 1)), (2, (2, 2))) AS liter_prepared_set SELECT COUNT() FROM test.single_column_bloom_filter WHERE (i64, (i64, i32)) IN liter_prepared_set SETTINGS max_rows_to_read = 6; + +DROP TABLE IF EXISTS test.single_column_bloom_filter; + + +DROP TABLE IF EXISTS test.bloom_filter_types_test; + +CREATE TABLE test.bloom_filter_types_test (order_key UInt64, i8 Int8, i16 Int16, i32 Int32, i64 Int64, u8 UInt8, u16 UInt16, u32 UInt32, u64 UInt64, f32 Float32, f64 Float64, date Date, date_time DateTime('Europe/Moscow'), str String, fixed_string FixedString(5), INDEX idx (i8, i16, i32, i64, u8, u16, u32, u64, f32, f64, date, date_time, str, fixed_string) TYPE bloom_filter GRANULARITY 1) ENGINE = MergeTree() ORDER BY order_key SETTINGS index_granularity = 6; +INSERT INTO test.bloom_filter_types_test SELECT number AS order_key, toInt8(number) AS i8, toInt16(number) AS i16, toInt32(number) AS i32, toInt64(number) AS i64, toUInt8(number) AS u8, toUInt16(number) AS u16, toUInt32(number) AS u32, toUInt64(number) AS u64, toFloat32(number) AS f32, toFloat64(number) AS f64, toDate(number, 'Europe/Moscow') AS date, toDateTime(number, 'Europe/Moscow') AS date_time, toString(number) AS str, toFixedString(toString(number), 5) AS fixed_string FROM system.numbers LIMIT 100; + +SELECT COUNT() FROM test.bloom_filter_types_test WHERE i8 = 1 SETTINGS max_rows_to_read = 6; +SELECT COUNT() FROM test.bloom_filter_types_test WHERE i16 = 1 SETTINGS max_rows_to_read = 6; +SELECT COUNT() FROM test.bloom_filter_types_test WHERE i32 = 1 SETTINGS max_rows_to_read = 6; +SELECT COUNT() FROM test.bloom_filter_types_test WHERE i64 = 1 SETTINGS max_rows_to_read = 6; +SELECT COUNT() FROM test.bloom_filter_types_test WHERE u8 = 1 SETTINGS max_rows_to_read = 6; +SELECT COUNT() FROM test.bloom_filter_types_test WHERE u16 = 1 SETTINGS max_rows_to_read = 6; +SELECT COUNT() FROM test.bloom_filter_types_test WHERE u32 = 1 SETTINGS max_rows_to_read = 6; +SELECT COUNT() FROM test.bloom_filter_types_test WHERE u64 = 1 SETTINGS max_rows_to_read = 6; +SELECT COUNT() FROM test.bloom_filter_types_test WHERE f32 = 1 SETTINGS max_rows_to_read = 6; +SELECT COUNT() FROM test.bloom_filter_types_test WHERE f64 = 1 SETTINGS max_rows_to_read = 6; +SELECT COUNT() FROM test.bloom_filter_types_test WHERE date = '1970-01-02' SETTINGS max_rows_to_read = 6; +SELECT COUNT() FROM test.bloom_filter_types_test WHERE date_time = toDateTime('1970-01-01 03:00:01', 'Europe/Moscow') SETTINGS max_rows_to_read = 6; +SELECT COUNT() FROM test.bloom_filter_types_test WHERE str = '1' SETTINGS max_rows_to_read = 6; +SELECT COUNT() FROM test.bloom_filter_types_test WHERE fixed_string = toFixedString('1', 5) SETTINGS max_rows_to_read = 12; + +DROP TABLE IF EXISTS test.bloom_filter_types_test; diff --git a/dbms/tests/queries/0_stateless/00951_ngram_entry.reference b/dbms/tests/queries/0_stateless/00951_ngram_search.reference similarity index 68% rename from dbms/tests/queries/0_stateless/00951_ngram_entry.reference rename to dbms/tests/queries/0_stateless/00951_ngram_search.reference index d6d97eaaab9..1b845b6015d 100644 --- a/dbms/tests/queries/0_stateless/00951_ngram_entry.reference +++ b/dbms/tests/queries/0_stateless/00951_ngram_search.reference @@ -1,13 +1,8 @@ -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 +1000 +1000 +1000 +1000 +1000 1000 1000 1000 @@ -18,98 +13,202 @@ 0 0 0 -0 -0 -0 -0 -0 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 500 500 500 500 500 -1000 -1000 -1000 -1000 -1000 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 0 0 +0 +0 +0 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1000 1000 0 -0 +1000 +1000 500 -1000 +0 +привет как дела?... Херсон 1000 +привет как дела клип - Яндекс.Видео 1000 +привет 1000 +пап привет как дела - Яндекс.Видео 1000 +привет братан как дела - Яндекс.Видео 1000 +http://metric.ru/ 1000 +http://autometric.ru/ 1000 +http://metrica.yandex.com/ 1000 +http://metris.ru/ 1000 +http://metrika.ru/ 1000 + 1000 +привет как дела?... Херсон 1000 +привет как дела клип - Яндекс.Видео 1000 +привет 1000 +пап привет как дела - Яндекс.Видео 1000 +привет братан как дела - Яндекс.Видео 1000 +http://metric.ru/ 1000 +http://autometric.ru/ 1000 +http://metrica.yandex.com/ 1000 +http://metris.ru/ 1000 +http://metrika.ru/ 1000 + 1000 +привет как дела?... Херсон 1000 +привет как дела клип - Яндекс.Видео 1000 +привет 1000 +пап привет как дела - Яндекс.Видео 1000 +привет братан как дела - Яндекс.Видео 1000 +http://metric.ru/ 1000 +http://autometric.ru/ 1000 +http://metrica.yandex.com/ 1000 +http://metris.ru/ 1000 +http://metrika.ru/ 1000 + 1000 +http://metric.ru/ 0 +http://autometric.ru/ 0 +http://metrica.yandex.com/ 0 +http://metris.ru/ 0 +http://metrika.ru/ 0 + 0 +привет 308 +привет братан как дела - Яндекс.Видео 923 +привет как дела?... Херсон 1000 +привет как дела клип - Яндекс.Видео 1000 +пап привет как дела - Яндекс.Видео 1000 +http://metric.ru/ 0 +http://autometric.ru/ 0 +http://metrica.yandex.com/ 0 +http://metris.ru/ 0 +http://metrika.ru/ 0 + 0 +привет 308 +привет как дела?... Херсон 769 +привет как дела клип - Яндекс.Видео 769 +привет братан как дела - Яндекс.Видео 769 +пап привет как дела - Яндекс.Видео 846 +привет как дела?... Херсон 0 +привет как дела клип - Яндекс.Видео 0 +привет 0 +пап привет как дела - Яндекс.Видео 0 +привет братан как дела - Яндекс.Видео 0 + 0 +http://metric.ru/ 600 +http://autometric.ru/ 600 +http://metrica.yandex.com/ 600 +http://metris.ru/ 600 +http://metrika.ru/ 1000 +привет как дела?... Херсон 0 +привет как дела клип - Яндекс.Видео 0 +привет 0 +пап привет как дела - Яндекс.Видео 0 +привет братан как дела - Яндекс.Видео 0 + 0 +http://metris.ru/ 600 +http://metrika.ru/ 600 +http://metric.ru/ 800 +http://autometric.ru/ 800 +http://metrica.yandex.com/ 1000 +привет как дела?... Херсон 0 +привет как дела клип - Яндекс.Видео 0 +привет 0 +пап привет как дела - Яндекс.Видео 0 +привет братан как дела - Яндекс.Видео 0 + 0 +http://metric.ru/ 600 +http://autometric.ru/ 600 +http://metrica.yandex.com/ 600 +http://metris.ru/ 600 +http://metrika.ru/ 800 +привет как дела?... Херсон 0 +привет как дела клип - Яндекс.Видео 0 +привет 0 +пап привет как дела - Яндекс.Видео 0 +привет братан как дела - Яндекс.Видео 0 + 0 +http://metris.ru/ 600 +http://metrika.ru/ 600 +http://metric.ru/ 800 +http://autometric.ru/ 800 +http://metrica.yandex.com/ 800 привет как дела?... Херсон 0 привет как дела клип - Яндекс.Видео 0 привет 0 @@ -117,10 +216,232 @@ привет братан как дела - Яндекс.Видео 0 http://metric.ru/ 0 http://autometric.ru/ 0 +http://metris.ru/ 0 +http://metrika.ru/ 0 + 0 +http://metrica.yandex.com/ 1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +0 +0 +0 +0 +0 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +500 +500 +500 +500 +500 +0 +0 +0 +0 +0 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1000 +1000 +0 +571 +1000 +500 +0 +привет как дела?... Херсон 1000 +привет как дела клип - Яндекс.Видео 1000 +привет 1000 +пап привет как дела - Яндекс.Видео 1000 +привет братан как дела - Яндекс.Видео 1000 +http://metric.ru/ 1000 +http://autometric.ru/ 1000 +http://metrica.yandex.com/ 1000 +http://metris.ru/ 1000 +http://metrika.ru/ 1000 + 1000 +привет как дела?... Херсон 1000 +привет как дела клип - Яндекс.Видео 1000 +привет 1000 +пап привет как дела - Яндекс.Видео 1000 +привет братан как дела - Яндекс.Видео 1000 +http://metric.ru/ 1000 +http://autometric.ru/ 1000 +http://metrica.yandex.com/ 1000 +http://metris.ru/ 1000 +http://metrika.ru/ 1000 + 1000 +привет как дела?... Херсон 1000 +привет как дела клип - Яндекс.Видео 1000 +привет 1000 +пап привет как дела - Яндекс.Видео 1000 +привет братан как дела - Яндекс.Видео 1000 +http://metric.ru/ 1000 +http://autometric.ru/ 1000 +http://metrica.yandex.com/ 1000 +http://metris.ru/ 1000 +http://metrika.ru/ 1000 + 1000 +http://metric.ru/ 0 +http://autometric.ru/ 0 http://metrica.yandex.com/ 0 http://metris.ru/ 0 http://metrika.ru/ 0 0 +привет 308 +привет братан как дела - Яндекс.Видео 923 +привет как дела?... Херсон 1000 +привет как дела клип - Яндекс.Видео 1000 +пап привет как дела - Яндекс.Видео 1000 +http://metric.ru/ 0 +http://autometric.ru/ 0 +http://metrica.yandex.com/ 0 +http://metris.ru/ 0 +http://metrika.ru/ 0 + 0 +привет 308 +привет как дела?... Херсон 769 +привет как дела клип - Яндекс.Видео 769 +привет братан как дела - Яндекс.Видео 769 +пап привет как дела - Яндекс.Видео 846 +привет как дела?... Херсон 0 +привет как дела клип - Яндекс.Видео 0 +привет 0 +пап привет как дела - Яндекс.Видео 0 +привет братан как дела - Яндекс.Видео 0 + 0 +http://metric.ru/ 600 +http://autometric.ru/ 600 +http://metrica.yandex.com/ 600 +http://metris.ru/ 600 +http://metrika.ru/ 1000 +привет как дела?... Херсон 0 +привет как дела клип - Яндекс.Видео 0 +привет 0 +пап привет как дела - Яндекс.Видео 0 +привет братан как дела - Яндекс.Видео 0 + 0 +http://metric.ru/ 600 +http://autometric.ru/ 600 +http://metrica.yandex.com/ 600 +http://metris.ru/ 600 +http://metrika.ru/ 1000 +привет как дела?... Херсон 0 +привет как дела клип - Яндекс.Видео 0 +привет 0 +пап привет как дела - Яндекс.Видео 0 +привет братан как дела - Яндекс.Видео 0 + 0 +http://metris.ru/ 600 +http://metrika.ru/ 600 +http://metric.ru/ 800 +http://autometric.ru/ 800 +http://metrica.yandex.com/ 1000 +привет как дела?... Херсон 0 +привет как дела клип - Яндекс.Видео 0 +привет 0 +пап привет как дела - Яндекс.Видео 0 +привет братан как дела - Яндекс.Видео 0 + 0 +http://metric.ru/ 600 +http://autometric.ru/ 600 +http://metrica.yandex.com/ 600 +http://metris.ru/ 600 +http://metrika.ru/ 800 +привет как дела?... Херсон 0 +привет как дела клип - Яндекс.Видео 0 +привет 0 +пап привет как дела - Яндекс.Видео 0 +привет братан как дела - Яндекс.Видео 0 + 0 +http://metris.ru/ 600 +http://metrika.ru/ 600 +http://metric.ru/ 800 +http://autometric.ru/ 800 +http://metrica.yandex.com/ 800 привет как дела?... Херсон 0 привет как дела клип - Яндекс.Видео 0 привет 0 @@ -128,108 +449,46 @@ http://metrika.ru/ 0 привет братан как дела - Яндекс.Видео 0 http://metric.ru/ 0 http://autometric.ru/ 0 -http://metrica.yandex.com/ 0 http://metris.ru/ 0 http://metrika.ru/ 0 0 -привет как дела?... Херсон 0 -привет как дела клип - Яндекс.Видео 0 -привет 0 -пап привет как дела - Яндекс.Видео 0 -привет братан как дела - Яндекс.Видео 0 +http://metrica.yandex.com/ 1000 http://metric.ru/ 0 http://autometric.ru/ 0 http://metrica.yandex.com/ 0 http://metris.ru/ 0 http://metrika.ru/ 0 0 -привет как дела?... Херсон 0 -привет как дела клип - Яндекс.Видео 0 -пап привет как дела - Яндекс.Видео 0 -привет братан как дела - Яндекс.Видео 77 -привет 692 -http://metric.ru/ 1000 -http://autometric.ru/ 1000 -http://metrica.yandex.com/ 1000 -http://metris.ru/ 1000 -http://metrika.ru/ 1000 - 1000 -пап привет как дела - Яндекс.Видео 154 -привет как дела?... Херсон 231 -привет как дела клип - Яндекс.Видео 231 -привет братан как дела - Яндекс.Видео 231 -привет 692 -http://metric.ru/ 1000 -http://autometric.ru/ 1000 -http://metrica.yandex.com/ 1000 -http://metris.ru/ 1000 -http://metrika.ru/ 1000 - 1000 -http://metrika.ru/ 0 -http://metric.ru/ 400 -http://autometric.ru/ 400 -http://metrica.yandex.com/ 400 -http://metris.ru/ 400 -привет как дела?... Херсон 1000 +привет 121 +привет как дела?... Херсон 394 +привет братан как дела - Яндекс.Видео 788 +пап привет как дела - Яндекс.Видео 818 привет как дела клип - Яндекс.Видео 1000 -привет 1000 -пап привет как дела - Яндекс.Видео 1000 -привет братан как дела - Яндекс.Видео 1000 - 1000 -http://metrica.yandex.com/ 0 -http://metric.ru/ 200 -http://autometric.ru/ 200 -http://metris.ru/ 400 -http://metrika.ru/ 400 -привет как дела?... Херсон 1000 -привет как дела клип - Яндекс.Видео 1000 -привет 1000 -пап привет как дела - Яндекс.Видео 1000 -привет братан как дела - Яндекс.Видео 1000 - 1000 -http://metrika.ru/ 200 -http://metric.ru/ 400 -http://autometric.ru/ 400 -http://metrica.yandex.com/ 400 -http://metris.ru/ 400 -привет как дела?... Херсон 1000 -привет как дела клип - Яндекс.Видео 1000 -привет 1000 -пап привет как дела - Яндекс.Видео 1000 -привет братан как дела - Яндекс.Видео 1000 - 1000 -http://metric.ru/ 200 -http://autometric.ru/ 200 -http://metrica.yandex.com/ 200 -http://metris.ru/ 400 -http://metrika.ru/ 400 -привет как дела?... Херсон 1000 -привет как дела клип - Яндекс.Видео 1000 -привет 1000 -пап привет как дела - Яндекс.Видео 1000 -привет братан как дела - Яндекс.Видео 1000 - 1000 -http://metrica.yandex.com/ 0 -привет как дела?... Херсон 1000 -привет как дела клип - Яндекс.Видео 1000 -привет 1000 -пап привет как дела - Яндекс.Видео 1000 -привет братан как дела - Яндекс.Видео 1000 -http://metric.ru/ 1000 -http://autometric.ru/ 1000 -http://metris.ru/ 1000 -http://metrika.ru/ 1000 - 1000 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 1000 1000 1000 @@ -240,616 +499,357 @@ http://metrika.ru/ 1000 0 0 0 -0 -0 -0 -0 -0 -500 -500 -500 -500 -500 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 1000 1000 1000 1000 1000 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -0 -0 1000 -429 -0 -500 1000 -привет как дела?... Херсон 0 -привет как дела клип - Яндекс.Видео 0 -привет 0 -пап привет как дела - Яндекс.Видео 0 -привет братан как дела - Яндекс.Видео 0 http://metric.ru/ 0 http://autometric.ru/ 0 http://metrica.yandex.com/ 0 http://metris.ru/ 0 http://metrika.ru/ 0 0 -привет как дела?... Херсон 0 -привет как дела клип - Яндекс.Видео 0 -привет 0 -пап привет как дела - Яндекс.Видео 0 -привет братан как дела - Яндекс.Видео 0 +привет 360 +привет братан как дела - Яндекс.Видео 960 +привет как дела?... Херсон 1000 +привет как дела клип - Яндекс.Видео 1000 +пап привет как дела - Яндекс.Видео 1000 http://metric.ru/ 0 http://autometric.ru/ 0 http://metrica.yandex.com/ 0 http://metris.ru/ 0 http://metrika.ru/ 0 0 -привет как дела?... Херсон 0 -привет как дела клип - Яндекс.Видео 0 -привет 0 -пап привет как дела - Яндекс.Видео 0 -привет братан как дела - Яндекс.Видео 0 -http://metric.ru/ 0 -http://autometric.ru/ 0 -http://metrica.yandex.com/ 0 -http://metris.ru/ 0 -http://metrika.ru/ 0 - 0 -привет как дела?... Херсон 0 -привет как дела клип - Яндекс.Видео 0 -пап привет как дела - Яндекс.Видео 0 -привет братан как дела - Яндекс.Видео 77 -привет 692 -http://metric.ru/ 1000 -http://autometric.ru/ 1000 -http://metrica.yandex.com/ 1000 -http://metris.ru/ 1000 -http://metrika.ru/ 1000 - 1000 -пап привет как дела - Яндекс.Видео 154 -привет как дела?... Херсон 231 -привет как дела клип - Яндекс.Видео 231 -привет братан как дела - Яндекс.Видео 231 -привет 692 -http://metric.ru/ 1000 -http://autometric.ru/ 1000 -http://metrica.yandex.com/ 1000 -http://metris.ru/ 1000 -http://metrika.ru/ 1000 - 1000 -http://metrika.ru/ 0 -http://metric.ru/ 400 -http://autometric.ru/ 400 -http://metrica.yandex.com/ 400 -http://metris.ru/ 400 -привет как дела?... Херсон 1000 -привет как дела клип - Яндекс.Видео 1000 -привет 1000 -пап привет как дела - Яндекс.Видео 1000 -привет братан как дела - Яндекс.Видео 1000 - 1000 -http://metrika.ru/ 0 -http://metric.ru/ 400 -http://autometric.ru/ 400 -http://metrica.yandex.com/ 400 -http://metris.ru/ 400 -привет как дела?... Херсон 1000 -привет как дела клип - Яндекс.Видео 1000 -привет 1000 -пап привет как дела - Яндекс.Видео 1000 -привет братан как дела - Яндекс.Видео 1000 - 1000 -http://metrica.yandex.com/ 0 -http://metric.ru/ 200 -http://autometric.ru/ 200 -http://metris.ru/ 400 -http://metrika.ru/ 400 -привет как дела?... Херсон 1000 -привет как дела клип - Яндекс.Видео 1000 -привет 1000 -пап привет как дела - Яндекс.Видео 1000 -привет братан как дела - Яндекс.Видео 1000 - 1000 -http://metrika.ru/ 200 -http://metric.ru/ 400 -http://autometric.ru/ 400 -http://metrica.yandex.com/ 400 -http://metris.ru/ 400 -привет как дела?... Херсон 1000 -привет как дела клип - Яндекс.Видео 1000 -привет 1000 -пап привет как дела - Яндекс.Видео 1000 -привет братан как дела - Яндекс.Видео 1000 - 1000 -http://metric.ru/ 200 -http://autometric.ru/ 200 -http://metrica.yandex.com/ 200 -http://metris.ru/ 400 -http://metrika.ru/ 400 -привет как дела?... Херсон 1000 -привет как дела клип - Яндекс.Видео 1000 -привет 1000 -пап привет как дела - Яндекс.Видео 1000 -привет братан как дела - Яндекс.Видео 1000 - 1000 -http://metrica.yandex.com/ 0 -привет как дела?... Херсон 1000 -привет как дела клип - Яндекс.Видео 1000 -привет 1000 -пап привет как дела - Яндекс.Видео 1000 -привет братан как дела - Яндекс.Видео 1000 -http://metric.ru/ 1000 -http://autometric.ru/ 1000 -http://metris.ru/ 1000 -http://metrika.ru/ 1000 - 1000 -привет как дела клип - Яндекс.Видео 0 -пап привет как дела - Яндекс.Видео 182 -привет братан как дела - Яндекс.Видео 212 -привет как дела?... Херсон 606 -привет 879 -http://metric.ru/ 1000 -http://autometric.ru/ 1000 -http://metrica.yandex.com/ 1000 -http://metris.ru/ 1000 -http://metrika.ru/ 1000 - 1000 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -1000 -1000 -1000 -1000 -1000 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -0 -0 -0 -0 -0 -0 -0 -привет как дела?... Херсон 0 -привет как дела клип - Яндекс.Видео 0 -пап привет как дела - Яндекс.Видео 0 -привет братан как дела - Яндекс.Видео 40 -привет 640 -http://metric.ru/ 1000 -http://autometric.ru/ 1000 -http://metrica.yandex.com/ 1000 -http://metris.ru/ 1000 -http://metrika.ru/ 1000 - 1000 -пап привет как дела - Яндекс.Видео 80 -привет как дела?... Херсон 120 -привет как дела клип - Яндекс.Видео 120 -привет братан как дела - Яндекс.Видео 120 -привет 640 -http://metric.ru/ 1000 -http://autometric.ru/ 1000 -http://metrica.yandex.com/ 1000 -http://metris.ru/ 1000 -http://metrika.ru/ 1000 - 1000 -http://metrika.ru/ 0 -http://metric.ru/ 500 -http://autometric.ru/ 500 -http://metrica.yandex.com/ 500 -http://metris.ru/ 500 -привет как дела?... Херсон 1000 -привет как дела клип - Яндекс.Видео 1000 -привет 1000 -пап привет как дела - Яндекс.Видео 1000 -привет братан как дела - Яндекс.Видео 1000 - 1000 -http://metrica.yandex.com/ 0 -http://metric.ru/ 250 -http://autometric.ru/ 250 -http://metris.ru/ 500 -http://metrika.ru/ 500 -привет как дела?... Херсон 1000 -привет как дела клип - Яндекс.Видео 1000 -привет 1000 -пап привет как дела - Яндекс.Видео 1000 -привет братан как дела - Яндекс.Видео 1000 - 1000 -http://metrika.ru/ 250 -http://metric.ru/ 500 -http://autometric.ru/ 500 -http://metrica.yandex.com/ 500 -http://metris.ru/ 500 -привет как дела?... Херсон 1000 -привет как дела клип - Яндекс.Видео 1000 -привет 1000 -пап привет как дела - Яндекс.Видео 1000 -привет братан как дела - Яндекс.Видео 1000 - 1000 -http://metric.ru/ 250 -http://autometric.ru/ 250 -http://metrica.yandex.com/ 250 -http://metris.ru/ 500 -http://metrika.ru/ 500 -привет как дела?... Херсон 1000 -привет как дела клип - Яндекс.Видео 1000 -привет 1000 -пап привет как дела - Яндекс.Видео 1000 -привет братан как дела - Яндекс.Видео 1000 - 1000 -http://metrica.yandex.com/ 0 -привет как дела?... Херсон 1000 -привет как дела клип - Яндекс.Видео 1000 -привет 1000 -пап привет как дела - Яндекс.Видео 1000 -привет братан как дела - Яндекс.Видео 1000 -http://metric.ru/ 1000 -http://autometric.ru/ 1000 -http://metris.ru/ 1000 -http://metrika.ru/ 1000 - 1000 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -1000 -1000 -1000 -1000 -1000 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -0 -0 -0 -0 -0 -0 -0 +привет 360 привет как дела?... Херсон 880 привет как дела клип - Яндекс.Видео 880 -пап привет как дела - Яндекс.Видео 880 -привет братан как дела - Яндекс.Видео 920 -привет 1000 -http://metric.ru/ 1000 -http://autometric.ru/ 1000 -http://metrica.yandex.com/ 1000 -http://metris.ru/ 1000 -http://metrika.ru/ 1000 - 1000 -привет как дела?... Херсон 560 -привет как дела клип - Яндекс.Видео 560 -пап привет как дела - Яндекс.Видео 560 -привет братан как дела - Яндекс.Видео 560 -привет 1000 -http://metric.ru/ 1000 -http://autometric.ru/ 1000 -http://metrica.yandex.com/ 1000 -http://metris.ru/ 1000 -http://metrika.ru/ 1000 - 1000 -http://metrika.ru/ 0 +привет братан как дела - Яндекс.Видео 880 +пап привет как дела - Яндекс.Видео 920 +привет как дела?... Херсон 0 +привет как дела клип - Яндекс.Видео 0 +привет 0 +пап привет как дела - Яндекс.Видео 0 +привет братан как дела - Яндекс.Видео 0 + 0 http://metric.ru/ 500 http://autometric.ru/ 500 http://metrica.yandex.com/ 500 http://metris.ru/ 500 -привет как дела?... Херсон 1000 -привет как дела клип - Яндекс.Видео 1000 -привет 1000 -пап привет как дела - Яндекс.Видео 1000 -привет братан как дела - Яндекс.Видео 1000 - 1000 -http://metrika.ru/ 0 -http://metric.ru/ 500 -http://autometric.ru/ 500 -http://metrica.yandex.com/ 500 -http://metris.ru/ 500 -привет как дела?... Херсон 1000 -привет как дела клип - Яндекс.Видео 1000 -привет 1000 -пап привет как дела - Яндекс.Видео 1000 -привет братан как дела - Яндекс.Видео 1000 - 1000 -http://metrica.yandex.com/ 0 -http://metric.ru/ 250 -http://autometric.ru/ 250 +http://metrika.ru/ 1000 +привет как дела?... Херсон 0 +привет как дела клип - Яндекс.Видео 0 +привет 0 +пап привет как дела - Яндекс.Видео 0 +привет братан как дела - Яндекс.Видео 0 + 0 http://metris.ru/ 500 http://metrika.ru/ 500 -привет как дела?... Херсон 1000 -привет как дела клип - Яндекс.Видео 1000 -привет 1000 -пап привет как дела - Яндекс.Видео 1000 -привет братан как дела - Яндекс.Видео 1000 - 1000 -http://metrika.ru/ 250 +http://metric.ru/ 750 +http://autometric.ru/ 750 +http://metrica.yandex.com/ 1000 +привет как дела?... Херсон 0 +привет как дела клип - Яндекс.Видео 0 +привет 0 +пап привет как дела - Яндекс.Видео 0 +привет братан как дела - Яндекс.Видео 0 + 0 http://metric.ru/ 500 http://autometric.ru/ 500 http://metrica.yandex.com/ 500 http://metris.ru/ 500 -привет как дела?... Херсон 1000 -привет как дела клип - Яндекс.Видео 1000 -привет 1000 -пап привет как дела - Яндекс.Видео 1000 -привет братан как дела - Яндекс.Видео 1000 - 1000 -http://metric.ru/ 250 -http://autometric.ru/ 250 -http://metrica.yandex.com/ 250 +http://metrika.ru/ 750 +привет как дела?... Херсон 0 +привет как дела клип - Яндекс.Видео 0 +привет 0 +пап привет как дела - Яндекс.Видео 0 +привет братан как дела - Яндекс.Видео 0 + 0 http://metris.ru/ 500 http://metrika.ru/ 500 -привет как дела?... Херсон 1000 -привет как дела клип - Яндекс.Видео 1000 -привет 1000 -пап привет как дела - Яндекс.Видео 1000 -привет братан как дела - Яндекс.Видео 1000 - 1000 +http://metric.ru/ 750 +http://autometric.ru/ 750 +http://metrica.yandex.com/ 750 +привет как дела?... Херсон 0 +привет как дела клип - Яндекс.Видео 0 +привет 0 +пап привет как дела - Яндекс.Видео 0 +привет братан как дела - Яндекс.Видео 0 +http://metric.ru/ 0 +http://autometric.ru/ 0 +http://metris.ru/ 0 +http://metrika.ru/ 0 + 0 +http://metrica.yandex.com/ 1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +0 +0 +0 +0 +0 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1000 +1000 +1000 +1000 +1000 +1000 +1000 +привет 0 +http://metric.ru/ 0 +http://autometric.ru/ 0 http://metrica.yandex.com/ 0 -привет как дела?... Херсон 1000 -привет как дела клип - Яндекс.Видео 1000 -привет 1000 -пап привет как дела - Яндекс.Видео 1000 -привет братан как дела - Яндекс.Видео 1000 -http://metric.ru/ 1000 -http://autometric.ru/ 1000 -http://metris.ru/ 1000 +http://metris.ru/ 0 +http://metrika.ru/ 0 + 0 +привет братан как дела - Яндекс.Видео 80 +привет как дела?... Херсон 120 +привет как дела клип - Яндекс.Видео 120 +пап привет как дела - Яндекс.Видео 120 +привет 0 +http://metric.ru/ 0 +http://autometric.ru/ 0 +http://metrica.yandex.com/ 0 +http://metris.ru/ 0 +http://metrika.ru/ 0 + 0 +привет как дела?... Херсон 440 +привет как дела клип - Яндекс.Видео 440 +пап привет как дела - Яндекс.Видео 440 +привет братан как дела - Яндекс.Видео 440 +привет как дела?... Херсон 0 +привет как дела клип - Яндекс.Видео 0 +привет 0 +пап привет как дела - Яндекс.Видео 0 +привет братан как дела - Яндекс.Видео 0 + 0 +http://metric.ru/ 500 +http://autometric.ru/ 500 +http://metrica.yandex.com/ 500 +http://metris.ru/ 500 http://metrika.ru/ 1000 - 1000 +привет как дела?... Херсон 0 +привет как дела клип - Яндекс.Видео 0 +привет 0 +пап привет как дела - Яндекс.Видео 0 +привет братан как дела - Яндекс.Видео 0 + 0 +http://metric.ru/ 500 +http://autometric.ru/ 500 +http://metrica.yandex.com/ 500 +http://metris.ru/ 500 +http://metrika.ru/ 1000 +привет как дела?... Херсон 0 +привет как дела клип - Яндекс.Видео 0 +привет 0 +пап привет как дела - Яндекс.Видео 0 +привет братан как дела - Яндекс.Видео 0 + 0 +http://metris.ru/ 500 +http://metrika.ru/ 500 +http://metric.ru/ 750 +http://autometric.ru/ 750 +http://metrica.yandex.com/ 1000 +привет как дела?... Херсон 0 +привет как дела клип - Яндекс.Видео 0 +привет 0 +пап привет как дела - Яндекс.Видео 0 +привет братан как дела - Яндекс.Видео 0 + 0 +http://metric.ru/ 500 +http://autometric.ru/ 500 +http://metrica.yandex.com/ 500 +http://metris.ru/ 500 +http://metrika.ru/ 750 +привет как дела?... Херсон 0 +привет как дела клип - Яндекс.Видео 0 +привет 0 +пап привет как дела - Яндекс.Видео 0 +привет братан как дела - Яндекс.Видео 0 + 0 +http://metris.ru/ 500 +http://metrika.ru/ 500 +http://metric.ru/ 750 +http://autometric.ru/ 750 +http://metrica.yandex.com/ 750 +привет как дела?... Херсон 0 +привет как дела клип - Яндекс.Видео 0 +привет 0 +пап привет как дела - Яндекс.Видео 0 +привет братан как дела - Яндекс.Видео 0 +http://metric.ru/ 0 +http://autometric.ru/ 0 +http://metris.ru/ 0 +http://metrika.ru/ 0 + 0 +http://metrica.yandex.com/ 1000 diff --git a/dbms/tests/queries/0_stateless/00951_ngram_entry.sql b/dbms/tests/queries/0_stateless/00951_ngram_search.sql similarity index 100% rename from dbms/tests/queries/0_stateless/00951_ngram_entry.sql rename to dbms/tests/queries/0_stateless/00951_ngram_search.sql diff --git a/dbms/tests/queries/0_stateless/00954_client_prepared_statements.reference b/dbms/tests/queries/0_stateless/00954_client_prepared_statements.reference new file mode 100644 index 00000000000..f25c522a3c5 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00954_client_prepared_statements.reference @@ -0,0 +1,11 @@ +1 Hello, world 2005-05-05 05:05:05 +1 Hello, world 2005-05-05 05:05:05 +2 test 2005-05-25 15:00:00 +2 test 2005-05-25 15:00:00 +Code: 456. +abc +abc +Hello, world +Hello, world +0 +0 diff --git a/dbms/tests/queries/0_stateless/00954_client_prepared_statements.sh b/dbms/tests/queries/0_stateless/00954_client_prepared_statements.sh new file mode 100755 index 00000000000..c90dc92a7ef --- /dev/null +++ b/dbms/tests/queries/0_stateless/00954_client_prepared_statements.sh @@ -0,0 +1,33 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + +$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS ps"; +$CLICKHOUSE_CLIENT -q "CREATE TABLE ps (i UInt8, s String, d DateTime) ENGINE = Memory"; + +$CLICKHOUSE_CLIENT -q "INSERT INTO ps VALUES (1, 'Hello, world', '2005-05-05 05:05:05')"; +$CLICKHOUSE_CLIENT -q "INSERT INTO ps VALUES (2, 'test', '2005-05-25 15:00:00')"; + +$CLICKHOUSE_CLIENT --max_threads=1 --param_id=1 \ + -q "SELECT * FROM ps WHERE i = {id:UInt8}"; +$CLICKHOUSE_CLIENT --max_threads=1 --param_phrase='Hello, world' \ + -q "SELECT * FROM ps WHERE s = {phrase:String}"; +$CLICKHOUSE_CLIENT --max_threads=1 --param_date='2005-05-25 15:00:00' \ + -q "SELECT * FROM ps WHERE d = {date:DateTime}"; +$CLICKHOUSE_CLIENT --max_threads=1 --param_id=2 --param_phrase='test' \ + -q "SELECT * FROM ps WHERE i = {id:UInt8} and s = {phrase:String}"; + +$CLICKHOUSE_CLIENT -q "SELECT {s:String}" 2>&1 | grep -oP '^Code: 456\.' + +$CLICKHOUSE_CLIENT -q "DROP TABLE ps"; + + +$CLICKHOUSE_CLIENT --param_test abc --query 'SELECT {test:String}' +$CLICKHOUSE_CLIENT --param_test=abc --query 'SELECT {test:String}' + +$CLICKHOUSE_CLIENT --param_test 'Hello, world' --query 'SELECT {test:String}' +$CLICKHOUSE_CLIENT --param_test='Hello, world' --query 'SELECT {test:String}' + +$CLICKHOUSE_CLIENT --param_test '' --query 'SELECT length({test:String})' +$CLICKHOUSE_CLIENT --param_test='' --query 'SELECT length({test:String})' diff --git a/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.reference b/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.reference new file mode 100644 index 00000000000..701cc5f8781 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.reference @@ -0,0 +1,6 @@ +(1,'Hello') +(1,('dt',2)) +[10,10,10] +[[10],[10],[10]] +[10,10,10] [[10],[10],[10]] (10,'Test') (10,('dt',10)) 2015-02-15 +Code: 457. diff --git a/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.sh b/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.sh new file mode 100755 index 00000000000..fd30921b1ac --- /dev/null +++ b/dbms/tests/queries/0_stateless/00955_complex_prepared_statements.sh @@ -0,0 +1,40 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + +EXCEPTION_TEXT="Code: 457." + +$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS ps"; +$CLICKHOUSE_CLIENT -q "CREATE TABLE ps ( + a Array(UInt32), da Array(Array(UInt8)), + t Tuple(Int16, String), dt Tuple(UInt8, Tuple(String, UInt8)), + n Nullable(Date) + ) ENGINE = Memory"; + +$CLICKHOUSE_CLIENT -q "INSERT INTO ps VALUES ( + [1, 2], [[1, 1], [2, 2]], + (1, 'Hello'), (1, ('dt', 2)), + NULL)"; +$CLICKHOUSE_CLIENT -q "INSERT INTO ps VALUES ( + [10, 10, 10], [[10], [10], [10]], + (10, 'Test'), (10, ('dt', 10)), + '2015-02-15')"; + +$CLICKHOUSE_CLIENT --max_threads=1 --param_aui="[1, 2]" \ + -q "SELECT t FROM ps WHERE a = {aui:Array(UInt16)}"; +$CLICKHOUSE_CLIENT --max_threads=1 --param_d_a="[[1, 1], [2, 2]]" \ + -q "SELECT dt FROM ps WHERE da = {d_a:Array(Array(UInt8))}"; +$CLICKHOUSE_CLIENT --max_threads=1 --param_tisd="(10, 'Test')" \ + -q "SELECT a FROM ps WHERE t = {tisd:Tuple(Int16, String)}"; +$CLICKHOUSE_CLIENT --max_threads=1 --param_d_t="(10, ('dt', 10))" \ + -q "SELECT da FROM ps WHERE dt = {d_t:Tuple(UInt8, Tuple(String, UInt8))}"; +$CLICKHOUSE_CLIENT --max_threads=1 --param_nd="2015-02-15" \ + -q "SELECT * FROM ps WHERE n = {nd:Nullable(Date)}"; + +# Must throw an exception to avoid SQL injection +$CLICKHOUSE_CLIENT --max_threads=1 --param_injection="[1] OR 1" \ + -q "SELECT * FROM ps WHERE a = {injection:Array(UInt32)}" 2>&1 \ + | grep -o "$EXCEPTION_TEXT" + +$CLICKHOUSE_CLIENT -q "DROP TABLE ps"; diff --git a/dbms/tests/queries/0_stateless/00956_http_prepared_statements.reference b/dbms/tests/queries/0_stateless/00956_http_prepared_statements.reference new file mode 100644 index 00000000000..28323dae39b --- /dev/null +++ b/dbms/tests/queries/0_stateless/00956_http_prepared_statements.reference @@ -0,0 +1,4 @@ +1 Hello, world 2005-05-05 +1 Hello, world 2005-05-05 +2 test 2019-05-25 +2 test 2019-05-25 diff --git a/dbms/tests/queries/0_stateless/00956_http_prepared_statements.sh b/dbms/tests/queries/0_stateless/00956_http_prepared_statements.sh new file mode 100755 index 00000000000..e022ff65fc2 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00956_http_prepared_statements.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + +${CLICKHOUSE_CURL} -sS $CLICKHOUSE_URL -d "DROP TABLE IF EXISTS ps"; +${CLICKHOUSE_CURL} -sS $CLICKHOUSE_URL -d "CREATE TABLE ps (i UInt8, s String, d Date) ENGINE = Memory"; + +${CLICKHOUSE_CURL} -sS $CLICKHOUSE_URL -d "INSERT INTO ps VALUES (1, 'Hello, world', '2005-05-05')"; +${CLICKHOUSE_CURL} -sS $CLICKHOUSE_URL -d "INSERT INTO ps VALUES (2, 'test', '2019-05-25')"; + +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}?param_id=1" \ + -d "SELECT * FROM ps WHERE i = {id:UInt8} ORDER BY i, s, d"; +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}?param_phrase=Hello,+world" \ + -d "SELECT * FROM ps WHERE s = {phrase:String} ORDER BY i, s, d"; +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}?param_date=2019-05-25" \ + -d "SELECT * FROM ps WHERE d = {date:Date} ORDER BY i, s, d"; +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}?param_id=2¶m_phrase=test" \ + -d "SELECT * FROM ps WHERE i = {id:UInt8} and s = {phrase:String} ORDER BY i, s, d"; + +${CLICKHOUSE_CURL} -sS $CLICKHOUSE_URL -d "DROP TABLE ps"; diff --git a/dbms/tests/queries/0_stateless/00957_format_with_clashed_aliases.reference b/dbms/tests/queries/0_stateless/00957_format_with_clashed_aliases.reference new file mode 100644 index 00000000000..d3f7a9aa18b --- /dev/null +++ b/dbms/tests/queries/0_stateless/00957_format_with_clashed_aliases.reference @@ -0,0 +1,7 @@ +SELECT + 1 AS x, + x.y +FROM +( + SELECT 'Hello, world' AS y +) AS x diff --git a/dbms/tests/queries/0_stateless/00957_format_with_clashed_aliases.sh b/dbms/tests/queries/0_stateless/00957_format_with_clashed_aliases.sh new file mode 100755 index 00000000000..7268a1e1a93 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00957_format_with_clashed_aliases.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + +set -e + +format="$CLICKHOUSE_FORMAT" + +echo "SELECT 1 AS x, x.y FROM (SELECT 'Hello, world' AS y) AS x" | $format diff --git a/dbms/tests/queries/0_stateless/00958_format_of_tuple_array_element.reference b/dbms/tests/queries/0_stateless/00958_format_of_tuple_array_element.reference new file mode 100644 index 00000000000..7265311960f --- /dev/null +++ b/dbms/tests/queries/0_stateless/00958_format_of_tuple_array_element.reference @@ -0,0 +1,9 @@ +SELECT + (x.1)[1], + (((x[1]).1)[1]).1, + (NOT x)[1], + -(x[1]), + (-x)[1], + (NOT x).1, + -(x.1), + (-x).1 diff --git a/dbms/tests/queries/0_stateless/00958_format_of_tuple_array_element.sh b/dbms/tests/queries/0_stateless/00958_format_of_tuple_array_element.sh new file mode 100755 index 00000000000..47f8e99bbb8 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00958_format_of_tuple_array_element.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + +set -e + +format="$CLICKHOUSE_FORMAT" + +echo "SELECT (x.1)[1], (x[1].1)[1].1, (NOT x)[1], -x[1], (-x)[1], (NOT x).1, -x.1, (-x).1" | $format diff --git a/dbms/tests/queries/0_stateless/00959_format_with_different_aliases.reference b/dbms/tests/queries/0_stateless/00959_format_with_different_aliases.reference new file mode 100644 index 00000000000..8feb70c2fc4 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00959_format_with_different_aliases.reference @@ -0,0 +1,3 @@ +SELECT a + b AS x, x +SELECT a + b AS x, a + c AS x +SELECT a + b AS x, x diff --git a/dbms/tests/queries/0_stateless/00959_format_with_different_aliases.sh b/dbms/tests/queries/0_stateless/00959_format_with_different_aliases.sh new file mode 100755 index 00000000000..cad1083ad60 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00959_format_with_different_aliases.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + +set -e + +format="$CLICKHOUSE_FORMAT --oneline" + +echo "SELECT a + b AS x, a + b AS x" | $format +echo "SELECT a + b AS x, a + c AS x" | $format +echo "SELECT a + b AS x, x" | $format diff --git a/dbms/tests/queries/0_stateless/00960_eval_ml_method_const.reference b/dbms/tests/queries/0_stateless/00960_eval_ml_method_const.reference new file mode 100644 index 00000000000..573541ac970 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00960_eval_ml_method_const.reference @@ -0,0 +1 @@ +0 diff --git a/dbms/tests/queries/0_stateless/00960_eval_ml_method_const.sql b/dbms/tests/queries/0_stateless/00960_eval_ml_method_const.sql new file mode 100644 index 00000000000..401c83af917 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00960_eval_ml_method_const.sql @@ -0,0 +1 @@ +WITH (SELECT stochasticLinearRegressionState(1, 2, 3)) AS model SELECT evalMLMethod(model, toFloat64(1), toFloat64(1)); diff --git a/dbms/tests/queries/1_stateful/00037_uniq_state_merge1.reference b/dbms/tests/queries/1_stateful/00037_uniq_state_merge1.reference index d9ca7e3be21..3bedecd267b 100644 --- a/dbms/tests/queries/1_stateful/00037_uniq_state_merge1.reference +++ b/dbms/tests/queries/1_stateful/00037_uniq_state_merge1.reference @@ -1,24 +1,15 @@ -yandex.ru 25107 25107 - 21999 21999 -public_search 16749 16749 + 89348 89348 +yandex.ru 25105 25105 avito.ru 16523 16523 -public 15429 15429 -mail.yandex.ru 13663 13663 -yandsearch 10039 10039 -news 8827 8827 +mail.yandex.ru 13659 13659 mail.ru 7643 7643 -doc 7537 7537 auto.ru 7350 7350 hurpass.com 6395 6395 best.ru 5477 5477 tv.yandex.ru 5341 5341 korer.ru 4967 4967 -mail.yandsearch 4246 4246 -cars 4077 4077 -publ 3970 3970 -yandex 3845 3845 -main=hurriyet.com 3806 3806 -yandex.ua 3803 3803 +mail.yandsearch 4237 4237 +yandex.ua 3802 3802 korablitz.ru 3717 3717 uyelik.hurriyet.com 3584 3584 e.mail.ru 3508 3508 @@ -28,46 +19,32 @@ coccoc.com 2707 2707 rutube.ru 2699 2699 rbc.ru 2644 2644 mamba.ru 2598 2598 -video 2558 2558 -mail.yandex 2447 2447 -wot 2253 2253 +mail.yandex 2441 2441 pikabu.ru 2130 2130 yandex.php 2057 2057 e.mail.yandex.ru 1971 1971 brandex.ru 1969 1969 -bravoslava-230v 1942 1942 -search 1933 1933 market.ru 1913 1913 mynet.ru 1881 1881 -mail 1845 1845 -mail.yandex.ua 1825 1825 +mail.yandex.ua 1823 1823 rutube.com 1821 1821 -images 1812 1812 news.rambler.com 1787 1787 hurpass.com.tr 1763 1763 ads.search 1742 1742 -marina_2_sezon 1680 1680 cars.auto.ru 1628 1628 cian.ru 1620 1620 ivi.ru 1617 1617 av.by 1598 1598 -world 1596 1596 news.yandex.ru 1495 1495 vk.com 1474 1474 -pub 1469 1469 -forum 1414 1414 wow-girls.ru 1399 1399 -kinogo-dhpWXEdIcgoxWUZ6fgdTWw.. 1338 1338 uyelik.hurriyet.com.tr 1330 1330 aukro.ua 1314 1314 -plugins 1244 1244 images.yandsearch 1235 1235 ondom.ru 1221 1221 korablitz.com 1189 1189 -videovol-9-sezon 1187 1187 kerl.org 1155 1155 mail.yandex.php 1148 1148 -file 1147 1147 love.mail.yandex.ru 1136 1136 yandex.kz 1124 1124 coccoc.com.tr 1113 1113 @@ -77,24 +54,47 @@ sprashivai.ru 1072 1072 market.yandex.ru 1064 1064 spb-n.ru 1056 1056 sz.spaces.ru 1055 1055 -xofx.net%2F63857&secret-oper=reply&id=0&extras] 1054 1054 marinance.ua 1050 1050 tube.ru 1044 1044 haber.com 1043 1043 -image&img_url=http 1042 1042 -sport 1040 1040 megogo.net 993 993 sozcu.com 991 991 yandex.by 938 938 -image&uinfo 936 936 -fast-golove.mail.ru_Mobile=0&at=35&text=производств 927 927 -linka 901 901 gazeta.ru 892 892 -yandex.ru;yandex.ru 892 892 -kinogo-dhpWXEdIcgoxWUZ6fgdTXA.. 890 890 fotki.yandex.ru 875 875 fast-golove.mail.yandex.php 842 842 -news=previews 839 839 -faber 833 833 lenta.ru 820 820 publicdaroglundai_anketa.ru 813 813 +mail.yandex.kz 810 810 +censor.net 807 807 +mail.yandex.by 804 804 +nnn.ru 796 796 +maxi.su 788 788 +rambler.ru 755 755 +hurpass.com.ua 729 729 +g1.botva.lv 728 728 +m.sport.airway 724 724 +tvizle.com 723 723 +fast-golove.mail.yandex.ru 712 712 +spb.ru 693 693 +eksisozluk.com 689 689 +uyelik.hurriyet 666 666 +rst.ua 650 650 +deko.ru 647 647 +my.mail.yandex.ru 647 647 +astrov.pro 625 625 +yandsearch.php 624 624 +kinogo.net 617 617 +fanati-avtomobile.jsp 611 611 +tv.yandsearch 605 605 +soft.ru 603 603 +pluginplus.ru 601 601 +images.yandex 595 595 +1tv.rbc.ru 592 592 +ria.ru 591 591 +marina_prezideniz.hurriyet.com 578 578 +youtube.ru 575 575 +cars.autochno.ru 570 570 +a2.stars.auto.yandsearch 566 566 +love.mail.ru 560 560 +mail.rambler.ru 553 553 diff --git a/dbms/tests/queries/1_stateful/00038_uniq_state_merge2.reference b/dbms/tests/queries/1_stateful/00038_uniq_state_merge2.reference index 926cb1911ba..9144afd90b2 100644 --- a/dbms/tests/queries/1_stateful/00038_uniq_state_merge2.reference +++ b/dbms/tests/queries/1_stateful/00038_uniq_state_merge2.reference @@ -1,100 +1,100 @@ - 582035 80248 -ru 299420 71339 -com 78253 34500 -html 40288 19569 -ua 33160 18847 -tr 19570 13117 -net 19003 12908 -php 17817 12011 -yandsearch 13598 10329 -by 9349 7695 -yandex 8946 7282 -org 5897 5320 -tv 5371 4660 -kz 5175 4588 -aspx 3084 2800 -phtml 3012 2725 -xml 2993 2726 -tr&callback_url=http 2897 2681 -su 2833 2587 -shtml 2442 2218 -hurriyet 2030 1907 -search 1915 1904 -tr&user 1556 1494 -jpg 1531 1427 -tr&users 1449 1373 -tr&callback 1294 1244 -jsp 1083 1048 -net%2F63857&secret-oper=reply&id=0&extras] 1054 1054 -htm 957 921 -ru_Mobile=0&at=35&text=производств 927 927 -lv 916 910 -tr&user_page 916 885 -exe 911 891 -me 911 864 -tr&user_page=http 900 868 -do 864 838 -tr&used 782 768 -pro 778 772 +ru 262914 69218 + 92101 89421 +com 63298 30285 +ua 29037 17475 +html 25079 15039 +tr 16770 11857 +net 16387 11686 +php 14374 10307 +yandsearch 12024 9484 +by 8192 6915 +yandex 7211 6124 +org 4890 4514 +kz 4679 4211 +tv 4400 3928 +su 2602 2396 +phtml 2409 2226 +xml 2322 2182 +aspx 1959 1848 +search 1835 1827 +hurriyet 1385 1345 +shtml 995 966 +lv 879 875 +jsp 855 845 +exe 814 798 +pro 737 734 airway 724 724 -biz 685 672 -mail 677 660 -info 593 575 -tr&callback_url=https 534 526 -tr%2Fgaleri 533 522 +me 675 647 +jpg 662 647 +do 625 611 +mail 593 581 +biz 537 530 bstatistik_dlja-dlya-naches 521 521 -sx 498 496 -ru%2Fupload 497 492 -news 492 487 -hu 486 479 -aspx&referer 473 459 -pogoda 460 460 -auto 438 429 -az 434 425 -net%2F63857&secret=506d9e3dfbd268e6b6630e58 432 432 +info 461 453 +pogoda 459 459 +sx 450 449 +news 448 444 sportlibrary 431 431 -jpg,http 411 397 -tr&callbusiness 410 407 -fm 405 400 -online 401 399 -tr&callbusines 388 384 -ru%2Fnews 387 382 +hu 396 393 +htm 393 385 +fm 379 378 +online 374 372 bstatistic 366 366 -wbp 346 346 -am 336 333 -ru;yandsearch 330 328 -tr&user_page=https 330 328 -tr&callback_url 329 319 -html&lang=ru&lr=110&category=dressages%2Fcs306755 328 328 -pl 328 326 -blog 327 326 -jpg&pos 307 302 -bstana 305 305 -ru;yandex 287 284 -im 283 278 -diary 277 275 -slando 276 274 -eu 274 269 -to 271 269 -asp 253 250 -html&lang 253 248 -mynet 253 251 -tj 242 241 -sberbank 241 238 -haber 234 227 -jpg,https 232 232 -cc 226 221 -_2544 222 222 -ws 221 219 -mamba 220 220 +auto 363 355 +az 356 350 +wbp 343 343 +bstana 304 304 +blog 268 268 +diary 262 261 +am 260 258 +slando 254 252 +im 238 235 +eu 237 234 liveinteria 218 218 -tr%2Fanasayfa 215 210 -tr&user_pts=&states 213 213 -yandsearchplus 212 211 -jpg","photo 211 209 -ru%2Fwww 211 211 -com&callback_url=http 209 208 +to 215 213 +mamba 214 214 auto-supers 208 208 -co 206 205 -kg 206 205 -ru%2Fuploads 206 205 +sberbank 207 207 +tj 205 205 +bstatistik_dlja-dlya_avia 201 201 +bstanii_otryasam 200 200 +pl 200 198 +wroad_5d 200 200 +mynet 191 190 +bstan 187 187 +yandsearchplus 186 186 +haber 184 179 +jpg,https 184 184 +turkasovki 183 183 +co 177 177 +video 177 177 +gif","photos 175 175 +mgshared_zone 172 172 +wssp 172 172 +jpg,http 170 168 +swf 167 167 +cc 166 164 +ws 164 164 +kg 157 156 +mobili_s_probegom 154 153 +cgi 153 152 +yandsearcher 152 151 +uz 150 150 +nsf 149 149 +adriver 147 144 +slandsearch 143 142 +korrez 140 140 +bstatistik_dlja-dlja-putin 139 139 +rambler 133 132 +mvideo 132 132 +asp 129 128 +vc 127 127 +md 121 121 +jpg","photo 119 119 +mp4 118 117 +ee 116 115 +loveplaceOfSearchplus 111 111 +nl 111 111 +bstatistika 107 107 +br 102 102 +sport 99 99 diff --git a/dbms/tests/queries/1_stateful/00044_any_left_join_string.reference b/dbms/tests/queries/1_stateful/00044_any_left_join_string.reference index a96e3c9f457..364115011f9 100644 --- a/dbms/tests/queries/1_stateful/00044_any_left_join_string.reference +++ b/dbms/tests/queries/1_stateful/00044_any_left_join_string.reference @@ -1,10 +1,10 @@ + 4508153 712428 auto.ru 576845 8935 -yandex.ru 410788 111278 -public 328528 23 - 313516 26015 -public_search 311125 0 +yandex.ru 410776 111278 korer.ru 277987 0 avito.ru 163820 15556 -mail.yandex.ru 152469 1046 -main=hurriyet.com 152096 259 -wot 116912 6682 +mail.yandex.ru 152447 1046 +mail.ru 87949 22225 +best.ru 58537 55 +korablitz.ru 51844 0 +hurpass.com 49671 1251 diff --git a/dbms/tests/queries/1_stateful/00089_position_functions_with_non_constant_arg.reference b/dbms/tests/queries/1_stateful/00089_position_functions_with_non_constant_arg.reference index ad9a93d1113..4d0ba2b70f3 100644 --- a/dbms/tests/queries/1_stateful/00089_position_functions_with_non_constant_arg.reference +++ b/dbms/tests/queries/1_stateful/00089_position_functions_with_non_constant_arg.reference @@ -2,8 +2,5 @@ 0 0 0 -http://игры на передачи пригорька россия&lr=213&rpt=simage&uinfo=ww-1905-wh-643-fw-112-rossiisoft.in.ua%2FKievav@yandex?appkey=506d9e3dfbd268e6b6630e58 -http://игры на передачи пригорька россия&lr=213&rpt=simage&uinfo=ww-1905-wh-643-fw-112-rossiisoft.in.ua%2FKievav@yandex?appkey=506d9e3dfbd268e6b6630e58 -http://ru slovari 15 -https://ru spb.rabota 15 -https://e yandex 12 +https://povary_dlya-511-gemotedDynamo_accoshyutoy-s-kortosh@bk.ru/yandsearch?text=simages%2F8%2F10544998#posts%2Fkartofeleri +https://povary_dlya-511-gemotedDynamo_accoshyutoy-s-kortosh@bk.ru/yandsearch?text=simages%2F8%2F10544998#posts%2Fkartofeleri diff --git a/docker/packager/deb/Dockerfile b/docker/packager/deb/Dockerfile index eb67fd52cb8..0c9c82a5e1f 100644 --- a/docker/packager/deb/Dockerfile +++ b/docker/packager/deb/Dockerfile @@ -70,6 +70,5 @@ RUN apt-get --allow-unauthenticated update -y \ gperf \ alien - COPY build.sh / CMD ["/bin/bash", "/build.sh"] diff --git a/docs/en/operations/table_engines/kafka.md b/docs/en/operations/table_engines/kafka.md index 22d0384fd42..7bedd8f7ac9 100644 --- a/docs/en/operations/table_engines/kafka.md +++ b/docs/en/operations/table_engines/kafka.md @@ -26,7 +26,7 @@ SETTINGS [kafka_row_delimiter = 'delimiter_symbol',] [kafka_schema = '',] [kafka_num_consumers = N,] - [kafka_skip_broken_messages = <0|1>] + [kafka_skip_broken_messages = N] ``` Required parameters: @@ -40,7 +40,7 @@ Optional parameters: - `kafka_row_delimiter` – Delimiter character, which ends the message. - `kafka_schema` – Parameter that must be used if the format requires a schema definition. For example, [Cap'n Proto](https://capnproto.org/) requires the path to the schema file and the name of the root `schema.capnp:Message` object. - `kafka_num_consumers` – The number of consumers per table. Default: `1`. Specify more consumers if the throughput of one consumer is insufficient. The total number of consumers should not exceed the number of partitions in the topic, since only one consumer can be assigned per partition. -- `kafka_skip_broken_messages` – Kafka message parser mode. If `kafka_skip_broken_messages = 1` then the engine skips the Kafka messages that can't be parsed (a message equals a row of data). +- `kafka_skip_broken_messages` – Kafka message parser tolerance to schema-incompatible messages per block. Default: `0`. If `kafka_skip_broken_messages = N` then the engine skips *N* Kafka messages that cannot be parsed (a message equals a row of data). Examples: @@ -100,6 +100,7 @@ Groups are flexible and synced on the cluster. For instance, if you have 10 topi 3. Create a materialized view that converts data from the engine and puts it into a previously created table. When the `MATERIALIZED VIEW` joins the engine, it starts collecting data in the background. This allows you to continually receive messages from Kafka and convert them to the required format using `SELECT`. +One kafka table can have as many materialized views as you like, they do not read data from the kafka table directly, but receive new records (in blocks), this way you can write to several tables with different detail level (with grouping - aggregation and without). Example: diff --git a/docs/en/operations/table_engines/merge.md b/docs/en/operations/table_engines/merge.md index 366a5459bf8..f29075ec973 100644 --- a/docs/en/operations/table_engines/merge.md +++ b/docs/en/operations/table_engines/merge.md @@ -27,11 +27,11 @@ Example 2: Let's say you have a old table (WatchLog_old) and decided to change partitioning without moving data to a new table (WatchLog_new) and you need to see data from both tables. ``` -CREATE TABLE WatchLog_old(date Date, UserId Int64, EventType String, Cnt UInt64) +CREATE TABLE WatchLog_old(date Date, UserId Int64, EventType String, Cnt UInt64) ENGINE=MergeTree(date, (UserId, EventType), 8192); INSERT INTO WatchLog_old VALUES ('2018-01-01', 1, 'hit', 3); -CREATE TABLE WatchLog_new(date Date, UserId Int64, EventType String, Cnt UInt64) +CREATE TABLE WatchLog_new(date Date, UserId Int64, EventType String, Cnt UInt64) ENGINE=MergeTree PARTITION BY date ORDER BY (UserId, EventType) SETTINGS index_granularity=8192; INSERT INTO WatchLog_new VALUES ('2018-01-02', 2, 'hit', 3); @@ -61,7 +61,9 @@ Virtual columns differ from normal columns in the following ways: - They are not selected when using the asterisk (`SELECT *`). - Virtual columns are not shown in `SHOW CREATE TABLE` and `DESC TABLE` queries. -The `Merge` type table contains a virtual `_table` column of the `String` type. (If the table already has a `_table` column, the virtual column is called `_table1`; if you already have `_table1`, it's called `_table2`, and so on.) It contains the name of the table that data was read from. +The `Merge` type table contains the virtual column `_table` of the type `String`. It contains the name of the table that data was read from. If any underlying table already has the column `_table`, then the virtual column is shadowed and is not accessible. + + If the `WHERE/PREWHERE` clause contains conditions for the `_table` column that do not depend on other table columns (as one of the conjunction elements, or as an entire expression), these conditions are used as an index. The conditions are performed on a data set of table names to read data from, and the read operation will be performed from only those tables that the condition was triggered on. diff --git a/docs/en/query_language/functions/string_search_functions.md b/docs/en/query_language/functions/string_search_functions.md index 71df498d994..fb02a13c3a0 100644 --- a/docs/en/query_language/functions/string_search_functions.md +++ b/docs/en/query_language/functions/string_search_functions.md @@ -108,7 +108,7 @@ For case-insensitive search or/and in UTF-8 format use functions `ngramDistanceC ## ngramSearch(haystack, needle) -Same as `ngramDistance` but calculates the non-symmetric difference between `needle` and `haystack` -- the number of n-grams from needle minus the common number of n-grams normalized by the number of `needle` n-grams. Can be useful for fuzzy string search. +Same as `ngramDistance` but calculates the non-symmetric difference between `needle` and `haystack` -- the number of n-grams from needle minus the common number of n-grams normalized by the number of `needle` n-grams. The closer to one, the more likely `needle` is in the `haystack`. Can be useful for fuzzy string search. For case-insensitive search or/and in UTF-8 format use functions `ngramSearchCaseInsensitive, ngramSearchUTF8, ngramSearchCaseInsensitiveUTF8`. diff --git a/docs/en/query_language/functions/url_functions.md b/docs/en/query_language/functions/url_functions.md index 19b12bd5b21..93edf705e7e 100644 --- a/docs/en/query_language/functions/url_functions.md +++ b/docs/en/query_language/functions/url_functions.md @@ -12,7 +12,7 @@ Returns the protocol. Examples: http, ftp, mailto, magnet... ### domain -Gets the domain. +Gets the domain. Cut scheme with size less than 16 bytes. ### domainWithoutWWW diff --git a/docs/ru/operations/table_engines/kafka.md b/docs/ru/operations/table_engines/kafka.md index bdbc13e171a..3fe2e4d5cba 100644 --- a/docs/ru/operations/table_engines/kafka.md +++ b/docs/ru/operations/table_engines/kafka.md @@ -97,6 +97,7 @@ Kafka(kafka_broker_list, kafka_topic_list, kafka_group_name, kafka_format 3. Создайте материализованное представление, которое преобразует данные от движка и помещает их в ранее созданную таблицу. Когда к движку присоединяется материализованное представление (`MATERIALIZED VIEW`), оно начинает в фоновом режиме собирать данные. Это позволяет непрерывно получать сообщения от Kafka и преобразовывать их в необходимый формат с помощью `SELECT`. +Материализованных представлений у одной kafka таблицы может быть сколько угодно, они не считывают данные из таблицы kafka непосредственно, а получают новые записи (блоками), таким образом можно писать в несколько таблиц с разным уровнем детализации (с группировкой - агрегацией и без). Пример: diff --git a/docs/ru/query_language/functions/geo.md b/docs/ru/query_language/functions/geo.md index 66759e2bd3b..33092cf804b 100644 --- a/docs/ru/query_language/functions/geo.md +++ b/docs/ru/query_language/functions/geo.md @@ -152,4 +152,35 @@ SELECT geohashDecode('ezs42') AS res └─────────────────────────────────┘ ``` +## geoToH3 + +Получает H3 индекс точки (lon, lat) с заданным разрешением + +``` +geoToH3(lon, lat, resolution) +``` + +**Входные значения** + +- `lon` - географическая долгота. Тип данных — [Float64](../../data_types/float.md). +- `lat` - географическая широта. Тип данных — [Float64](../../data_types/float.md). +- `resolution` - требуемое разрешение индекса. Тип данных — [UInt8](../../data_types/int_uint.md). Диапазон возможных значение — `[0, 15]`. + +**Возвращаемые значения** + +Возвращает значение с типом [UInt64] (../../data_types/int_uint.md). +`0` в случае ошибки. +Иначе возвращается индексный номер шестиугольника. + +**Пример** + +``` sql +SELECT geoToH3(37.79506683, 55.71290588, 15) as h3Index +``` +``` +┌────────────h3Index─┐ +│ 644325524701193974 │ +└────────────────────┘ +``` + [Оригинальная статья](https://clickhouse.yandex/docs/ru/query_language/functions/geo/) diff --git a/docs/ru/query_language/functions/string_search_functions.md b/docs/ru/query_language/functions/string_search_functions.md index 0301b094c86..7e94e378814 100644 --- a/docs/ru/query_language/functions/string_search_functions.md +++ b/docs/ru/query_language/functions/string_search_functions.md @@ -97,7 +97,7 @@ ## ngramSearch(haystack, needle) -То же, что и `ngramDistance`, но вычисляет несимметричную разность между `needle` и `haystack` -- количество n-грамм из `needle` минус количество общих n-грамм, нормированное на количество n-грамм из `needle`. Может быть использовано для приближенного поиска. +То же, что и `ngramDistance`, но вычисляет несимметричную разность между `needle` и `haystack` -- количество n-грамм из `needle` минус количество общих n-грамм, нормированное на количество n-грамм из `needle`. Чем ближе результат к единице, тем вероятнее, что `needle` внутри `haystack`. Может быть использовано для приближенного поиска. Для поиска без учета регистра и/или в формате UTF-8 используйте функции `ngramSearchCaseInsensitive, ngramSearchUTF8, ngramSearchCaseInsensitiveUTF8`. diff --git a/docs/ru/query_language/functions/url_functions.md b/docs/ru/query_language/functions/url_functions.md index 4b4fdc9adda..1897d1b28a3 100644 --- a/docs/ru/query_language/functions/url_functions.md +++ b/docs/ru/query_language/functions/url_functions.md @@ -10,7 +10,7 @@ Возвращает протокол. Примеры: http, ftp, mailto, magnet... ### domain -Возвращает домен. +Возвращает домен. Отсекает схему размером не более 16 байт. ### domainWithoutWWW Возвращает домен, удалив не более одного 'www.' с начала, если есть. diff --git a/libs/libcommon/include/common/unaligned.h b/libs/libcommon/include/common/unaligned.h index 2b1505ba2d3..ca73298adfb 100644 --- a/libs/libcommon/include/common/unaligned.h +++ b/libs/libcommon/include/common/unaligned.h @@ -1,6 +1,7 @@ #pragma once #include +#include template @@ -11,8 +12,14 @@ inline T unalignedLoad(const void * address) return res; } +/// We've had troubles before with wrong store size due to integral promotions +/// (e.g., unalignedStore(dest, uint16_t + uint16_t) stores an uint32_t). +/// To prevent this, make the caller specify the stored type explicitly. +/// To disable deduction of T, wrap the argument type with std::enable_if. template -inline void unalignedStore(void * address, const T & src) +inline void unalignedStore(void * address, + const typename std::enable_if::type & src) { + static_assert(std::is_trivially_copyable_v); memcpy(address, &src, sizeof(src)); } diff --git a/libs/libcommon/src/DateLUT.cpp b/libs/libcommon/src/DateLUT.cpp index 66ca8e6d201..cac38634a26 100644 --- a/libs/libcommon/src/DateLUT.cpp +++ b/libs/libcommon/src/DateLUT.cpp @@ -13,12 +13,12 @@ Poco::DigestEngine::Digest calcSHA1(const std::string & path) { std::ifstream stream(path); if (!stream) - throw Poco::Exception("Error while opening file: `" + path + "'."); + throw Poco::Exception("Error while opening file: '" + path + "'."); Poco::SHA1Engine digest_engine; Poco::DigestInputStream digest_stream(digest_engine, stream); digest_stream.ignore(std::numeric_limits::max()); if (!stream.eof()) - throw Poco::Exception("Error while reading file: `" + path + "'."); + throw Poco::Exception("Error while reading file: '" + path + "'."); return digest_engine.digest(); } @@ -39,7 +39,7 @@ std::string determineDefaultTimeZone() if (tz_env_var) { - error_prefix = std::string("Could not determine time zone from TZ variable value: `") + tz_env_var + "': "; + error_prefix = std::string("Could not determine time zone from TZ variable value: '") + tz_env_var + "': "; if (*tz_env_var == ':') ++tz_env_var;