From 1a3cef8216d859f129659b4a68cae698b9d6517a Mon Sep 17 00:00:00 2001 From: alesapin Date: Sat, 2 Feb 2019 16:33:50 +0300 Subject: [PATCH 01/79] Add brotli to contrib --- .gitmodules | 3 +++ CMakeLists.txt | 1 + cmake/find_brotli.cmake | 23 +++++++++++++++++++++ contrib/CMakeLists.txt | 4 ++++ contrib/brotli | 1 + contrib/brotli-cmake/CMakeLists.txt | 32 +++++++++++++++++++++++++++++ dbms/CMakeLists.txt | 5 +++++ 7 files changed, 69 insertions(+) create mode 100644 cmake/find_brotli.cmake create mode 160000 contrib/brotli create mode 100644 contrib/brotli-cmake/CMakeLists.txt diff --git a/.gitmodules b/.gitmodules index 24211b6707e..47a7a0cf31c 100644 --- a/.gitmodules +++ b/.gitmodules @@ -64,3 +64,6 @@ [submodule "contrib/cppkafka"] path = contrib/cppkafka url = https://github.com/mfontanini/cppkafka.git +[submodule "contrib/brotli"] + path = contrib/brotli + url = https://github.com/google/brotli.git diff --git a/CMakeLists.txt b/CMakeLists.txt index e75eecc4e6d..f2d4f668883 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -252,6 +252,7 @@ if (NOT USE_CPUID) endif() include (cmake/find_libgsasl.cmake) include (cmake/find_libxml2.cmake) +include (cmake/find_brotli.cmake) include (cmake/find_protobuf.cmake) include (cmake/find_hdfs3.cmake) include (cmake/find_consistent-hashing.cmake) diff --git a/cmake/find_brotli.cmake b/cmake/find_brotli.cmake new file mode 100644 index 00000000000..66ef4498da0 --- /dev/null +++ b/cmake/find_brotli.cmake @@ -0,0 +1,23 @@ +option (USE_INTERNAL_BROTLI_LIBRARY "Set to FALSE to use system libbrotli library instead of bundled" ${NOT_UNBUNDLED}) + +if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/brotli/c/include/brotli/decode.h") + if (USE_INTERNAL_BROTLI_LIBRARY) + message (WARNING "submodule contrib/brotli is missing. to fix try run: \n git submodule update --init --recursive") + set (USE_INTERNAL_BROTLI_LIBRARY 0) + endif () + set (MISSING_INTERNAL_BROTLI_LIBRARY 1) +endif () + +if (NOT USE_INTERNAL_BROTLI_LIBRARY) + find_library (BROTLI_LIBRARY libbrotli) + find_path (BROTLI_INCLUDE_DIR NAMES decode.h encode.h port.h types.h PATHS ${BROTLI_INCLUDE_PATHS}) +endif () + +if (BROTLI_LIBRARY AND BROTLI_INCLUDE_DIR) +elseif (NOT MISSING_INTERNAL_BROTLI_LIBRARY) + set (BROTLI_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/libbrotli/c/include) + set (USE_INTERNAL_BROTLI_LIBRARY 1) + set (BROTLI_LIBRARY libbrotli) +endif () + +message (STATUS "Using brotli: ${BROTLI_INCLUDE_DIR} : ${BROTLI_LIBRARY}") diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index fcc2cc75817..fa111688a4e 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -215,6 +215,10 @@ if (USE_INTERNAL_LIBXML2_LIBRARY) add_subdirectory(libxml2-cmake) endif () +if (USE_INTERNAL_LIBBROTLI_LIBRARY) + add_subdirectory(libbrotli-cmake) +endif () + if (USE_INTERNAL_PROTOBUF_LIBRARY) set(protobuf_BUILD_TESTS OFF CACHE INTERNAL "" FORCE) set(protobuf_BUILD_SHARED_LIBS OFF CACHE INTERNAL "" FORCE) diff --git a/contrib/brotli b/contrib/brotli new file mode 160000 index 00000000000..5805f99a533 --- /dev/null +++ b/contrib/brotli @@ -0,0 +1 @@ +Subproject commit 5805f99a533a8f8118699c0100d8c102f3605f65 diff --git a/contrib/brotli-cmake/CMakeLists.txt b/contrib/brotli-cmake/CMakeLists.txt new file mode 100644 index 00000000000..e9ad40991df --- /dev/null +++ b/contrib/brotli-cmake/CMakeLists.txt @@ -0,0 +1,32 @@ +set(BROTLI_SOURCE_DIR ${CMAKE_SOURCE_DIR}/contrib/brotli) +set(BROTLI_BINARY_DIR ${CMAKE_BINARY_DIR}/contrib/brotli) + +set(SRCS + ${BROTLI_SOURCE_DIR}/dec/bit_reader.c + ${BROTLI_SOURCE_DIR}/dec/state.c + ${BROTLI_SOURCE_DIR}/dec/huffman.c + ${BROTLI_SOURCE_DIR}/dec/decode.c + ${BROTLI_SOURCE_DIR}/enc/encode.c + ${BROTLI_SOURCE_DIR}/enc/dictionary_hash.c + ${BROTLI_SOURCE_DIR}/enc/cluster.c + ${BROTLI_SOURCE_DIR}/enc/entropy_encode.c + ${BROTLI_SOURCE_DIR}/enc/literal_cost.c + ${BROTLI_SOURCE_DIR}/enc/compress_fragment_two_pass.c + ${BROTLI_SOURCE_DIR}/enc/static_dict.c + ${BROTLI_SOURCE_DIR}/enc/compress_fragment.c + ${BROTLI_SOURCE_DIR}/enc/block_splitter.c + ${BROTLI_SOURCE_DIR}/enc/backward_references_hq.c + ${BROTLI_SOURCE_DIR}/enc/histogram.c + ${BROTLI_SOURCE_DIR}/enc/brotli_bit_stream.c + ${BROTLI_SOURCE_DIR}/enc/utf8_util.c + ${BROTLI_SOURCE_DIR}/enc/encoder_dict.c + ${BROTLI_SOURCE_DIR}/enc/backward_references.c + ${BROTLI_SOURCE_DIR}/enc/bit_cost.c + ${BROTLI_SOURCE_DIR}/enc/metablock.c + ${BROTLI_SOURCE_DIR}/enc/memory.c + ${BROTLI_SOURCE_DIR}/common/dictionary.c + ${BROTLI_SOURCE_DIR}/common/transform.c +) +add_library(libbrotli STATIC ${SRCS}) + +target_include_directories(libbrotli PUBLIC ${BROTLI_SOURCE_DIR}/c/include) diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index 8853ee1b960..3a6dacdb01a 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -314,6 +314,11 @@ if (USE_HDFS) target_include_directories (clickhouse_common_io SYSTEM BEFORE PRIVATE ${HDFS3_INCLUDE_DIR}) endif() +if (USE_BROTLI) + target_link_libraries (clickhouse_common_io PRIVATE ${BROTLI_LIBRARY}) + target_include_directories (clickhouse_common_io SYSTEM BEFORE PRIVATE ${BROTLI_INCLUDE_DIR}) +endif() + if (USE_JEMALLOC) target_include_directories (dbms SYSTEM BEFORE PRIVATE ${JEMALLOC_INCLUDE_DIR}) # used in Interpreters/AsynchronousMetrics.cpp endif () From 708e06837acdc9f23baeb32bf192a289b56d18ae Mon Sep 17 00:00:00 2001 From: alesapin Date: Sat, 2 Feb 2019 17:17:51 +0300 Subject: [PATCH 02/79] Fix brotli build --- cmake/find_brotli.cmake | 7 ++++--- contrib/CMakeLists.txt | 4 ++-- contrib/brotli-cmake/CMakeLists.txt | 9 +++++---- 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/cmake/find_brotli.cmake b/cmake/find_brotli.cmake index 66ef4498da0..ec7309cf362 100644 --- a/cmake/find_brotli.cmake +++ b/cmake/find_brotli.cmake @@ -9,15 +9,16 @@ if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/brotli/c/include/brotli/decode. endif () if (NOT USE_INTERNAL_BROTLI_LIBRARY) - find_library (BROTLI_LIBRARY libbrotli) + find_library (BROTLI_LIBRARY brotli) find_path (BROTLI_INCLUDE_DIR NAMES decode.h encode.h port.h types.h PATHS ${BROTLI_INCLUDE_PATHS}) endif () if (BROTLI_LIBRARY AND BROTLI_INCLUDE_DIR) elseif (NOT MISSING_INTERNAL_BROTLI_LIBRARY) - set (BROTLI_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/libbrotli/c/include) + set (BROTLI_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/brotli/c/include) set (USE_INTERNAL_BROTLI_LIBRARY 1) - set (BROTLI_LIBRARY libbrotli) + set (BROTLI_LIBRARY brotli) + set (USE_BROTLI 1) endif () message (STATUS "Using brotli: ${BROTLI_INCLUDE_DIR} : ${BROTLI_LIBRARY}") diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index fa111688a4e..bfd77c8a4c6 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -215,8 +215,8 @@ if (USE_INTERNAL_LIBXML2_LIBRARY) add_subdirectory(libxml2-cmake) endif () -if (USE_INTERNAL_LIBBROTLI_LIBRARY) - add_subdirectory(libbrotli-cmake) +if (USE_INTERNAL_BROTLI_LIBRARY) + add_subdirectory(brotli-cmake) endif () if (USE_INTERNAL_PROTOBUF_LIBRARY) diff --git a/contrib/brotli-cmake/CMakeLists.txt b/contrib/brotli-cmake/CMakeLists.txt index e9ad40991df..0922435dd8e 100644 --- a/contrib/brotli-cmake/CMakeLists.txt +++ b/contrib/brotli-cmake/CMakeLists.txt @@ -1,5 +1,5 @@ -set(BROTLI_SOURCE_DIR ${CMAKE_SOURCE_DIR}/contrib/brotli) -set(BROTLI_BINARY_DIR ${CMAKE_BINARY_DIR}/contrib/brotli) +set(BROTLI_SOURCE_DIR ${CMAKE_SOURCE_DIR}/contrib/brotli/c) +set(BROTLI_BINARY_DIR ${CMAKE_BINARY_DIR}/contrib/brotli/c) set(SRCS ${BROTLI_SOURCE_DIR}/dec/bit_reader.c @@ -27,6 +27,7 @@ set(SRCS ${BROTLI_SOURCE_DIR}/common/dictionary.c ${BROTLI_SOURCE_DIR}/common/transform.c ) -add_library(libbrotli STATIC ${SRCS}) -target_include_directories(libbrotli PUBLIC ${BROTLI_SOURCE_DIR}/c/include) +add_library(brotli STATIC ${SRCS}) + +target_include_directories(brotli PUBLIC ${BROTLI_SOURCE_DIR}/include) From 5cbe55820539aa0502237f4e308e6cc44e51041f Mon Sep 17 00:00:00 2001 From: proller Date: Sun, 3 Feb 2019 17:13:47 +0300 Subject: [PATCH 03/79] Update CMakeLists.txt --- contrib/brotli-cmake/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/brotli-cmake/CMakeLists.txt b/contrib/brotli-cmake/CMakeLists.txt index 0922435dd8e..6e5fb2d22ae 100644 --- a/contrib/brotli-cmake/CMakeLists.txt +++ b/contrib/brotli-cmake/CMakeLists.txt @@ -28,6 +28,6 @@ set(SRCS ${BROTLI_SOURCE_DIR}/common/transform.c ) -add_library(brotli STATIC ${SRCS}) +add_library(brotli ${LINK_MODE} ${SRCS}) target_include_directories(brotli PUBLIC ${BROTLI_SOURCE_DIR}/include) From 847b8757605ffe097e93ac07d19321f027fc4fa0 Mon Sep 17 00:00:00 2001 From: proller Date: Sun, 3 Feb 2019 17:14:30 +0300 Subject: [PATCH 04/79] Update find_brotli.cmake --- cmake/find_brotli.cmake | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cmake/find_brotli.cmake b/cmake/find_brotli.cmake index ec7309cf362..34f5845a346 100644 --- a/cmake/find_brotli.cmake +++ b/cmake/find_brotli.cmake @@ -14,6 +14,7 @@ if (NOT USE_INTERNAL_BROTLI_LIBRARY) endif () if (BROTLI_LIBRARY AND BROTLI_INCLUDE_DIR) + set (USE_BROTLI 1) elseif (NOT MISSING_INTERNAL_BROTLI_LIBRARY) set (BROTLI_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/brotli/c/include) set (USE_INTERNAL_BROTLI_LIBRARY 1) @@ -21,4 +22,4 @@ elseif (NOT MISSING_INTERNAL_BROTLI_LIBRARY) set (USE_BROTLI 1) endif () -message (STATUS "Using brotli: ${BROTLI_INCLUDE_DIR} : ${BROTLI_LIBRARY}") +message (STATUS "Using brotli=${USE_BROTLI}: ${BROTLI_INCLUDE_DIR} : ${BROTLI_LIBRARY}") From 7e9e34bebf1afdd65263f3ac67c991b56c231ddf Mon Sep 17 00:00:00 2001 From: Mikhail Fandyushin Date: Tue, 5 Feb 2019 01:15:08 +0300 Subject: [PATCH 05/79] WIP: Reading brotli encoded HTTP messages. First-draft-ugly implementation, but its works --- dbms/CMakeLists.txt | 3 ++ dbms/programs/server/HTTPHandler.cpp | 31 +++++++++++++-------- dbms/src/IO/BrotliReadBuffer.cpp | 41 ++++++++++++++++++++++++++++ dbms/src/IO/BrotliReadBuffer.h | 27 ++++++++++++++++++ 4 files changed, 90 insertions(+), 12 deletions(-) create mode 100644 dbms/src/IO/BrotliReadBuffer.cpp create mode 100644 dbms/src/IO/BrotliReadBuffer.h diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index ec69a4389a7..a86de61827b 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -89,6 +89,9 @@ set(dbms_sources) include(../cmake/dbms_glob_sources.cmake) +# temp ugly hack +include_directories(${BROTLI_INCLUDE_DIR}) + add_headers_and_sources(clickhouse_common_io src/Common) add_headers_and_sources(clickhouse_common_io src/Common/HashTable) add_headers_and_sources(clickhouse_common_io src/IO) diff --git a/dbms/programs/server/HTTPHandler.cpp b/dbms/programs/server/HTTPHandler.cpp index f75f801cf27..2d531b3840f 100644 --- a/dbms/programs/server/HTTPHandler.cpp +++ b/dbms/programs/server/HTTPHandler.cpp @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -396,19 +397,25 @@ void HTTPHandler::processQuery( String http_request_compression_method_str = request.get("Content-Encoding", ""); if (!http_request_compression_method_str.empty()) { - ZlibCompressionMethod method; - if (http_request_compression_method_str == "gzip") - { - method = ZlibCompressionMethod::Gzip; - } - else if (http_request_compression_method_str == "deflate") - { - method = ZlibCompressionMethod::Zlib; - } - else + do { + if (http_request_compression_method_str == "gzip") + { + in_post = std::make_unique(*in_post_raw, ZlibCompressionMethod::Gzip); + break; + } + if (http_request_compression_method_str == "deflate") + { + in_post = std::make_unique(*in_post_raw, ZlibCompressionMethod::Zlib); + break; + } + if (http_request_compression_method_str == "br") + { + in_post = std::make_unique(*in_post_raw); + break; + } throw Exception("Unknown Content-Encoding of HTTP request: " + http_request_compression_method_str, - ErrorCodes::UNKNOWN_COMPRESSION_METHOD); - in_post = std::make_unique(*in_post_raw, method); + ErrorCodes::UNKNOWN_COMPRESSION_METHOD); + } while(0); } else in_post = std::move(in_post_raw); diff --git a/dbms/src/IO/BrotliReadBuffer.cpp b/dbms/src/IO/BrotliReadBuffer.cpp new file mode 100644 index 00000000000..f67e9ffc78d --- /dev/null +++ b/dbms/src/IO/BrotliReadBuffer.cpp @@ -0,0 +1,41 @@ +#include "BrotliReadBuffer.h" + +namespace DB +{ +BrotliReadBuffer::BrotliReadBuffer(ReadBuffer &in_, size_t buf_size, char *existing_memory, size_t alignment) + : BufferWithOwnMemory(buf_size, existing_memory, alignment) + , in(in_) + , eof(false) +{ + bstate_ = BrotliDecoderCreateInstance(NULL,NULL,NULL); +} + +BrotliReadBuffer::~BrotliReadBuffer() +{ + BrotliDecoderDestroyInstance(bstate_); +} + +bool BrotliReadBuffer::nextImpl() +{ + if (eof) + return false; + + auto ptr_in = reinterpret_cast(in.position()); + size_t size_in = in.buffer().end() - in.position(); + + auto ptr_out = reinterpret_cast(internal_buffer.begin()); + size_t size_out = internal_buffer.size(); + + BrotliDecoderDecompressStream(bstate_, &size_in, &ptr_in, &size_out, &ptr_out, nullptr); + + in.position() = in.buffer().end() - size_in; + working_buffer.resize(internal_buffer.size() - size_out); + + if (in.eof()) { + eof = true; + return working_buffer.size() != 0; + } + + return true; +} +} \ No newline at end of file diff --git a/dbms/src/IO/BrotliReadBuffer.h b/dbms/src/IO/BrotliReadBuffer.h new file mode 100644 index 00000000000..cc06587add4 --- /dev/null +++ b/dbms/src/IO/BrotliReadBuffer.h @@ -0,0 +1,27 @@ +#pragma once + +#include +#include + +#include + +namespace DB { + +class BrotliReadBuffer : public BufferWithOwnMemory { +public: + BrotliReadBuffer( + ReadBuffer &in_, + size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, + char *existing_memory = nullptr, + size_t alignment = 0); + + ~BrotliReadBuffer() override; + +private: + bool nextImpl() override; + + ReadBuffer ∈ + BrotliDecoderState * bstate_; + bool eof; +}; +} \ No newline at end of file From 0f9f63825199b40af19f3c50bd453bd7054e1fa3 Mon Sep 17 00:00:00 2001 From: Mikhail Date: Tue, 5 Feb 2019 12:12:52 +0300 Subject: [PATCH 06/79] Update .gitmodules --- .gitmodules | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitmodules b/.gitmodules index ec0adb71427..a4a570d20d8 100644 --- a/.gitmodules +++ b/.gitmodules @@ -66,6 +66,7 @@ url = https://github.com/mfontanini/cppkafka.git [submodule "contrib/pdqsort"] path = contrib/pdqsort + url = https://github.com/orlp/pdqsort [submodule "contrib/brotli"] path = contrib/brotli url = https://github.com/google/brotli.git From 1ee18203aec85a7ee730c75216d500ca06f8d90b Mon Sep 17 00:00:00 2001 From: Mikhail Fandyushin Date: Wed, 6 Feb 2019 02:12:31 +0300 Subject: [PATCH 07/79] handle brotli errors; working wersion of BrotliReadBuffer --- dbms/programs/server/HTTPHandler.cpp | 34 +++++++++---------- dbms/src/IO/BrotliReadBuffer.cpp | 51 +++++++++++++++++++++------- dbms/src/IO/BrotliReadBuffer.h | 17 ++++++++-- 3 files changed, 69 insertions(+), 33 deletions(-) diff --git a/dbms/programs/server/HTTPHandler.cpp b/dbms/programs/server/HTTPHandler.cpp index 2d531b3840f..8c69573dd36 100644 --- a/dbms/programs/server/HTTPHandler.cpp +++ b/dbms/programs/server/HTTPHandler.cpp @@ -397,25 +397,23 @@ void HTTPHandler::processQuery( String http_request_compression_method_str = request.get("Content-Encoding", ""); if (!http_request_compression_method_str.empty()) { - do { - if (http_request_compression_method_str == "gzip") - { - in_post = std::make_unique(*in_post_raw, ZlibCompressionMethod::Gzip); - break; - } - if (http_request_compression_method_str == "deflate") - { - in_post = std::make_unique(*in_post_raw, ZlibCompressionMethod::Zlib); - break; - } - if (http_request_compression_method_str == "br") - { - in_post = std::make_unique(*in_post_raw); - break; - } + if (http_request_compression_method_str == "gzip") + { + in_post = std::make_unique(*in_post_raw, ZlibCompressionMethod::Gzip); + } + else if (http_request_compression_method_str == "deflate") + { + in_post = std::make_unique(*in_post_raw, ZlibCompressionMethod::Zlib); + } + else if (http_request_compression_method_str == "br") + { + in_post = std::make_unique(*in_post_raw); + } + else + { throw Exception("Unknown Content-Encoding of HTTP request: " + http_request_compression_method_str, - ErrorCodes::UNKNOWN_COMPRESSION_METHOD); - } while(0); + ErrorCodes::UNKNOWN_COMPRESSION_METHOD); + } } else in_post = std::move(in_post_raw); diff --git a/dbms/src/IO/BrotliReadBuffer.cpp b/dbms/src/IO/BrotliReadBuffer.cpp index f67e9ffc78d..3ddd2cb165e 100644 --- a/dbms/src/IO/BrotliReadBuffer.cpp +++ b/dbms/src/IO/BrotliReadBuffer.cpp @@ -5,14 +5,19 @@ namespace DB BrotliReadBuffer::BrotliReadBuffer(ReadBuffer &in_, size_t buf_size, char *existing_memory, size_t alignment) : BufferWithOwnMemory(buf_size, existing_memory, alignment) , in(in_) + , bstate(BrotliDecoderCreateInstance(nullptr, nullptr, nullptr)) + , bresult(BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT) + , in_available(0) + , in_data(nullptr) + , out_capacity(0) + , out_data(nullptr) , eof(false) { - bstate_ = BrotliDecoderCreateInstance(NULL,NULL,NULL); } BrotliReadBuffer::~BrotliReadBuffer() { - BrotliDecoderDestroyInstance(bstate_); + BrotliDecoderDestroyInstance(bstate); } bool BrotliReadBuffer::nextImpl() @@ -20,20 +25,42 @@ bool BrotliReadBuffer::nextImpl() if (eof) return false; - auto ptr_in = reinterpret_cast(in.position()); - size_t size_in = in.buffer().end() - in.position(); + if (!in_available) + { + in.nextIfAtEnd(); + in_available = in.buffer().end() - in.position(); + in_data = reinterpret_cast(in.position()); + } - auto ptr_out = reinterpret_cast(internal_buffer.begin()); - size_t size_out = internal_buffer.size(); + if (bresult == BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT && (!in_available || in.eof())) + { + throw Exception(std::string("brotli decode error"), ErrorCodes::CANNOT_READ_ALL_DATA); + } - BrotliDecoderDecompressStream(bstate_, &size_in, &ptr_in, &size_out, &ptr_out, nullptr); + out_capacity = internal_buffer.size(); + out_data = reinterpret_cast(internal_buffer.begin()); - in.position() = in.buffer().end() - size_in; - working_buffer.resize(internal_buffer.size() - size_out); + bresult = BrotliDecoderDecompressStream(bstate, &in_available, &in_data, &out_capacity, &out_data, nullptr); - if (in.eof()) { - eof = true; - return working_buffer.size() != 0; + in.position() = in.buffer().end() - in_available; + working_buffer.resize(internal_buffer.size() - out_capacity); + + if (bresult == BROTLI_DECODER_RESULT_SUCCESS) + { + if (in.eof()) + { + eof = true; + return working_buffer.size() != 0; + } + else + { + throw Exception(std::string("brotli decode error"), ErrorCodes::CANNOT_READ_ALL_DATA); + } + } + + if (bresult == BROTLI_DECODER_RESULT_ERROR) + { + throw Exception(std::string("brotli decode error"), ErrorCodes::CANNOT_READ_ALL_DATA); } return true; diff --git a/dbms/src/IO/BrotliReadBuffer.h b/dbms/src/IO/BrotliReadBuffer.h index cc06587add4..400d343984c 100644 --- a/dbms/src/IO/BrotliReadBuffer.h +++ b/dbms/src/IO/BrotliReadBuffer.h @@ -5,9 +5,11 @@ #include -namespace DB { +namespace DB +{ -class BrotliReadBuffer : public BufferWithOwnMemory { +class BrotliReadBuffer : public BufferWithOwnMemory +{ public: BrotliReadBuffer( ReadBuffer &in_, @@ -21,7 +23,16 @@ private: bool nextImpl() override; ReadBuffer ∈ - BrotliDecoderState * bstate_; + + BrotliDecoderState * bstate; + BrotliDecoderResult bresult; + + size_t in_available; + const uint8_t * in_data; + + size_t out_capacity; + uint8_t * out_data; + bool eof; }; } \ No newline at end of file From 60cd76662339dc5f62da1f0452e571d43fc1ca05 Mon Sep 17 00:00:00 2001 From: Mihail Fandyushin Date: Wed, 6 Feb 2019 09:05:41 +0300 Subject: [PATCH 08/79] fixed clang build; added new line --- dbms/src/IO/BrotliReadBuffer.cpp | 3 ++- dbms/src/IO/BrotliReadBuffer.h | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/dbms/src/IO/BrotliReadBuffer.cpp b/dbms/src/IO/BrotliReadBuffer.cpp index 3ddd2cb165e..70bf0a56dd8 100644 --- a/dbms/src/IO/BrotliReadBuffer.cpp +++ b/dbms/src/IO/BrotliReadBuffer.cpp @@ -65,4 +65,5 @@ bool BrotliReadBuffer::nextImpl() return true; } -} \ No newline at end of file +} + diff --git a/dbms/src/IO/BrotliReadBuffer.h b/dbms/src/IO/BrotliReadBuffer.h index 400d343984c..0ba910a69ea 100644 --- a/dbms/src/IO/BrotliReadBuffer.h +++ b/dbms/src/IO/BrotliReadBuffer.h @@ -35,4 +35,5 @@ private: bool eof; }; -} \ No newline at end of file +} + From 47d0509f3df1649a7afded7a9e8772f7662fbba5 Mon Sep 17 00:00:00 2001 From: Mikhail Fandyushin Date: Mon, 11 Feb 2019 00:52:26 +0300 Subject: [PATCH 09/79] added tests for brotli compression --- dbms/tests/queries/0_stateless/00302_http_compression.reference | 2 ++ dbms/tests/queries/0_stateless/00302_http_compression.sh | 2 ++ 2 files changed, 4 insertions(+) diff --git a/dbms/tests/queries/0_stateless/00302_http_compression.reference b/dbms/tests/queries/0_stateless/00302_http_compression.reference index 97e6f753567..a572b69a989 100644 --- a/dbms/tests/queries/0_stateless/00302_http_compression.reference +++ b/dbms/tests/queries/0_stateless/00302_http_compression.reference @@ -53,6 +53,8 @@ < Content-Encoding: gzip 1 1 +1 +Hello, world Hello, world Hello, world 0 diff --git a/dbms/tests/queries/0_stateless/00302_http_compression.sh b/dbms/tests/queries/0_stateless/00302_http_compression.sh index bd066fd1142..1b47312ce5f 100755 --- a/dbms/tests/queries/0_stateless/00302_http_compression.sh +++ b/dbms/tests/queries/0_stateless/00302_http_compression.sh @@ -21,9 +21,11 @@ ${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}?enable_http_compression=1" -H 'Accept echo "SELECT 1" | ${CLICKHOUSE_CURL} -sS --data-binary @- ${CLICKHOUSE_URL}; echo "SELECT 1" | gzip -c | ${CLICKHOUSE_CURL} -sS --data-binary @- -H 'Content-Encoding: gzip' ${CLICKHOUSE_URL}; +echo "SELECT 1" | brotli | ${CLICKHOUSE_CURL} -sS --data-binary @- -H 'Content-Encoding: br' ${CLICKHOUSE_URL}; echo "'Hello, world'" | ${CLICKHOUSE_CURL} -sS --data-binary @- "${CLICKHOUSE_URL}?query=SELECT"; echo "'Hello, world'" | gzip -c | ${CLICKHOUSE_CURL} -sS --data-binary @- -H 'Content-Encoding: gzip' "${CLICKHOUSE_URL}?query=SELECT"; +echo "'Hello, world'" | brotli | ${CLICKHOUSE_CURL} -sS --data-binary @- -H 'Content-Encoding: br' "${CLICKHOUSE_URL}?query=SELECT"; ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}?enable_http_compression=1" -H 'Accept-Encoding: gzip' -d 'SELECT number FROM system.numbers LIMIT 0' | wc -c; From 9787b3a1eeae3feefcdd53f5a4b7a5f831e18579 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 11 Feb 2019 14:19:56 +0300 Subject: [PATCH 10/79] Added Field type AggregateFunctionState. --- dbms/src/Columns/ColumnAggregateFunction.cpp | 12 ++++---- dbms/src/Common/FieldVisitors.cpp | 15 ++++++++++ dbms/src/Common/FieldVisitors.h | 29 ++++++++++++++++++++ dbms/src/Core/Field.cpp | 24 ++++++++++++++++ dbms/src/Core/Field.h | 18 +++++++++++- 5 files changed, 91 insertions(+), 7 deletions(-) diff --git a/dbms/src/Columns/ColumnAggregateFunction.cpp b/dbms/src/Columns/ColumnAggregateFunction.cpp index 7d3e001998a..ae72d263a15 100644 --- a/dbms/src/Columns/ColumnAggregateFunction.cpp +++ b/dbms/src/Columns/ColumnAggregateFunction.cpp @@ -260,9 +260,9 @@ MutableColumnPtr ColumnAggregateFunction::cloneEmpty() const Field ColumnAggregateFunction::operator[](size_t n) const { - Field field = String(); + Field field = AggregateFunctionStateData(); { - WriteBufferFromString buffer(field.get()); + WriteBufferFromString buffer(field.get().toUnderType()); func->serialize(data[n], buffer); } return field; @@ -272,7 +272,7 @@ void ColumnAggregateFunction::get(size_t n, Field & res) const { res = String(); { - WriteBufferFromString buffer(res.get()); + WriteBufferFromString buffer(res.get().toUnderType()); func->serialize(data[n], buffer); } } @@ -343,7 +343,7 @@ void ColumnAggregateFunction::insert(const Field & x) ensureOwnership(); Arena & arena = createOrGetArena(); pushBackAndCreateState(data, arena, func.get()); - ReadBufferFromString read_buffer(x.get()); + ReadBufferFromString read_buffer(x.get().toUnderType()); func->deserialize(data.back(), read_buffer, &arena); } @@ -465,12 +465,12 @@ void ColumnAggregateFunction::getExtremes(Field & min, Field & max) const AlignedBuffer place_buffer(func->sizeOfData(), func->alignOfData()); AggregateDataPtr place = place_buffer.data(); - String serialized; + AggregateFunctionStateData serialized; func->create(place); try { - WriteBufferFromString buffer(serialized); + WriteBufferFromString buffer(serialized.toUnderType()); func->serialize(place, buffer); } catch (...) diff --git a/dbms/src/Common/FieldVisitors.cpp b/dbms/src/Common/FieldVisitors.cpp index e2b855c8f83..e10095fed90 100644 --- a/dbms/src/Common/FieldVisitors.cpp +++ b/dbms/src/Common/FieldVisitors.cpp @@ -89,6 +89,12 @@ String FieldVisitorDump::operator() (const Tuple & x_def) const return wb.str(); } +String FieldVisitorDump::operator() (const AggregateFunctionStateData & x) const +{ + WriteBufferFromOwnString wb; + writeQuoted(x, wb); + return wb.str(); +} /** In contrast to writeFloatText (and writeQuoted), * even if number looks like integer after formatting, prints decimal point nevertheless (for example, Float64(1) is printed as 1.). @@ -121,6 +127,7 @@ String FieldVisitorToString::operator() (const DecimalField & x) cons String FieldVisitorToString::operator() (const DecimalField & x) const { return formatQuoted(x); } String FieldVisitorToString::operator() (const DecimalField & x) const { return formatQuoted(x); } String FieldVisitorToString::operator() (const UInt128 & x) const { return formatQuoted(UUID(x)); } +String FieldVisitorToString::operator() (const AggregateFunctionStateData & x) const { return formatQuoted(x); } String FieldVisitorToString::operator() (const Array & x) const { @@ -231,5 +238,13 @@ void FieldVisitorHash::operator() (const DecimalField & x) const hash.update(x); } +void FieldVisitorHash::operator() (const AggregateFunctionStateData & x) const +{ + UInt8 type = Field::Types::AggregateFunctionState; + hash.update(type); + hash.update(x.toUnderType().size()); + hash.update(x.toUnderType().data(), x.toUnderType().size()); +} + } diff --git a/dbms/src/Common/FieldVisitors.h b/dbms/src/Common/FieldVisitors.h index 19b4e583338..30aa700f80d 100644 --- a/dbms/src/Common/FieldVisitors.h +++ b/dbms/src/Common/FieldVisitors.h @@ -138,6 +138,7 @@ public: String operator() (const DecimalField & x) const; String operator() (const DecimalField & x) const; String operator() (const DecimalField & x) const; + String operator() (const AggregateFunctionStateData & x) const; }; @@ -156,6 +157,7 @@ public: String operator() (const DecimalField & x) const; String operator() (const DecimalField & x) const; String operator() (const DecimalField & x) const; + String operator() (const AggregateFunctionStateData & x) const; }; @@ -201,6 +203,11 @@ public: else return x.getValue() / x.getScaleMultiplier(); } + + T operator() (const AggregateFunctionStateData &) const + { + throw Exception("Cannot convert String to " + demangle(typeid(T).name()), ErrorCodes::CANNOT_CONVERT_TYPE); + } }; @@ -222,6 +229,7 @@ public: void operator() (const DecimalField & x) const; void operator() (const DecimalField & x) const; void operator() (const DecimalField & x) const; + void operator() (const AggregateFunctionStateData & x) const; }; @@ -321,6 +329,16 @@ public: template bool operator() (const Int64 & l, const DecimalField & r) const { return DecimalField(l, 0) == r; } template bool operator() (const Float64 & l, const DecimalField & r) const { return cantCompare(l, r); } + template + bool operator() (const AggregateFunctionStateData & l, const T & r) const + { + if constexpr (std::is_same_v) + return l == r; + if constexpr (std::is_same_v) + return stringToUUID(l.toUnderType()) == r; + return cantCompare(l, r); + } + private: template bool cantCompare(const T &, const U &) const @@ -419,6 +437,16 @@ public: template bool operator() (const Int64 & l, const DecimalField & r) const { return DecimalField(l, 0) < r; } template bool operator() (const Float64 &, const DecimalField &) const { return false; } + template + bool operator() (const AggregateFunctionStateData & l, const T & r) const + { + if constexpr (std::is_same_v) + return l < r; + if constexpr (std::is_same_v) + return stringToUUID(l.toUnderType()) < r; + return cantCompare(l, r); + } + private: template bool cantCompare(const T &, const U &) const @@ -447,6 +475,7 @@ public: bool operator() (String &) const { throw Exception("Cannot sum Strings", ErrorCodes::LOGICAL_ERROR); } bool operator() (Array &) const { throw Exception("Cannot sum Arrays", ErrorCodes::LOGICAL_ERROR); } bool operator() (UInt128 &) const { throw Exception("Cannot sum UUIDs", ErrorCodes::LOGICAL_ERROR); } + bool operator() (AggregateFunctionStateData &) const { throw Exception("Cannot sum AggregateFunctionStates", ErrorCodes::LOGICAL_ERROR); } template bool operator() (DecimalField & x) const diff --git a/dbms/src/Core/Field.cpp b/dbms/src/Core/Field.cpp index c195652d051..7411d17efc1 100644 --- a/dbms/src/Core/Field.cpp +++ b/dbms/src/Core/Field.cpp @@ -75,6 +75,13 @@ namespace DB x.push_back(value); break; } + case Field::Types::AggregateFunctionState: + { + AggregateFunctionStateData value; + DB::readStringBinary(value, buf); + x.push_back(value); + break; + } } } } @@ -128,6 +135,11 @@ namespace DB DB::writeBinary(get(*it), buf); break; } + case Field::Types::AggregateFunctionState: + { + DB::writeStringBinary(get(*it), buf); + break; + } } } } @@ -209,6 +221,13 @@ namespace DB x.push_back(value); break; } + case Field::Types::AggregateFunctionState: + { + AggregateFunctionStateData value; + DB::readStringBinary(value, buf); + x.push_back(value); + break; + } } } } @@ -262,6 +281,11 @@ namespace DB DB::writeBinary(get(*it), buf); break; } + case Field::Types::AggregateFunctionState: + { + DB::writeStringBinary(get(*it), buf); + break; + } } } } diff --git a/dbms/src/Core/Field.h b/dbms/src/Core/Field.h index 47242825f86..583b3303c56 100644 --- a/dbms/src/Core/Field.h +++ b/dbms/src/Core/Field.h @@ -30,6 +30,9 @@ using Array = std::vector; using TupleBackend = std::vector; STRONG_TYPEDEF(TupleBackend, Tuple) /// Array and Tuple are different types with equal representation inside Field. +using AggregateFunctionStateDataBackend = String; +STRONG_TYPEDEF(AggregateFunctionStateDataBackend, AggregateFunctionStateData) + template bool decimalEqual(T x, T y, UInt32 x_scale, UInt32 y_scale); template bool decimalLess(T x, T y, UInt32 x_scale, UInt32 y_scale); template bool decimalLessOrEqual(T x, T y, UInt32 x_scale, UInt32 y_scale); @@ -131,6 +134,7 @@ public: Decimal32 = 19, Decimal64 = 20, Decimal128 = 21, + AggregateFunctionState = 22, }; static const int MIN_NON_POD = 16; @@ -151,6 +155,7 @@ public: case Decimal32: return "Decimal32"; case Decimal64: return "Decimal64"; case Decimal128: return "Decimal128"; + case AggregateFunctionState: return "AggregateFunctionState"; } throw Exception("Bad type of Field", ErrorCodes::BAD_TYPE_OF_FIELD); @@ -325,6 +330,7 @@ public: case Types::Decimal32: return get>() < rhs.get>(); case Types::Decimal64: return get>() < rhs.get>(); case Types::Decimal128: return get>() < rhs.get>(); + case Types::AggregateFunctionState: return get() < rhs.get(); } throw Exception("Bad type of Field", ErrorCodes::BAD_TYPE_OF_FIELD); @@ -356,6 +362,7 @@ public: case Types::Decimal32: return get>() <= rhs.get>(); case Types::Decimal64: return get>() <= rhs.get>(); case Types::Decimal128: return get>() <= rhs.get>(); + case Types::AggregateFunctionState: return get() <= rhs.get(); } throw Exception("Bad type of Field", ErrorCodes::BAD_TYPE_OF_FIELD); @@ -385,6 +392,7 @@ public: case Types::Decimal32: return get>() == rhs.get>(); case Types::Decimal64: return get>() == rhs.get>(); case Types::Decimal128: return get>() == rhs.get>(); + case Types::AggregateFunctionState: return get() == rhs.get(); } throw Exception("Bad type of Field", ErrorCodes::BAD_TYPE_OF_FIELD); @@ -398,7 +406,7 @@ public: private: std::aligned_union_t, DecimalField, DecimalField + DecimalField, DecimalField, DecimalField, AggregateFunctionStateData > storage; Types::Which which; @@ -449,6 +457,7 @@ private: case Types::Decimal32: f(field.template get>()); return; case Types::Decimal64: f(field.template get>()); return; case Types::Decimal128: f(field.template get>()); return; + case Types::AggregateFunctionState: f(field.template get()); return; } } @@ -501,6 +510,9 @@ private: case Types::Tuple: destroy(); break; + case Types::AggregateFunctionState: + destroy(); + break; default: break; } @@ -531,6 +543,7 @@ template <> struct Field::TypeToEnum { static const Types::Which value template <> struct Field::TypeToEnum>{ static const Types::Which value = Types::Decimal32; }; template <> struct Field::TypeToEnum>{ static const Types::Which value = Types::Decimal64; }; template <> struct Field::TypeToEnum>{ static const Types::Which value = Types::Decimal128; }; +template <> struct Field::TypeToEnum{ static const Types::Which value = Types::AggregateFunctionState; }; template <> struct Field::EnumToType { using Type = Null; }; template <> struct Field::EnumToType { using Type = UInt64; }; @@ -544,6 +557,7 @@ template <> struct Field::EnumToType { using Type = Tuple template <> struct Field::EnumToType { using Type = DecimalField; }; template <> struct Field::EnumToType { using Type = DecimalField; }; template <> struct Field::EnumToType { using Type = DecimalField; }; +template <> struct Field::EnumToType { using Type = DecimalField; }; template @@ -616,6 +630,8 @@ template <> struct NearestFieldTypeImpl { using Type = Tuple; }; template <> struct NearestFieldTypeImpl { using Type = UInt64; }; template <> struct NearestFieldTypeImpl { using Type = Null; }; +template <> struct NearestFieldTypeImpl { using Type = AggregateFunctionStateData; }; + template using NearestFieldType = typename NearestFieldTypeImpl::Type; From 1de1192aa8514feb4809813976a4183cafc5b5c0 Mon Sep 17 00:00:00 2001 From: chertus Date: Mon, 11 Feb 2019 17:19:09 +0300 Subject: [PATCH 11/79] move asterisks extraction to TranslateQualifiedNamesVisitor (before column names normalisation) --- dbms/src/Interpreters/ActionsVisitor.cpp | 1 + .../PredicateExpressionsOptimizer.cpp | 6 +- dbms/src/Interpreters/QueryNormalizer.cpp | 115 --------------- dbms/src/Interpreters/QueryNormalizer.h | 26 +--- dbms/src/Interpreters/SyntaxAnalyzer.cpp | 69 ++++----- .../TranslateQualifiedNamesVisitor.cpp | 134 +++++++++++++++++- .../TranslateQualifiedNamesVisitor.h | 25 +++- 7 files changed, 185 insertions(+), 191 deletions(-) diff --git a/dbms/src/Interpreters/ActionsVisitor.cpp b/dbms/src/Interpreters/ActionsVisitor.cpp index e7688903db8..0dba4f6a163 100644 --- a/dbms/src/Interpreters/ActionsVisitor.cpp +++ b/dbms/src/Interpreters/ActionsVisitor.cpp @@ -35,6 +35,7 @@ #include #include #include +#include namespace DB { diff --git a/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp b/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp index b49f02a14fa..930295a3b5a 100644 --- a/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp +++ b/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp @@ -338,9 +338,9 @@ ASTs PredicateExpressionsOptimizer::getSelectQueryProjectionColumns(ASTPtr & ast std::unordered_map aliases; std::vector tables = getDatabaseAndTables(*select_query, context.getCurrentDatabase()); - std::vector tables_with_columns; - TranslateQualifiedNamesVisitor::Data::setTablesOnly(tables, tables_with_columns); - TranslateQualifiedNamesVisitor::Data qn_visitor_data{{}, tables_with_columns}; + /// TODO: get tables from evaluateAsterisk instead of tablesOnly() to extract asterisks in general way + std::vector tables_with_columns = TranslateQualifiedNamesVisitor::Data::tablesOnly(tables); + TranslateQualifiedNamesVisitor::Data qn_visitor_data({}, tables_with_columns, false); TranslateQualifiedNamesVisitor(qn_visitor_data).visit(ast); QueryAliasesVisitor::Data query_aliases_data{aliases}; diff --git a/dbms/src/Interpreters/QueryNormalizer.cpp b/dbms/src/Interpreters/QueryNormalizer.cpp index 1a0f5bb5ac8..97d4d402f26 100644 --- a/dbms/src/Interpreters/QueryNormalizer.cpp +++ b/dbms/src/Interpreters/QueryNormalizer.cpp @@ -7,12 +7,10 @@ #include #include #include -#include #include #include #include #include -#include #include namespace DB @@ -91,13 +89,6 @@ void QueryNormalizer::visit(ASTFunction & node, const ASTPtr &, Data & data) /// and on all remote servers, function implementation will be same. if (endsWith(func_name, "Distinct") && func_name_lowercase == "countdistinct") func_name = data.settings.count_distinct_implementation; - - /// As special case, treat count(*) as count(), not as count(list of all columns). - if (func_name_lowercase == "count" && func_arguments->children.size() == 1 - && typeid_cast(func_arguments->children[0].get())) - { - func_arguments->children.clear(); - } } } @@ -138,84 +129,6 @@ void QueryNormalizer::visit(ASTIdentifier & node, ASTPtr & ast, Data & data) } } -/// Replace *, alias.*, database.table.* with a list of columns. -void QueryNormalizer::visit(ASTExpressionList & node, const ASTPtr &, Data & data) -{ - if (!data.tables_with_columns) - return; - - const auto & tables_with_columns = *data.tables_with_columns; - const auto & source_columns_set = data.source_columns_set; - - ASTs old_children; - if (data.processAsterisks()) - { - bool has_asterisk = false; - for (const auto & child : node.children) - { - if (typeid_cast(child.get()) || - typeid_cast(child.get())) - { - has_asterisk = true; - break; - } - } - - if (has_asterisk) - { - old_children.swap(node.children); - node.children.reserve(old_children.size()); - } - } - - for (const auto & child : old_children) - { - if (typeid_cast(child.get())) - { - bool first_table = true; - for (const auto & [table_name, table_columns] : tables_with_columns) - { - for (const auto & column_name : table_columns) - if (first_table || !data.join_using_columns.count(column_name)) - { - /// qualifed names for duplicates - if (!first_table && source_columns_set && source_columns_set->count(column_name)) - node.children.emplace_back(std::make_shared(table_name.getQualifiedNamePrefix() + column_name)); - else - node.children.emplace_back(std::make_shared(column_name)); - } - - first_table = false; - } - } - else if (const auto * qualified_asterisk = typeid_cast(child.get())) - { - DatabaseAndTableWithAlias ident_db_and_name(qualified_asterisk->children[0]); - - bool first_table = true; - for (const auto & [table_name, table_columns] : tables_with_columns) - { - if (ident_db_and_name.satisfies(table_name, true)) - { - for (const auto & column_name : table_columns) - { - /// qualifed names for duplicates - if (!first_table && source_columns_set && source_columns_set->count(column_name)) - node.children.emplace_back(std::make_shared(table_name.getQualifiedNamePrefix() + column_name)); - else - node.children.emplace_back(std::make_shared(column_name)); - } - break; - } - - first_table = false; - } - } - else - node.children.emplace_back(child); - } -} - /// mark table identifiers as 'not columns' void QueryNormalizer::visit(ASTTablesInSelectQueryElement & node, const ASTPtr &, Data &) { @@ -229,9 +142,6 @@ void QueryNormalizer::visit(ASTTablesInSelectQueryElement & node, const ASTPtr & /// special visitChildren() for ASTSelectQuery void QueryNormalizer::visit(ASTSelectQuery & select, const ASTPtr & ast, Data & data) { - if (auto join = select.join()) - extractJoinUsingColumns(join->table_join, data); - for (auto & child : ast->children) { if (typeid_cast(child.get()) || @@ -312,8 +222,6 @@ void QueryNormalizer::visit(ASTPtr & ast, Data & data) visit(*node, ast, data); if (auto * node = typeid_cast(ast.get())) visit(*node, ast, data); - if (auto * node = typeid_cast(ast.get())) - visit(*node, ast, data); if (auto * node = typeid_cast(ast.get())) visit(*node, ast, data); if (auto * node = typeid_cast(ast.get())) @@ -344,27 +252,4 @@ void QueryNormalizer::visit(ASTPtr & ast, Data & data) } } -/// 'select * from a join b using id' should result one 'id' column -void QueryNormalizer::extractJoinUsingColumns(const ASTPtr ast, Data & data) -{ - const auto & table_join = typeid_cast(*ast); - - if (table_join.using_expression_list) - { - auto & keys = typeid_cast(*table_join.using_expression_list); - for (const auto & key : keys.children) - if (auto opt_column = getIdentifierName(key)) - data.join_using_columns.insert(*opt_column); - else if (typeid_cast(key.get())) - data.join_using_columns.insert(key->getColumnName()); - else - { - String alias = key->tryGetAlias(); - if (alias.empty()) - throw Exception("Logical error: expected identifier or alias, got: " + key->getID(), ErrorCodes::LOGICAL_ERROR); - data.join_using_columns.insert(alias); - } - } -} - } diff --git a/dbms/src/Interpreters/QueryNormalizer.h b/dbms/src/Interpreters/QueryNormalizer.h index 517f9416959..6d6fea86e44 100644 --- a/dbms/src/Interpreters/QueryNormalizer.h +++ b/dbms/src/Interpreters/QueryNormalizer.h @@ -1,11 +1,9 @@ #pragma once -#include #include #include #include -#include #include namespace DB @@ -21,9 +19,9 @@ inline bool functionIsInOrGlobalInOperator(const String & name) return functionIsInOperator(name) || name == "globalIn" || name == "globalNotIn"; } +class ASTSelectQuery; class ASTFunction; class ASTIdentifier; -class ASTExpressionList; struct ASTTablesInSelectQueryElement; class Context; @@ -53,10 +51,6 @@ public: const Aliases & aliases; const ExtractedSettings settings; - const Context * context; - const NameSet * source_columns_set; - const std::vector * tables_with_columns; - std::unordered_set join_using_columns; /// tmp data size_t level; @@ -64,26 +58,11 @@ public: SetOfASTs current_asts; /// vertices in the current call stack of this method std::string current_alias; /// the alias referencing to the ancestor of ast (the deepest ancestor with aliases) - Data(const Aliases & aliases_, ExtractedSettings && settings_, const Context & context_, - const NameSet & source_columns_set, const std::vector & tables_with_columns_) - : aliases(aliases_) - , settings(settings_) - , context(&context_) - , source_columns_set(&source_columns_set) - , tables_with_columns(&tables_with_columns_) - , level(0) - {} - Data(const Aliases & aliases_, ExtractedSettings && settings_) : aliases(aliases_) , settings(settings_) - , context(nullptr) - , source_columns_set(nullptr) - , tables_with_columns(nullptr) , level(0) {} - - bool processAsterisks() const { return tables_with_columns && !tables_with_columns->empty(); } }; QueryNormalizer(Data & data) @@ -102,13 +81,10 @@ private: static void visit(ASTIdentifier &, ASTPtr &, Data &); static void visit(ASTFunction &, const ASTPtr &, Data &); - static void visit(ASTExpressionList &, const ASTPtr &, Data &); static void visit(ASTTablesInSelectQueryElement &, const ASTPtr &, Data &); static void visit(ASTSelectQuery &, const ASTPtr &, Data &); static void visitChildren(const ASTPtr &, Data & data); - - static void extractJoinUsingColumns(const ASTPtr ast, Data & data); }; } diff --git a/dbms/src/Interpreters/SyntaxAnalyzer.cpp b/dbms/src/Interpreters/SyntaxAnalyzer.cpp index b4dc9a31211..33cb2da0515 100644 --- a/dbms/src/Interpreters/SyntaxAnalyzer.cpp +++ b/dbms/src/Interpreters/SyntaxAnalyzer.cpp @@ -78,49 +78,36 @@ void collectSourceColumns(ASTSelectQuery * select_query, StoragePtr storage, Nam } } -/// Translate qualified names such as db.table.column, table.column, table_alias.column to unqualified names. -void translateQualifiedNames(ASTPtr & query, ASTSelectQuery * select_query, const NameSet & source_columns, - const std::vector & tables_with_columns) +/// Translate qualified names such as db.table.column, table.column, table_alias.column to names' normal form. +/// Expand asterisks and qualified asterisks with column names. +/// There would be columns in normal form & column aliases after translation. Column & column alias would be normalized in QueryNormalizer. +void translateQualifiedNames(ASTPtr & query, ASTSelectQuery * select_query, const Context & context, SyntaxAnalyzerResult & result, + const Names & source_columns_list, const NameSet & source_columns_set) { - if (!select_query->tables || select_query->tables->children.empty()) - return; + std::vector tables_with_columns = getDatabaseAndTablesWithColumnNames(*select_query, context); + + if (tables_with_columns.empty()) + { + Names all_columns_name = source_columns_list; + + /// TODO: asterisk_left_columns_only probably does not work in some cases + if (!context.getSettingsRef().asterisk_left_columns_only) + { + auto columns_from_joined_table = result.analyzed_join.getColumnsFromJoinedTable(source_columns_set, context, select_query); + for (auto & column : columns_from_joined_table) + all_columns_name.emplace_back(column.name_and_type.name); + } + + tables_with_columns.emplace_back(DatabaseAndTableWithAlias{}, std::move(all_columns_name)); + } LogAST log; - TranslateQualifiedNamesVisitor::Data visitor_data{source_columns, tables_with_columns}; + TranslateQualifiedNamesVisitor::Data visitor_data(source_columns_set, tables_with_columns); TranslateQualifiedNamesVisitor visitor(visitor_data, log.stream()); visitor.visit(query); } -/// For star nodes(`*`), expand them to a list of all columns. For literal nodes, substitute aliases. -void normalizeTree( - ASTPtr & query, - SyntaxAnalyzerResult & result, - const Names & source_columns, - const NameSet & source_columns_set, - const Context & context, - const ASTSelectQuery * select_query, - std::vector & tables_with_columns) -{ - const auto & settings = context.getSettingsRef(); - Names all_columns_name = source_columns; - - if (!settings.asterisk_left_columns_only) - { - auto columns_from_joined_table = result.analyzed_join.getColumnsFromJoinedTable(source_columns_set, context, select_query); - for (auto & column : columns_from_joined_table) - all_columns_name.emplace_back(column.name_and_type.name); - } - - if (all_columns_name.empty()) - throw Exception("An asterisk cannot be replaced with empty columns.", ErrorCodes::LOGICAL_ERROR); - - if (tables_with_columns.empty()) - tables_with_columns.emplace_back(DatabaseAndTableWithAlias{}, std::move(all_columns_name)); - - QueryNormalizer::Data normalizer_data(result.aliases, settings, context, source_columns_set, tables_with_columns); - QueryNormalizer(normalizer_data).visit(query); -} bool hasArrayJoin(const ASTPtr & ast) { if (const ASTFunction * function = typeid_cast(&*ast)) @@ -646,12 +633,10 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze( if (source_columns_set.size() != source_columns_list.size()) throw Exception("Unexpected duplicates in source columns list.", ErrorCodes::LOGICAL_ERROR); - std::vector tables_with_columns; - if (select_query) { - tables_with_columns = getDatabaseAndTablesWithColumnNames(*select_query, context); - translateQualifiedNames(query, select_query, source_columns_set, tables_with_columns); + translateQualifiedNames(query, select_query, context, result, + (storage ? storage->getColumns().ordinary.getNames() : source_columns_list), source_columns_set); /// Depending on the user's profile, check for the execution rights /// distributed subqueries inside the IN or JOIN sections and process these subqueries. @@ -669,8 +654,10 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze( } /// Common subexpression elimination. Rewrite rules. - normalizeTree(query, result, (storage ? storage->getColumns().ordinary.getNames() : source_columns_list), source_columns_set, - context, select_query, tables_with_columns); + { + QueryNormalizer::Data normalizer_data(result.aliases, context.getSettingsRef()); + QueryNormalizer(normalizer_data).visit(query); + } /// Remove unneeded columns according to 'required_result_columns'. /// Leave all selected columns in case of DISTINCT; columns that contain arrayJoin function inside. diff --git a/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp b/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp index f6e5ebe956a..febc9753366 100644 --- a/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp +++ b/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp @@ -1,3 +1,5 @@ +#include + #include #include @@ -5,10 +7,14 @@ #include #include +#include #include #include #include #include +#include +#include +#include namespace DB @@ -41,12 +47,14 @@ std::vector TranslateQualifiedNamesMatcher::visit(ASTPtr & ast, Data & { if (auto * t = typeid_cast(ast.get())) return visit(*t, ast, data); - if (auto * t = typeid_cast(ast.get())) - return visit(*t, ast, data); if (auto * t = typeid_cast(ast.get())) return visit(*t, ast, data); if (auto * t = typeid_cast(ast.get())) return visit(*t, ast, data); + if (auto * node = typeid_cast(ast.get())) + visit(*node, ast, data); + if (auto * node = typeid_cast(ast.get())) + visit(*node, ast, data); return {}; } @@ -73,6 +81,18 @@ std::vector TranslateQualifiedNamesMatcher::visit(ASTIdentifier & iden return {}; } +/// As special case, treat count(*) as count(), not as count(list of all columns). +void TranslateQualifiedNamesMatcher::visit(ASTFunction & node, const ASTPtr &, Data &) +{ + ASTPtr & func_arguments = node.arguments; + + String func_name_lowercase = Poco::toLower(node.name); + if (func_name_lowercase == "count" && + func_arguments->children.size() == 1 && + typeid_cast(func_arguments->children[0].get())) + func_arguments->children.clear(); +} + std::vector TranslateQualifiedNamesMatcher::visit(const ASTQualifiedAsterisk & , const ASTPtr & ast, Data & data) { if (ast->children.size() != 1) @@ -100,8 +120,11 @@ std::vector TranslateQualifiedNamesMatcher::visit(ASTTableJoin & join, return out; } -std::vector TranslateQualifiedNamesMatcher::visit(ASTSelectQuery & select, const ASTPtr & , Data &) +std::vector TranslateQualifiedNamesMatcher::visit(ASTSelectQuery & select, const ASTPtr & , Data & data) { + if (auto join = select.join()) + extractJoinUsingColumns(join->table_join, data); + /// If the WHERE clause or HAVING consists of a single qualified column, the reference must be translated not only in children, /// but also in where_expression and having_expression. std::vector out; @@ -114,4 +137,109 @@ std::vector TranslateQualifiedNamesMatcher::visit(ASTSelectQuery & sel return out; } +/// Replace *, alias.*, database.table.* with a list of columns. +void TranslateQualifiedNamesMatcher::visit(ASTExpressionList & node, const ASTPtr &, Data & data) +{ + const auto & tables_with_columns = data.tables; + const auto & source_columns = data.source_columns; + + ASTs old_children; + if (data.processAsterisks()) + { + bool has_asterisk = false; + for (const auto & child : node.children) + { + if (typeid_cast(child.get())) + { + if (tables_with_columns.empty()) + throw Exception("An asterisk cannot be replaced with empty columns.", ErrorCodes::LOGICAL_ERROR); + has_asterisk = true; + break; + } + else if (auto qa = typeid_cast(child.get())) + { + visit(*qa, child, data); /// check if it's OK before rewrite + has_asterisk = true; + break; + } + } + + if (has_asterisk) + { + old_children.swap(node.children); + node.children.reserve(old_children.size()); + } + } + + for (const auto & child : old_children) + { + if (typeid_cast(child.get())) + { + bool first_table = true; + for (const auto & [table_name, table_columns] : tables_with_columns) + { + for (const auto & column_name : table_columns) + if (first_table || !data.join_using_columns.count(column_name)) + { + /// qualifed names for duplicates + if (!first_table && source_columns.count(column_name)) + node.children.emplace_back(std::make_shared(table_name.getQualifiedNamePrefix() + column_name)); + else + node.children.emplace_back(std::make_shared(column_name)); + } + + first_table = false; + } + } + else if (const auto * qualified_asterisk = typeid_cast(child.get())) + { + DatabaseAndTableWithAlias ident_db_and_name(qualified_asterisk->children[0]); + + bool first_table = true; + for (const auto & [table_name, table_columns] : tables_with_columns) + { + if (ident_db_and_name.satisfies(table_name, true)) + { + for (const auto & column_name : table_columns) + { + /// qualifed names for duplicates + if (!first_table && source_columns.count(column_name)) + node.children.emplace_back(std::make_shared(table_name.getQualifiedNamePrefix() + column_name)); + else + node.children.emplace_back(std::make_shared(column_name)); + } + break; + } + + first_table = false; + } + } + else + node.children.emplace_back(child); + } +} + +/// 'select * from a join b using id' should result one 'id' column +void TranslateQualifiedNamesMatcher::extractJoinUsingColumns(const ASTPtr ast, Data & data) +{ + const auto & table_join = typeid_cast(*ast); + + if (table_join.using_expression_list) + { + auto & keys = typeid_cast(*table_join.using_expression_list); + for (const auto & key : keys.children) + if (auto opt_column = getIdentifierName(key)) + data.join_using_columns.insert(*opt_column); + else if (typeid_cast(key.get())) + data.join_using_columns.insert(key->getColumnName()); + else + { + String alias = key->tryGetAlias(); + if (alias.empty()) + throw Exception("Logical error: expected identifier or alias, got: " + key->getID(), ErrorCodes::LOGICAL_ERROR); + data.join_using_columns.insert(alias); + } + } +} + } diff --git a/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.h b/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.h index bee5e7022f4..15dd6e5192f 100644 --- a/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.h +++ b/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.h @@ -13,6 +13,8 @@ class ASTIdentifier; class ASTQualifiedAsterisk; struct ASTTableJoin; class ASTSelectQuery; +class ASTExpressionList; +class ASTFunction; /// Visit one node for names qualification. @sa InDepthNodeVisitor. class TranslateQualifiedNamesMatcher @@ -22,15 +24,26 @@ public: { const NameSet & source_columns; const std::vector & tables; + std::unordered_set join_using_columns; + bool has_columns; - static void setTablesOnly(const std::vector & tables, - std::vector & tables_with_columns) + Data(const NameSet & source_columns_, const std::vector & tables_, bool has_columns_ = true) + : source_columns(source_columns_) + , tables(tables_) + , has_columns(has_columns_) + {} + + static std::vector tablesOnly(const std::vector & tables) { - tables_with_columns.clear(); + std::vector tables_with_columns; tables_with_columns.reserve(tables.size()); + for (const auto & table : tables) tables_with_columns.emplace_back(TableWithColumnNames{table, {}}); + return tables_with_columns; } + + bool processAsterisks() const { return !tables.empty() && has_columns; } }; static constexpr const char * label = "TranslateQualifiedNames"; @@ -43,10 +56,14 @@ private: static std::vector visit(const ASTQualifiedAsterisk & node, const ASTPtr & ast, Data &); static std::vector visit(ASTTableJoin & node, const ASTPtr & ast, Data &); static std::vector visit(ASTSelectQuery & node, const ASTPtr & ast, Data &); + static void visit(ASTExpressionList &, const ASTPtr &, Data &); + static void visit(ASTFunction &, const ASTPtr &, Data &); + + static void extractJoinUsingColumns(const ASTPtr ast, Data & data); }; /// Visits AST for names qualification. -/// It finds columns (general identifiers and asterisks) and translate their names according to tables' names. +/// It finds columns and translate their names to the normal form. Expand asterisks and qualified asterisks with column names. using TranslateQualifiedNamesVisitor = InDepthNodeVisitor; } From c35439a5f013c997e0f7cb8171dd0c3d9a4752c8 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 11 Feb 2019 16:11:52 +0300 Subject: [PATCH 12/79] Updated FieldVisitor. --- .../AggregateFunctionFactory.cpp | 7 +++- .../AggregateFunctions/IAggregateFunction.h | 15 ++++++++ dbms/src/Columns/ColumnAggregateFunction.cpp | 27 ++++++++++--- dbms/src/Columns/ColumnAggregateFunction.h | 2 + dbms/src/Common/FieldVisitors.cpp | 14 +++++-- dbms/src/Common/FieldVisitors.h | 19 ++++++---- dbms/src/Core/Field.cpp | 12 ++++-- dbms/src/Core/Field.h | 38 ++++++++++++++++++- dbms/src/DataTypes/FieldToDataType.cpp | 6 +++ dbms/src/DataTypes/FieldToDataType.h | 1 + dbms/src/Interpreters/convertFieldToType.cpp | 15 ++++++++ 11 files changed, 132 insertions(+), 24 deletions(-) diff --git a/dbms/src/AggregateFunctions/AggregateFunctionFactory.cpp b/dbms/src/AggregateFunctions/AggregateFunctionFactory.cpp index 6aeaaef2bfa..932d6615385 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionFactory.cpp +++ b/dbms/src/AggregateFunctions/AggregateFunctionFactory.cpp @@ -86,12 +86,17 @@ AggregateFunctionPtr AggregateFunctionFactory::get( [](const auto & type) { return type->onlyNull(); })) nested_function = getImpl(name, nested_types, parameters, recursion_level); - return combinator->transformAggregateFunction(nested_function, argument_types, parameters); + auto res = combinator->transformAggregateFunction(nested_function, type_without_low_cardinality, parameters); + res->setArguments(type_without_low_cardinality, parameters); + return res; } auto res = getImpl(name, type_without_low_cardinality, parameters, recursion_level); if (!res) throw Exception("Logical error: AggregateFunctionFactory returned nullptr", ErrorCodes::LOGICAL_ERROR); + + res->setArguments(type_without_low_cardinality, parameters); + return res; } diff --git a/dbms/src/AggregateFunctions/IAggregateFunction.h b/dbms/src/AggregateFunctions/IAggregateFunction.h index 85de0e0c7ff..f5def066058 100644 --- a/dbms/src/AggregateFunctions/IAggregateFunction.h +++ b/dbms/src/AggregateFunctions/IAggregateFunction.h @@ -108,6 +108,21 @@ public: * const char * getHeaderFilePath() const override { return __FILE__; } */ virtual const char * getHeaderFilePath() const = 0; + + const DataTypes & getArgumentTypes() const { return argument_types; } + const Array & getParameters() const { return parameters; } + +private: + DataTypes argument_types; + Array parameters; + + friend class AggregateFunctionFactory; + + void setArguments(DataTypes argument_types_, Array parameters_) + { + argument_types = std::move(argument_types_); + parameters = std::move(parameters_); + } }; diff --git a/dbms/src/Columns/ColumnAggregateFunction.cpp b/dbms/src/Columns/ColumnAggregateFunction.cpp index 65b168e35a9..23abee39530 100644 --- a/dbms/src/Columns/ColumnAggregateFunction.cpp +++ b/dbms/src/Columns/ColumnAggregateFunction.cpp @@ -3,6 +3,8 @@ #include #include #include +#include +#include #include #include #include @@ -258,11 +260,17 @@ MutableColumnPtr ColumnAggregateFunction::cloneEmpty() const return create(func, Arenas(1, std::make_shared())); } +String ColumnAggregateFunction::getTypeString() const +{ + return DataTypeAggregateFunction(func, func->getArgumentTypes(), func->getParameters()).getName(); +} + Field ColumnAggregateFunction::operator[](size_t n) const { Field field = AggregateFunctionStateData(); + field.get().name = getTypeString(); { - WriteBufferFromString buffer(field.get().toUnderType()); + WriteBufferFromString buffer(field.get().data); func->serialize(data[n], buffer); } return field; @@ -270,9 +278,10 @@ Field ColumnAggregateFunction::operator[](size_t n) const void ColumnAggregateFunction::get(size_t n, Field & res) const { - res = String(); + res = AggregateFunctionStateData(); + res.get().name = getTypeString(); { - WriteBufferFromString buffer(res.get().toUnderType()); + WriteBufferFromString buffer(res.get().data); func->serialize(data[n], buffer); } } @@ -337,13 +346,18 @@ static void pushBackAndCreateState(ColumnAggregateFunction::Container & data, Ar } } - void ColumnAggregateFunction::insert(const Field & x) { + String type_string = getTypeString(); + auto & field_name = x.get().name; + if (type_string != field_name) + throw Exception("Cannot insert filed with type " + field_name + " into column with type " + type_string, + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + ensureOwnership(); Arena & arena = createOrGetArena(); pushBackAndCreateState(data, arena, func.get()); - ReadBufferFromString read_buffer(x.get().toUnderType()); + ReadBufferFromString read_buffer(x.get().data); func->deserialize(data.back(), read_buffer, &arena); } @@ -466,11 +480,12 @@ void ColumnAggregateFunction::getExtremes(Field & min, Field & max) const AggregateDataPtr place = place_buffer.data(); AggregateFunctionStateData serialized; + serialized.name = getTypeString(); func->create(place); try { - WriteBufferFromString buffer(serialized.toUnderType()); + WriteBufferFromString buffer(serialized.data); func->serialize(place, buffer); } catch (...) diff --git a/dbms/src/Columns/ColumnAggregateFunction.h b/dbms/src/Columns/ColumnAggregateFunction.h index e44689d992e..e090cb8c110 100644 --- a/dbms/src/Columns/ColumnAggregateFunction.h +++ b/dbms/src/Columns/ColumnAggregateFunction.h @@ -94,6 +94,8 @@ private: { } + String getTypeString() const; + public: ~ColumnAggregateFunction() override; diff --git a/dbms/src/Common/FieldVisitors.cpp b/dbms/src/Common/FieldVisitors.cpp index e10095fed90..f77977b3eed 100644 --- a/dbms/src/Common/FieldVisitors.cpp +++ b/dbms/src/Common/FieldVisitors.cpp @@ -92,7 +92,8 @@ String FieldVisitorDump::operator() (const Tuple & x_def) const String FieldVisitorDump::operator() (const AggregateFunctionStateData & x) const { WriteBufferFromOwnString wb; - writeQuoted(x, wb); + writeQuoted(x.name, wb); + writeQuoted(x.data, wb); return wb.str(); } @@ -127,7 +128,10 @@ String FieldVisitorToString::operator() (const DecimalField & x) cons String FieldVisitorToString::operator() (const DecimalField & x) const { return formatQuoted(x); } String FieldVisitorToString::operator() (const DecimalField & x) const { return formatQuoted(x); } String FieldVisitorToString::operator() (const UInt128 & x) const { return formatQuoted(UUID(x)); } -String FieldVisitorToString::operator() (const AggregateFunctionStateData & x) const { return formatQuoted(x); } +String FieldVisitorToString::operator() (const AggregateFunctionStateData & x) const +{ + return "(" + formatQuoted(x.name) + ")" + formatQuoted(x.data); +} String FieldVisitorToString::operator() (const Array & x) const { @@ -242,8 +246,10 @@ void FieldVisitorHash::operator() (const AggregateFunctionStateData & x) const { UInt8 type = Field::Types::AggregateFunctionState; hash.update(type); - hash.update(x.toUnderType().size()); - hash.update(x.toUnderType().data(), x.toUnderType().size()); + hash.update(x.name.size()); + hash.update(x.name.data(), x.name.size()); + hash.update(x.data.size()); + hash.update(x.data.data(), x.data.size()); } diff --git a/dbms/src/Common/FieldVisitors.h b/dbms/src/Common/FieldVisitors.h index 30aa700f80d..5575c607b3b 100644 --- a/dbms/src/Common/FieldVisitors.h +++ b/dbms/src/Common/FieldVisitors.h @@ -49,6 +49,7 @@ typename std::decay_t::ResultType applyVisitor(Visitor && visitor, F && case Field::Types::Decimal32: return visitor(field.template get>()); case Field::Types::Decimal64: return visitor(field.template get>()); case Field::Types::Decimal128: return visitor(field.template get>()); + case Field::Types::AggregateFunctionState: return visitor(field.template get()); default: throw Exception("Bad type of Field", ErrorCodes::BAD_TYPE_OF_FIELD); @@ -72,6 +73,7 @@ static typename std::decay_t::ResultType applyBinaryVisitorImpl(Visitor case Field::Types::Decimal32: return visitor(field1, field2.template get>()); case Field::Types::Decimal64: return visitor(field1, field2.template get>()); case Field::Types::Decimal128: return visitor(field1, field2.template get>()); + case Field::Types::AggregateFunctionState: return visitor(field1, field2.template get()); default: throw Exception("Bad type of Field", ErrorCodes::BAD_TYPE_OF_FIELD); @@ -116,6 +118,9 @@ typename std::decay_t::ResultType applyVisitor(Visitor && visitor, F1 & case Field::Types::Decimal128: return applyBinaryVisitorImpl( std::forward(visitor), field1.template get>(), std::forward(field2)); + case Field::Types::AggregateFunctionState: + return applyBinaryVisitorImpl( + std::forward(visitor), field1.template get(), std::forward(field2)); default: throw Exception("Bad type of Field", ErrorCodes::BAD_TYPE_OF_FIELD); @@ -206,7 +211,7 @@ public: T operator() (const AggregateFunctionStateData &) const { - throw Exception("Cannot convert String to " + demangle(typeid(T).name()), ErrorCodes::CANNOT_CONVERT_TYPE); + throw Exception("Cannot convert AggregateFunctionStateData to " + demangle(typeid(T).name()), ErrorCodes::CANNOT_CONVERT_TYPE); } }; @@ -254,6 +259,7 @@ public: bool operator() (const UInt64 & l, const String & r) const { return cantCompare(l, r); } bool operator() (const UInt64 & l, const Array & r) const { return cantCompare(l, r); } bool operator() (const UInt64 & l, const Tuple & r) const { return cantCompare(l, r); } + bool operator() (const UInt64 & l, const AggregateFunctionStateData & r) const { return cantCompare(l, r); } bool operator() (const Int64 & l, const Null & r) const { return cantCompare(l, r); } bool operator() (const Int64 & l, const UInt64 & r) const { return accurate::equalsOp(l, r); } @@ -263,6 +269,7 @@ public: bool operator() (const Int64 & l, const String & r) const { return cantCompare(l, r); } bool operator() (const Int64 & l, const Array & r) const { return cantCompare(l, r); } bool operator() (const Int64 & l, const Tuple & r) const { return cantCompare(l, r); } + bool operator() (const Int64 & l, const AggregateFunctionStateData & r) const { return cantCompare(l, r); } bool operator() (const Float64 & l, const Null & r) const { return cantCompare(l, r); } bool operator() (const Float64 & l, const UInt64 & r) const { return accurate::equalsOp(l, r); } @@ -272,6 +279,7 @@ public: bool operator() (const Float64 & l, const String & r) const { return cantCompare(l, r); } bool operator() (const Float64 & l, const Array & r) const { return cantCompare(l, r); } bool operator() (const Float64 & l, const Tuple & r) const { return cantCompare(l, r); } + bool operator() (const Float64 & l, const AggregateFunctionStateData & r) const { return cantCompare(l, r); } template bool operator() (const Null &, const T &) const @@ -334,8 +342,6 @@ public: { if constexpr (std::is_same_v) return l == r; - if constexpr (std::is_same_v) - return stringToUUID(l.toUnderType()) == r; return cantCompare(l, r); } @@ -362,6 +368,7 @@ public: bool operator() (const UInt64 & l, const String & r) const { return cantCompare(l, r); } bool operator() (const UInt64 & l, const Array & r) const { return cantCompare(l, r); } bool operator() (const UInt64 & l, const Tuple & r) const { return cantCompare(l, r); } + bool operator() (const UInt64 & l, const AggregateFunctionStateData & r) const { return cantCompare(l, r); } bool operator() (const Int64 & l, const Null & r) const { return cantCompare(l, r); } bool operator() (const Int64 & l, const UInt64 & r) const { return accurate::lessOp(l, r); } @@ -371,6 +378,7 @@ public: bool operator() (const Int64 & l, const String & r) const { return cantCompare(l, r); } bool operator() (const Int64 & l, const Array & r) const { return cantCompare(l, r); } bool operator() (const Int64 & l, const Tuple & r) const { return cantCompare(l, r); } + bool operator() (const Int64 & l, const AggregateFunctionStateData & r) const { return cantCompare(l, r); } bool operator() (const Float64 & l, const Null & r) const { return cantCompare(l, r); } bool operator() (const Float64 & l, const UInt64 & r) const { return accurate::lessOp(l, r); } @@ -380,6 +388,7 @@ public: bool operator() (const Float64 & l, const String & r) const { return cantCompare(l, r); } bool operator() (const Float64 & l, const Array & r) const { return cantCompare(l, r); } bool operator() (const Float64 & l, const Tuple & r) const { return cantCompare(l, r); } + bool operator() (const Float64 & l, const AggregateFunctionStateData & r) const { return cantCompare(l, r); } template bool operator() (const Null &, const T &) const @@ -440,10 +449,6 @@ public: template bool operator() (const AggregateFunctionStateData & l, const T & r) const { - if constexpr (std::is_same_v) - return l < r; - if constexpr (std::is_same_v) - return stringToUUID(l.toUnderType()) < r; return cantCompare(l, r); } diff --git a/dbms/src/Core/Field.cpp b/dbms/src/Core/Field.cpp index 7411d17efc1..fbe436a76fe 100644 --- a/dbms/src/Core/Field.cpp +++ b/dbms/src/Core/Field.cpp @@ -78,7 +78,8 @@ namespace DB case Field::Types::AggregateFunctionState: { AggregateFunctionStateData value; - DB::readStringBinary(value, buf); + DB::readStringBinary(value.name, buf); + DB::readStringBinary(value.data, buf); x.push_back(value); break; } @@ -137,7 +138,8 @@ namespace DB } case Field::Types::AggregateFunctionState: { - DB::writeStringBinary(get(*it), buf); + DB::writeStringBinary(it->get().name, buf); + DB::writeStringBinary(it->get().data, buf); break; } } @@ -224,7 +226,8 @@ namespace DB case Field::Types::AggregateFunctionState: { AggregateFunctionStateData value; - DB::readStringBinary(value, buf); + DB::readStringBinary(value.name, buf); + DB::readStringBinary(value.data, buf); x.push_back(value); break; } @@ -283,7 +286,8 @@ namespace DB } case Field::Types::AggregateFunctionState: { - DB::writeStringBinary(get(*it), buf); + DB::writeStringBinary(it->get().name, buf); + DB::writeStringBinary(it->get().data, buf); break; } } diff --git a/dbms/src/Core/Field.h b/dbms/src/Core/Field.h index 583b3303c56..7afa1395710 100644 --- a/dbms/src/Core/Field.h +++ b/dbms/src/Core/Field.h @@ -23,6 +23,7 @@ namespace ErrorCodes extern const int BAD_GET; extern const int NOT_IMPLEMENTED; extern const int LOGICAL_ERROR; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; } class Field; @@ -30,8 +31,40 @@ using Array = std::vector; using TupleBackend = std::vector; STRONG_TYPEDEF(TupleBackend, Tuple) /// Array and Tuple are different types with equal representation inside Field. -using AggregateFunctionStateDataBackend = String; -STRONG_TYPEDEF(AggregateFunctionStateDataBackend, AggregateFunctionStateData) +struct AggregateFunctionStateData +{ + String name; /// Name with arguments. + String data; + + bool operator < (const AggregateFunctionStateData &) const + { + throw Exception("Operator < is not implemented for AggregateFunctionStateData.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } + + bool operator <= (const AggregateFunctionStateData &) const + { + throw Exception("Operator <= is not implemented for AggregateFunctionStateData.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } + + bool operator > (const AggregateFunctionStateData &) const + { + throw Exception("Operator > is not implemented for AggregateFunctionStateData.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } + + bool operator >= (const AggregateFunctionStateData &) const + { + throw Exception("Operator >= is not implemented for AggregateFunctionStateData.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } + + bool operator == (const AggregateFunctionStateData & rhs) const + { + if (name != rhs.name) + throw Exception("Comparing aggregate functions with different types: " + name + " and " + rhs.name, + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + return data == rhs.data; + } +}; template bool decimalEqual(T x, T y, UInt32 x_scale, UInt32 y_scale); template bool decimalLess(T x, T y, UInt32 x_scale, UInt32 y_scale); @@ -587,6 +620,7 @@ T safeGet(Field & field) template <> struct TypeName { static std::string get() { return "Array"; } }; template <> struct TypeName { static std::string get() { return "Tuple"; } }; +template <> struct TypeName { static std::string get() { return "AggregateFunctionState"; } }; template struct NearestFieldTypeImpl; diff --git a/dbms/src/DataTypes/FieldToDataType.cpp b/dbms/src/DataTypes/FieldToDataType.cpp index 18fa0e97b88..70fab533838 100644 --- a/dbms/src/DataTypes/FieldToDataType.cpp +++ b/dbms/src/DataTypes/FieldToDataType.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -104,5 +105,10 @@ DataTypePtr FieldToDataType::operator() (const Tuple & x) const return std::make_shared(element_types); } +DataTypePtr FieldToDataType::operator() (const AggregateFunctionStateData & x) const +{ + auto & name = static_cast(x).name; + return DataTypeFactory::instance().get(name); +} } diff --git a/dbms/src/DataTypes/FieldToDataType.h b/dbms/src/DataTypes/FieldToDataType.h index dc103e24641..1edcdf3c11d 100644 --- a/dbms/src/DataTypes/FieldToDataType.h +++ b/dbms/src/DataTypes/FieldToDataType.h @@ -28,6 +28,7 @@ public: DataTypePtr operator() (const DecimalField & x) const; DataTypePtr operator() (const DecimalField & x) const; DataTypePtr operator() (const DecimalField & x) const; + DataTypePtr operator() (const AggregateFunctionStateData & x) const; }; } diff --git a/dbms/src/Interpreters/convertFieldToType.cpp b/dbms/src/Interpreters/convertFieldToType.cpp index 892a5967719..378cd7db6b8 100644 --- a/dbms/src/Interpreters/convertFieldToType.cpp +++ b/dbms/src/Interpreters/convertFieldToType.cpp @@ -22,6 +22,7 @@ #include #include +#include namespace DB @@ -248,6 +249,18 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID return res; } } + else if (const DataTypeAggregateFunction * agg_func_type = typeid_cast(&type)) + { + if (src.getType() != Field::Types::AggregateFunctionState) + throw Exception(String("Cannot convert ") + src.getTypeName() + " to " + agg_func_type->getName(), + ErrorCodes::TYPE_MISMATCH); + + auto & name = src.get().name; + if (agg_func_type->getName() != name) + throw Exception("Cannot convert " + name + " to " + agg_func_type->getName(), ErrorCodes::TYPE_MISMATCH); + + return src; + } if (src.getType() == Field::Types::String) { @@ -257,6 +270,8 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID return (*col)[0]; } + + // TODO (nemkov): should we attempt to parse value using or `type.deserializeAsTextEscaped()` type.deserializeAsTextEscaped() ? throw Exception("Type mismatch in IN or VALUES section. Expected: " + type.getName() + ". Got: " + Field::Types::toString(src.getType()), ErrorCodes::TYPE_MISMATCH); From d4ba5432b30ba8a8b6f091718f62531da6afb6bd Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 11 Feb 2019 18:45:14 +0300 Subject: [PATCH 13/79] Added test. --- .../00905_field_with_aggregate_function_state.reference | 3 +++ .../0_stateless/00905_field_with_aggregate_function_state.sql | 4 ++++ 2 files changed, 7 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/00905_field_with_aggregate_function_state.reference create mode 100644 dbms/tests/queries/0_stateless/00905_field_with_aggregate_function_state.sql diff --git a/dbms/tests/queries/0_stateless/00905_field_with_aggregate_function_state.reference b/dbms/tests/queries/0_stateless/00905_field_with_aggregate_function_state.reference new file mode 100644 index 00000000000..6d6b7b02313 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00905_field_with_aggregate_function_state.reference @@ -0,0 +1,3 @@ +1 +45 +4.5 diff --git a/dbms/tests/queries/0_stateless/00905_field_with_aggregate_function_state.sql b/dbms/tests/queries/0_stateless/00905_field_with_aggregate_function_state.sql new file mode 100644 index 00000000000..b0470ac9992 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00905_field_with_aggregate_function_state.sql @@ -0,0 +1,4 @@ +with (select sumState(1)) as s select sumMerge(s); +with (select sumState(number) from (select * from system.numbers limit 10)) as s select sumMerge(s); +with (select quantileState(0.5)(number) from (select * from system.numbers limit 10)) as s select quantileMerge(s); + From 0a6f75a1b67c41de40238f73a306a2471e8699f8 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 11 Feb 2019 18:50:52 +0300 Subject: [PATCH 14/79] Allow to execute subquery with scalar aggregate function state. --- .../Interpreters/ExecuteScalarSubqueriesVisitor.cpp | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/dbms/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp b/dbms/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp index b09ad5fb36e..cec1fa90962 100644 --- a/dbms/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp +++ b/dbms/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp @@ -109,11 +109,6 @@ void ExecuteScalarSubqueriesMatcher::visit(const ASTSubquery & subquery, ASTPtr size_t columns = block.columns(); if (columns == 1) { - if (typeid_cast(block.safeGetByPosition(0).type.get())) - { - throw Exception("Scalar subquery can't return an aggregate function state", ErrorCodes::INCORRECT_RESULT_OF_SCALAR_SUBQUERY); - } - auto lit = std::make_unique((*block.safeGetByPosition(0).column)[0]); lit->alias = subquery.alias; lit->prefer_alias_to_column_name = subquery.prefer_alias_to_column_name; @@ -132,11 +127,6 @@ void ExecuteScalarSubqueriesMatcher::visit(const ASTSubquery & subquery, ASTPtr exp_list->children.resize(columns); for (size_t i = 0; i < columns; ++i) { - if (typeid_cast(block.safeGetByPosition(i).type.get())) - { - throw Exception("Scalar subquery can't return an aggregate function state", ErrorCodes::INCORRECT_RESULT_OF_SCALAR_SUBQUERY); - } - exp_list->children[i] = addTypeConversion( std::make_unique((*block.safeGetByPosition(i).column)[0]), block.safeGetByPosition(i).type->getName()); From 958b538a0135714543fda7ac6d7a372e7f814119 Mon Sep 17 00:00:00 2001 From: chertus Date: Mon, 11 Feb 2019 21:26:40 +0300 Subject: [PATCH 15/79] minor improvement --- dbms/src/Interpreters/QueryAliasesVisitor.cpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/dbms/src/Interpreters/QueryAliasesVisitor.cpp b/dbms/src/Interpreters/QueryAliasesVisitor.cpp index 3c10f7da1b5..3c17f9d4684 100644 --- a/dbms/src/Interpreters/QueryAliasesVisitor.cpp +++ b/dbms/src/Interpreters/QueryAliasesVisitor.cpp @@ -56,14 +56,16 @@ std::vector QueryAliasesMatcher::visit(const ASTArrayJoin &, const AST { visitOther(ast, data); - /// @warning It breaks botom-to-top order (childs processed after node here), could lead to some effects. - /// It's possible to add ast back to result vec to save order. It will need two phase ASTArrayJoin visit (setting phase in data). - std::vector out; + std::vector grand_children; for (auto & child1 : ast->children) for (auto & child2 : child1->children) for (auto & child3 : child2->children) - out.push_back(&child3); - return out; + grand_children.push_back(child3); + + /// create own visitor to run bottom to top + for (auto & child : grand_children) + QueryAliasesVisitor(data).visit(child); + return {}; } /// set unique aliases for all subqueries. this is needed, because: From 805866e6b3cd38e8b1e4db3155e7aa4520e58087 Mon Sep 17 00:00:00 2001 From: chertus Date: Mon, 11 Feb 2019 22:14:57 +0300 Subject: [PATCH 16/79] hotfix for wrong aliases issue-4110 --- dbms/src/Interpreters/IdentifierSemantic.cpp | 9 ++++++++- dbms/src/Interpreters/IdentifierSemantic.h | 2 ++ dbms/src/Interpreters/QueryNormalizer.cpp | 2 +- .../queries/0_stateless/00818_alias_bug_4110.reference | 5 +++++ dbms/tests/queries/0_stateless/00818_alias_bug_4110.sql | 5 +++++ 5 files changed, 21 insertions(+), 2 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/00818_alias_bug_4110.reference create mode 100644 dbms/tests/queries/0_stateless/00818_alias_bug_4110.sql diff --git a/dbms/src/Interpreters/IdentifierSemantic.cpp b/dbms/src/Interpreters/IdentifierSemantic.cpp index 13a9c49c3e0..41dc565811a 100644 --- a/dbms/src/Interpreters/IdentifierSemantic.cpp +++ b/dbms/src/Interpreters/IdentifierSemantic.cpp @@ -37,12 +37,17 @@ std::optional IdentifierSemantic::getTableName(const ASTPtr & ast) return {}; } - void IdentifierSemantic::setNeedLongName(ASTIdentifier & identifier, bool value) { identifier.semantic->need_long_name = value; } +bool IdentifierSemantic::canBeAlias(const ASTIdentifier & identifier) +{ + return identifier.semantic->can_be_alias; +} + + std::pair IdentifierSemantic::extractDatabaseAndTable(const ASTIdentifier & identifier) { if (identifier.name_parts.size() > 2) @@ -108,6 +113,8 @@ void IdentifierSemantic::setColumnNormalName(ASTIdentifier & identifier, const D size_t match = IdentifierSemantic::canReferColumnToTable(identifier, db_and_table); setColumnShortName(identifier, match); + if (match) + identifier.semantic->can_be_alias = false; if (identifier.semantic->need_long_name) { diff --git a/dbms/src/Interpreters/IdentifierSemantic.h b/dbms/src/Interpreters/IdentifierSemantic.h index be721627e1a..4318142c146 100644 --- a/dbms/src/Interpreters/IdentifierSemantic.h +++ b/dbms/src/Interpreters/IdentifierSemantic.h @@ -10,6 +10,7 @@ struct IdentifierSemanticImpl { bool special = false; bool need_long_name = false; + bool can_be_alias = true; }; /// Static calss to manipulate IdentifierSemanticImpl via ASTIdentifier @@ -28,6 +29,7 @@ struct IdentifierSemantic static String columnNormalName(const ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table); static void setColumnNormalName(ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table); static void setNeedLongName(ASTIdentifier & identifier, bool); /// if set setColumnNormalName makes qualified name + static bool canBeAlias(const ASTIdentifier & identifier); private: static bool doesIdentifierBelongTo(const ASTIdentifier & identifier, const String & database, const String & table); diff --git a/dbms/src/Interpreters/QueryNormalizer.cpp b/dbms/src/Interpreters/QueryNormalizer.cpp index 97d4d402f26..936c99afc51 100644 --- a/dbms/src/Interpreters/QueryNormalizer.cpp +++ b/dbms/src/Interpreters/QueryNormalizer.cpp @@ -102,7 +102,7 @@ void QueryNormalizer::visit(ASTIdentifier & node, ASTPtr & ast, Data & data) /// If it is an alias, but not a parent alias (for constructs like "SELECT column + 1 AS column"). auto it_alias = data.aliases.find(node.name); - if (it_alias != data.aliases.end() && current_alias != node.name) + if (IdentifierSemantic::canBeAlias(node) && it_alias != data.aliases.end() && current_alias != node.name) { auto & alias_node = it_alias->second; diff --git a/dbms/tests/queries/0_stateless/00818_alias_bug_4110.reference b/dbms/tests/queries/0_stateless/00818_alias_bug_4110.reference new file mode 100644 index 00000000000..204377075a9 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00818_alias_bug_4110.reference @@ -0,0 +1,5 @@ +10 11 +11 10 +11 11 +11 12 +12 11 diff --git a/dbms/tests/queries/0_stateless/00818_alias_bug_4110.sql b/dbms/tests/queries/0_stateless/00818_alias_bug_4110.sql new file mode 100644 index 00000000000..b1cfa3780a2 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00818_alias_bug_4110.sql @@ -0,0 +1,5 @@ +select s.a as a, s.a + 1 as b from (select 10 as a) s; +select s.a + 1 as a, s.a as b from (select 10 as a) s; +select s.a + 1 as a, s.a + 1 as b from (select 10 as a) s; +select s.a + 1 as b, s.a + 2 as a from (select 10 as a) s; +select s.a + 2 as b, s.a + 1 as a from (select 10 as a) s; From 2b8b342ccd3b6030d02ba16884112b812f2f5f3f Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 11 Feb 2019 22:26:32 +0300 Subject: [PATCH 17/79] Update IAggregateFunction interface. --- .../AggregateFunctionArgMinMax.h | 7 ++-- .../AggregateFunctionArray.h | 3 +- .../AggregateFunctions/AggregateFunctionAvg.h | 3 +- .../AggregateFunctionBitwise.cpp | 2 +- .../AggregateFunctionBitwise.h | 3 ++ .../AggregateFunctionBoundingRatio.h | 1 + .../AggregateFunctionCount.cpp | 4 +- .../AggregateFunctionCount.h | 8 +++- .../AggregateFunctionEntropy.cpp | 4 +- .../AggregateFunctionEntropy.h | 4 +- .../AggregateFunctionFactory.cpp | 7 +--- .../AggregateFunctionForEach.h | 3 +- .../AggregateFunctionGroupArray.h | 5 ++- .../AggregateFunctionGroupArrayInsertAt.cpp | 4 ++ .../AggregateFunctionGroupArrayInsertAt.h | 9 ++--- .../AggregateFunctionGroupUniqArray.cpp | 8 +++- .../AggregateFunctionGroupUniqArray.h | 8 +++- .../AggregateFunctionHistogram.cpp | 2 +- .../AggregateFunctionHistogram.h | 5 ++- .../AggregateFunctions/AggregateFunctionIf.h | 3 +- .../AggregateFunctionMaxIntersections.h | 2 +- .../AggregateFunctionMerge.cpp | 2 +- .../AggregateFunctionMerge.h | 9 +++-- .../AggregateFunctionMinMaxAny.h | 6 ++- .../AggregateFunctionNothing.h | 3 ++ .../AggregateFunctionNull.cpp | 16 ++++---- .../AggregateFunctionNull.h | 12 +++--- .../AggregateFunctionQuantile.h | 5 ++- .../AggregateFunctionRetention.h | 1 + .../AggregateFunctionSequenceMatch.cpp | 4 +- .../AggregateFunctionSequenceMatch.h | 11 +++++- .../AggregateFunctionState.h | 3 +- .../AggregateFunctionStatistics.cpp | 4 +- .../AggregateFunctionStatistics.h | 7 ++++ .../AggregateFunctionStatisticsSimple.h | 10 +++-- .../AggregateFunctionSum.cpp | 4 +- .../AggregateFunctions/AggregateFunctionSum.h | 10 +++-- .../AggregateFunctionSumMap.cpp | 4 +- .../AggregateFunctionSumMap.h | 17 ++++++--- .../AggregateFunctionTopK.cpp | 14 +++---- .../AggregateFunctionTopK.h | 13 ++++--- .../AggregateFunctionUniq.cpp | 18 ++++----- .../AggregateFunctionUniq.h | 4 ++ .../AggregateFunctionUniqCombined.cpp | 38 +++++++++---------- .../AggregateFunctionUniqCombined.h | 7 +++- .../AggregateFunctionUniqUpTo.cpp | 18 ++++----- .../AggregateFunctionUniqUpTo.h | 10 +++-- .../AggregateFunctionWindowFunnel.h | 1 + .../AggregateFunctionsStatisticsSimple.cpp | 6 +-- .../AggregateFunctions/IAggregateFunction.h | 19 +++++----- 50 files changed, 220 insertions(+), 151 deletions(-) diff --git a/dbms/src/AggregateFunctions/AggregateFunctionArgMinMax.h b/dbms/src/AggregateFunctions/AggregateFunctionArgMinMax.h index 9a232e2e77d..42649be78fd 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionArgMinMax.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionArgMinMax.h @@ -31,12 +31,13 @@ template class AggregateFunctionArgMinMax final : public IAggregateFunctionDataHelper> { private: - DataTypePtr type_res; - DataTypePtr type_val; + const DataTypePtr & type_res; + const DataTypePtr & type_val; public: AggregateFunctionArgMinMax(const DataTypePtr & type_res, const DataTypePtr & type_val) - : type_res(type_res), type_val(type_val) + : IAggregateFunctionDataHelper>({type_res, type_val}, {}), + type_res(argument_types[0]), type_val(argument_types[1]) { if (!type_val->isComparable()) throw Exception("Illegal type " + type_val->getName() + " of second argument of aggregate function " + getName() diff --git a/dbms/src/AggregateFunctions/AggregateFunctionArray.h b/dbms/src/AggregateFunctions/AggregateFunctionArray.h index 5dfebf13d52..08fa7c13bc3 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionArray.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionArray.h @@ -28,7 +28,8 @@ private: public: AggregateFunctionArray(AggregateFunctionPtr nested_, const DataTypes & arguments) - : nested_func(nested_), num_arguments(arguments.size()) + : IAggregateFunctionHelper(arguments, {}) + , nested_func(nested_), num_arguments(arguments.size()) { for (const auto & type : arguments) if (!isArray(type)) diff --git a/dbms/src/AggregateFunctions/AggregateFunctionAvg.h b/dbms/src/AggregateFunctions/AggregateFunctionAvg.h index 53b42c42c9a..98604f76742 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionAvg.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionAvg.h @@ -55,7 +55,8 @@ public: /// ctor for Decimals AggregateFunctionAvg(const IDataType & data_type) - : scale(getDecimalScale(data_type)) + : IAggregateFunctionDataHelper>({data_type}, {}) + , scale(getDecimalScale(data_type)) {} String getName() const override { return "avg"; } diff --git a/dbms/src/AggregateFunctions/AggregateFunctionBitwise.cpp b/dbms/src/AggregateFunctions/AggregateFunctionBitwise.cpp index 8c188bcbb8e..e92e1917bd5 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionBitwise.cpp +++ b/dbms/src/AggregateFunctions/AggregateFunctionBitwise.cpp @@ -21,7 +21,7 @@ AggregateFunctionPtr createAggregateFunctionBitwise(const std::string & name, co + " is illegal, because it cannot be used in bitwise operations", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - AggregateFunctionPtr res(createWithUnsignedIntegerType(*argument_types[0])); + AggregateFunctionPtr res(createWithUnsignedIntegerType(*argument_types[0], argument_types[0])); if (!res) throw Exception("Illegal type " + argument_types[0]->getName() + " of argument for aggregate function " + name, ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); diff --git a/dbms/src/AggregateFunctions/AggregateFunctionBitwise.h b/dbms/src/AggregateFunctions/AggregateFunctionBitwise.h index 6d33f010bd0..2788fdccd51 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionBitwise.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionBitwise.h @@ -43,6 +43,9 @@ template class AggregateFunctionBitwise final : public IAggregateFunctionDataHelper> { public: + AggregateFunctionBitwise(const DataTypePtr & type) + : IAggregateFunctionDataHelper>({type}, {}) {} + String getName() const override { return Data::name(); } DataTypePtr getReturnType() const override diff --git a/dbms/src/AggregateFunctions/AggregateFunctionBoundingRatio.h b/dbms/src/AggregateFunctions/AggregateFunctionBoundingRatio.h index 40b13acbbaa..5966993dc65 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionBoundingRatio.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionBoundingRatio.h @@ -111,6 +111,7 @@ public: } AggregateFunctionBoundingRatio(const DataTypes & arguments) + : IAggregateFunctionDataHelper(arguments, {}) { const auto x_arg = arguments.at(0).get(); const auto y_arg = arguments.at(0).get(); diff --git a/dbms/src/AggregateFunctions/AggregateFunctionCount.cpp b/dbms/src/AggregateFunctions/AggregateFunctionCount.cpp index 1df424ecbf2..02dc796a4cf 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionCount.cpp +++ b/dbms/src/AggregateFunctions/AggregateFunctionCount.cpp @@ -9,12 +9,12 @@ namespace DB namespace { -AggregateFunctionPtr createAggregateFunctionCount(const std::string & name, const DataTypes & /*argument_types*/, const Array & parameters) +AggregateFunctionPtr createAggregateFunctionCount(const std::string & name, const DataTypes & argument_types, const Array & parameters) { assertNoParameters(name, parameters); /// 'count' accept any number of arguments and (in this case of non-Nullable types) simply ignore them. - return std::make_shared(); + return std::make_shared(argument_types); } } diff --git a/dbms/src/AggregateFunctions/AggregateFunctionCount.h b/dbms/src/AggregateFunctions/AggregateFunctionCount.h index f9a1dcb45e2..82958a95fd2 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionCount.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionCount.h @@ -28,6 +28,8 @@ namespace ErrorCodes class AggregateFunctionCount final : public IAggregateFunctionDataHelper { public: + AggregateFunctionCount(const DataTypes & argument_types) : IAggregateFunctionDataHelper(argument_types, {}) {} + String getName() const override { return "count"; } DataTypePtr getReturnType() const override @@ -74,7 +76,8 @@ public: class AggregateFunctionCountNotNullUnary final : public IAggregateFunctionDataHelper { public: - AggregateFunctionCountNotNullUnary(const DataTypePtr & argument) + AggregateFunctionCountNotNullUnary(const DataTypePtr & argument, const Array & params) + : IAggregateFunctionDataHelper({argument}, params) { if (!argument->isNullable()) throw Exception("Logical error: not Nullable data type passed to AggregateFunctionCountNotNullUnary", ErrorCodes::LOGICAL_ERROR); @@ -120,7 +123,8 @@ public: class AggregateFunctionCountNotNullVariadic final : public IAggregateFunctionDataHelper { public: - AggregateFunctionCountNotNullVariadic(const DataTypes & arguments) + AggregateFunctionCountNotNullVariadic(const DataTypes & arguments, const Array & params) + : IAggregateFunctionDataHelper(arguments, params) { number_of_arguments = arguments.size(); diff --git a/dbms/src/AggregateFunctions/AggregateFunctionEntropy.cpp b/dbms/src/AggregateFunctions/AggregateFunctionEntropy.cpp index 2f9910c97de..7ea15e11b72 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionEntropy.cpp +++ b/dbms/src/AggregateFunctions/AggregateFunctionEntropy.cpp @@ -26,12 +26,12 @@ AggregateFunctionPtr createAggregateFunctionEntropy(const std::string & name, co if (num_args == 1) { /// Specialized implementation for single argument of numeric type. - if (auto res = createWithNumericBasedType(*argument_types[0], num_args)) + if (auto res = createWithNumericBasedType(*argument_types[0], argument_types)) return AggregateFunctionPtr(res); } /// Generic implementation for other types or for multiple arguments. - return std::make_shared>(num_args); + return std::make_shared>(argument_types); } } diff --git a/dbms/src/AggregateFunctions/AggregateFunctionEntropy.h b/dbms/src/AggregateFunctions/AggregateFunctionEntropy.h index 1adeefc6397..91ec6d4d5a6 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionEntropy.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionEntropy.h @@ -97,7 +97,9 @@ private: size_t num_args; public: - AggregateFunctionEntropy(size_t num_args) : num_args(num_args) + AggregateFunctionEntropy(const DataTypes & argument_types) + : IAggregateFunctionDataHelper, AggregateFunctionEntropy>(argument_types, {}) + , num_args(argument_types.size()) { } diff --git a/dbms/src/AggregateFunctions/AggregateFunctionFactory.cpp b/dbms/src/AggregateFunctions/AggregateFunctionFactory.cpp index 932d6615385..6aeaaef2bfa 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionFactory.cpp +++ b/dbms/src/AggregateFunctions/AggregateFunctionFactory.cpp @@ -86,17 +86,12 @@ AggregateFunctionPtr AggregateFunctionFactory::get( [](const auto & type) { return type->onlyNull(); })) nested_function = getImpl(name, nested_types, parameters, recursion_level); - auto res = combinator->transformAggregateFunction(nested_function, type_without_low_cardinality, parameters); - res->setArguments(type_without_low_cardinality, parameters); - return res; + return combinator->transformAggregateFunction(nested_function, argument_types, parameters); } auto res = getImpl(name, type_without_low_cardinality, parameters, recursion_level); if (!res) throw Exception("Logical error: AggregateFunctionFactory returned nullptr", ErrorCodes::LOGICAL_ERROR); - - res->setArguments(type_without_low_cardinality, parameters); - return res; } diff --git a/dbms/src/AggregateFunctions/AggregateFunctionForEach.h b/dbms/src/AggregateFunctions/AggregateFunctionForEach.h index 519d1911a8a..39a52a7fa6e 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionForEach.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionForEach.h @@ -97,7 +97,8 @@ private: public: AggregateFunctionForEach(AggregateFunctionPtr nested_, const DataTypes & arguments) - : nested_func(nested_), num_arguments(arguments.size()) + : IAggregateFunctionDataHelper(arguments, {}) + , nested_func(nested_), num_arguments(arguments.size()) { nested_size_of_data = nested_func->sizeOfData(); diff --git a/dbms/src/AggregateFunctions/AggregateFunctionGroupArray.h b/dbms/src/AggregateFunctions/AggregateFunctionGroupArray.h index 26708c87520..c496e90844d 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionGroupArray.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionGroupArray.h @@ -48,12 +48,13 @@ class GroupArrayNumericImpl final : public IAggregateFunctionDataHelper, GroupArrayNumericImpl> { static constexpr bool limit_num_elems = Tlimit_num_elems::value; - DataTypePtr data_type; + DataTypePtr & data_type; UInt64 max_elems; public: explicit GroupArrayNumericImpl(const DataTypePtr & data_type_, UInt64 max_elems_ = std::numeric_limits::max()) - : data_type(data_type_), max_elems(max_elems_) {} + : IAggregateFunctionDataHelper, GroupArrayNumericImpl>({data_type}, {}) + , data_type(argument_types[0]), max_elems(max_elems_) {} String getName() const override { return "groupArray"; } diff --git a/dbms/src/AggregateFunctions/AggregateFunctionGroupArrayInsertAt.cpp b/dbms/src/AggregateFunctions/AggregateFunctionGroupArrayInsertAt.cpp index bc8fac86d6d..ea42c129dea 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionGroupArrayInsertAt.cpp +++ b/dbms/src/AggregateFunctions/AggregateFunctionGroupArrayInsertAt.cpp @@ -13,6 +13,10 @@ namespace AggregateFunctionPtr createAggregateFunctionGroupArrayInsertAt(const std::string & name, const DataTypes & argument_types, const Array & parameters) { assertBinary(name, argument_types); + + if (argument_types.size() != 2) + throw Exception("Aggregate function groupArrayInsertAt requires two arguments.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + return std::make_shared(argument_types, parameters); } diff --git a/dbms/src/AggregateFunctions/AggregateFunctionGroupArrayInsertAt.h b/dbms/src/AggregateFunctions/AggregateFunctionGroupArrayInsertAt.h index 90b19266e4c..c7dab21a4cb 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionGroupArrayInsertAt.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionGroupArrayInsertAt.h @@ -54,12 +54,14 @@ class AggregateFunctionGroupArrayInsertAtGeneric final : public IAggregateFunctionDataHelper { private: - DataTypePtr type; + DataTypePtr & type; Field default_value; UInt64 length_to_resize = 0; /// zero means - do not do resizing. public: AggregateFunctionGroupArrayInsertAtGeneric(const DataTypes & arguments, const Array & params) + : IAggregateFunctionDataHelper(arguments, params) + , type(argument_types[0]) { if (!params.empty()) { @@ -76,14 +78,9 @@ public: } } - if (arguments.size() != 2) - throw Exception("Aggregate function " + getName() + " requires two arguments.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - if (!isUnsignedInteger(arguments[1])) throw Exception("Second argument of aggregate function " + getName() + " must be integer.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - type = arguments.front(); - if (default_value.isNull()) default_value = type->getDefault(); else diff --git a/dbms/src/AggregateFunctions/AggregateFunctionGroupUniqArray.cpp b/dbms/src/AggregateFunctions/AggregateFunctionGroupUniqArray.cpp index a84ba2b28a2..f80a45afaa9 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionGroupUniqArray.cpp +++ b/dbms/src/AggregateFunctions/AggregateFunctionGroupUniqArray.cpp @@ -15,11 +15,15 @@ namespace /// Substitute return type for Date and DateTime class AggregateFunctionGroupUniqArrayDate : public AggregateFunctionGroupUniqArray { +public: + AggregateFunctionGroupUniqArrayDate(const DataTypePtr & argument_type) : AggregateFunctionGroupUniqArray(argument_type) {} DataTypePtr getReturnType() const override { return std::make_shared(std::make_shared()); } }; class AggregateFunctionGroupUniqArrayDateTime : public AggregateFunctionGroupUniqArray { +public: + AggregateFunctionGroupUniqArrayDateTime(const DataTypePtr & argument_type) : AggregateFunctionGroupUniqArray(argument_type) {} DataTypePtr getReturnType() const override { return std::make_shared(std::make_shared()); } }; @@ -27,8 +31,8 @@ class AggregateFunctionGroupUniqArrayDateTime : public AggregateFunctionGroupUni static IAggregateFunction * createWithExtraTypes(const DataTypePtr & argument_type) { WhichDataType which(argument_type); - if (which.idx == TypeIndex::Date) return new AggregateFunctionGroupUniqArrayDate; - else if (which.idx == TypeIndex::DateTime) return new AggregateFunctionGroupUniqArrayDateTime; + if (which.idx == TypeIndex::Date) return new AggregateFunctionGroupUniqArrayDate(argument_type); + else if (which.idx == TypeIndex::DateTime) return new AggregateFunctionGroupUniqArrayDateTime(argument_type); else { /// Check that we can use plain version of AggreagteFunctionGroupUniqArrayGeneric diff --git a/dbms/src/AggregateFunctions/AggregateFunctionGroupUniqArray.h b/dbms/src/AggregateFunctions/AggregateFunctionGroupUniqArray.h index b638996f553..c0ef1fe0fa8 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionGroupUniqArray.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionGroupUniqArray.h @@ -44,6 +44,9 @@ private: using State = AggregateFunctionGroupUniqArrayData; public: + AggregateFunctionGroupUniqArray(const DataTypePtr & argument_type) + : IAggregateFunctionDataHelper, AggregateFunctionGroupUniqArray>({argument_type}, {}) {} + String getName() const override { return "groupUniqArray"; } DataTypePtr getReturnType() const override @@ -115,7 +118,7 @@ template class AggreagteFunctionGroupUniqArrayGeneric : public IAggregateFunctionDataHelper> { - DataTypePtr input_data_type; + DataTypePtr & input_data_type; using State = AggreagteFunctionGroupUniqArrayGenericData; @@ -125,7 +128,8 @@ class AggreagteFunctionGroupUniqArrayGeneric public: AggreagteFunctionGroupUniqArrayGeneric(const DataTypePtr & input_data_type) - : input_data_type(input_data_type) {} + : IAggregateFunctionDataHelper>({input_data_type}, {}) + , input_data_type(argument_types[0]) {} String getName() const override { return "groupUniqArray"; } diff --git a/dbms/src/AggregateFunctions/AggregateFunctionHistogram.cpp b/dbms/src/AggregateFunctions/AggregateFunctionHistogram.cpp index 05c4fe86320..384298b16a8 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionHistogram.cpp +++ b/dbms/src/AggregateFunctions/AggregateFunctionHistogram.cpp @@ -39,7 +39,7 @@ AggregateFunctionPtr createAggregateFunctionHistogram(const std::string & name, throw Exception("Bin count should be positive", ErrorCodes::BAD_ARGUMENTS); assertUnary(name, arguments); - AggregateFunctionPtr res(createWithNumericType(*arguments[0], bins_count)); + AggregateFunctionPtr res(createWithNumericType(*arguments[0], bins_count, arguments, params)); if (!res) throw Exception("Illegal type " + arguments[0]->getName() + " of argument for aggregate function " + name, ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); diff --git a/dbms/src/AggregateFunctions/AggregateFunctionHistogram.h b/dbms/src/AggregateFunctions/AggregateFunctionHistogram.h index 3d03821cc65..60385f4788a 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionHistogram.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionHistogram.h @@ -304,8 +304,9 @@ private: const UInt32 max_bins; public: - AggregateFunctionHistogram(UInt32 max_bins) - : max_bins(max_bins) + AggregateFunctionHistogram(const DataTypes & arguments, const Array & params, UInt32 max_bins) + : IAggregateFunctionDataHelper>(arguments, params) + , max_bins(max_bins) { } diff --git a/dbms/src/AggregateFunctions/AggregateFunctionIf.h b/dbms/src/AggregateFunctions/AggregateFunctionIf.h index 594193eac87..8daf9505ae6 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionIf.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionIf.h @@ -28,7 +28,8 @@ private: public: AggregateFunctionIf(AggregateFunctionPtr nested, const DataTypes & types) - : nested_func(nested), num_arguments(types.size()) + : IAggregateFunctionHelper(types, nested->getParameters()) + , nested_func(nested), num_arguments(types.size()) { if (num_arguments == 0) throw Exception("Aggregate function " + getName() + " require at least one argument", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); diff --git a/dbms/src/AggregateFunctions/AggregateFunctionMaxIntersections.h b/dbms/src/AggregateFunctions/AggregateFunctionMaxIntersections.h index 9b81ce01f30..dbb727b7d9a 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionMaxIntersections.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionMaxIntersections.h @@ -59,7 +59,7 @@ private: public: AggregateFunctionIntersectionsMax(AggregateFunctionIntersectionsKind kind_, const DataTypes & arguments) - : kind(kind_) + : IAggregateFunctionDataHelper, AggregateFunctionIntersectionsMax>(arguments, {}), kind(kind_) { if (!isNumber(arguments[0])) throw Exception{getName() + ": first argument must be represented by integer", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; diff --git a/dbms/src/AggregateFunctions/AggregateFunctionMerge.cpp b/dbms/src/AggregateFunctions/AggregateFunctionMerge.cpp index 256c7bc9a84..f9c2eb8c9dd 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionMerge.cpp +++ b/dbms/src/AggregateFunctions/AggregateFunctionMerge.cpp @@ -47,7 +47,7 @@ public: + ", because it corresponds to different aggregate function: " + function->getFunctionName() + " instead of " + nested_function->getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - return std::make_shared(nested_function, *argument); + return std::make_shared(nested_function, argument); } }; diff --git a/dbms/src/AggregateFunctions/AggregateFunctionMerge.h b/dbms/src/AggregateFunctions/AggregateFunctionMerge.h index 2d92db98e17..c94d4d3cf3c 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionMerge.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionMerge.h @@ -22,13 +22,14 @@ private: AggregateFunctionPtr nested_func; public: - AggregateFunctionMerge(const AggregateFunctionPtr & nested_, const IDataType & argument) - : nested_func(nested_) + AggregateFunctionMerge(const AggregateFunctionPtr & nested_, const DataTypePtr & argument) + : IAggregateFunctionHelper({argument}, nested_->getParameters()) + , nested_func(nested_) { - const DataTypeAggregateFunction * data_type = typeid_cast(&argument); + const DataTypeAggregateFunction * data_type = typeid_cast(argument.get()); if (!data_type || data_type->getFunctionName() != nested_func->getName()) - throw Exception("Illegal type " + argument.getName() + " of argument for aggregate function " + getName(), + throw Exception("Illegal type " + argument->getName() + " of argument for aggregate function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); } diff --git a/dbms/src/AggregateFunctions/AggregateFunctionMinMaxAny.h b/dbms/src/AggregateFunctions/AggregateFunctionMinMaxAny.h index 51d1e8d1dd7..426ee8ee479 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionMinMaxAny.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionMinMaxAny.h @@ -676,10 +676,12 @@ template class AggregateFunctionsSingleValue final : public IAggregateFunctionDataHelper> { private: - DataTypePtr type; + DataTypePtr & type; public: - AggregateFunctionsSingleValue(const DataTypePtr & type) : type(type) + AggregateFunctionsSingleValue(const DataTypePtr & type) + : IAggregateFunctionDataHelper>({type}, {}) + , type(argument_types[0]) { if (StringRef(Data::name()) == StringRef("min") || StringRef(Data::name()) == StringRef("max")) diff --git a/dbms/src/AggregateFunctions/AggregateFunctionNothing.h b/dbms/src/AggregateFunctions/AggregateFunctionNothing.h index 3a98807bb4a..aa54d95f158 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionNothing.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionNothing.h @@ -15,6 +15,9 @@ namespace DB class AggregateFunctionNothing final : public IAggregateFunctionHelper { public: + AggregateFunctionNothing(const DataTypes & arguments, const Array & params) + : IAggregateFunctionHelper(arguments, params) {} + String getName() const override { return "nothing"; diff --git a/dbms/src/AggregateFunctions/AggregateFunctionNull.cpp b/dbms/src/AggregateFunctions/AggregateFunctionNull.cpp index 6ce7d94d970..7011ebbde09 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionNull.cpp +++ b/dbms/src/AggregateFunctions/AggregateFunctionNull.cpp @@ -30,7 +30,7 @@ public: } AggregateFunctionPtr transformAggregateFunction( - const AggregateFunctionPtr & nested_function, const DataTypes & arguments, const Array &) const override + const AggregateFunctionPtr & nested_function, const DataTypes & arguments, const Array & params) const override { bool has_nullable_types = false; bool has_null_types = false; @@ -55,29 +55,29 @@ public: if (nested_function && nested_function->getName() == "count") { if (arguments.size() == 1) - return std::make_shared(arguments[0]); + return std::make_shared(arguments[0], params); else - return std::make_shared(arguments); + return std::make_shared(arguments, params); } if (has_null_types) - return std::make_shared(); + return std::make_shared(arguments, params); bool return_type_is_nullable = nested_function->getReturnType()->canBeInsideNullable(); if (arguments.size() == 1) { if (return_type_is_nullable) - return std::make_shared>(nested_function); + return std::make_shared>(nested_function, arguments, params); else - return std::make_shared>(nested_function); + return std::make_shared>(nested_function, arguments, params); } else { if (return_type_is_nullable) - return std::make_shared>(nested_function, arguments); + return std::make_shared>(nested_function, arguments, params); else - return std::make_shared>(nested_function, arguments); + return std::make_shared>(nested_function, arguments, params); } } }; diff --git a/dbms/src/AggregateFunctions/AggregateFunctionNull.h b/dbms/src/AggregateFunctions/AggregateFunctionNull.h index c8676230500..ab4b5b27844 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionNull.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionNull.h @@ -68,8 +68,8 @@ protected: } public: - AggregateFunctionNullBase(AggregateFunctionPtr nested_function_) - : nested_function{nested_function_} + AggregateFunctionNullBase(AggregateFunctionPtr nested_function_, const DataTypes & arguments, const Array & params) + : IAggregateFunctionHelper(arguments, params), nested_function{nested_function_} { if (result_is_nullable) prefix_size = nested_function->alignOfData(); @@ -187,8 +187,8 @@ template class AggregateFunctionNullUnary final : public AggregateFunctionNullBase> { public: - AggregateFunctionNullUnary(AggregateFunctionPtr nested_function_) - : AggregateFunctionNullBase>(std::move(nested_function_)) + AggregateFunctionNullUnary(AggregateFunctionPtr nested_function_, const DataTypes & arguments, const Array & params) + : AggregateFunctionNullBase>(std::move(nested_function_), arguments, params) { } @@ -209,8 +209,8 @@ template class AggregateFunctionNullVariadic final : public AggregateFunctionNullBase> { public: - AggregateFunctionNullVariadic(AggregateFunctionPtr nested_function_, const DataTypes & arguments) - : AggregateFunctionNullBase>(std::move(nested_function_)), + AggregateFunctionNullVariadic(AggregateFunctionPtr nested_function_, const DataTypes & arguments, const Array & params) + : AggregateFunctionNullBase>(std::move(nested_function_), arguments, params), number_of_arguments(arguments.size()) { if (number_of_arguments == 1) diff --git a/dbms/src/AggregateFunctions/AggregateFunctionQuantile.h b/dbms/src/AggregateFunctions/AggregateFunctionQuantile.h index cee2b6fe0c0..a87f520d395 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionQuantile.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionQuantile.h @@ -73,11 +73,12 @@ private: /// Used when there are single level to get. Float64 level = 0.5; - DataTypePtr argument_type; + DataTypePtr & argument_type; public: AggregateFunctionQuantile(const DataTypePtr & argument_type, const Array & params) - : levels(params, returns_many), level(levels.levels[0]), argument_type(argument_type) + : IAggregateFunctionDataHelper>({argument_type}, params) + , levels(params, returns_many), level(levels.levels[0]), argument_type(argument_types[0]) { if (!returns_many && levels.size() > 1) throw Exception("Aggregate function " + getName() + " require one parameter or less", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); diff --git a/dbms/src/AggregateFunctions/AggregateFunctionRetention.h b/dbms/src/AggregateFunctions/AggregateFunctionRetention.h index 688f7f1404c..525a4d848d2 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionRetention.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionRetention.h @@ -76,6 +76,7 @@ public: } AggregateFunctionRetention(const DataTypes & arguments) + : IAggregateFunctionDataHelper(arguments, {}) { for (const auto i : ext::range(0, arguments.size())) { diff --git a/dbms/src/AggregateFunctions/AggregateFunctionSequenceMatch.cpp b/dbms/src/AggregateFunctions/AggregateFunctionSequenceMatch.cpp index 0b7a4b6b357..be139d9e633 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionSequenceMatch.cpp +++ b/dbms/src/AggregateFunctions/AggregateFunctionSequenceMatch.cpp @@ -19,7 +19,7 @@ AggregateFunctionPtr createAggregateFunctionSequenceCount(const std::string & na ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH}; String pattern = params.front().safeGet(); - return std::make_shared(argument_types, pattern); + return std::make_shared(argument_types, params, pattern); } AggregateFunctionPtr createAggregateFunctionSequenceMatch(const std::string & name, const DataTypes & argument_types, const Array & params) @@ -29,7 +29,7 @@ AggregateFunctionPtr createAggregateFunctionSequenceMatch(const std::string & na ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH}; String pattern = params.front().safeGet(); - return std::make_shared(argument_types, pattern); + return std::make_shared(argument_types, params, pattern); } } diff --git a/dbms/src/AggregateFunctions/AggregateFunctionSequenceMatch.h b/dbms/src/AggregateFunctions/AggregateFunctionSequenceMatch.h index 86627a453c2..5c443c72b63 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionSequenceMatch.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionSequenceMatch.h @@ -139,8 +139,9 @@ template class AggregateFunctionSequenceBase : public IAggregateFunctionDataHelper { public: - AggregateFunctionSequenceBase(const DataTypes & arguments, const String & pattern) - : pattern(pattern) + AggregateFunctionSequenceBase(const DataTypes & arguments, const Array & params, const String & pattern) + : IAggregateFunctionDataHelper(arguments, params) + , pattern(pattern) { arg_count = arguments.size(); @@ -578,6 +579,9 @@ private: class AggregateFunctionSequenceMatch final : public AggregateFunctionSequenceBase { public: + AggregateFunctionSequenceMatch(const DataTypes & arguments, const Array & params, const String & pattern) + : AggregateFunctionSequenceBase(arguments, params, pattern) {} + using AggregateFunctionSequenceBase::AggregateFunctionSequenceBase; String getName() const override { return "sequenceMatch"; } @@ -603,6 +607,9 @@ public: class AggregateFunctionSequenceCount final : public AggregateFunctionSequenceBase { public: + AggregateFunctionSequenceCount(const DataTypes & arguments, const Array & params, const String & pattern) + : AggregateFunctionSequenceBase(arguments, params, pattern) {} + using AggregateFunctionSequenceBase::AggregateFunctionSequenceBase; String getName() const override { return "sequenceCount"; } diff --git a/dbms/src/AggregateFunctions/AggregateFunctionState.h b/dbms/src/AggregateFunctions/AggregateFunctionState.h index 30755ce3896..2d8e5c6a537 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionState.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionState.h @@ -24,7 +24,8 @@ private: public: AggregateFunctionState(AggregateFunctionPtr nested, const DataTypes & arguments, const Array & params) - : nested_func(nested), arguments(arguments), params(params) {} + : IAggregateFunctionHelper(arguments, params) + , nested_func(nested), arguments(arguments), params(params) {} String getName() const override { diff --git a/dbms/src/AggregateFunctions/AggregateFunctionStatistics.cpp b/dbms/src/AggregateFunctions/AggregateFunctionStatistics.cpp index ae73013d29d..1530ad25cf3 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionStatistics.cpp +++ b/dbms/src/AggregateFunctions/AggregateFunctionStatistics.cpp @@ -21,7 +21,7 @@ AggregateFunctionPtr createAggregateFunctionStatisticsUnary(const std::string & assertNoParameters(name, parameters); assertUnary(name, argument_types); - AggregateFunctionPtr res(createWithNumericType(*argument_types[0])); + AggregateFunctionPtr res(createWithNumericType(*argument_types[0], argument_types[0])); if (!res) throw Exception("Illegal type " + argument_types[0]->getName() + " of argument for aggregate function " + name, ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); @@ -35,7 +35,7 @@ AggregateFunctionPtr createAggregateFunctionStatisticsBinary(const std::string & assertNoParameters(name, parameters); assertBinary(name, argument_types); - AggregateFunctionPtr res(createWithTwoNumericTypes(*argument_types[0], *argument_types[1])); + AggregateFunctionPtr res(createWithTwoNumericTypes(*argument_types[0], *argument_types[1], argument_types)); if (!res) throw Exception("Illegal types " + argument_types[0]->getName() + " and " + argument_types[1]->getName() + " of arguments for aggregate function " + name, ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); diff --git a/dbms/src/AggregateFunctions/AggregateFunctionStatistics.h b/dbms/src/AggregateFunctions/AggregateFunctionStatistics.h index 82d34fc2954..d1112ec0831 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionStatistics.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionStatistics.h @@ -111,6 +111,9 @@ class AggregateFunctionVariance final : public IAggregateFunctionDataHelper, AggregateFunctionVariance> { public: + AggregateFunctionVariance(const DataTypePtr & arg) + : IAggregateFunctionDataHelper, AggregateFunctionVariance>({arg}, {}) {} + String getName() const override { return Op::name; } DataTypePtr getReturnType() const override @@ -361,6 +364,10 @@ class AggregateFunctionCovariance final AggregateFunctionCovariance> { public: + AggregateFunctionCovariance(const DataTypes & args) : IAggregateFunctionDataHelper< + CovarianceData, + AggregateFunctionCovariance>(args, {}) {} + String getName() const override { return Op::name; } DataTypePtr getReturnType() const override diff --git a/dbms/src/AggregateFunctions/AggregateFunctionStatisticsSimple.h b/dbms/src/AggregateFunctions/AggregateFunctionStatisticsSimple.h index 0580a5131a2..4ab6a4d51ed 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionStatisticsSimple.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionStatisticsSimple.h @@ -288,12 +288,14 @@ public: using ResultType = typename StatFunc::ResultType; using ColVecResult = ColumnVector; - AggregateFunctionVarianceSimple() - : src_scale(0) + AggregateFunctionVarianceSimple(const DataTypes & argument_types) + : IAggregateFunctionDataHelper>(argument_types, {}) + , src_scale(0) {} - AggregateFunctionVarianceSimple(const IDataType & data_type) - : src_scale(getDecimalScale(data_type)) + AggregateFunctionVarianceSimple(const IDataType & data_type, const DataTypes & argument_types) + : IAggregateFunctionDataHelper>(argument_types, {}) + , src_scale(getDecimalScale(data_type)) {} String getName() const override diff --git a/dbms/src/AggregateFunctions/AggregateFunctionSum.cpp b/dbms/src/AggregateFunctions/AggregateFunctionSum.cpp index f21c60eeae6..5e060d7b7df 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionSum.cpp +++ b/dbms/src/AggregateFunctions/AggregateFunctionSum.cpp @@ -50,9 +50,9 @@ AggregateFunctionPtr createAggregateFunctionSum(const std::string & name, const AggregateFunctionPtr res; DataTypePtr data_type = argument_types[0]; if (isDecimal(data_type)) - res.reset(createWithDecimalType(*data_type, *data_type)); + res.reset(createWithDecimalType(*data_type, *data_type, argument_types)); else - res.reset(createWithNumericType(*data_type)); + res.reset(createWithNumericType(*data_type, argument_types)); if (!res) throw Exception("Illegal type " + argument_types[0]->getName() + " of argument for aggregate function " + name, diff --git a/dbms/src/AggregateFunctions/AggregateFunctionSum.h b/dbms/src/AggregateFunctions/AggregateFunctionSum.h index 5bd2d10917a..1860088cd93 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionSum.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionSum.h @@ -102,12 +102,14 @@ public: String getName() const override { return "sum"; } - AggregateFunctionSum() - : scale(0) + AggregateFunctionSum(const DataTypes & argument_types) + : IAggregateFunctionDataHelper>(argument_types, {}) + , scale(0) {} - AggregateFunctionSum(const IDataType & data_type) - : scale(getDecimalScale(data_type)) + AggregateFunctionSum(const IDataType & data_type, const DataTypes & argument_types) + : IAggregateFunctionDataHelper>(argument_types, {}) + , scale(getDecimalScale(data_type)) {} DataTypePtr getReturnType() const override diff --git a/dbms/src/AggregateFunctions/AggregateFunctionSumMap.cpp b/dbms/src/AggregateFunctions/AggregateFunctionSumMap.cpp index 75cd62c00f1..5a10ae62324 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionSumMap.cpp +++ b/dbms/src/AggregateFunctions/AggregateFunctionSumMap.cpp @@ -80,7 +80,7 @@ AggregateFunctionPtr createAggregateFunctionSumMap(const std::string & name, con auto [keys_type, values_types] = parseArguments(name, arguments); - AggregateFunctionPtr res(createWithNumericBasedType(*keys_type, keys_type, values_types)); + AggregateFunctionPtr res(createWithNumericBasedType(*keys_type, keys_type, values_types, arguments)); if (!res) res.reset(createWithDecimalType(*keys_type, keys_type, values_types)); if (!res) @@ -103,7 +103,7 @@ AggregateFunctionPtr createAggregateFunctionSumMapFiltered(const std::string & n auto [keys_type, values_types] = parseArguments(name, arguments); - AggregateFunctionPtr res(createWithNumericBasedType(*keys_type, keys_type, values_types, keys_to_keep)); + AggregateFunctionPtr res(createWithNumericBasedType(*keys_type, keys_type, values_types, keys_to_keep, arguments, params)); if (!res) res.reset(createWithDecimalType(*keys_type, keys_type, values_types, keys_to_keep)); if (!res) diff --git a/dbms/src/AggregateFunctions/AggregateFunctionSumMap.h b/dbms/src/AggregateFunctions/AggregateFunctionSumMap.h index c239b74630e..ef6cae9babc 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionSumMap.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionSumMap.h @@ -61,8 +61,11 @@ private: DataTypes values_types; public: - AggregateFunctionSumMapBase(const DataTypePtr & keys_type, const DataTypes & values_types) - : keys_type(keys_type), values_types(values_types) {} + AggregateFunctionSumMapBase( + const DataTypePtr & keys_type, const DataTypes & values_types, + const DataTypes & argument_types, const Array & params) + : IAggregateFunctionDataHelper>, Derived>(argument_types, params) + , keys_type(keys_type), values_types(values_types) {} String getName() const override { return "sumMap"; } @@ -271,8 +274,8 @@ private: using Base = AggregateFunctionSumMapBase; public: - AggregateFunctionSumMap(const DataTypePtr & keys_type, DataTypes & values_types) - : Base{keys_type, values_types} + AggregateFunctionSumMap(const DataTypePtr & keys_type, DataTypes & values_types, const DataTypes & argument_types) + : Base{keys_type, values_types, argument_types, {}} {} String getName() const override { return "sumMap"; } @@ -291,8 +294,10 @@ private: std::unordered_set keys_to_keep; public: - AggregateFunctionSumMapFiltered(const DataTypePtr & keys_type, const DataTypes & values_types, const Array & keys_to_keep_) - : Base{keys_type, values_types} + AggregateFunctionSumMapFiltered( + const DataTypePtr & keys_type, const DataTypes & values_types, const Array & keys_to_keep_, + const DataTypes & argument_types, const Array & params) + : Base{keys_type, values_types, argument_types, params} { keys_to_keep.reserve(keys_to_keep_.size()); for (const Field & f : keys_to_keep_) diff --git a/dbms/src/AggregateFunctions/AggregateFunctionTopK.cpp b/dbms/src/AggregateFunctions/AggregateFunctionTopK.cpp index 168dba4ebd5..04e74c17434 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionTopK.cpp +++ b/dbms/src/AggregateFunctions/AggregateFunctionTopK.cpp @@ -39,19 +39,19 @@ class AggregateFunctionTopKDateTime : public AggregateFunctionTopK -static IAggregateFunction * createWithExtraTypes(const DataTypePtr & argument_type, UInt64 threshold) +static IAggregateFunction * createWithExtraTypes(const DataTypePtr & argument_type, UInt64 threshold, const Array & params) { WhichDataType which(argument_type); if (which.idx == TypeIndex::Date) - return new AggregateFunctionTopKDate(threshold); + return new AggregateFunctionTopKDate(threshold, {argument_type}, params); if (which.idx == TypeIndex::DateTime) - return new AggregateFunctionTopKDateTime(threshold); + return new AggregateFunctionTopKDateTime(threshold, {argument_type}, params); /// Check that we can use plain version of AggregateFunctionTopKGeneric if (argument_type->isValueUnambiguouslyRepresentedInContiguousMemoryRegion()) - return new AggregateFunctionTopKGeneric(threshold, argument_type); + return new AggregateFunctionTopKGeneric(threshold, argument_type, params); else - return new AggregateFunctionTopKGeneric(threshold, argument_type); + return new AggregateFunctionTopKGeneric(threshold, argument_type, params); } @@ -90,10 +90,10 @@ AggregateFunctionPtr createAggregateFunctionTopK(const std::string & name, const threshold = k; } - AggregateFunctionPtr res(createWithNumericType(*argument_types[0], threshold)); + AggregateFunctionPtr res(createWithNumericType(*argument_types[0], threshold, argument_types, params)); if (!res) - res = AggregateFunctionPtr(createWithExtraTypes(argument_types[0], threshold)); + res = AggregateFunctionPtr(createWithExtraTypes(argument_types[0], threshold, params)); if (!res) throw Exception("Illegal type " + argument_types[0]->getName() + diff --git a/dbms/src/AggregateFunctions/AggregateFunctionTopK.h b/dbms/src/AggregateFunctions/AggregateFunctionTopK.h index 09897f5ccd2..846a3e2b2a1 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionTopK.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionTopK.h @@ -48,8 +48,9 @@ protected: UInt64 reserved; public: - AggregateFunctionTopK(UInt64 threshold) - : threshold(threshold), reserved(TOP_K_LOAD_FACTOR * threshold) {} + AggregateFunctionTopK(UInt64 threshold, const DataTypes & argument_types, const Array & params) + : IAggregateFunctionDataHelper, AggregateFunctionTopK>(argument_types, params) + , threshold(threshold), reserved(TOP_K_LOAD_FACTOR * threshold) {} String getName() const override { return is_weighted ? "topKWeighted" : "topK"; } @@ -136,13 +137,15 @@ private: UInt64 threshold; UInt64 reserved; - DataTypePtr input_data_type; + DataTypePtr & input_data_type; static void deserializeAndInsert(StringRef str, IColumn & data_to); public: - AggregateFunctionTopKGeneric(UInt64 threshold, const DataTypePtr & input_data_type) - : threshold(threshold), reserved(TOP_K_LOAD_FACTOR * threshold), input_data_type(input_data_type) {} + AggregateFunctionTopKGeneric( + UInt64 threshold, const DataTypePtr & input_data_type, const Array & params) + : IAggregateFunctionDataHelper>({input_data_type}, params) + , threshold(threshold), reserved(TOP_K_LOAD_FACTOR * threshold), input_data_type(argument_types[0]) {} String getName() const override { return is_weighted ? "topKWeighted" : "topK"; } diff --git a/dbms/src/AggregateFunctions/AggregateFunctionUniq.cpp b/dbms/src/AggregateFunctions/AggregateFunctionUniq.cpp index 6b63a719b8f..eaf021d8735 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionUniq.cpp +++ b/dbms/src/AggregateFunctions/AggregateFunctionUniq.cpp @@ -43,19 +43,19 @@ AggregateFunctionPtr createAggregateFunctionUniq(const std::string & name, const { const IDataType & argument_type = *argument_types[0]; - AggregateFunctionPtr res(createWithNumericType(*argument_types[0])); + AggregateFunctionPtr res(createWithNumericType(*argument_types[0], argument_types)); WhichDataType which(argument_type); if (res) return res; else if (which.isDate()) - return std::make_shared>(); + return std::make_shared>(argument_types); else if (which.isDateTime()) - return std::make_shared>(); + return std::make_shared>(argument_types); else if (which.isStringOrFixedString()) - return std::make_shared>(); + return std::make_shared>(argument_types); else if (which.isUUID()) - return std::make_shared>(); + return std::make_shared>(argument_types); else if (which.isTuple()) { if (use_exact_hash_function) @@ -89,19 +89,19 @@ AggregateFunctionPtr createAggregateFunctionUniq(const std::string & name, const { const IDataType & argument_type = *argument_types[0]; - AggregateFunctionPtr res(createWithNumericType(*argument_types[0])); + AggregateFunctionPtr res(createWithNumericType(*argument_types[0], argument_types)); WhichDataType which(argument_type); if (res) return res; else if (which.isDate()) - return std::make_shared>>(); + return std::make_shared>>(argument_types); else if (which.isDateTime()) - return std::make_shared>>(); + return std::make_shared>>(argument_types); else if (which.isStringOrFixedString()) return std::make_shared>>(); else if (which.isUUID()) - return std::make_shared>>(); + return std::make_shared>>(argument_types); else if (which.isTuple()) { if (use_exact_hash_function) diff --git a/dbms/src/AggregateFunctions/AggregateFunctionUniq.h b/dbms/src/AggregateFunctions/AggregateFunctionUniq.h index fea79a920a9..56a855aabb9 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionUniq.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionUniq.h @@ -209,6 +209,9 @@ template class AggregateFunctionUniq final : public IAggregateFunctionDataHelper> { public: + AggregateFunctionUniq(const DataTypes & argument_types) + : IAggregateFunctionDataHelper>(argument_types, {}) {} + String getName() const override { return Data::getName(); } DataTypePtr getReturnType() const override @@ -257,6 +260,7 @@ private: public: AggregateFunctionUniqVariadic(const DataTypes & arguments) + : IAggregateFunctionDataHelper>(arguments) { if (argument_is_tuple) num_args = typeid_cast(*arguments[0]).getElements().size(); diff --git a/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.cpp b/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.cpp index 90b84d3b927..38982b8130e 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.cpp +++ b/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.cpp @@ -28,7 +28,7 @@ namespace }; template - AggregateFunctionPtr createAggregateFunctionWithK(const DataTypes & argument_types) + AggregateFunctionPtr createAggregateFunctionWithK(const DataTypes & argument_types, const Array & params) { /// We use exact hash function if the arguments are not contiguous in memory, because only exact hash function has support for this case. bool use_exact_hash_function = !isAllArgumentsContiguousInMemory(argument_types); @@ -37,33 +37,33 @@ namespace { const IDataType & argument_type = *argument_types[0]; - AggregateFunctionPtr res(createWithNumericType::template AggregateFunction>(*argument_types[0])); + AggregateFunctionPtr res(createWithNumericType::template AggregateFunction>(*argument_types[0], argument_types, params)); WhichDataType which(argument_type); if (res) return res; else if (which.isDate()) - return std::make_shared::template AggregateFunction>(); + return std::make_shared::template AggregateFunction>(argument_types, params); else if (which.isDateTime()) - return std::make_shared::template AggregateFunction>(); + return std::make_shared::template AggregateFunction>(argument_types, params); else if (which.isStringOrFixedString()) - return std::make_shared::template AggregateFunction>(); + return std::make_shared::template AggregateFunction>(argument_types, params); else if (which.isUUID()) - return std::make_shared::template AggregateFunction>(); + return std::make_shared::template AggregateFunction>(argument_types, params); else if (which.isTuple()) { if (use_exact_hash_function) - return std::make_shared::template AggregateFunctionVariadic>(argument_types); + return std::make_shared::template AggregateFunctionVariadic>(argument_types, params); else - return std::make_shared::template AggregateFunctionVariadic>(argument_types); + return std::make_shared::template AggregateFunctionVariadic>(argument_types, params); } } /// "Variadic" method also works as a fallback generic case for a single argument. if (use_exact_hash_function) - return std::make_shared::template AggregateFunctionVariadic>(argument_types); + return std::make_shared::template AggregateFunctionVariadic>(argument_types, params); else - return std::make_shared::template AggregateFunctionVariadic>(argument_types); + return std::make_shared::template AggregateFunctionVariadic>(argument_types, params); } AggregateFunctionPtr createAggregateFunctionUniqCombined( @@ -95,23 +95,23 @@ namespace switch (precision) { case 12: - return createAggregateFunctionWithK<12>(argument_types); + return createAggregateFunctionWithK<12>(argument_types, params); case 13: - return createAggregateFunctionWithK<13>(argument_types); + return createAggregateFunctionWithK<13>(argument_types, params); case 14: - return createAggregateFunctionWithK<14>(argument_types); + return createAggregateFunctionWithK<14>(argument_types, params); case 15: - return createAggregateFunctionWithK<15>(argument_types); + return createAggregateFunctionWithK<15>(argument_types, params); case 16: - return createAggregateFunctionWithK<16>(argument_types); + return createAggregateFunctionWithK<16>(argument_types, params); case 17: - return createAggregateFunctionWithK<17>(argument_types); + return createAggregateFunctionWithK<17>(argument_types, params); case 18: - return createAggregateFunctionWithK<18>(argument_types); + return createAggregateFunctionWithK<18>(argument_types, params); case 19: - return createAggregateFunctionWithK<19>(argument_types); + return createAggregateFunctionWithK<19>(argument_types, params); case 20: - return createAggregateFunctionWithK<20>(argument_types); + return createAggregateFunctionWithK<20>(argument_types, params); } __builtin_unreachable(); diff --git a/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.h b/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.h index 001f4e7f289..3b7aee95186 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.h @@ -114,6 +114,9 @@ class AggregateFunctionUniqCombined final : public IAggregateFunctionDataHelper, AggregateFunctionUniqCombined> { public: + AggregateFunctionUniqCombined(const DataTypes & argument_types, const Array & params) + : IAggregateFunctionDataHelper, AggregateFunctionUniqCombined>(argument_types, params) {} + String getName() const override { return "uniqCombined"; @@ -176,7 +179,9 @@ private: size_t num_args = 0; public: - explicit AggregateFunctionUniqCombinedVariadic(const DataTypes & arguments) + explicit AggregateFunctionUniqCombinedVariadic(const DataTypes & arguments, const Array & params) + : IAggregateFunctionDataHelper, + AggregateFunctionUniqCombinedVariadic>(arguments, params) { if (argument_is_tuple) num_args = typeid_cast(*arguments[0]).getElements().size(); diff --git a/dbms/src/AggregateFunctions/AggregateFunctionUniqUpTo.cpp b/dbms/src/AggregateFunctions/AggregateFunctionUniqUpTo.cpp index b9cdcaa4eae..ba4f337839e 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionUniqUpTo.cpp +++ b/dbms/src/AggregateFunctions/AggregateFunctionUniqUpTo.cpp @@ -52,33 +52,33 @@ AggregateFunctionPtr createAggregateFunctionUniqUpTo(const std::string & name, c { const IDataType & argument_type = *argument_types[0]; - AggregateFunctionPtr res(createWithNumericType(*argument_types[0], threshold)); + AggregateFunctionPtr res(createWithNumericType(*argument_types[0], threshold, argument_types, params)); WhichDataType which(argument_type); if (res) return res; else if (which.isDate()) - return std::make_shared>(threshold); + return std::make_shared>(threshold, argument_types, params); else if (which.isDateTime()) - return std::make_shared>(threshold); + return std::make_shared>(threshold, argument_types, params); else if (which.isStringOrFixedString()) - return std::make_shared>(threshold); + return std::make_shared>(threshold, argument_types, params); else if (which.isUUID()) - return std::make_shared>(threshold); + return std::make_shared>(threshold, argument_types, params); else if (which.isTuple()) { if (use_exact_hash_function) - return std::make_shared>(argument_types, threshold); + return std::make_shared>(argument_types, params, threshold); else - return std::make_shared>(argument_types, threshold); + return std::make_shared>(argument_types, params, threshold); } } /// "Variadic" method also works as a fallback generic case for single argument. if (use_exact_hash_function) - return std::make_shared>(argument_types, threshold); + return std::make_shared>(argument_types, params, threshold); else - return std::make_shared>(argument_types, threshold); + return std::make_shared>(argument_types, params, threshold); } } diff --git a/dbms/src/AggregateFunctions/AggregateFunctionUniqUpTo.h b/dbms/src/AggregateFunctions/AggregateFunctionUniqUpTo.h index 6b6a645024a..477a729894d 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionUniqUpTo.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionUniqUpTo.h @@ -136,8 +136,9 @@ private: UInt8 threshold; public: - AggregateFunctionUniqUpTo(UInt8 threshold) - : threshold(threshold) + AggregateFunctionUniqUpTo(UInt8 threshold, const DataTypes & argument_types, const Array & params) + : IAggregateFunctionDataHelper, AggregateFunctionUniqUpTo>(argument_types, params) + , threshold(threshold) { } @@ -195,8 +196,9 @@ private: UInt8 threshold; public: - AggregateFunctionUniqUpToVariadic(const DataTypes & arguments, UInt8 threshold) - : threshold(threshold) + AggregateFunctionUniqUpToVariadic(const DataTypes & arguments, const Array & params, UInt8 threshold) + : IAggregateFunctionDataHelper, AggregateFunctionUniqUpToVariadic>(arguments, params) + , threshold(threshold) { if (argument_is_tuple) num_args = typeid_cast(*arguments[0]).getElements().size(); diff --git a/dbms/src/AggregateFunctions/AggregateFunctionWindowFunnel.h b/dbms/src/AggregateFunctions/AggregateFunctionWindowFunnel.h index 317637b1b69..556f9bb1ae1 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionWindowFunnel.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionWindowFunnel.h @@ -189,6 +189,7 @@ public: } AggregateFunctionWindowFunnel(const DataTypes & arguments, const Array & params) + : IAggregateFunctionDataHelper(arguments, params) { const auto time_arg = arguments.front().get(); if (!WhichDataType(time_arg).isDateTime() && !WhichDataType(time_arg).isUInt32()) diff --git a/dbms/src/AggregateFunctions/AggregateFunctionsStatisticsSimple.cpp b/dbms/src/AggregateFunctions/AggregateFunctionsStatisticsSimple.cpp index 4159403afc7..1fafa6e00c9 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionsStatisticsSimple.cpp +++ b/dbms/src/AggregateFunctions/AggregateFunctionsStatisticsSimple.cpp @@ -24,9 +24,9 @@ AggregateFunctionPtr createAggregateFunctionStatisticsUnary(const std::string & AggregateFunctionPtr res; DataTypePtr data_type = argument_types[0]; if (isDecimal(data_type)) - res.reset(createWithDecimalType(*data_type, *data_type)); + res.reset(createWithDecimalType(*data_type, *data_type, argument_types)); else - res.reset(createWithNumericType(*data_type)); + res.reset(createWithNumericType(*data_type, argument_types)); if (!res) throw Exception("Illegal type " + argument_types[0]->getName() + " of argument for aggregate function " + name, @@ -40,7 +40,7 @@ AggregateFunctionPtr createAggregateFunctionStatisticsBinary(const std::string & assertNoParameters(name, parameters); assertBinary(name, argument_types); - AggregateFunctionPtr res(createWithTwoNumericTypes(*argument_types[0], *argument_types[1])); + AggregateFunctionPtr res(createWithTwoNumericTypes(*argument_types[0], *argument_types[1], argument_types)); if (!res) throw Exception("Illegal types " + argument_types[0]->getName() + " and " + argument_types[1]->getName() + " of arguments for aggregate function " + name, ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); diff --git a/dbms/src/AggregateFunctions/IAggregateFunction.h b/dbms/src/AggregateFunctions/IAggregateFunction.h index f5def066058..17620f7493d 100644 --- a/dbms/src/AggregateFunctions/IAggregateFunction.h +++ b/dbms/src/AggregateFunctions/IAggregateFunction.h @@ -37,6 +37,9 @@ using ConstAggregateDataPtr = const char *; class IAggregateFunction { public: + IAggregateFunction(const DataTypes & argument_types_, const Array & parameters_) + : argument_types(argument_types_), parameters(parameters_) {} + /// Get main function name. virtual String getName() const = 0; @@ -112,17 +115,9 @@ public: const DataTypes & getArgumentTypes() const { return argument_types; } const Array & getParameters() const { return parameters; } -private: +protected: DataTypes argument_types; Array parameters; - - friend class AggregateFunctionFactory; - - void setArguments(DataTypes argument_types_, Array parameters_) - { - argument_types = std::move(argument_types_); - parameters = std::move(parameters_); - } }; @@ -137,6 +132,8 @@ private: } public: + IAggregateFunctionHelper(const DataTypes & argument_types_, const Array & parameters_) + : IAggregateFunction(argument_types_, parameters_) {} AddFunc getAddressOfAddFunction() const override { return &addFree; } }; @@ -152,6 +149,10 @@ protected: static const Data & data(ConstAggregateDataPtr place) { return *reinterpret_cast(place); } public: + + IAggregateFunctionDataHelper(const DataTypes & argument_types_, const Array & parameters_) + : IAggregateFunctionHelper(argument_types_, parameters_) {} + void create(AggregateDataPtr place) const override { new (place) Data; From bbfd6c502c037b09902441a9631f29a736f38c10 Mon Sep 17 00:00:00 2001 From: chertus Date: Mon, 11 Feb 2019 22:33:04 +0300 Subject: [PATCH 18/79] test for issue-3998 --- dbms/tests/queries/0_stateless/00818_alias_bug_4110.reference | 2 ++ dbms/tests/queries/0_stateless/00818_alias_bug_4110.sql | 3 +++ 2 files changed, 5 insertions(+) diff --git a/dbms/tests/queries/0_stateless/00818_alias_bug_4110.reference b/dbms/tests/queries/0_stateless/00818_alias_bug_4110.reference index 204377075a9..c79c5b1fbc7 100644 --- a/dbms/tests/queries/0_stateless/00818_alias_bug_4110.reference +++ b/dbms/tests/queries/0_stateless/00818_alias_bug_4110.reference @@ -3,3 +3,5 @@ 11 11 11 12 12 11 +0 +1 diff --git a/dbms/tests/queries/0_stateless/00818_alias_bug_4110.sql b/dbms/tests/queries/0_stateless/00818_alias_bug_4110.sql index b1cfa3780a2..af62eef03ca 100644 --- a/dbms/tests/queries/0_stateless/00818_alias_bug_4110.sql +++ b/dbms/tests/queries/0_stateless/00818_alias_bug_4110.sql @@ -3,3 +3,6 @@ select s.a + 1 as a, s.a as b from (select 10 as a) s; select s.a + 1 as a, s.a + 1 as b from (select 10 as a) s; select s.a + 1 as b, s.a + 2 as a from (select 10 as a) s; select s.a + 2 as b, s.a + 1 as a from (select 10 as a) s; + +SELECT 0 as t FROM (SELECT 1 as t) as inn WHERE inn.t = 1; +SELECT sum(value) as value FROM (SELECT 1 as value) as data WHERE data.value > 0; From a493f9ee7d2df969ad8f4fc0d78c0046548225eb Mon Sep 17 00:00:00 2001 From: chertus Date: Mon, 11 Feb 2019 22:39:11 +0300 Subject: [PATCH 19/79] add test for issue-3290 --- .../queries/0_stateless/00818_alias_bug_4110.reference | 1 + dbms/tests/queries/0_stateless/00818_alias_bug_4110.sql | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/dbms/tests/queries/0_stateless/00818_alias_bug_4110.reference b/dbms/tests/queries/0_stateless/00818_alias_bug_4110.reference index c79c5b1fbc7..5186cb8eeff 100644 --- a/dbms/tests/queries/0_stateless/00818_alias_bug_4110.reference +++ b/dbms/tests/queries/0_stateless/00818_alias_bug_4110.reference @@ -5,3 +5,4 @@ 12 11 0 1 +123 456 diff --git a/dbms/tests/queries/0_stateless/00818_alias_bug_4110.sql b/dbms/tests/queries/0_stateless/00818_alias_bug_4110.sql index af62eef03ca..7480f137a65 100644 --- a/dbms/tests/queries/0_stateless/00818_alias_bug_4110.sql +++ b/dbms/tests/queries/0_stateless/00818_alias_bug_4110.sql @@ -6,3 +6,10 @@ select s.a + 2 as b, s.a + 1 as a from (select 10 as a) s; SELECT 0 as t FROM (SELECT 1 as t) as inn WHERE inn.t = 1; SELECT sum(value) as value FROM (SELECT 1 as value) as data WHERE data.value > 0; + +USE test; +DROP TABLE IF EXISTS test; +CREATE TABLE test (field String, not_field String) ENGINE = Memory; +INSERT INTO test (field, not_field) VALUES ('123', '456') +SELECT test.field AS other_field, test.not_field AS field FROM test; +DROP TABLE test; From b01d73542134ee6749f8dcc94ce0136a1905b6f3 Mon Sep 17 00:00:00 2001 From: Mikhail Fandyushin Date: Mon, 11 Feb 2019 23:42:46 +0300 Subject: [PATCH 20/79] hide brotli from inerface --- dbms/CMakeLists.txt | 3 --- dbms/src/IO/BrotliReadBuffer.cpp | 33 +++++++++++++++++++++++++------- dbms/src/IO/BrotliReadBuffer.h | 5 ++--- 3 files changed, 28 insertions(+), 13 deletions(-) diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index a86de61827b..ec69a4389a7 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -89,9 +89,6 @@ set(dbms_sources) include(../cmake/dbms_glob_sources.cmake) -# temp ugly hack -include_directories(${BROTLI_INCLUDE_DIR}) - add_headers_and_sources(clickhouse_common_io src/Common) add_headers_and_sources(clickhouse_common_io src/Common/HashTable) add_headers_and_sources(clickhouse_common_io src/IO) diff --git a/dbms/src/IO/BrotliReadBuffer.cpp b/dbms/src/IO/BrotliReadBuffer.cpp index 70bf0a56dd8..9fc033c41cc 100644 --- a/dbms/src/IO/BrotliReadBuffer.cpp +++ b/dbms/src/IO/BrotliReadBuffer.cpp @@ -1,12 +1,32 @@ #include "BrotliReadBuffer.h" +#include namespace DB { + +class BrotliReadBuffer::BrotliStateWrapper +{ +public: + BrotliStateWrapper() + : state(BrotliDecoderCreateInstance(nullptr, nullptr, nullptr)) + , result(BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT) + { + } + + ~BrotliStateWrapper() + { + BrotliDecoderDestroyInstance(state); + } + +public: + BrotliDecoderState * state; + BrotliDecoderResult result; +}; + BrotliReadBuffer::BrotliReadBuffer(ReadBuffer &in_, size_t buf_size, char *existing_memory, size_t alignment) : BufferWithOwnMemory(buf_size, existing_memory, alignment) , in(in_) - , bstate(BrotliDecoderCreateInstance(nullptr, nullptr, nullptr)) - , bresult(BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT) + , brotli(new BrotliStateWrapper()) , in_available(0) , in_data(nullptr) , out_capacity(0) @@ -17,7 +37,6 @@ BrotliReadBuffer::BrotliReadBuffer(ReadBuffer &in_, size_t buf_size, char *exist BrotliReadBuffer::~BrotliReadBuffer() { - BrotliDecoderDestroyInstance(bstate); } bool BrotliReadBuffer::nextImpl() @@ -32,7 +51,7 @@ bool BrotliReadBuffer::nextImpl() in_data = reinterpret_cast(in.position()); } - if (bresult == BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT && (!in_available || in.eof())) + if (brotli->result == BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT && (!in_available || in.eof())) { throw Exception(std::string("brotli decode error"), ErrorCodes::CANNOT_READ_ALL_DATA); } @@ -40,12 +59,12 @@ bool BrotliReadBuffer::nextImpl() out_capacity = internal_buffer.size(); out_data = reinterpret_cast(internal_buffer.begin()); - bresult = BrotliDecoderDecompressStream(bstate, &in_available, &in_data, &out_capacity, &out_data, nullptr); + brotli->result = BrotliDecoderDecompressStream(brotli->state, &in_available, &in_data, &out_capacity, &out_data, nullptr); in.position() = in.buffer().end() - in_available; working_buffer.resize(internal_buffer.size() - out_capacity); - if (bresult == BROTLI_DECODER_RESULT_SUCCESS) + if (brotli->result == BROTLI_DECODER_RESULT_SUCCESS) { if (in.eof()) { @@ -58,7 +77,7 @@ bool BrotliReadBuffer::nextImpl() } } - if (bresult == BROTLI_DECODER_RESULT_ERROR) + if (brotli->result == BROTLI_DECODER_RESULT_ERROR) { throw Exception(std::string("brotli decode error"), ErrorCodes::CANNOT_READ_ALL_DATA); } diff --git a/dbms/src/IO/BrotliReadBuffer.h b/dbms/src/IO/BrotliReadBuffer.h index 0ba910a69ea..e0e9bc6160e 100644 --- a/dbms/src/IO/BrotliReadBuffer.h +++ b/dbms/src/IO/BrotliReadBuffer.h @@ -3,7 +3,6 @@ #include #include -#include namespace DB { @@ -24,8 +23,8 @@ private: ReadBuffer ∈ - BrotliDecoderState * bstate; - BrotliDecoderResult bresult; + class BrotliStateWrapper; + std::unique_ptr brotli; size_t in_available; const uint8_t * in_data; From 7a0ea7776e10ff4c1f842fe268eff84d9e223cbc Mon Sep 17 00:00:00 2001 From: Mikhail Fandyushin Date: Tue, 12 Feb 2019 00:04:47 +0300 Subject: [PATCH 21/79] install brotli for tests --- docker/test/stateless/Dockerfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docker/test/stateless/Dockerfile b/docker/test/stateless/Dockerfile index 9faf98bf066..d2d1821ed7c 100644 --- a/docker/test/stateless/Dockerfile +++ b/docker/test/stateless/Dockerfile @@ -19,7 +19,8 @@ RUN apt-get update -y \ openssl \ netcat-openbsd \ telnet \ - moreutils + moreutils \ + brotli ENV TZ=Europe/Moscow RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone From 428f7ce27ba808998599b7b3b3b506552e13cd9c Mon Sep 17 00:00:00 2001 From: proller Date: Tue, 12 Feb 2019 00:43:29 +0300 Subject: [PATCH 22/79] Fix split link of dbms/programs/odbc-bridge --- dbms/programs/odbc-bridge/CMakeLists.txt | 13 +++---------- dbms/programs/odbc-bridge/MainHandler.cpp | 2 +- .../odbc-bridge}/ODBCBlockInputStream.cpp | 0 .../odbc-bridge}/ODBCBlockInputStream.h | 2 +- 4 files changed, 5 insertions(+), 12 deletions(-) rename dbms/{src/Dictionaries => programs/odbc-bridge}/ODBCBlockInputStream.cpp (100%) rename dbms/{src/Dictionaries => programs/odbc-bridge}/ODBCBlockInputStream.h (94%) diff --git a/dbms/programs/odbc-bridge/CMakeLists.txt b/dbms/programs/odbc-bridge/CMakeLists.txt index 12062b5a939..3b06e0bc395 100644 --- a/dbms/programs/odbc-bridge/CMakeLists.txt +++ b/dbms/programs/odbc-bridge/CMakeLists.txt @@ -1,13 +1,6 @@ -add_library (clickhouse-odbc-bridge-lib ${LINK_MODE} - PingHandler.cpp - MainHandler.cpp - ColumnInfoHandler.cpp - IdentifierQuoteHandler.cpp - HandlerFactory.cpp - ODBCBridge.cpp - getIdentifierQuote.cpp - validateODBCConnectionString.cpp -) +add_headers_and_sources(clickhouse_odbc_bridge .) + +add_library (clickhouse-odbc-bridge-lib ${LINK_MODE} ${clickhouse_odbc_bridge_sources}) target_link_libraries (clickhouse-odbc-bridge-lib PRIVATE daemon dbms clickhouse_common_io) target_include_directories (clickhouse-odbc-bridge-lib PUBLIC ${ClickHouse_SOURCE_DIR}/libs/libdaemon/include) diff --git a/dbms/programs/odbc-bridge/MainHandler.cpp b/dbms/programs/odbc-bridge/MainHandler.cpp index 8ffedc5a25a..2aebdda3b03 100644 --- a/dbms/programs/odbc-bridge/MainHandler.cpp +++ b/dbms/programs/odbc-bridge/MainHandler.cpp @@ -4,7 +4,7 @@ #include #include #include -#include +#include "ODBCBlockInputStream.h" #include #include #include diff --git a/dbms/src/Dictionaries/ODBCBlockInputStream.cpp b/dbms/programs/odbc-bridge/ODBCBlockInputStream.cpp similarity index 100% rename from dbms/src/Dictionaries/ODBCBlockInputStream.cpp rename to dbms/programs/odbc-bridge/ODBCBlockInputStream.cpp diff --git a/dbms/src/Dictionaries/ODBCBlockInputStream.h b/dbms/programs/odbc-bridge/ODBCBlockInputStream.h similarity index 94% rename from dbms/src/Dictionaries/ODBCBlockInputStream.h rename to dbms/programs/odbc-bridge/ODBCBlockInputStream.h index 46bfed01a51..e22c245fa47 100644 --- a/dbms/src/Dictionaries/ODBCBlockInputStream.h +++ b/dbms/programs/odbc-bridge/ODBCBlockInputStream.h @@ -6,7 +6,7 @@ #include #include #include -#include "ExternalResultDescription.h" +#include namespace DB From ec6a5590fdd99ea24fc42ba41208864bab9ffa1e Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 12 Feb 2019 12:31:20 +0300 Subject: [PATCH 23/79] Fix build. --- dbms/src/AggregateFunctions/AggregateFunctionArgMinMax.h | 2 +- dbms/src/AggregateFunctions/AggregateFunctionAvg.cpp | 4 ++-- dbms/src/AggregateFunctions/AggregateFunctionAvg.h | 9 +++++---- .../src/AggregateFunctions/AggregateFunctionGroupArray.h | 9 +++++---- .../AggregateFunctionGroupUniqArray.cpp | 2 +- .../AggregateFunctions/AggregateFunctionGroupUniqArray.h | 2 +- dbms/src/AggregateFunctions/AggregateFunctionHistogram.h | 2 +- dbms/src/AggregateFunctions/AggregateFunctionMinMaxAny.h | 2 +- dbms/src/AggregateFunctions/AggregateFunctionQuantile.h | 2 +- dbms/src/AggregateFunctions/AggregateFunctionSumMap.cpp | 4 ++-- dbms/src/AggregateFunctions/AggregateFunctionTopK.h | 2 +- dbms/src/AggregateFunctions/AggregateFunctionUniq.cpp | 2 +- dbms/src/AggregateFunctions/AggregateFunctionUniq.h | 2 +- 13 files changed, 23 insertions(+), 21 deletions(-) diff --git a/dbms/src/AggregateFunctions/AggregateFunctionArgMinMax.h b/dbms/src/AggregateFunctions/AggregateFunctionArgMinMax.h index 42649be78fd..9f5d5b69fbd 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionArgMinMax.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionArgMinMax.h @@ -37,7 +37,7 @@ private: public: AggregateFunctionArgMinMax(const DataTypePtr & type_res, const DataTypePtr & type_val) : IAggregateFunctionDataHelper>({type_res, type_val}, {}), - type_res(argument_types[0]), type_val(argument_types[1]) + type_res(this->argument_types[0]), type_val(this->argument_types[1]) { if (!type_val->isComparable()) throw Exception("Illegal type " + type_val->getName() + " of second argument of aggregate function " + getName() diff --git a/dbms/src/AggregateFunctions/AggregateFunctionAvg.cpp b/dbms/src/AggregateFunctions/AggregateFunctionAvg.cpp index 565f1f0c335..1886637629f 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionAvg.cpp +++ b/dbms/src/AggregateFunctions/AggregateFunctionAvg.cpp @@ -27,9 +27,9 @@ AggregateFunctionPtr createAggregateFunctionAvg(const std::string & name, const AggregateFunctionPtr res; DataTypePtr data_type = argument_types[0]; if (isDecimal(data_type)) - res.reset(createWithDecimalType(*data_type, *data_type)); + res.reset(createWithDecimalType(*data_type, *data_type, argument_types)); else - res.reset(createWithNumericType(*data_type)); + res.reset(createWithNumericType(*data_type, argument_types)); if (!res) throw Exception("Illegal type " + argument_types[0]->getName() + " of argument for aggregate function " + name, diff --git a/dbms/src/AggregateFunctions/AggregateFunctionAvg.h b/dbms/src/AggregateFunctions/AggregateFunctionAvg.h index 98604f76742..d34420efe28 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionAvg.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionAvg.h @@ -49,13 +49,14 @@ public: using ColVecResult = std::conditional_t, ColumnDecimal, ColumnVector>; /// ctor for native types - AggregateFunctionAvg() - : scale(0) + AggregateFunctionAvg(const DataTypes & argument_types) + : IAggregateFunctionDataHelper>(argument_types, {}) + , scale(0) {} /// ctor for Decimals - AggregateFunctionAvg(const IDataType & data_type) - : IAggregateFunctionDataHelper>({data_type}, {}) + AggregateFunctionAvg(const IDataType & data_type, const DataTypes & argument_types) + : IAggregateFunctionDataHelper>(argument_types, {}) , scale(getDecimalScale(data_type)) {} diff --git a/dbms/src/AggregateFunctions/AggregateFunctionGroupArray.h b/dbms/src/AggregateFunctions/AggregateFunctionGroupArray.h index c496e90844d..d732d65ecf8 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionGroupArray.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionGroupArray.h @@ -53,8 +53,8 @@ class GroupArrayNumericImpl final public: explicit GroupArrayNumericImpl(const DataTypePtr & data_type_, UInt64 max_elems_ = std::numeric_limits::max()) - : IAggregateFunctionDataHelper, GroupArrayNumericImpl>({data_type}, {}) - , data_type(argument_types[0]), max_elems(max_elems_) {} + : IAggregateFunctionDataHelper, GroupArrayNumericImpl>({data_type_}, {}) + , data_type(this->argument_types[0]), max_elems(max_elems_) {} String getName() const override { return "groupArray"; } @@ -249,12 +249,13 @@ class GroupArrayGeneralListImpl final static Data & data(AggregateDataPtr place) { return *reinterpret_cast(place); } static const Data & data(ConstAggregateDataPtr place) { return *reinterpret_cast(place); } - DataTypePtr data_type; + DataTypePtr & data_type; UInt64 max_elems; public: GroupArrayGeneralListImpl(const DataTypePtr & data_type, UInt64 max_elems_ = std::numeric_limits::max()) - : data_type(data_type), max_elems(max_elems_) {} + : IAggregateFunctionDataHelper, GroupArrayGeneralListImpl>({data_type}, {}) + , data_type(this->argument_types[0]), max_elems(max_elems_) {} String getName() const override { return "groupArray"; } diff --git a/dbms/src/AggregateFunctions/AggregateFunctionGroupUniqArray.cpp b/dbms/src/AggregateFunctions/AggregateFunctionGroupUniqArray.cpp index f80a45afaa9..7a99709c33e 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionGroupUniqArray.cpp +++ b/dbms/src/AggregateFunctions/AggregateFunctionGroupUniqArray.cpp @@ -48,7 +48,7 @@ AggregateFunctionPtr createAggregateFunctionGroupUniqArray(const std::string & n assertNoParameters(name, parameters); assertUnary(name, argument_types); - AggregateFunctionPtr res(createWithNumericType(*argument_types[0])); + AggregateFunctionPtr res(createWithNumericType(*argument_types[0], argument_types[0])); if (!res) res = AggregateFunctionPtr(createWithExtraTypes(argument_types[0])); diff --git a/dbms/src/AggregateFunctions/AggregateFunctionGroupUniqArray.h b/dbms/src/AggregateFunctions/AggregateFunctionGroupUniqArray.h index c0ef1fe0fa8..f2ae9e77438 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionGroupUniqArray.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionGroupUniqArray.h @@ -129,7 +129,7 @@ class AggreagteFunctionGroupUniqArrayGeneric public: AggreagteFunctionGroupUniqArrayGeneric(const DataTypePtr & input_data_type) : IAggregateFunctionDataHelper>({input_data_type}, {}) - , input_data_type(argument_types[0]) {} + , input_data_type(this->argument_types[0]) {} String getName() const override { return "groupUniqArray"; } diff --git a/dbms/src/AggregateFunctions/AggregateFunctionHistogram.h b/dbms/src/AggregateFunctions/AggregateFunctionHistogram.h index 60385f4788a..2e2c979f1d0 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionHistogram.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionHistogram.h @@ -304,7 +304,7 @@ private: const UInt32 max_bins; public: - AggregateFunctionHistogram(const DataTypes & arguments, const Array & params, UInt32 max_bins) + AggregateFunctionHistogram(UInt32 max_bins, const DataTypes & arguments, const Array & params) : IAggregateFunctionDataHelper>(arguments, params) , max_bins(max_bins) { diff --git a/dbms/src/AggregateFunctions/AggregateFunctionMinMaxAny.h b/dbms/src/AggregateFunctions/AggregateFunctionMinMaxAny.h index 426ee8ee479..de661f81115 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionMinMaxAny.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionMinMaxAny.h @@ -681,7 +681,7 @@ private: public: AggregateFunctionsSingleValue(const DataTypePtr & type) : IAggregateFunctionDataHelper>({type}, {}) - , type(argument_types[0]) + , type(this->argument_types[0]) { if (StringRef(Data::name()) == StringRef("min") || StringRef(Data::name()) == StringRef("max")) diff --git a/dbms/src/AggregateFunctions/AggregateFunctionQuantile.h b/dbms/src/AggregateFunctions/AggregateFunctionQuantile.h index a87f520d395..399b7f993d0 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionQuantile.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionQuantile.h @@ -78,7 +78,7 @@ private: public: AggregateFunctionQuantile(const DataTypePtr & argument_type, const Array & params) : IAggregateFunctionDataHelper>({argument_type}, params) - , levels(params, returns_many), level(levels.levels[0]), argument_type(argument_types[0]) + , levels(params, returns_many), level(levels.levels[0]), argument_type(this->argument_types[0]) { if (!returns_many && levels.size() > 1) throw Exception("Aggregate function " + getName() + " require one parameter or less", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); diff --git a/dbms/src/AggregateFunctions/AggregateFunctionSumMap.cpp b/dbms/src/AggregateFunctions/AggregateFunctionSumMap.cpp index 5a10ae62324..3c94b0c3705 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionSumMap.cpp +++ b/dbms/src/AggregateFunctions/AggregateFunctionSumMap.cpp @@ -82,7 +82,7 @@ AggregateFunctionPtr createAggregateFunctionSumMap(const std::string & name, con AggregateFunctionPtr res(createWithNumericBasedType(*keys_type, keys_type, values_types, arguments)); if (!res) - res.reset(createWithDecimalType(*keys_type, keys_type, values_types)); + res.reset(createWithDecimalType(*keys_type, keys_type, values_types, arguments)); if (!res) throw Exception("Illegal type of argument for aggregate function " + name, ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); @@ -105,7 +105,7 @@ AggregateFunctionPtr createAggregateFunctionSumMapFiltered(const std::string & n AggregateFunctionPtr res(createWithNumericBasedType(*keys_type, keys_type, values_types, keys_to_keep, arguments, params)); if (!res) - res.reset(createWithDecimalType(*keys_type, keys_type, values_types, keys_to_keep)); + res.reset(createWithDecimalType(*keys_type, keys_type, values_types, keys_to_keep, arguments, params)); if (!res) throw Exception("Illegal type of argument for aggregate function " + name, ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); diff --git a/dbms/src/AggregateFunctions/AggregateFunctionTopK.h b/dbms/src/AggregateFunctions/AggregateFunctionTopK.h index 846a3e2b2a1..340b5f14e5a 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionTopK.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionTopK.h @@ -145,7 +145,7 @@ public: AggregateFunctionTopKGeneric( UInt64 threshold, const DataTypePtr & input_data_type, const Array & params) : IAggregateFunctionDataHelper>({input_data_type}, params) - , threshold(threshold), reserved(TOP_K_LOAD_FACTOR * threshold), input_data_type(argument_types[0]) {} + , threshold(threshold), reserved(TOP_K_LOAD_FACTOR * threshold), input_data_type(this->argument_types[0]) {} String getName() const override { return is_weighted ? "topKWeighted" : "topK"; } diff --git a/dbms/src/AggregateFunctions/AggregateFunctionUniq.cpp b/dbms/src/AggregateFunctions/AggregateFunctionUniq.cpp index eaf021d8735..86456af0f9e 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionUniq.cpp +++ b/dbms/src/AggregateFunctions/AggregateFunctionUniq.cpp @@ -99,7 +99,7 @@ AggregateFunctionPtr createAggregateFunctionUniq(const std::string & name, const else if (which.isDateTime()) return std::make_shared>>(argument_types); else if (which.isStringOrFixedString()) - return std::make_shared>>(); + return std::make_shared>>(argument_types); else if (which.isUUID()) return std::make_shared>>(argument_types); else if (which.isTuple()) diff --git a/dbms/src/AggregateFunctions/AggregateFunctionUniq.h b/dbms/src/AggregateFunctions/AggregateFunctionUniq.h index 56a855aabb9..aea227a5d69 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionUniq.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionUniq.h @@ -260,7 +260,7 @@ private: public: AggregateFunctionUniqVariadic(const DataTypes & arguments) - : IAggregateFunctionDataHelper>(arguments) + : IAggregateFunctionDataHelper>(arguments, {}) { if (argument_is_tuple) num_args = typeid_cast(*arguments[0]).getElements().size(); From a4441bfba245eeb7a1a54309b191591144be5a1e Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 12 Feb 2019 13:09:03 +0300 Subject: [PATCH 24/79] Fix tests. --- dbms/src/Columns/ColumnAggregateFunction.cpp | 7 ++++++- dbms/src/DataTypes/DataTypeAggregateFunction.cpp | 5 +++-- dbms/tests/queries/0_stateless/00205_scalar_subqueries.sql | 3 ++- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/dbms/src/Columns/ColumnAggregateFunction.cpp b/dbms/src/Columns/ColumnAggregateFunction.cpp index 23abee39530..176b685548c 100644 --- a/dbms/src/Columns/ColumnAggregateFunction.cpp +++ b/dbms/src/Columns/ColumnAggregateFunction.cpp @@ -349,7 +349,12 @@ static void pushBackAndCreateState(ColumnAggregateFunction::Container & data, Ar void ColumnAggregateFunction::insert(const Field & x) { String type_string = getTypeString(); - auto & field_name = x.get().name; + + if (x.getType() != Field::Types::AggregateFunctionState) + throw Exception(String("Inserting field of type ") + x.getTypeName() + " into ColumnAggregateFunction. " + "Expected " + Field::Types::toString(Field::Types::AggregateFunctionState), ErrorCodes::LOGICAL_ERROR); + + auto & field_name = x.get().name; if (type_string != field_name) throw Exception("Cannot insert filed with type " + field_name + " into column with type " + type_string, ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); diff --git a/dbms/src/DataTypes/DataTypeAggregateFunction.cpp b/dbms/src/DataTypes/DataTypeAggregateFunction.cpp index 33c6ba6a84f..04a06eb98f2 100644 --- a/dbms/src/DataTypes/DataTypeAggregateFunction.cpp +++ b/dbms/src/DataTypes/DataTypeAggregateFunction.cpp @@ -264,7 +264,8 @@ MutableColumnPtr DataTypeAggregateFunction::createColumn() const /// Create empty state Field DataTypeAggregateFunction::getDefault() const { - Field field = String(); + Field field = AggregateFunctionStateData(); + field.get().name = getName(); AlignedBuffer place_buffer(function->sizeOfData(), function->alignOfData()); AggregateDataPtr place = place_buffer.data(); @@ -273,7 +274,7 @@ Field DataTypeAggregateFunction::getDefault() const try { - WriteBufferFromString buffer_from_field(field.get()); + WriteBufferFromString buffer_from_field(field.get().data); function->serialize(place, buffer_from_field); } catch (...) diff --git a/dbms/tests/queries/0_stateless/00205_scalar_subqueries.sql b/dbms/tests/queries/0_stateless/00205_scalar_subqueries.sql index 5ebb524dc53..35febd5a75b 100644 --- a/dbms/tests/queries/0_stateless/00205_scalar_subqueries.sql +++ b/dbms/tests/queries/0_stateless/00205_scalar_subqueries.sql @@ -5,4 +5,5 @@ SELECT toUInt64((SELECT 9)) IN (SELECT number FROM system.numbers LIMIT 10); SELECT (SELECT toDate('2015-01-02')) = toDate('2015-01-02'), 'Hello' = (SELECT 'Hello'); SELECT (SELECT toDate('2015-01-02'), 'Hello'); SELECT (SELECT toDate('2015-01-02'), 'Hello') AS x, x, identity((SELECT 1)), identity((SELECT 1) AS y); -SELECT (SELECT uniqState('')); -- { serverError 125 } +-- SELECT (SELECT uniqState('')); + From 558ebbcc31c3b4cf7fb7220c184f3510001ee22e Mon Sep 17 00:00:00 2001 From: proller Date: Tue, 12 Feb 2019 14:17:46 +0300 Subject: [PATCH 25/79] New library clickhouse_storage_kafka --- dbms/CMakeLists.txt | 9 +-------- dbms/src/Storages/CMakeLists.txt | 9 +++++---- dbms/src/Storages/Kafka/CMakeLists.txt | 9 +++++++++ 3 files changed, 15 insertions(+), 12 deletions(-) create mode 100644 dbms/src/Storages/Kafka/CMakeLists.txt diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index 4f31b16cbb0..65a99cede0d 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -102,9 +102,6 @@ add_headers_and_sources(dbms src/Interpreters/ClusterProxy) add_headers_and_sources(dbms src/Columns) add_headers_and_sources(dbms src/Storages) add_headers_and_sources(dbms src/Storages/Distributed) -if(USE_RDKAFKA) - add_headers_and_sources(dbms src/Storages/Kafka) -endif() add_headers_and_sources(dbms src/Storages/MergeTree) add_headers_and_sources(dbms src/Client) add_headers_and_sources(dbms src/Formats) @@ -297,11 +294,7 @@ if (USE_CAPNP) endif () if (USE_RDKAFKA) - target_link_libraries (dbms PRIVATE ${RDKAFKA_LIBRARY}) - target_link_libraries (dbms PRIVATE ${CPPKAFKA_LIBRARY}) - if (NOT USE_INTERNAL_RDKAFKA_LIBRARY) - target_include_directories (dbms SYSTEM BEFORE PRIVATE ${RDKAFKA_INCLUDE_DIR}) - endif () + target_link_libraries (dbms PRIVATE clickhouse_storage_kafka) endif () target_link_libraries(dbms PRIVATE ${OPENSSL_CRYPTO_LIBRARY} Threads::Threads) diff --git a/dbms/src/Storages/CMakeLists.txt b/dbms/src/Storages/CMakeLists.txt index 617e866a012..236d4d32524 100644 --- a/dbms/src/Storages/CMakeLists.txt +++ b/dbms/src/Storages/CMakeLists.txt @@ -1,5 +1,6 @@ -add_subdirectory (System) +add_subdirectory(System) +add_subdirectory(Kafka) -if (ENABLE_TESTS) - add_subdirectory (tests) -endif () +if(ENABLE_TESTS) + add_subdirectory(tests) +endif() diff --git a/dbms/src/Storages/Kafka/CMakeLists.txt b/dbms/src/Storages/Kafka/CMakeLists.txt new file mode 100644 index 00000000000..c764a1b7fd5 --- /dev/null +++ b/dbms/src/Storages/Kafka/CMakeLists.txt @@ -0,0 +1,9 @@ +if(USE_RDKAFKA) + include(${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake) + add_headers_and_sources(clickhouse_storage_kafka .) + add_library(clickhouse_storage_kafka ${LINK_MODE} ${clickhouse_storage_kafka_sources}) + target_link_libraries(clickhouse_storage_kafka PRIVATE clickhouse_common_io ${RDKAFKA_LIBRARY} ${CPPKAFKA_LIBRARY}) + if(NOT USE_INTERNAL_RDKAFKA_LIBRARY) + target_include_directories(clickhouse_storage_kafka SYSTEM BEFORE PRIVATE ${RDKAFKA_INCLUDE_DIR}) + endif() +endif() From 3218f5003f132a4124746577ff7fb3cea9f4c654 Mon Sep 17 00:00:00 2001 From: chertus Date: Tue, 12 Feb 2019 15:11:45 +0300 Subject: [PATCH 26/79] fix full/right join different number of columns --- dbms/src/Interpreters/Join.cpp | 3 ++- .../00819_full_join_wrong_columns_in_block.reference | 8 ++++++++ .../00819_full_join_wrong_columns_in_block.sql | 9 +++++++++ 3 files changed, 19 insertions(+), 1 deletion(-) create mode 100644 dbms/tests/queries/0_stateless/00819_full_join_wrong_columns_in_block.reference create mode 100644 dbms/tests/queries/0_stateless/00819_full_join_wrong_columns_in_block.sql diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp index 9d5c19fe565..e1a30c5778d 100644 --- a/dbms/src/Interpreters/Join.cpp +++ b/dbms/src/Interpreters/Join.cpp @@ -1207,7 +1207,8 @@ private: for (size_t i = 0; i < right_sample_block.columns(); ++i) { const ColumnWithTypeAndName & src_column = right_sample_block.getByPosition(i); - result_sample_block.insert(src_column.cloneEmpty()); + if (!result_sample_block.has(src_column.name)) + result_sample_block.insert(src_column.cloneEmpty()); } const auto & key_names_right = parent.key_names_right; diff --git a/dbms/tests/queries/0_stateless/00819_full_join_wrong_columns_in_block.reference b/dbms/tests/queries/0_stateless/00819_full_join_wrong_columns_in_block.reference new file mode 100644 index 00000000000..074ee47e294 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00819_full_join_wrong_columns_in_block.reference @@ -0,0 +1,8 @@ +1 x x +1 x x +1 x x +1 x x +1 x x +1 x x +1 x x +1 x x diff --git a/dbms/tests/queries/0_stateless/00819_full_join_wrong_columns_in_block.sql b/dbms/tests/queries/0_stateless/00819_full_join_wrong_columns_in_block.sql new file mode 100644 index 00000000000..ba746b62c09 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00819_full_join_wrong_columns_in_block.sql @@ -0,0 +1,9 @@ +SELECT * FROM (SELECT 1 AS a, 'x' AS b) join (SELECT 1 as a, 'y' as b) using a; +SELECT * FROM (SELECT 1 AS a, 'x' AS b) left join (SELECT 1 as a, 'y' as b) using a; +SELECT * FROM (SELECT 1 AS a, 'x' AS b) full join (SELECT 1 as a, 'y' as b) using a; +SELECT * FROM (SELECT 1 AS a, 'x' AS b) right join (SELECT 1 as a, 'y' as b) using a; + +SELECT * FROM (SELECT 1 AS a, 'x' AS b) any join (SELECT 1 as a, 'y' as b) using a; +SELECT * FROM (SELECT 1 AS a, 'x' AS b) any left join (SELECT 1 as a, 'y' as b) using a; +SELECT * FROM (SELECT 1 AS a, 'x' AS b) any full join (SELECT 1 as a, 'y' as b) using a; +SELECT * FROM (SELECT 1 AS a, 'x' AS b) any right join (SELECT 1 as a, 'y' as b) using a; From bdefba6c329b58caa014c0fb8de7e44ebe3ccb72 Mon Sep 17 00:00:00 2001 From: proller Date: Tue, 12 Feb 2019 15:59:40 +0300 Subject: [PATCH 27/79] fix link order --- dbms/src/Storages/Kafka/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Storages/Kafka/CMakeLists.txt b/dbms/src/Storages/Kafka/CMakeLists.txt index c764a1b7fd5..520f1311d1d 100644 --- a/dbms/src/Storages/Kafka/CMakeLists.txt +++ b/dbms/src/Storages/Kafka/CMakeLists.txt @@ -2,7 +2,7 @@ if(USE_RDKAFKA) include(${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake) add_headers_and_sources(clickhouse_storage_kafka .) add_library(clickhouse_storage_kafka ${LINK_MODE} ${clickhouse_storage_kafka_sources}) - target_link_libraries(clickhouse_storage_kafka PRIVATE clickhouse_common_io ${RDKAFKA_LIBRARY} ${CPPKAFKA_LIBRARY}) + target_link_libraries(clickhouse_storage_kafka PRIVATE clickhouse_common_io ${CPPKAFKA_LIBRARY} ${RDKAFKA_LIBRARY}) if(NOT USE_INTERNAL_RDKAFKA_LIBRARY) target_include_directories(clickhouse_storage_kafka SYSTEM BEFORE PRIVATE ${RDKAFKA_INCLUDE_DIR}) endif() From 5eb5f631fd6b8a9b3bf10bdef923c5894d984774 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 12 Feb 2019 16:01:14 +0300 Subject: [PATCH 28/79] Update BrotliReadBuffer.h --- dbms/src/IO/BrotliReadBuffer.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dbms/src/IO/BrotliReadBuffer.h b/dbms/src/IO/BrotliReadBuffer.h index e0e9bc6160e..d6f2b7712b3 100644 --- a/dbms/src/IO/BrotliReadBuffer.h +++ b/dbms/src/IO/BrotliReadBuffer.h @@ -11,9 +11,9 @@ class BrotliReadBuffer : public BufferWithOwnMemory { public: BrotliReadBuffer( - ReadBuffer &in_, + ReadBuffer & in_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, - char *existing_memory = nullptr, + char * existing_memory = nullptr, size_t alignment = 0); ~BrotliReadBuffer() override; @@ -21,7 +21,7 @@ public: private: bool nextImpl() override; - ReadBuffer ∈ + ReadBuffer & in; class BrotliStateWrapper; std::unique_ptr brotli; From f86432dd92df80f5236c79e2fa62e895d1eb4c4f Mon Sep 17 00:00:00 2001 From: Mikhail Date: Tue, 12 Feb 2019 16:56:43 +0300 Subject: [PATCH 29/79] Update .gitmodules --- .gitmodules | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitmodules b/.gitmodules index a4a570d20d8..6597ac2e5ed 100644 --- a/.gitmodules +++ b/.gitmodules @@ -63,7 +63,7 @@ url = https://github.com/ClickHouse-Extras/libgsasl.git [submodule "contrib/cppkafka"] path = contrib/cppkafka - url = https://github.com/mfontanini/cppkafka.git + url = https://github.com/ClickHouse-Extras/cppkafka.git [submodule "contrib/pdqsort"] path = contrib/pdqsort url = https://github.com/orlp/pdqsort From edefa194201735e14ed26980c1e5fcb7a23c0b31 Mon Sep 17 00:00:00 2001 From: proller Date: Tue, 12 Feb 2019 17:08:05 +0300 Subject: [PATCH 30/79] Move odbc-brigde to clickhouse-common-static --- debian/clickhouse-common-static.install | 1 + debian/clickhouse-server.install | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/debian/clickhouse-common-static.install b/debian/clickhouse-common-static.install index a2f5b69c964..6666b090272 100644 --- a/debian/clickhouse-common-static.install +++ b/debian/clickhouse-common-static.install @@ -1,3 +1,4 @@ usr/bin/clickhouse +usr/bin/clickhouse-odbc-bridge etc/security/limits.d/clickhouse.conf usr/share/clickhouse/* diff --git a/debian/clickhouse-server.install b/debian/clickhouse-server.install index bc3902ed41a..f69969a6084 100644 --- a/debian/clickhouse-server.install +++ b/debian/clickhouse-server.install @@ -2,7 +2,6 @@ usr/bin/clickhouse-server usr/bin/clickhouse-clang usr/bin/clickhouse-lld usr/bin/clickhouse-copier -usr/bin/clickhouse-odbc-bridge usr/bin/clickhouse-report etc/clickhouse-server/config.xml etc/clickhouse-server/users.xml From 83d461975e4db37a5e4c4b2ddc28121dfc674dd3 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Tue, 12 Feb 2019 17:18:00 +0300 Subject: [PATCH 31/79] Function toStartOfDay() now can receive a date. --- dbms/src/Functions/DateTimeTransforms.h | 4 +- .../FunctionDateOrDateTimeToSomething.h | 28 +++++--- dbms/src/Functions/toStartOfInterval.cpp | 34 ++++++--- .../0_stateless/00189_time_zones.reference | 59 ++++++++++++++++ .../queries/0_stateless/00189_time_zones.sql | 69 ++++++++++++++++++- 5 files changed, 171 insertions(+), 23 deletions(-) diff --git a/dbms/src/Functions/DateTimeTransforms.h b/dbms/src/Functions/DateTimeTransforms.h index 5e3b540533c..6890b513602 100644 --- a/dbms/src/Functions/DateTimeTransforms.h +++ b/dbms/src/Functions/DateTimeTransforms.h @@ -65,9 +65,9 @@ struct ToStartOfDayImpl { return time_zone.toDate(t); } - static inline UInt32 execute(UInt16, const DateLUTImpl &) + static inline UInt32 execute(UInt16 d, const DateLUTImpl & time_zone) { - return dateIsNotSupported(name); + return time_zone.toDate(DayNum(d)); } using FactorTransform = ZeroTransform; diff --git a/dbms/src/Functions/FunctionDateOrDateTimeToSomething.h b/dbms/src/Functions/FunctionDateOrDateTimeToSomething.h index 19baf8569a2..bb32230a5b1 100644 --- a/dbms/src/Functions/FunctionDateOrDateTimeToSomething.h +++ b/dbms/src/Functions/FunctionDateOrDateTimeToSomething.h @@ -37,23 +37,33 @@ public: if (arguments.size() == 1) { if (!isDateOrDateTime(arguments[0].type)) - throw Exception("Illegal type " + arguments[0].type->getName() + " of argument of function " + getName() + - ". Should be a date or a date with time", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception( + "Illegal type " + arguments[0].type->getName() + " of argument of function " + getName() + + ". Should be a date or a date with time", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); } else if (arguments.size() == 2) { - if (!WhichDataType(arguments[0].type).isDateTime() - || !WhichDataType(arguments[1].type).isString()) + if (!isDateOrDateTime(arguments[0].type)) + throw Exception( + "Illegal type " + arguments[0].type->getName() + " of argument of function " + getName() + + ". Should be a date or a date with time", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + if (!isString(arguments[1].type)) throw Exception( "Function " + getName() + " supports 1 or 2 arguments. The 1st argument " - "must be of type Date or DateTime. The 2nd argument (optional) must be " - "a constant string with timezone name. The timezone argument is allowed " - "only when the 1st argument has the type DateTime", + "must be of type Date or DateTime. The 2nd argument (optional) must be " + "a constant string with timezone name", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + if (isDate(arguments[0].type) && std::is_same_v) + throw Exception( + "The timezone argument of function " + getName() + " is allowed only when the 1st argument has the type DateTime", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); } else - throw Exception("Number of arguments for function " + getName() + " doesn't match: passed " - + toString(arguments.size()) + ", should be 1 or 2", + throw Exception( + "Number of arguments for function " + getName() + " doesn't match: passed " + toString(arguments.size()) + + ", should be 1 or 2", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); /// For DateTime, if time zone is specified, attach it to type. diff --git a/dbms/src/Functions/toStartOfInterval.cpp b/dbms/src/Functions/toStartOfInterval.cpp index bae34568811..21e500602e1 100644 --- a/dbms/src/Functions/toStartOfInterval.cpp +++ b/dbms/src/Functions/toStartOfInterval.cpp @@ -142,41 +142,54 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { - auto check_date_time_argument = [&] { + bool first_argument_is_date = false; + auto check_first_argument = [&] + { if (!isDateOrDateTime(arguments[0].type)) throw Exception( "Illegal type " + arguments[0].type->getName() + " of argument of function " + getName() + ". Should be a date or a date with time", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + first_argument_is_date = isDate(arguments[0].type); }; const DataTypeInterval * interval_type = nullptr; - auto check_interval_argument = [&] { + bool result_type_is_date = false; + auto check_interval_argument = [&] + { interval_type = checkAndGetDataType(arguments[1].type.get()); if (!interval_type) throw Exception( "Illegal type " + arguments[1].type->getName() + " of argument of function " + getName() + ". Should be an interval of time", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + result_type_is_date = (interval_type->getKind() == DataTypeInterval::Year) + || (interval_type->getKind() == DataTypeInterval::Quarter) || (interval_type->getKind() == DataTypeInterval::Month) + || (interval_type->getKind() == DataTypeInterval::Week); }; - auto check_timezone_argument = [&] { + auto check_timezone_argument = [&] + { if (!WhichDataType(arguments[2].type).isString()) throw Exception( "Illegal type " + arguments[2].type->getName() + " of argument of function " + getName() - + ". This argument is optional and must be a constant string with timezone name" - ". This argument is allowed only when the 1st argument has the type DateTime", + + ". This argument is optional and must be a constant string with timezone name", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + if (first_argument_is_date && result_type_is_date) + throw Exception( + "The timezone argument of function " + getName() + " with interval type " + interval_type->kindToString() + + " is allowed only when the 1st argument has the type DateTime", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); }; if (arguments.size() == 2) { - check_date_time_argument(); + check_first_argument(); check_interval_argument(); } else if (arguments.size() == 3) { - check_date_time_argument(); + check_first_argument(); check_interval_argument(); check_timezone_argument(); } @@ -188,11 +201,10 @@ public: ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); } - if ((interval_type->getKind() == DataTypeInterval::Second) || (interval_type->getKind() == DataTypeInterval::Minute) - || (interval_type->getKind() == DataTypeInterval::Hour) || (interval_type->getKind() == DataTypeInterval::Day)) - return std::make_shared(extractTimeZoneNameFromFunctionArguments(arguments, 2, 0)); - else + if (result_type_is_date) return std::make_shared(); + else + return std::make_shared(extractTimeZoneNameFromFunctionArguments(arguments, 2, 0)); } bool useDefaultImplementationForConstants() const override { return true; } diff --git a/dbms/tests/queries/0_stateless/00189_time_zones.reference b/dbms/tests/queries/0_stateless/00189_time_zones.reference index 5de17b8bb37..8e0abb19516 100644 --- a/dbms/tests/queries/0_stateless/00189_time_zones.reference +++ b/dbms/tests/queries/0_stateless/00189_time_zones.reference @@ -1,88 +1,137 @@ +toStartOfDay +2014-09-30 00:00:00 +2014-09-30 00:00:00 +2014-09-30 00:00:00 +2014-10-01 00:00:00 +2014-09-30 00:00:00 +2014-09-30 00:00:00 +2014-09-30 00:00:00 +2014-09-30 00:00:00 +2014-09-30 00:00:00 +2014-09-30 00:00:00 +toMonday 2014-12-29 2014-12-22 2014-12-22 2014-12-29 2014-12-22 +2014-12-29 +2014-12-29 +2014-12-29 +2014-12-29 +2014-12-29 +toStartOfMonth 2014-12-01 2014-12-01 2014-12-01 2014-12-01 2014-12-01 +2014-12-01 +2014-12-01 +2014-12-01 +2014-12-01 +2014-12-01 +toStartOfQuarter 2014-07-01 2014-07-01 2014-07-01 2014-10-01 2014-07-01 +2014-07-01 +2014-07-01 +2014-07-01 +2014-07-01 +2014-07-01 +toStartOfYear 2014-01-01 2014-01-01 2014-01-01 2014-01-01 2014-01-01 +2014-01-01 +2014-01-01 +2014-01-01 +2014-01-01 +2014-01-01 +toTime 1970-01-02 12:00:00 1970-01-02 12:00:00 1970-01-02 10:00:00 1970-01-02 11:00:00 1970-01-02 09:00:00 1970-01-02 10:00:00 1970-01-02 18:00:00 1970-01-02 18:00:00 1970-01-02 01:00:00 1970-01-02 01:00:00 +toYear 2014 2014 2014 2014 2014 +toMonth 9 9 9 10 9 +toDayOfMonth 30 30 30 1 30 +toDayOfWeek 2 2 2 3 2 +toHour 23 21 20 4 11 +toMinute 50 50 50 50 50 +toSecond 0 0 0 0 0 +toStartOfMinute 2019-02-06 22:57:00 2019-02-06 20:57:00 2019-02-06 19:57:00 2019-02-07 04:57:00 2019-02-06 11:57:00 +toStartOfFiveMinute 2019-02-06 22:55:00 2019-02-06 20:55:00 2019-02-06 19:55:00 2019-02-07 04:55:00 2019-02-06 11:55:00 +toStartOfTenMinutes 2019-02-06 22:50:00 2019-02-06 20:50:00 2019-02-06 19:50:00 2019-02-07 04:50:00 2019-02-06 11:50:00 +toStartOfFifteenMinutes 2019-02-06 22:45:00 2019-02-06 20:45:00 2019-02-06 19:45:00 2019-02-07 04:45:00 2019-02-06 11:45:00 +toStartOfHour 2019-02-06 22:00:00 2019-02-06 20:00:00 2019-02-06 19:00:00 2019-02-07 04:00:00 2019-02-06 11:00:00 +toStartOfInterval 2019-01-01 2018-01-01 2015-01-01 @@ -125,40 +174,48 @@ 2019-02-06 00:00:00 2019-02-05 00:00:00 2019-02-03 00:00:00 +toRelativeYearNum 44 44 44 44 44 +toRelativeMonthNum 536 536 536 537 536 +toRelativeWeekNum 2335 2335 2335 2335 2335 +toRelativeDayNum 16343 16343 16343 16344 16343 +toRelativeHourNum 392251 392251 392251 392251 +toRelativeMinuteNum 23535110 23535110 23535110 23535110 23535110 +toRelativeSecondNum 1412106600 1412106600 1412106600 1412106600 1412106600 +toDate 2014-09-30 2014-09-30 2014-09-30 @@ -169,11 +226,13 @@ 2014-09-30 2014-10-01 2014-09-30 +toString 2015-07-15 13:30:00 2015-07-15 12:30:00 2015-07-15 11:30:00 2015-07-15 19:30:00 2015-07-15 02:30:00 +toUnixTimestamp 1426415400 1426422600 1426426200 diff --git a/dbms/tests/queries/0_stateless/00189_time_zones.sql b/dbms/tests/queries/0_stateless/00189_time_zones.sql index 41936a5c145..45d8dc8e7fc 100644 --- a/dbms/tests/queries/0_stateless/00189_time_zones.sql +++ b/dbms/tests/queries/0_stateless/00189_time_zones.sql @@ -5,41 +5,84 @@ /* timestamp 1428310800 == 2015-04-06 12:00:00 (Europe/Moscow) */ /* timestamp 1436956200 == 2015-07-15 13:30:00 (Europe/Moscow) */ /* timestamp 1426415400 == 2015-03-15 13:30:00 (Europe/Moscow) */ +/* timestamp 1549483055 == 2019-02-06 22:57:35 (Europe/Moscow) */ +/* date 16343 == 2014-09-30 */ +/* date 16433 == 2014-12-29 */ +/* date 17933 == 2019-02-06 */ + +/* toStartOfDay */ + +SELECT 'toStartOfDay'; +SELECT toStartOfDay(toDateTime(1412106600), 'Europe/Moscow'); +SELECT toStartOfDay(toDateTime(1412106600), 'Europe/Paris'); +SELECT toStartOfDay(toDateTime(1412106600), 'Europe/London'); +SELECT toStartOfDay(toDateTime(1412106600), 'Asia/Tokyo'); +SELECT toStartOfDay(toDateTime(1412106600), 'Pacific/Pitcairn'); +SELECT toStartOfDay(toDate(16343), 'Europe/Moscow'); +SELECT toStartOfDay(toDate(16343), 'Europe/Paris'); +SELECT toStartOfDay(toDate(16343), 'Europe/London'); +SELECT toStartOfDay(toDate(16343), 'Asia/Tokyo'); +SELECT toStartOfDay(toDate(16343), 'Pacific/Pitcairn'); /* toMonday */ +SELECT 'toMonday'; SELECT toMonday(toDateTime(1419800400), 'Europe/Moscow'); SELECT toMonday(toDateTime(1419800400), 'Europe/Paris'); SELECT toMonday(toDateTime(1419800400), 'Europe/London'); SELECT toMonday(toDateTime(1419800400), 'Asia/Tokyo'); SELECT toMonday(toDateTime(1419800400), 'Pacific/Pitcairn'); +SELECT toMonday(toDate(16433)); +SELECT toMonday(toDate(16433)); +SELECT toMonday(toDate(16433)); +SELECT toMonday(toDate(16433)); +SELECT toMonday(toDate(16433)); /* toStartOfMonth */ +SELECT 'toStartOfMonth'; SELECT toStartOfMonth(toDateTime(1419800400), 'Europe/Moscow'); SELECT toStartOfMonth(toDateTime(1419800400), 'Europe/Paris'); SELECT toStartOfMonth(toDateTime(1419800400), 'Europe/London'); SELECT toStartOfMonth(toDateTime(1419800400), 'Asia/Tokyo'); SELECT toStartOfMonth(toDateTime(1419800400), 'Pacific/Pitcairn'); +SELECT toStartOfMonth(toDate(16433)); +SELECT toStartOfMonth(toDate(16433)); +SELECT toStartOfMonth(toDate(16433)); +SELECT toStartOfMonth(toDate(16433)); +SELECT toStartOfMonth(toDate(16433)); /* toStartOfQuarter */ +SELECT 'toStartOfQuarter'; SELECT toStartOfQuarter(toDateTime(1412106600), 'Europe/Moscow'); SELECT toStartOfQuarter(toDateTime(1412106600), 'Europe/Paris'); SELECT toStartOfQuarter(toDateTime(1412106600), 'Europe/London'); SELECT toStartOfQuarter(toDateTime(1412106600), 'Asia/Tokyo'); SELECT toStartOfQuarter(toDateTime(1412106600), 'Pacific/Pitcairn'); +SELECT toStartOfQuarter(toDate(16343)); +SELECT toStartOfQuarter(toDate(16343)); +SELECT toStartOfQuarter(toDate(16343)); +SELECT toStartOfQuarter(toDate(16343)); +SELECT toStartOfQuarter(toDate(16343)); /* toStartOfYear */ +SELECT 'toStartOfYear'; SELECT toStartOfYear(toDateTime(1419800400), 'Europe/Moscow'); SELECT toStartOfYear(toDateTime(1419800400), 'Europe/Paris'); SELECT toStartOfYear(toDateTime(1419800400), 'Europe/London'); SELECT toStartOfYear(toDateTime(1419800400), 'Asia/Tokyo'); SELECT toStartOfYear(toDateTime(1419800400), 'Pacific/Pitcairn'); +SELECT toStartOfYear(toDate(16433)); +SELECT toStartOfYear(toDate(16433)); +SELECT toStartOfYear(toDate(16433)); +SELECT toStartOfYear(toDate(16433)); +SELECT toStartOfYear(toDate(16433)); /* toTime */ +SELECT 'toTime'; SELECT toString(toTime(toDateTime(1420102800), 'Europe/Moscow'), 'Europe/Moscow'), toString(toTime(toDateTime(1428310800), 'Europe/Moscow'), 'Europe/Moscow'); SELECT toString(toTime(toDateTime(1420102800), 'Europe/Paris'), 'Europe/Paris'), toString(toTime(toDateTime(1428310800), 'Europe/Paris'), 'Europe/Paris'); SELECT toString(toTime(toDateTime(1420102800), 'Europe/London'), 'Europe/London'), toString(toTime(toDateTime(1428310800), 'Europe/London'), 'Europe/London'); @@ -48,6 +91,7 @@ SELECT toString(toTime(toDateTime(1420102800), 'Pacific/Pitcairn'), 'Pacific/Pit /* toYear */ +SELECT 'toYear'; SELECT toYear(toDateTime(1412106600), 'Europe/Moscow'); SELECT toYear(toDateTime(1412106600), 'Europe/Paris'); SELECT toYear(toDateTime(1412106600), 'Europe/London'); @@ -56,6 +100,7 @@ SELECT toYear(toDateTime(1412106600), 'Pacific/Pitcairn'); /* toMonth */ +SELECT 'toMonth'; SELECT toMonth(toDateTime(1412106600), 'Europe/Moscow'); SELECT toMonth(toDateTime(1412106600), 'Europe/Paris'); SELECT toMonth(toDateTime(1412106600), 'Europe/London'); @@ -64,6 +109,7 @@ SELECT toMonth(toDateTime(1412106600), 'Pacific/Pitcairn'); /* toDayOfMonth */ +SELECT 'toDayOfMonth'; SELECT toDayOfMonth(toDateTime(1412106600), 'Europe/Moscow'); SELECT toDayOfMonth(toDateTime(1412106600), 'Europe/Paris'); SELECT toDayOfMonth(toDateTime(1412106600), 'Europe/London'); @@ -72,6 +118,7 @@ SELECT toDayOfMonth(toDateTime(1412106600), 'Pacific/Pitcairn'); /* toDayOfWeek */ +SELECT 'toDayOfWeek'; SELECT toDayOfWeek(toDateTime(1412106600), 'Europe/Moscow'); SELECT toDayOfWeek(toDateTime(1412106600), 'Europe/Paris'); SELECT toDayOfWeek(toDateTime(1412106600), 'Europe/London'); @@ -80,6 +127,7 @@ SELECT toDayOfWeek(toDateTime(1412106600), 'Pacific/Pitcairn'); /* toHour */ +SELECT 'toHour'; SELECT toHour(toDateTime(1412106600), 'Europe/Moscow'); SELECT toHour(toDateTime(1412106600), 'Europe/Paris'); SELECT toHour(toDateTime(1412106600), 'Europe/London'); @@ -88,6 +136,7 @@ SELECT toHour(toDateTime(1412106600), 'Pacific/Pitcairn'); /* toMinute */ +SELECT 'toMinute'; SELECT toMinute(toDateTime(1412106600), 'Europe/Moscow'); SELECT toMinute(toDateTime(1412106600), 'Europe/Paris'); SELECT toMinute(toDateTime(1412106600), 'Europe/London'); @@ -96,6 +145,7 @@ SELECT toMinute(toDateTime(1412106600), 'Pacific/Pitcairn'); /* toSecond */ +SELECT 'toSecond'; SELECT toSecond(toDateTime(1412106600), 'Europe/Moscow'); SELECT toSecond(toDateTime(1412106600), 'Europe/Paris'); SELECT toSecond(toDateTime(1412106600), 'Europe/London'); @@ -104,6 +154,7 @@ SELECT toSecond(toDateTime(1412106600), 'Pacific/Pitcairn'); /* toStartOfMinute */ +SELECT 'toStartOfMinute'; SELECT toString(toStartOfMinute(toDateTime(1549483055), 'Europe/Moscow'), 'Europe/Moscow'); SELECT toString(toStartOfMinute(toDateTime(1549483055), 'Europe/Paris'), 'Europe/Paris'); SELECT toString(toStartOfMinute(toDateTime(1549483055), 'Europe/London'), 'Europe/London'); @@ -112,6 +163,7 @@ SELECT toString(toStartOfMinute(toDateTime(1549483055), 'Pacific/Pitcairn'), 'Pa /* toStartOfFiveMinute */ +SELECT 'toStartOfFiveMinute'; SELECT toString(toStartOfFiveMinute(toDateTime(1549483055), 'Europe/Moscow'), 'Europe/Moscow'); SELECT toString(toStartOfFiveMinute(toDateTime(1549483055), 'Europe/Paris'), 'Europe/Paris'); SELECT toString(toStartOfFiveMinute(toDateTime(1549483055), 'Europe/London'), 'Europe/London'); @@ -120,14 +172,16 @@ SELECT toString(toStartOfFiveMinute(toDateTime(1549483055), 'Pacific/Pitcairn'), /* toStartOfTenMinutes */ +SELECT 'toStartOfTenMinutes'; SELECT toString(toStartOfTenMinutes(toDateTime(1549483055), 'Europe/Moscow'), 'Europe/Moscow'); SELECT toString(toStartOfTenMinutes(toDateTime(1549483055), 'Europe/Paris'), 'Europe/Paris'); SELECT toString(toStartOfTenMinutes(toDateTime(1549483055), 'Europe/London'), 'Europe/London'); SELECT toString(toStartOfTenMinutes(toDateTime(1549483055), 'Asia/Tokyo'), 'Asia/Tokyo'); SELECT toString(toStartOfTenMinutes(toDateTime(1549483055), 'Pacific/Pitcairn'), 'Pacific/Pitcairn'); -/* toStartOfTenMinutes */ +/* toStartOfFifteenMinutes */ +SELECT 'toStartOfFifteenMinutes'; SELECT toString(toStartOfFifteenMinutes(toDateTime(1549483055), 'Europe/Moscow'), 'Europe/Moscow'); SELECT toString(toStartOfFifteenMinutes(toDateTime(1549483055), 'Europe/Paris'), 'Europe/Paris'); SELECT toString(toStartOfFifteenMinutes(toDateTime(1549483055), 'Europe/London'), 'Europe/London'); @@ -136,6 +190,7 @@ SELECT toString(toStartOfFifteenMinutes(toDateTime(1549483055), 'Pacific/Pitcair /* toStartOfHour */ +SELECT 'toStartOfHour'; SELECT toString(toStartOfHour(toDateTime(1549483055), 'Europe/Moscow'), 'Europe/Moscow'); SELECT toString(toStartOfHour(toDateTime(1549483055), 'Europe/Paris'), 'Europe/Paris'); SELECT toString(toStartOfHour(toDateTime(1549483055), 'Europe/London'), 'Europe/London'); @@ -143,6 +198,8 @@ SELECT toString(toStartOfHour(toDateTime(1549483055), 'Asia/Tokyo'), 'Asia/Tokyo SELECT toString(toStartOfHour(toDateTime(1549483055), 'Pacific/Pitcairn'), 'Pacific/Pitcairn'); /* toStartOfInterval */ + +SELECT 'toStartOfInterval'; SELECT toStartOfInterval(toDateTime(1549483055), INTERVAL 1 year, 'Europe/Moscow'); SELECT toStartOfInterval(toDateTime(1549483055), INTERVAL 2 year, 'Europe/Moscow'); SELECT toStartOfInterval(toDateTime(1549483055), INTERVAL 5 year, 'Europe/Moscow'); @@ -188,6 +245,7 @@ SELECT toString(toStartOfInterval(toDate(17933), INTERVAL 5 day, 'Europe/Moscow' /* toRelativeYearNum */ +SELECT 'toRelativeYearNum'; SELECT toRelativeYearNum(toDateTime(1412106600), 'Europe/Moscow') - toRelativeYearNum(toDateTime(0), 'Europe/Moscow'); SELECT toRelativeYearNum(toDateTime(1412106600), 'Europe/Paris') - toRelativeYearNum(toDateTime(0), 'Europe/Paris'); SELECT toRelativeYearNum(toDateTime(1412106600), 'Europe/London') - toRelativeYearNum(toDateTime(0), 'Europe/London'); @@ -196,6 +254,7 @@ SELECT toRelativeYearNum(toDateTime(1412106600), 'Pacific/Pitcairn') - toRelativ /* toRelativeMonthNum */ +SELECT 'toRelativeMonthNum'; SELECT toRelativeMonthNum(toDateTime(1412106600), 'Europe/Moscow') - toRelativeMonthNum(toDateTime(0), 'Europe/Moscow'); SELECT toRelativeMonthNum(toDateTime(1412106600), 'Europe/Paris') - toRelativeMonthNum(toDateTime(0), 'Europe/Paris'); SELECT toRelativeMonthNum(toDateTime(1412106600), 'Europe/London') - toRelativeMonthNum(toDateTime(0), 'Europe/London'); @@ -204,6 +263,7 @@ SELECT toRelativeMonthNum(toDateTime(1412106600), 'Pacific/Pitcairn') - toRelati /* toRelativeWeekNum */ +SELECT 'toRelativeWeekNum'; SELECT toRelativeWeekNum(toDateTime(1412106600), 'Europe/Moscow') - toRelativeWeekNum(toDateTime(0), 'Europe/Moscow'); SELECT toRelativeWeekNum(toDateTime(1412106600), 'Europe/Paris') - toRelativeWeekNum(toDateTime(0), 'Europe/Paris'); SELECT toRelativeWeekNum(toDateTime(1412106600), 'Europe/London') - toRelativeWeekNum(toDateTime(0), 'Europe/London'); @@ -212,6 +272,7 @@ SELECT toRelativeWeekNum(toDateTime(1412106600), 'Pacific/Pitcairn') - toRelativ /* toRelativeDayNum */ +SELECT 'toRelativeDayNum'; SELECT toRelativeDayNum(toDateTime(1412106600), 'Europe/Moscow') - toRelativeDayNum(toDateTime(0), 'Europe/Moscow'); SELECT toRelativeDayNum(toDateTime(1412106600), 'Europe/Paris') - toRelativeDayNum(toDateTime(0), 'Europe/Paris'); SELECT toRelativeDayNum(toDateTime(1412106600), 'Europe/London') - toRelativeDayNum(toDateTime(0), 'Europe/London'); @@ -220,6 +281,7 @@ SELECT toRelativeDayNum(toDateTime(1412106600), 'Pacific/Pitcairn') - toRelative /* toRelativeHourNum */ +SELECT 'toRelativeHourNum'; SELECT toRelativeHourNum(toDateTime(1412106600), 'Europe/Moscow') - toRelativeHourNum(toDateTime(0), 'Europe/Moscow'); SELECT toRelativeHourNum(toDateTime(1412106600), 'Europe/Paris') - toRelativeHourNum(toDateTime(0), 'Europe/Paris'); SELECT toRelativeHourNum(toDateTime(1412106600), 'Europe/London') - toRelativeHourNum(toDateTime(0), 'Europe/London'); @@ -228,6 +290,7 @@ SELECT toRelativeHourNum(toDateTime(1412106600), 'Asia/Tokyo') - toRelativeHourN /* toRelativeMinuteNum */ +SELECT 'toRelativeMinuteNum'; SELECT toRelativeMinuteNum(toDateTime(1412106600), 'Europe/Moscow') - toRelativeMinuteNum(toDateTime(0), 'Europe/Moscow'); SELECT toRelativeMinuteNum(toDateTime(1412106600), 'Europe/Paris') - toRelativeMinuteNum(toDateTime(0), 'Europe/Paris'); SELECT toRelativeMinuteNum(toDateTime(1412106600), 'Europe/London') - toRelativeMinuteNum(toDateTime(0), 'Europe/London'); @@ -236,6 +299,7 @@ SELECT toRelativeMinuteNum(toDateTime(1412106600), 'Pacific/Pitcairn') - toRelat /* toRelativeSecondNum */ +SELECT 'toRelativeSecondNum'; SELECT toRelativeSecondNum(toDateTime(1412106600), 'Europe/Moscow') - toRelativeSecondNum(toDateTime(0), 'Europe/Moscow'); SELECT toRelativeSecondNum(toDateTime(1412106600), 'Europe/Paris') - toRelativeSecondNum(toDateTime(0), 'Europe/Paris'); SELECT toRelativeSecondNum(toDateTime(1412106600), 'Europe/London') - toRelativeSecondNum(toDateTime(0), 'Europe/London'); @@ -244,6 +308,7 @@ SELECT toRelativeSecondNum(toDateTime(1412106600), 'Pacific/Pitcairn') - toRelat /* toDate */ +SELECT 'toDate'; SELECT toDate(toDateTime(1412106600), 'Europe/Moscow'); SELECT toDate(toDateTime(1412106600), 'Europe/Paris'); SELECT toDate(toDateTime(1412106600), 'Europe/London'); @@ -258,6 +323,7 @@ SELECT toDate(1412106600, 'Pacific/Pitcairn'); /* toString */ +SELECT 'toString'; SELECT toString(toDateTime(1436956200), 'Europe/Moscow'); SELECT toString(toDateTime(1436956200), 'Europe/Paris'); SELECT toString(toDateTime(1436956200), 'Europe/London'); @@ -266,6 +332,7 @@ SELECT toString(toDateTime(1436956200), 'Pacific/Pitcairn'); /* toUnixTimestamp */ +SELECT 'toUnixTimestamp'; SELECT toUnixTimestamp(toString(toDateTime(1426415400), 'Europe/Moscow'), 'Europe/Moscow'); SELECT toUnixTimestamp(toString(toDateTime(1426415400), 'Europe/Moscow'), 'Europe/Paris'); SELECT toUnixTimestamp(toString(toDateTime(1426415400), 'Europe/Moscow'), 'Europe/London'); From ec7fdcb8f16bf636d393a6fe3acb76dfe96bcc87 Mon Sep 17 00:00:00 2001 From: Mikhail Date: Tue, 12 Feb 2019 17:26:51 +0300 Subject: [PATCH 32/79] Update .gitmodules --- .gitmodules | 3 --- 1 file changed, 3 deletions(-) diff --git a/.gitmodules b/.gitmodules index 6597ac2e5ed..cc89b4cca31 100644 --- a/.gitmodules +++ b/.gitmodules @@ -64,9 +64,6 @@ [submodule "contrib/cppkafka"] path = contrib/cppkafka url = https://github.com/ClickHouse-Extras/cppkafka.git -[submodule "contrib/pdqsort"] - path = contrib/pdqsort - url = https://github.com/orlp/pdqsort [submodule "contrib/brotli"] path = contrib/brotli url = https://github.com/google/brotli.git From a73f29ca2e8941adc7acf8b6efbb06d03f7387ad Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 12 Feb 2019 17:38:29 +0300 Subject: [PATCH 33/79] Fix LowCardinality cache. --- dbms/src/Common/ColumnsHashing.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dbms/src/Common/ColumnsHashing.h b/dbms/src/Common/ColumnsHashing.h index 4f6708ae2f1..436ed43660e 100644 --- a/dbms/src/Common/ColumnsHashing.h +++ b/dbms/src/Common/ColumnsHashing.h @@ -364,7 +364,10 @@ struct HashMethodSingleLowCardinalityColumn : public SingleColumnMethod } if constexpr (has_mapped) + { + mapped_cache[row] = it->second; return EmplaceResult(it->second, mapped_cache[row], inserted); + } else return EmplaceResult(inserted); } From b95f5196c0f9fab7d4e076f41000c6610396e19f Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 12 Feb 2019 17:43:52 +0300 Subject: [PATCH 34/79] Added test. --- .../queries/0_stateless/906_low_cardinality_cache.reference | 1 + dbms/tests/queries/0_stateless/906_low_cardinality_cache.sql | 5 +++++ 2 files changed, 6 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/906_low_cardinality_cache.reference create mode 100644 dbms/tests/queries/0_stateless/906_low_cardinality_cache.sql diff --git a/dbms/tests/queries/0_stateless/906_low_cardinality_cache.reference b/dbms/tests/queries/0_stateless/906_low_cardinality_cache.reference new file mode 100644 index 00000000000..5e2be7a023a --- /dev/null +++ b/dbms/tests/queries/0_stateless/906_low_cardinality_cache.reference @@ -0,0 +1 @@ +100000000 0123456789 diff --git a/dbms/tests/queries/0_stateless/906_low_cardinality_cache.sql b/dbms/tests/queries/0_stateless/906_low_cardinality_cache.sql new file mode 100644 index 00000000000..cafc559d77f --- /dev/null +++ b/dbms/tests/queries/0_stateless/906_low_cardinality_cache.sql @@ -0,0 +1,5 @@ +drop table if exists test.lc; +create table test.lc (b LowCardinality(String)) engine=MergeTree order by b; +insert into test.lc select '0123456789' from numbers(100000000); +select count(), b from test.lc group by b; + From 93e0621a2ca64324c8c8262cf8726e36a473c755 Mon Sep 17 00:00:00 2001 From: chertus Date: Tue, 12 Feb 2019 18:08:21 +0300 Subject: [PATCH 35/79] fix wrong query in push down test --- dbms/src/Interpreters/ExpressionAnalyzer.cpp | 5 +++-- dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp | 1 - dbms/src/Parsers/ASTIdentifier.cpp | 9 +++++++++ dbms/src/Parsers/ASTIdentifier.h | 6 +----- .../queries/0_stateless/00597_push_down_predicate.sql | 4 ++-- 5 files changed, 15 insertions(+), 10 deletions(-) diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index 2c333040360..62f7f91f197 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -1019,9 +1019,10 @@ void ExpressionAnalyzer::collectUsedColumns() for (NamesAndTypesList::iterator it = source_columns.begin(); it != source_columns.end();) { - unknown_required_source_columns.erase(it->name); + const String & column_name = it->name; + unknown_required_source_columns.erase(column_name); - if (!required.count(it->name)) + if (!required.count(column_name)) source_columns.erase(it++); else ++it; diff --git a/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp b/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp index 930295a3b5a..85f8f09b867 100644 --- a/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp +++ b/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp @@ -250,7 +250,6 @@ void PredicateExpressionsOptimizer::setNewAliasesForInnerPredicate( name = ast->getAliasOrColumnName(); } - IdentifierSemantic::setNeedLongName(*identifier, false); identifier->setShortName(name); } } diff --git a/dbms/src/Parsers/ASTIdentifier.cpp b/dbms/src/Parsers/ASTIdentifier.cpp index e5500a89bd0..b57db5bef87 100644 --- a/dbms/src/Parsers/ASTIdentifier.cpp +++ b/dbms/src/Parsers/ASTIdentifier.cpp @@ -22,6 +22,15 @@ ASTIdentifier::ASTIdentifier(const String & name_, std::vector && name_p { } +void ASTIdentifier::setShortName(const String & new_name) +{ + name = new_name; + name_parts.clear(); + + semantic->need_long_name = false; + semantic->can_be_alias = true; +} + void ASTIdentifier::formatImplWithoutAlias(const FormatSettings & settings, FormatState &, FormatStateStacked) const { auto format_element = [&](const String & elem_name) diff --git a/dbms/src/Parsers/ASTIdentifier.h b/dbms/src/Parsers/ASTIdentifier.h index 9457b7d9156..b875b7dc91a 100644 --- a/dbms/src/Parsers/ASTIdentifier.h +++ b/dbms/src/Parsers/ASTIdentifier.h @@ -36,11 +36,7 @@ public: bool compound() const { return !name_parts.empty(); } bool isShort() const { return name_parts.empty() || name == name_parts.back(); } - void setShortName(const String & new_name) - { - name = new_name; - name_parts.clear(); - } + void setShortName(const String & new_name); const String & shortName() const { diff --git a/dbms/tests/queries/0_stateless/00597_push_down_predicate.sql b/dbms/tests/queries/0_stateless/00597_push_down_predicate.sql index 0180fcdeb1e..495c367e3bd 100644 --- a/dbms/tests/queries/0_stateless/00597_push_down_predicate.sql +++ b/dbms/tests/queries/0_stateless/00597_push_down_predicate.sql @@ -31,7 +31,7 @@ SELECT * FROM (SELECT 1 AS id, (SELECT 1) as subquery) WHERE subquery = 1; SELECT * FROM (SELECT toUInt64(b) AS a, sum(id) AS b FROM test.test) WHERE a = 3; SELECT * FROM (SELECT toUInt64(b), sum(id) AS b FROM test.test) WHERE `toUInt64(sum(id))` = 3; SELECT date, id, name, value FROM (SELECT date, name, value, min(id) AS id FROM test.test GROUP BY date, name, value) WHERE id = 1; -SELECT * FROM (SELECT toUInt64(table_alias.b) AS a, sum(id) AS b FROM test.test AS table_alias) AS outer_table_alias WHERE outer_table_alias.b = 3; +SELECT * FROM (SELECT toUInt64(b) AS a, sum(id) AS b FROM test.test AS table_alias) AS outer_table_alias WHERE outer_table_alias.b = 3; SELECT '-------Force push down-------'; SET force_primary_key = 1; @@ -72,7 +72,7 @@ SELECT '-------Push to having expression, need check.-------'; SELECT id FROM (SELECT min(id) AS id FROM test.test) WHERE id = 1; -- { serverError 277 } SELECT * FROM (SELECT toUInt64(b) AS a, sum(id) AS b FROM test.test) WHERE a = 3; -- { serverError 277 } SELECT * FROM (SELECT toUInt64(b), sum(id) AS b FROM test.test) WHERE `toUInt64(sum(id))` = 3; -- { serverError 277 } -SELECT * FROM (SELECT toUInt64(table_alias.b) AS a, sum(id) AS b FROM test.test AS table_alias) AS outer_table_alias WHERE outer_table_alias.b = 3; -- { serverError 277 } +SELECT * FROM (SELECT toUInt64(b) AS a, sum(id) AS b FROM test.test AS table_alias) AS outer_table_alias WHERE outer_table_alias.b = 3; -- { serverError 277 } SELECT '-------Compatibility test-------'; SELECT * FROM (SELECT toInt8(1) AS id, toDate('2000-01-01') AS date FROM system.numbers LIMIT 1) ANY LEFT JOIN (SELECT * FROM test.test) AS b USING date, id WHERE b.date = toDate('2000-01-01'); From 1af4d9b89c0078576ae73147ab6b00d5bda43da7 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 12 Feb 2019 18:42:33 +0300 Subject: [PATCH 36/79] Fix type for constant LowCardinality arguments. --- dbms/src/Functions/IFunction.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dbms/src/Functions/IFunction.cpp b/dbms/src/Functions/IFunction.cpp index a3d34a830a9..2905893458b 100644 --- a/dbms/src/Functions/IFunction.cpp +++ b/dbms/src/Functions/IFunction.cpp @@ -354,7 +354,11 @@ static ColumnPtr replaceLowCardinalityColumnsByNestedAndGetDictionaryIndexes( { ColumnWithTypeAndName & column = block.getByPosition(arg); if (auto * column_const = checkAndGetColumn(column.column.get())) + { column.column = column_const->removeLowCardinality()->cloneResized(num_rows); + if (auto * low_cardinality_type = checkAndGetDataType(column.type.get())) + column.type = removeLowCardinality() ///low_cardinality_type->getDictionaryType(); + } else if (auto * low_cardinality_column = checkAndGetColumn(column.column.get())) { auto * low_cardinality_type = checkAndGetDataType(column.type.get()); From bb686b3154920202841dc2de934c9891973f539e Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 12 Feb 2019 18:43:58 +0300 Subject: [PATCH 37/79] Fix type for constant LowCardinality arguments. --- dbms/src/Functions/IFunction.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dbms/src/Functions/IFunction.cpp b/dbms/src/Functions/IFunction.cpp index 2905893458b..069a96f9ad8 100644 --- a/dbms/src/Functions/IFunction.cpp +++ b/dbms/src/Functions/IFunction.cpp @@ -356,8 +356,7 @@ static ColumnPtr replaceLowCardinalityColumnsByNestedAndGetDictionaryIndexes( if (auto * column_const = checkAndGetColumn(column.column.get())) { column.column = column_const->removeLowCardinality()->cloneResized(num_rows); - if (auto * low_cardinality_type = checkAndGetDataType(column.type.get())) - column.type = removeLowCardinality() ///low_cardinality_type->getDictionaryType(); + column.type = removeLowCardinality(column.type); } else if (auto * low_cardinality_column = checkAndGetColumn(column.column.get())) { From 7673d2abb5c2385d7e0fbd7d046d40fa455f786a Mon Sep 17 00:00:00 2001 From: KochetovNicolai Date: Tue, 12 Feb 2019 18:48:32 +0300 Subject: [PATCH 38/79] Update 906_low_cardinality_cache.sql Delete table after test. --- dbms/tests/queries/0_stateless/906_low_cardinality_cache.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/tests/queries/0_stateless/906_low_cardinality_cache.sql b/dbms/tests/queries/0_stateless/906_low_cardinality_cache.sql index cafc559d77f..ca286e9379f 100644 --- a/dbms/tests/queries/0_stateless/906_low_cardinality_cache.sql +++ b/dbms/tests/queries/0_stateless/906_low_cardinality_cache.sql @@ -2,4 +2,4 @@ drop table if exists test.lc; create table test.lc (b LowCardinality(String)) engine=MergeTree order by b; insert into test.lc select '0123456789' from numbers(100000000); select count(), b from test.lc group by b; - +drop table if exists test.lc; From 31397f715675b14acab4642a20fce77b6a1339ee Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 12 Feb 2019 19:04:05 +0300 Subject: [PATCH 39/79] Fix column size for const LowCardinality arguments. --- dbms/src/Functions/IFunction.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dbms/src/Functions/IFunction.cpp b/dbms/src/Functions/IFunction.cpp index 069a96f9ad8..85342fca358 100644 --- a/dbms/src/Functions/IFunction.cpp +++ b/dbms/src/Functions/IFunction.cpp @@ -332,9 +332,9 @@ static const ColumnLowCardinality * findLowCardinalityArgument(const Block & blo } static ColumnPtr replaceLowCardinalityColumnsByNestedAndGetDictionaryIndexes( - Block & block, const ColumnNumbers & args, bool can_be_executed_on_default_arguments) + Block & block, const ColumnNumbers & args, bool can_be_executed_on_default_arguments, size_t input_rows_count) { - size_t num_rows = 0; + size_t num_rows = input_rows_count; ColumnPtr indexes; for (auto arg : args) @@ -426,7 +426,7 @@ void PreparedFunctionImpl::execute(Block & block, const ColumnNumbers & args, si block_without_low_cardinality.safeGetByPosition(result).type = res_low_cardinality_type->getDictionaryType(); ColumnPtr indexes = replaceLowCardinalityColumnsByNestedAndGetDictionaryIndexes( - block_without_low_cardinality, args, can_be_executed_on_default_arguments); + block_without_low_cardinality, args, can_be_executed_on_default_arguments, input_rows_count); executeWithoutLowCardinalityColumns(block_without_low_cardinality, args, result, block_without_low_cardinality.rows(), dry_run); From 8e075899cbbbc081f71b6f26a0613fbd4ab750ac Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 12 Feb 2019 19:15:20 +0300 Subject: [PATCH 40/79] Added test. --- .../0_stateless/00906_low_cardinality_const_argument.reference | 1 + .../0_stateless/00906_low_cardinality_const_argument.sql | 2 ++ 2 files changed, 3 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/00906_low_cardinality_const_argument.reference create mode 100644 dbms/tests/queries/0_stateless/00906_low_cardinality_const_argument.sql diff --git a/dbms/tests/queries/0_stateless/00906_low_cardinality_const_argument.reference b/dbms/tests/queries/0_stateless/00906_low_cardinality_const_argument.reference new file mode 100644 index 00000000000..78981922613 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00906_low_cardinality_const_argument.reference @@ -0,0 +1 @@ +a diff --git a/dbms/tests/queries/0_stateless/00906_low_cardinality_const_argument.sql b/dbms/tests/queries/0_stateless/00906_low_cardinality_const_argument.sql new file mode 100644 index 00000000000..831a4534f1e --- /dev/null +++ b/dbms/tests/queries/0_stateless/00906_low_cardinality_const_argument.sql @@ -0,0 +1,2 @@ +select materialize(toLowCardinality('a')); + From 7d1e755a7c6b5b8903aed7e418e1c14f4bb37f3b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 12 Feb 2019 19:41:06 +0300 Subject: [PATCH 41/79] Fixed rare race condition on startup of non-replicated MergeTree tables: concurrent attempt to remove a temporary directory [#CLICKHOUSE-4296] --- dbms/src/Storages/MergeTree/BackgroundProcessingPool.h | 2 ++ dbms/src/Storages/MergeTree/MergeTreeData.cpp | 2 +- dbms/src/Storages/StorageMergeTree.cpp | 6 ++++-- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/dbms/src/Storages/MergeTree/BackgroundProcessingPool.h b/dbms/src/Storages/MergeTree/BackgroundProcessingPool.h index b9c64aebfe9..ac7d231d966 100644 --- a/dbms/src/Storages/MergeTree/BackgroundProcessingPool.h +++ b/dbms/src/Storages/MergeTree/BackgroundProcessingPool.h @@ -53,7 +53,9 @@ public: return size; } + /// The task is started immediately. TaskHandle addTask(const Task & task); + void removeTask(const TaskHandle & task); ~BackgroundProcessingPool(); diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 689e166c757..2203d15a00f 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -831,7 +831,7 @@ void MergeTreeData::clearOldTemporaryDirectories(ssize_t custom_directories_life Poco::DirectoryIterator end; for (Poco::DirectoryIterator it{full_path}; it != end; ++it) { - if (startsWith(it.name(), "tmp")) + if (startsWith(it.name(), "tmp_")) { Poco::File tmp_dir(full_path + it.name()); diff --git a/dbms/src/Storages/StorageMergeTree.cpp b/dbms/src/Storages/StorageMergeTree.cpp index 0f9f2069645..0582cc859cb 100644 --- a/dbms/src/Storages/StorageMergeTree.cpp +++ b/dbms/src/Storages/StorageMergeTree.cpp @@ -87,13 +87,15 @@ StorageMergeTree::StorageMergeTree( void StorageMergeTree::startup() { - background_task_handle = background_pool.addTask([this] { return backgroundTask(); }); - data.clearOldPartsFromFilesystem(); /// Temporary directories contain incomplete results of merges (after forced restart) /// and don't allow to reinitialize them, so delete each of them immediately data.clearOldTemporaryDirectories(0); + + /// NOTE background task will also do the above cleanups periodically. + time_after_previous_cleanup.restart(); + background_task_handle = background_pool.addTask([this] { return backgroundTask(); }); } From 5279c7ff81b638fecbf193c9901e0e530ba70240 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Tue, 12 Feb 2019 20:54:52 +0300 Subject: [PATCH 42/79] setting --- dbms/src/Interpreters/Settings.h | 1 + dbms/src/Storages/IndicesDescription.h | 1 + dbms/src/Storages/MergeTree/MergeTreeData.cpp | 8 +++++++- dbms/src/Storages/MergeTree/MergeTreeData.h | 2 +- dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp | 4 ++++ dbms/src/Storages/StorageMergeTree.cpp | 2 +- dbms/src/Storages/StorageReplicatedMergeTree.cpp | 2 +- dbms/tests/queries/0_stateless/00836_indices_alter.sql | 2 ++ .../00836_indices_alter_replicated_zookeeper.sql | 2 ++ dbms/tests/queries/0_stateless/00837_minmax_index.sh | 9 ++++++--- .../00837_minmax_index_replicated_zookeeper.sql | 2 ++ dbms/tests/queries/0_stateless/00838_unique_index.sh | 4 +++- 12 files changed, 31 insertions(+), 8 deletions(-) diff --git a/dbms/src/Interpreters/Settings.h b/dbms/src/Interpreters/Settings.h index 20b1e3cffaf..ef53f1300cb 100644 --- a/dbms/src/Interpreters/Settings.h +++ b/dbms/src/Interpreters/Settings.h @@ -300,6 +300,7 @@ struct Settings M(SettingBool, allow_experimental_cross_to_join_conversion, false, "Convert CROSS JOIN to INNER JOIN if possible") \ M(SettingBool, cancel_http_readonly_queries_on_client_close, false, "Cancel HTTP readonly queries when a client closes the connection without waiting for response.") \ M(SettingBool, external_table_functions_use_nulls, true, "If it is set to true, external table functions will implicitly use Nullable type if needed. Otherwise NULLs will be substituted with default values. Currently supported only for 'mysql' table function.") \ + M(SettingBool, allow_experimental_data_skipping_indices, false, "If it is set to true, data skipping indices can be used in CREATE TABLE/ALTER TABLE queries.")\ #define DECLARE(TYPE, NAME, DEFAULT, DESCRIPTION) \ TYPE NAME {DEFAULT}; diff --git a/dbms/src/Storages/IndicesDescription.h b/dbms/src/Storages/IndicesDescription.h index 9d7c7907ca1..53b492198ce 100644 --- a/dbms/src/Storages/IndicesDescription.h +++ b/dbms/src/Storages/IndicesDescription.h @@ -14,6 +14,7 @@ struct IndicesDescription IndicesDescription() = default; + bool empty() const { return indices.empty(); } String toString() const; static IndicesDescription parse(const String & str); diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 689e166c757..1df3f9f50a7 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -72,6 +72,7 @@ namespace DB namespace ErrorCodes { + extern const int BAD_ARGUMENTS; extern const int MEMORY_LIMIT_EXCEEDED; extern const int SYNTAX_ERROR; extern const int INVALID_PARTITION_VALUE; @@ -1051,7 +1052,7 @@ bool isMetadataOnlyConversion(const IDataType * from, const IDataType * to) } -void MergeTreeData::checkAlter(const AlterCommands & commands) +void MergeTreeData::checkAlter(const AlterCommands & commands, const Context & context) { /// Check that needed transformations can be applied to the list of columns without considering type conversions. auto new_columns = getColumns(); @@ -1060,6 +1061,11 @@ void MergeTreeData::checkAlter(const AlterCommands & commands) ASTPtr new_primary_key_ast = primary_key_ast; commands.apply(new_columns, new_indices, new_order_by_ast, new_primary_key_ast); + if (getIndicesDescription().empty() && !new_indices.empty() && + !context.getSettingsRef().allow_experimental_data_skipping_indices) + throw Exception("You must set the setting `allow_experimental_data_skipping_indices` to 1 " \ + "before using data skipping indices.", ErrorCodes::BAD_ARGUMENTS); + /// Set of columns that shouldn't be altered. NameSet columns_alter_forbidden; diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h index 66cbe6bf9dd..f252e43b562 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.h +++ b/dbms/src/Storages/MergeTree/MergeTreeData.h @@ -481,7 +481,7 @@ public: /// - all type conversions can be done. /// - columns corresponding to primary key, indices, sign, sampling expression and date are not affected. /// If something is wrong, throws an exception. - void checkAlter(const AlterCommands & commands); + void checkAlter(const AlterCommands & commands, const Context & context); /// Performs ALTER of the data part, writes the result to temporary files. /// Returns an object allowing to rename temporary files to permanent files. diff --git a/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp b/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp index 6fd61fb434c..1958b489023 100644 --- a/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -610,6 +610,10 @@ static StoragePtr create(const StorageFactory::Arguments & args) ErrorCodes::BAD_ARGUMENTS); } + if (!args.attach && !indices_description.empty() && !args.local_context.getSettingsRef().allow_experimental_data_skipping_indices) + throw Exception("You must set the setting `allow_experimental_data_skipping_indices` to 1 " \ + "before using data skipping indices.", ErrorCodes::BAD_ARGUMENTS); + if (replicated) return StorageReplicatedMergeTree::create( zookeeper_path, replica_name, args.attach, args.data_path, args.database_name, args.table_name, diff --git a/dbms/src/Storages/StorageMergeTree.cpp b/dbms/src/Storages/StorageMergeTree.cpp index 0f9f2069645..7f4c844aa09 100644 --- a/dbms/src/Storages/StorageMergeTree.cpp +++ b/dbms/src/Storages/StorageMergeTree.cpp @@ -211,7 +211,7 @@ void StorageMergeTree::alter( auto table_soft_lock = lockDataForAlter(); - data.checkAlter(params); + data.checkAlter(params, context); auto new_columns = data.getColumns(); auto new_indices = data.getIndicesDescription(); diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index 062800871e6..a1eaee63a72 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -3125,7 +3125,7 @@ void StorageReplicatedMergeTree::alter(const AlterCommands & params, if (is_readonly) throw Exception("Can't ALTER readonly table", ErrorCodes::TABLE_IS_READ_ONLY); - data.checkAlter(params); + data.checkAlter(params, query_context); ColumnsDescription new_columns = data.getColumns(); IndicesDescription new_indices = data.getIndicesDescription(); diff --git a/dbms/tests/queries/0_stateless/00836_indices_alter.sql b/dbms/tests/queries/0_stateless/00836_indices_alter.sql index 6749d69ff28..0fb8d49da6a 100644 --- a/dbms/tests/queries/0_stateless/00836_indices_alter.sql +++ b/dbms/tests/queries/0_stateless/00836_indices_alter.sql @@ -1,6 +1,8 @@ DROP TABLE IF EXISTS test.minmax_idx; DROP TABLE IF EXISTS test.minmax_idx2; +SET allow_experimental_data_skipping_indices = 1; + CREATE TABLE test.minmax_idx ( u64 UInt64, diff --git a/dbms/tests/queries/0_stateless/00836_indices_alter_replicated_zookeeper.sql b/dbms/tests/queries/0_stateless/00836_indices_alter_replicated_zookeeper.sql index ccd71beb6ec..4240348f7de 100644 --- a/dbms/tests/queries/0_stateless/00836_indices_alter_replicated_zookeeper.sql +++ b/dbms/tests/queries/0_stateless/00836_indices_alter_replicated_zookeeper.sql @@ -3,6 +3,8 @@ DROP TABLE IF EXISTS test.minmax_idx_r; DROP TABLE IF EXISTS test.minmax_idx2; DROP TABLE IF EXISTS test.minmax_idx2_r; +SET allow_experimental_data_skipping_indices = 1; + CREATE TABLE test.minmax_idx ( u64 UInt64, diff --git a/dbms/tests/queries/0_stateless/00837_minmax_index.sh b/dbms/tests/queries/0_stateless/00837_minmax_index.sh index 72c36be1bf1..d38f7bbabfd 100755 --- a/dbms/tests/queries/0_stateless/00837_minmax_index.sh +++ b/dbms/tests/queries/0_stateless/00837_minmax_index.sh @@ -3,9 +3,12 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . $CURDIR/../shell_config.sh -$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS test.minmax_idx" +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS test.minmax_idx;" -$CLICKHOUSE_CLIENT --query="CREATE TABLE test.minmax_idx + +$CLICKHOUSE_CLIENT -n --query=" +SET allow_experimental_data_skipping_indices = 1; +CREATE TABLE test.minmax_idx ( u64 UInt64, i32 Int32, @@ -19,7 +22,7 @@ $CLICKHOUSE_CLIENT --query="CREATE TABLE test.minmax_idx INDEX idx_2 (u64 + toYear(dt), substring(s, 2, 4)) TYPE minmax GRANULARITY 3 ) ENGINE = MergeTree() ORDER BY u64 -SETTINGS index_granularity = 2" +SETTINGS index_granularity = 2;" $CLICKHOUSE_CLIENT --query="INSERT INTO test.minmax_idx VALUES diff --git a/dbms/tests/queries/0_stateless/00837_minmax_index_replicated_zookeeper.sql b/dbms/tests/queries/0_stateless/00837_minmax_index_replicated_zookeeper.sql index 8d0d5ce80ff..ceb361170f3 100644 --- a/dbms/tests/queries/0_stateless/00837_minmax_index_replicated_zookeeper.sql +++ b/dbms/tests/queries/0_stateless/00837_minmax_index_replicated_zookeeper.sql @@ -1,6 +1,8 @@ DROP TABLE IF EXISTS test.minmax_idx1; DROP TABLE IF EXISTS test.minmax_idx2; +SET allow_experimental_data_skipping_indices = 1; + CREATE TABLE test.minmax_idx1 ( u64 UInt64, diff --git a/dbms/tests/queries/0_stateless/00838_unique_index.sh b/dbms/tests/queries/0_stateless/00838_unique_index.sh index 9f33c916689..dd4440bd5ce 100755 --- a/dbms/tests/queries/0_stateless/00838_unique_index.sh +++ b/dbms/tests/queries/0_stateless/00838_unique_index.sh @@ -5,7 +5,9 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS test.set_idx;" -$CLICKHOUSE_CLIENT --query="CREATE TABLE test.set_idx +$CLICKHOUSE_CLIENT -n --query=" +SET allow_experimental_data_skipping_indices = 1; +CREATE TABLE test.set_idx ( u64 UInt64, i32 Int32, From dddec4073ea0ccd8cf9a8a8549bc4d9bfff3f208 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Tue, 12 Feb 2019 20:56:57 +0300 Subject: [PATCH 43/79] fix --- docs/en/operations/table_engines/mergetree.md | 2 +- docs/ru/operations/table_engines/mergetree.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/operations/table_engines/mergetree.md b/docs/en/operations/table_engines/mergetree.md index 35c7feb36b9..29647a76e33 100644 --- a/docs/en/operations/table_engines/mergetree.md +++ b/docs/en/operations/table_engines/mergetree.md @@ -228,7 +228,7 @@ To check whether ClickHouse can use the index when running a query, use the sett The key for partitioning by month allows reading only those data blocks which contain dates from the proper range. In this case, the data block may contain data for many dates (up to an entire month). Within a block, data is sorted by primary key, which might not contain the date as the first column. Because of this, using a query with only a date condition that does not specify the primary key prefix will cause more data to be read than for a single date. -### Data Skipping Indices +### Data Skipping Indices (Experimental) Index declaration in the columns section of the `CREATE` query. ```sql diff --git a/docs/ru/operations/table_engines/mergetree.md b/docs/ru/operations/table_engines/mergetree.md index aa288e45434..f27dacb3495 100644 --- a/docs/ru/operations/table_engines/mergetree.md +++ b/docs/ru/operations/table_engines/mergetree.md @@ -226,7 +226,7 @@ SELECT count() FROM table WHERE CounterID = 34 OR URL LIKE '%upyachka%' Ключ партиционирования по месяцам обеспечивает чтение только тех блоков данных, которые содержат даты из нужного диапазона. При этом блок данных может содержать данные за многие даты (до целого месяца). В пределах одного блока данные упорядочены по первичному ключу, который может не содержать дату в качестве первого столбца. В связи с этим, при использовании запроса с указанием условия только на дату, но не на префикс первичного ключа, будет читаться данных больше, чем за одну дату. -### Дополнительные индексы +### Дополнительные индексы (Экспериментальный функционал) Для таблиц семейства `*MergeTree` можно задать дополнительные индексы в секции столбцов. From a27c61b0656294f4614f911398b72eb4fd472fdd Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Tue, 12 Feb 2019 21:02:45 +0300 Subject: [PATCH 44/79] docs --- docs/en/operations/table_engines/mergetree.md | 2 ++ docs/ru/operations/table_engines/mergetree.md | 2 ++ 2 files changed, 4 insertions(+) diff --git a/docs/en/operations/table_engines/mergetree.md b/docs/en/operations/table_engines/mergetree.md index 29647a76e33..948d63ff7d8 100644 --- a/docs/en/operations/table_engines/mergetree.md +++ b/docs/en/operations/table_engines/mergetree.md @@ -230,6 +230,8 @@ The key for partitioning by month allows reading only those data blocks which co ### Data Skipping Indices (Experimental) +You need to set `allow_experimental_data_skipping_indices` to 1 to use indices. (run `SET allow_experimental_data_skipping_indices = 1`). + Index declaration in the columns section of the `CREATE` query. ```sql INDEX index_name expr TYPE type(...) GRANULARITY granularity_value diff --git a/docs/ru/operations/table_engines/mergetree.md b/docs/ru/operations/table_engines/mergetree.md index f27dacb3495..3c4f84d1c8c 100644 --- a/docs/ru/operations/table_engines/mergetree.md +++ b/docs/ru/operations/table_engines/mergetree.md @@ -228,6 +228,8 @@ SELECT count() FROM table WHERE CounterID = 34 OR URL LIKE '%upyachka%' ### Дополнительные индексы (Экспериментальный функционал) +Для использования требуется установить настройку `allow_experimental_data_skipping_indices` в 1. (запустить `SET allow_experimental_data_skipping_indices = 1`). + Для таблиц семейства `*MergeTree` можно задать дополнительные индексы в секции столбцов. Индексы аггрегируют для заданного выражения некоторые данные, а потом при `SELECT` запросе используют для пропуска боков данных (пропускаемый блок состоих из гранул данных в количестве равном гранулярности данного индекса), на которых секция `WHERE` не может быть выполнена, тем самым уменьшая объем данных читаемых с диска. From 0bf4f4334bc46b324fc4582e826e09d506ef1b3b Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 12 Feb 2019 21:27:14 +0300 Subject: [PATCH 45/79] Renamed tests. --- ...lity_cache.reference => 00906_low_cardinality_cache.reference} | 0 ..._low_cardinality_cache.sql => 00906_low_cardinality_cache.sql} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename dbms/tests/queries/0_stateless/{906_low_cardinality_cache.reference => 00906_low_cardinality_cache.reference} (100%) rename dbms/tests/queries/0_stateless/{906_low_cardinality_cache.sql => 00906_low_cardinality_cache.sql} (100%) diff --git a/dbms/tests/queries/0_stateless/906_low_cardinality_cache.reference b/dbms/tests/queries/0_stateless/00906_low_cardinality_cache.reference similarity index 100% rename from dbms/tests/queries/0_stateless/906_low_cardinality_cache.reference rename to dbms/tests/queries/0_stateless/00906_low_cardinality_cache.reference diff --git a/dbms/tests/queries/0_stateless/906_low_cardinality_cache.sql b/dbms/tests/queries/0_stateless/00906_low_cardinality_cache.sql similarity index 100% rename from dbms/tests/queries/0_stateless/906_low_cardinality_cache.sql rename to dbms/tests/queries/0_stateless/00906_low_cardinality_cache.sql From 7665ef297fba68fdb7762197b8b05f580bf61d53 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 12 Feb 2019 21:42:14 +0300 Subject: [PATCH 46/79] Auto version update to [19.3.1] [54415] --- dbms/cmake/version.cmake | 8 ++++---- debian/changelog | 4 ++-- docker/client/Dockerfile | 2 +- docker/server/Dockerfile | 2 +- docker/test/Dockerfile | 2 +- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/dbms/cmake/version.cmake b/dbms/cmake/version.cmake index f19b898939b..5f3758b89a6 100644 --- a/dbms/cmake/version.cmake +++ b/dbms/cmake/version.cmake @@ -2,10 +2,10 @@ set(VERSION_REVISION 54415) set(VERSION_MAJOR 19) set(VERSION_MINOR 3) -set(VERSION_PATCH 0) -set(VERSION_GITHASH 1db4bd8c2a1a0cd610c8a6564e8194dca5265562) -set(VERSION_DESCRIBE v19.3.0-testing) -set(VERSION_STRING 19.3.0) +set(VERSION_PATCH 1) +set(VERSION_GITHASH 48280074c4a9151ca010fb0a777efd82634460bd) +set(VERSION_DESCRIBE v19.3.1-testing) +set(VERSION_STRING 19.3.1) # end of autochange set(VERSION_EXTRA "" CACHE STRING "") diff --git a/debian/changelog b/debian/changelog index f1e2c9cd754..8d988cb3cac 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,5 +1,5 @@ -clickhouse (19.3.0) unstable; urgency=low +clickhouse (19.3.1) unstable; urgency=low * Modified source code - -- Mon, 11 Feb 2019 18:13:23 +0300 + -- Tue, 12 Feb 2019 21:42:14 +0300 diff --git a/docker/client/Dockerfile b/docker/client/Dockerfile index e817274d1e5..2196a342ecd 100644 --- a/docker/client/Dockerfile +++ b/docker/client/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:18.04 ARG repository="deb http://repo.yandex.ru/clickhouse/deb/stable/ main/" -ARG version=19.3.0 +ARG version=19.3.1 RUN apt-get update \ && apt-get install --yes --no-install-recommends \ diff --git a/docker/server/Dockerfile b/docker/server/Dockerfile index 93d19b3ee60..244c86f92a9 100644 --- a/docker/server/Dockerfile +++ b/docker/server/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:18.04 ARG repository="deb http://repo.yandex.ru/clickhouse/deb/stable/ main/" -ARG version=19.3.0 +ARG version=19.3.1 ARG gosu_ver=1.10 RUN apt-get update \ diff --git a/docker/test/Dockerfile b/docker/test/Dockerfile index ef7299c5b7c..e5cf7036165 100644 --- a/docker/test/Dockerfile +++ b/docker/test/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:18.04 ARG repository="deb http://repo.yandex.ru/clickhouse/deb/stable/ main/" -ARG version=19.3.0 +ARG version=19.3.1 RUN apt-get update && \ apt-get install -y apt-transport-https dirmngr && \ From d558cf345ceaefcee84b32b3ce39ee8f1bdf4fc3 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 12 Feb 2019 22:04:13 +0300 Subject: [PATCH 47/79] Faster calcelling of vertical merges --- dbms/src/DataStreams/MergingSortedBlockInputStream.h | 1 - .../Storages/MergeTree/MergeTreeDataMergerMutator.cpp | 9 +++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/dbms/src/DataStreams/MergingSortedBlockInputStream.h b/dbms/src/DataStreams/MergingSortedBlockInputStream.h index 00a1011d78c..83c31dc51e2 100644 --- a/dbms/src/DataStreams/MergingSortedBlockInputStream.h +++ b/dbms/src/DataStreams/MergingSortedBlockInputStream.h @@ -13,7 +13,6 @@ #include #include -#include namespace DB diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index c6c7ead4c53..f5ee7fe1ee7 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -762,11 +762,15 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor size_t column_elems_written = 0; column_to.writePrefix(); - while ((block = column_gathered_stream.read())) + while (!actions_blocker.isCancelled() && (block = column_gathered_stream.read())) { column_elems_written += block.rows(); column_to.write(block); } + + if (actions_blocker.isCancelled()) + throw Exception("Cancelled merging parts", ErrorCodes::ABORTED); + column_gathered_stream.readSuffix(); checksums_gathered_columns.add(column_to.writeSuffixAndGetChecksums()); @@ -781,9 +785,6 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor merge_entry->columns_written += 1; merge_entry->bytes_written_uncompressed += column_gathered_stream.getProfileInfo().bytes; merge_entry->progress.store(progress_before + column_sizes.columnWeight(column_name), std::memory_order_relaxed); - - if (actions_blocker.isCancelled()) - throw Exception("Cancelled merging parts", ErrorCodes::ABORTED); } Poco::File(rows_sources_file_path).remove(); From 4051306fa7e68601ca87e3bd1c9287a8221b13d8 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 12 Feb 2019 22:09:51 +0300 Subject: [PATCH 48/79] Fixed includes #4372 --- dbms/src/DataStreams/CollapsingSortedBlockInputStream.h | 1 + dbms/src/DataStreams/MergingSortedBlockInputStream.cpp | 1 + dbms/src/DataStreams/ReplacingSortedBlockInputStream.h | 1 + dbms/src/DataStreams/VersionedCollapsingSortedBlockInputStream.h | 1 + 4 files changed, 4 insertions(+) diff --git a/dbms/src/DataStreams/CollapsingSortedBlockInputStream.h b/dbms/src/DataStreams/CollapsingSortedBlockInputStream.h index bd1a33436e0..999e9b5e6dd 100644 --- a/dbms/src/DataStreams/CollapsingSortedBlockInputStream.h +++ b/dbms/src/DataStreams/CollapsingSortedBlockInputStream.h @@ -3,6 +3,7 @@ #include #include +#include namespace DB diff --git a/dbms/src/DataStreams/MergingSortedBlockInputStream.cpp b/dbms/src/DataStreams/MergingSortedBlockInputStream.cpp index a65a4195dfd..e59bbef4c61 100644 --- a/dbms/src/DataStreams/MergingSortedBlockInputStream.cpp +++ b/dbms/src/DataStreams/MergingSortedBlockInputStream.cpp @@ -3,6 +3,7 @@ #include #include +#include namespace DB diff --git a/dbms/src/DataStreams/ReplacingSortedBlockInputStream.h b/dbms/src/DataStreams/ReplacingSortedBlockInputStream.h index fa1e9d4428c..06505d2fe2f 100644 --- a/dbms/src/DataStreams/ReplacingSortedBlockInputStream.h +++ b/dbms/src/DataStreams/ReplacingSortedBlockInputStream.h @@ -3,6 +3,7 @@ #include #include +#include namespace DB diff --git a/dbms/src/DataStreams/VersionedCollapsingSortedBlockInputStream.h b/dbms/src/DataStreams/VersionedCollapsingSortedBlockInputStream.h index 2222ba9d93b..b123079febc 100644 --- a/dbms/src/DataStreams/VersionedCollapsingSortedBlockInputStream.h +++ b/dbms/src/DataStreams/VersionedCollapsingSortedBlockInputStream.h @@ -3,6 +3,7 @@ #include #include +#include #include From 67e3bc501bbe2a56bd4f432840b5048425ca0090 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 12 Feb 2019 22:19:12 +0300 Subject: [PATCH 49/79] Add brotli to stateless test image --- docker/test/stateless/Dockerfile | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docker/test/stateless/Dockerfile b/docker/test/stateless/Dockerfile index 9faf98bf066..34a64a24b1a 100644 --- a/docker/test/stateless/Dockerfile +++ b/docker/test/stateless/Dockerfile @@ -19,7 +19,9 @@ RUN apt-get update -y \ openssl \ netcat-openbsd \ telnet \ - moreutils + moreutils \ + brotli + ENV TZ=Europe/Moscow RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone From c8cafa456c51a3c1a35360228ad9e066a25bf41d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 12 Feb 2019 22:23:22 +0300 Subject: [PATCH 50/79] Added a test for already fixed issue [#CLICKHOUSE-4260] --- ...optimize_predicate_and_rename_table.reference | 2 ++ ...00843_optimize_predicate_and_rename_table.sql | 16 ++++++++++++++++ 2 files changed, 18 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/00843_optimize_predicate_and_rename_table.reference create mode 100644 dbms/tests/queries/0_stateless/00843_optimize_predicate_and_rename_table.sql diff --git a/dbms/tests/queries/0_stateless/00843_optimize_predicate_and_rename_table.reference b/dbms/tests/queries/0_stateless/00843_optimize_predicate_and_rename_table.reference new file mode 100644 index 00000000000..6ed281c757a --- /dev/null +++ b/dbms/tests/queries/0_stateless/00843_optimize_predicate_and_rename_table.reference @@ -0,0 +1,2 @@ +1 +1 diff --git a/dbms/tests/queries/0_stateless/00843_optimize_predicate_and_rename_table.sql b/dbms/tests/queries/0_stateless/00843_optimize_predicate_and_rename_table.sql new file mode 100644 index 00000000000..324f9e7c5e0 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00843_optimize_predicate_and_rename_table.sql @@ -0,0 +1,16 @@ +DROP TABLE IF EXISTS test.test1; +DROP TABLE IF EXISTS test.test2; +DROP TABLE IF EXISTS test.view; + +CREATE TABLE test.test1 (a UInt8) ENGINE = Memory; +INSERT INTO test.test1 VALUES (1); + +CREATE VIEW test.view AS SELECT * FROM test.test1; +SELECT * FROM test.view; +RENAME TABLE test.test1 TO test.test2; +SELECT * FROM test.view; -- { serverError 60 } +RENAME TABLE test.test2 TO test.test1; +SELECT * FROM test.view; + +DROP TABLE test.test1; +DROP TABLE test.view; From 6a16d27da45aa54d45e45d420aa2315ce8a37d4c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 13 Feb 2019 02:49:32 +0300 Subject: [PATCH 51/79] Fixed bad error message #2418 --- dbms/src/Interpreters/CatBoostModel.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/Interpreters/CatBoostModel.cpp b/dbms/src/Interpreters/CatBoostModel.cpp index b9f6d9beaa0..eb27eb85580 100644 --- a/dbms/src/Interpreters/CatBoostModel.cpp +++ b/dbms/src/Interpreters/CatBoostModel.cpp @@ -132,7 +132,7 @@ public: std::string msg; { WriteBufferFromString buffer(msg); - buffer << "Column " << i << "should be numeric to make float feature."; + buffer << "Column " << i << " should be numeric to make float feature."; } throw Exception(msg, ErrorCodes::BAD_ARGUMENTS); } @@ -150,7 +150,7 @@ public: std::string msg; { WriteBufferFromString buffer(msg); - buffer << "Column " << i << "should be numeric or string."; + buffer << "Column " << i << " should be numeric or string."; } throw Exception(msg, ErrorCodes::BAD_ARGUMENTS); } From 8b6b6c8035a2844d50ca9fd79217af2ec0375fff Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 13 Feb 2019 02:53:36 +0300 Subject: [PATCH 52/79] Updated test --- .../0_stateless/00843_optimize_predicate_and_rename_table.sql | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dbms/tests/queries/0_stateless/00843_optimize_predicate_and_rename_table.sql b/dbms/tests/queries/0_stateless/00843_optimize_predicate_and_rename_table.sql index 324f9e7c5e0..fb70e561d35 100644 --- a/dbms/tests/queries/0_stateless/00843_optimize_predicate_and_rename_table.sql +++ b/dbms/tests/queries/0_stateless/00843_optimize_predicate_and_rename_table.sql @@ -1,3 +1,5 @@ +SET enable_optimize_predicate_expression = 1; + DROP TABLE IF EXISTS test.test1; DROP TABLE IF EXISTS test.test2; DROP TABLE IF EXISTS test.view; From 2695a4614d35ffb5f39e05bbbb9be5a0027c3c45 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 13 Feb 2019 03:11:49 +0300 Subject: [PATCH 53/79] Added a test #3170 --- .../0_stateless/00844_join_lightee2.reference | 1 + .../0_stateless/00844_join_lightee2.sql | 23 +++++++++++++++++++ 2 files changed, 24 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/00844_join_lightee2.reference create mode 100644 dbms/tests/queries/0_stateless/00844_join_lightee2.sql diff --git a/dbms/tests/queries/0_stateless/00844_join_lightee2.reference b/dbms/tests/queries/0_stateless/00844_join_lightee2.reference new file mode 100644 index 00000000000..27f02a9914b --- /dev/null +++ b/dbms/tests/queries/0_stateless/00844_join_lightee2.reference @@ -0,0 +1 @@ +1 name1 diff --git a/dbms/tests/queries/0_stateless/00844_join_lightee2.sql b/dbms/tests/queries/0_stateless/00844_join_lightee2.sql new file mode 100644 index 00000000000..4cdb317f1d4 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00844_join_lightee2.sql @@ -0,0 +1,23 @@ +USE test; + +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS t2; + +CREATE TABLE IF NOT EXISTS t1 ( +f1 UInt32, +f2 String +) ENGINE = MergeTree ORDER BY (f1); + +CREATE TABLE IF NOT EXISTS t2 ( +f1 String, +f3 String +) ENGINE = MergeTree ORDER BY (f1); + +insert into t1 values(1,'1'); +insert into t2 values('1','name1'); + +select t1.f1,t2.f3 from t1 all inner join t2 on t1.f2 = t2.f1 +where t2.f1 = '1'; + +DROP TABLE t1; +DROP TABLE t2; From c5966b4fbc049ec0b486b13f4e08bba61428b8e7 Mon Sep 17 00:00:00 2001 From: ogorbacheva Date: Wed, 13 Feb 2019 13:18:09 +0300 Subject: [PATCH 54/79] Doc fix: note about CLEAR COLUMN .. PARTITION query (#4375) --- docs/en/query_language/alter.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/en/query_language/alter.md b/docs/en/query_language/alter.md index 558f1c81875..e048d803f83 100644 --- a/docs/en/query_language/alter.md +++ b/docs/en/query_language/alter.md @@ -39,6 +39,9 @@ CLEAR COLUMN name IN PARTITION partition_name Clears all data in a column in a specified partition. +!!! info + Use the `CLEAR COLUMN .. IN PARTITION` query just with inactive partitions. Otherwise, it can cause the replication queue to freeze processing. + ``` sql MODIFY COLUMN name [type] [default_expr] ``` From 3b09a816b303c930e618a7cf5dfdce1a58685349 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 13 Feb 2019 15:05:34 +0300 Subject: [PATCH 55/79] Add options forwarding for binary builder docker image --- docker/packager/binary/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/packager/binary/Dockerfile b/docker/packager/binary/Dockerfile index 8927e79b01f..4babbc01066 100644 --- a/docker/packager/binary/Dockerfile +++ b/docker/packager/binary/Dockerfile @@ -28,4 +28,4 @@ RUN apt-get update -y \ ninja-build \ git -CMD mkdir -p build/build_result && cd build/build_result && cmake .. -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DSANITIZE=$SANITIZER && ninja && mv ./dbms/programs/clickhouse* /output +CMD mkdir -p build/build_result && cd build/build_result && cmake .. -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DSANITIZE=$SANITIZER $CMAKE_FLAGS && ninja && mv ./dbms/programs/clickhouse* /output From e2d4dae5e171a9be18e6532f93ff0fc891d7248a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 13 Feb 2019 15:08:51 +0300 Subject: [PATCH 56/79] Fixed race condition in removal of data parts #3531 --- dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp index 77d02c8809f..100639a999c 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp @@ -334,8 +334,20 @@ void MergeTreeDataPart::remove() const if (relative_path.empty()) throw Exception("Part relative_path cannot be empty. This is bug.", ErrorCodes::LOGICAL_ERROR); + /** Atomic directory removal: + * - rename directory to temporary name; + * - remove it recursive. + * + * For temporary name we use "delete_tmp_" prefix. + * + * NOTE: We cannot use "tmp_delete_" prefix, because there is a second thread, + * that calls "clearOldTemporaryDirectories" and removes all directories, that begin with "tmp_" and are old enough. + * But when we removing data part, it can be old enough. And rename doesn't change mtime. + * And a race condition can happen that will lead to "File not found" error here. + */ + String from = storage.full_path + relative_path; - String to = storage.full_path + "tmp_delete_" + name; + String to = storage.full_path + "delete_tmp_" + name; Poco::File from_dir{from}; Poco::File to_dir{to}; From a0b91077e783ee59c7689d60d9cda9c48b16066f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 13 Feb 2019 14:50:41 +0300 Subject: [PATCH 57/79] Fixed -Wshadow-field-in-constructor (clang 8 warning) --- dbms/src/AggregateFunctions/AggregateFunctionAvg.h | 8 ++++---- dbms/src/AggregateFunctions/AggregateFunctionCount.h | 2 +- .../AggregateFunctions/AggregateFunctionEntropy.h | 6 +++--- .../AggregateFunctions/AggregateFunctionQuantile.h | 10 +++++----- .../AggregateFunctionStatisticsSimple.h | 8 ++++---- dbms/src/AggregateFunctions/AggregateFunctionSum.h | 8 ++++---- .../src/AggregateFunctions/AggregateFunctionSumMap.h | 12 ++++++------ dbms/src/AggregateFunctions/AggregateFunctionTopK.h | 4 ++-- dbms/src/AggregateFunctions/AggregateFunctionUniq.h | 4 ++-- .../AggregateFunctionUniqCombined.h | 4 ++-- .../AggregateFunctions/AggregateFunctionUniqUpTo.h | 4 ++-- 11 files changed, 35 insertions(+), 35 deletions(-) diff --git a/dbms/src/AggregateFunctions/AggregateFunctionAvg.h b/dbms/src/AggregateFunctions/AggregateFunctionAvg.h index d34420efe28..185dbc38c51 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionAvg.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionAvg.h @@ -49,14 +49,14 @@ public: using ColVecResult = std::conditional_t, ColumnDecimal, ColumnVector>; /// ctor for native types - AggregateFunctionAvg(const DataTypes & argument_types) - : IAggregateFunctionDataHelper>(argument_types, {}) + AggregateFunctionAvg(const DataTypes & argument_types_) + : IAggregateFunctionDataHelper>(argument_types_, {}) , scale(0) {} /// ctor for Decimals - AggregateFunctionAvg(const IDataType & data_type, const DataTypes & argument_types) - : IAggregateFunctionDataHelper>(argument_types, {}) + AggregateFunctionAvg(const IDataType & data_type, const DataTypes & argument_types_) + : IAggregateFunctionDataHelper>(argument_types_, {}) , scale(getDecimalScale(data_type)) {} diff --git a/dbms/src/AggregateFunctions/AggregateFunctionCount.h b/dbms/src/AggregateFunctions/AggregateFunctionCount.h index 82958a95fd2..e0371a78644 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionCount.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionCount.h @@ -28,7 +28,7 @@ namespace ErrorCodes class AggregateFunctionCount final : public IAggregateFunctionDataHelper { public: - AggregateFunctionCount(const DataTypes & argument_types) : IAggregateFunctionDataHelper(argument_types, {}) {} + AggregateFunctionCount(const DataTypes & argument_types_) : IAggregateFunctionDataHelper(argument_types_, {}) {} String getName() const override { return "count"; } diff --git a/dbms/src/AggregateFunctions/AggregateFunctionEntropy.h b/dbms/src/AggregateFunctions/AggregateFunctionEntropy.h index 91ec6d4d5a6..3041f1781aa 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionEntropy.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionEntropy.h @@ -97,9 +97,9 @@ private: size_t num_args; public: - AggregateFunctionEntropy(const DataTypes & argument_types) - : IAggregateFunctionDataHelper, AggregateFunctionEntropy>(argument_types, {}) - , num_args(argument_types.size()) + AggregateFunctionEntropy(const DataTypes & argument_types_) + : IAggregateFunctionDataHelper, AggregateFunctionEntropy>(argument_types_, {}) + , num_args(argument_types_.size()) { } diff --git a/dbms/src/AggregateFunctions/AggregateFunctionQuantile.h b/dbms/src/AggregateFunctions/AggregateFunctionQuantile.h index 399b7f993d0..f917fdc9742 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionQuantile.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionQuantile.h @@ -175,16 +175,16 @@ public: const char * getHeaderFilePath() const override { return __FILE__; } - static void assertSecondArg(const DataTypes & argument_types) + static void assertSecondArg(const DataTypes & types) { if constexpr (has_second_arg) { - assertBinary(Name::name, argument_types); - if (!isUnsignedInteger(argument_types[1])) - throw Exception("Second argument (weight) for function " + std::string(Name::name) + " must be unsigned integer, but it has type " + argument_types[1]->getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + assertBinary(Name::name, types); + if (!isUnsignedInteger(types[1])) + throw Exception("Second argument (weight) for function " + std::string(Name::name) + " must be unsigned integer, but it has type " + types[1]->getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); } else - assertUnary(Name::name, argument_types); + assertUnary(Name::name, types); } }; diff --git a/dbms/src/AggregateFunctions/AggregateFunctionStatisticsSimple.h b/dbms/src/AggregateFunctions/AggregateFunctionStatisticsSimple.h index 4ab6a4d51ed..3a4afd2c251 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionStatisticsSimple.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionStatisticsSimple.h @@ -288,13 +288,13 @@ public: using ResultType = typename StatFunc::ResultType; using ColVecResult = ColumnVector; - AggregateFunctionVarianceSimple(const DataTypes & argument_types) - : IAggregateFunctionDataHelper>(argument_types, {}) + AggregateFunctionVarianceSimple(const DataTypes & argument_types_) + : IAggregateFunctionDataHelper>(argument_types_, {}) , src_scale(0) {} - AggregateFunctionVarianceSimple(const IDataType & data_type, const DataTypes & argument_types) - : IAggregateFunctionDataHelper>(argument_types, {}) + AggregateFunctionVarianceSimple(const IDataType & data_type, const DataTypes & argument_types_) + : IAggregateFunctionDataHelper>(argument_types_, {}) , src_scale(getDecimalScale(data_type)) {} diff --git a/dbms/src/AggregateFunctions/AggregateFunctionSum.h b/dbms/src/AggregateFunctions/AggregateFunctionSum.h index 1860088cd93..4aead37e146 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionSum.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionSum.h @@ -102,13 +102,13 @@ public: String getName() const override { return "sum"; } - AggregateFunctionSum(const DataTypes & argument_types) - : IAggregateFunctionDataHelper>(argument_types, {}) + AggregateFunctionSum(const DataTypes & argument_types_) + : IAggregateFunctionDataHelper>(argument_types_, {}) , scale(0) {} - AggregateFunctionSum(const IDataType & data_type, const DataTypes & argument_types) - : IAggregateFunctionDataHelper>(argument_types, {}) + AggregateFunctionSum(const IDataType & data_type, const DataTypes & argument_types_) + : IAggregateFunctionDataHelper>(argument_types_, {}) , scale(getDecimalScale(data_type)) {} diff --git a/dbms/src/AggregateFunctions/AggregateFunctionSumMap.h b/dbms/src/AggregateFunctions/AggregateFunctionSumMap.h index ef6cae9babc..6837379f98f 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionSumMap.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionSumMap.h @@ -63,8 +63,8 @@ private: public: AggregateFunctionSumMapBase( const DataTypePtr & keys_type, const DataTypes & values_types, - const DataTypes & argument_types, const Array & params) - : IAggregateFunctionDataHelper>, Derived>(argument_types, params) + const DataTypes & argument_types_, const Array & params_) + : IAggregateFunctionDataHelper>, Derived>(argument_types_, params_) , keys_type(keys_type), values_types(values_types) {} String getName() const override { return "sumMap"; } @@ -274,8 +274,8 @@ private: using Base = AggregateFunctionSumMapBase; public: - AggregateFunctionSumMap(const DataTypePtr & keys_type, DataTypes & values_types, const DataTypes & argument_types) - : Base{keys_type, values_types, argument_types, {}} + AggregateFunctionSumMap(const DataTypePtr & keys_type_, DataTypes & values_types_, const DataTypes & argument_types_) + : Base{keys_type_, values_types_, argument_types_, {}} {} String getName() const override { return "sumMap"; } @@ -296,8 +296,8 @@ private: public: AggregateFunctionSumMapFiltered( const DataTypePtr & keys_type, const DataTypes & values_types, const Array & keys_to_keep_, - const DataTypes & argument_types, const Array & params) - : Base{keys_type, values_types, argument_types, params} + const DataTypes & argument_types_, const Array & params_) + : Base{keys_type, values_types, argument_types_, params_} { keys_to_keep.reserve(keys_to_keep_.size()); for (const Field & f : keys_to_keep_) diff --git a/dbms/src/AggregateFunctions/AggregateFunctionTopK.h b/dbms/src/AggregateFunctions/AggregateFunctionTopK.h index 340b5f14e5a..76bca7dae86 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionTopK.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionTopK.h @@ -48,8 +48,8 @@ protected: UInt64 reserved; public: - AggregateFunctionTopK(UInt64 threshold, const DataTypes & argument_types, const Array & params) - : IAggregateFunctionDataHelper, AggregateFunctionTopK>(argument_types, params) + AggregateFunctionTopK(UInt64 threshold, const DataTypes & argument_types_, const Array & params) + : IAggregateFunctionDataHelper, AggregateFunctionTopK>(argument_types_, params) , threshold(threshold), reserved(TOP_K_LOAD_FACTOR * threshold) {} String getName() const override { return is_weighted ? "topKWeighted" : "topK"; } diff --git a/dbms/src/AggregateFunctions/AggregateFunctionUniq.h b/dbms/src/AggregateFunctions/AggregateFunctionUniq.h index aea227a5d69..62eb1db8115 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionUniq.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionUniq.h @@ -209,8 +209,8 @@ template class AggregateFunctionUniq final : public IAggregateFunctionDataHelper> { public: - AggregateFunctionUniq(const DataTypes & argument_types) - : IAggregateFunctionDataHelper>(argument_types, {}) {} + AggregateFunctionUniq(const DataTypes & argument_types_) + : IAggregateFunctionDataHelper>(argument_types_, {}) {} String getName() const override { return Data::getName(); } diff --git a/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.h b/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.h index 3b7aee95186..b82b1f2c198 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.h @@ -114,8 +114,8 @@ class AggregateFunctionUniqCombined final : public IAggregateFunctionDataHelper, AggregateFunctionUniqCombined> { public: - AggregateFunctionUniqCombined(const DataTypes & argument_types, const Array & params) - : IAggregateFunctionDataHelper, AggregateFunctionUniqCombined>(argument_types, params) {} + AggregateFunctionUniqCombined(const DataTypes & argument_types_, const Array & params_) + : IAggregateFunctionDataHelper, AggregateFunctionUniqCombined>(argument_types_, params_) {} String getName() const override { diff --git a/dbms/src/AggregateFunctions/AggregateFunctionUniqUpTo.h b/dbms/src/AggregateFunctions/AggregateFunctionUniqUpTo.h index 477a729894d..de9ca69c17f 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionUniqUpTo.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionUniqUpTo.h @@ -136,8 +136,8 @@ private: UInt8 threshold; public: - AggregateFunctionUniqUpTo(UInt8 threshold, const DataTypes & argument_types, const Array & params) - : IAggregateFunctionDataHelper, AggregateFunctionUniqUpTo>(argument_types, params) + AggregateFunctionUniqUpTo(UInt8 threshold, const DataTypes & argument_types_, const Array & params_) + : IAggregateFunctionDataHelper, AggregateFunctionUniqUpTo>(argument_types_, params_) , threshold(threshold) { } From af4c4dc1e5ace8d3ec4d330af0bc3ac884bdbac6 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 13 Feb 2019 14:57:47 +0300 Subject: [PATCH 58/79] Fix old clients insert query processing with embedded data --- dbms/programs/server/TCPHandler.cpp | 3 ++- dbms/src/Core/Defines.h | 1 + dbms/src/Interpreters/executeQuery.cpp | 5 +++-- dbms/src/Interpreters/executeQuery.h | 3 ++- 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/dbms/programs/server/TCPHandler.cpp b/dbms/programs/server/TCPHandler.cpp index 51f331fab74..3f2475ff142 100644 --- a/dbms/programs/server/TCPHandler.cpp +++ b/dbms/programs/server/TCPHandler.cpp @@ -185,8 +185,9 @@ void TCPHandler::runImpl() state.maybe_compressed_in.reset(); /// For more accurate accounting by MemoryTracker. }); + bool may_have_embedded_data = client_revision >= DBMS_MIN_REVISION_WITH_CLIENT_SUPPORT_EMBEDDED_DATA; /// Processing Query - state.io = executeQuery(state.query, query_context, false, state.stage); + state.io = executeQuery(state.query, query_context, false, state.stage, may_have_embedded_data); if (state.io.out) state.need_receive_data_for_insert = true; diff --git a/dbms/src/Core/Defines.h b/dbms/src/Core/Defines.h index 2333fad774f..ad20cca8458 100644 --- a/dbms/src/Core/Defines.h +++ b/dbms/src/Core/Defines.h @@ -47,6 +47,7 @@ #define DBMS_MIN_REVISION_WITH_SERVER_DISPLAY_NAME 54372 #define DBMS_MIN_REVISION_WITH_VERSION_PATCH 54401 #define DBMS_MIN_REVISION_WITH_SERVER_LOGS 54406 +#define DBMS_MIN_REVISION_WITH_CLIENT_SUPPORT_EMBEDDED_DATA 54415 /// Minimum revision with exactly the same set of aggregation methods and rules to select them. /// Two-level (bucketed) aggregation is incompatible if servers are inconsistent in these rules /// (keys will be placed in different buckets and result will not be fully aggregated). diff --git a/dbms/src/Interpreters/executeQuery.cpp b/dbms/src/Interpreters/executeQuery.cpp index d04b616efd5..4a7defb5b63 100644 --- a/dbms/src/Interpreters/executeQuery.cpp +++ b/dbms/src/Interpreters/executeQuery.cpp @@ -435,10 +435,11 @@ BlockIO executeQuery( const String & query, Context & context, bool internal, - QueryProcessingStage::Enum stage) + QueryProcessingStage::Enum stage, + bool may_have_embedded_data) { BlockIO streams; - std::tie(std::ignore, streams) = executeQueryImpl(query.data(), query.data() + query.size(), context, internal, stage, false); + std::tie(std::ignore, streams) = executeQueryImpl(query.data(), query.data() + query.size(), context, internal, stage, !may_have_embedded_data); return streams; } diff --git a/dbms/src/Interpreters/executeQuery.h b/dbms/src/Interpreters/executeQuery.h index 1d1fbae5daa..d56e9afafa4 100644 --- a/dbms/src/Interpreters/executeQuery.h +++ b/dbms/src/Interpreters/executeQuery.h @@ -37,7 +37,8 @@ BlockIO executeQuery( const String & query, /// Query text without INSERT data. The latter must be written to BlockIO::out. Context & context, /// DB, tables, data types, storage engines, functions, aggregate functions... bool internal = false, /// If true, this query is caused by another query and thus needn't be registered in the ProcessList. - QueryProcessingStage::Enum stage = QueryProcessingStage::Complete /// To which stage the query must be executed. + QueryProcessingStage::Enum stage = QueryProcessingStage::Complete, /// To which stage the query must be executed. + bool may_have_embedded_data = false /// If insert query may have embedded data ); } From 15d0adaf5a852df3635fda42f05c12961474df9f Mon Sep 17 00:00:00 2001 From: Alexey Zatelepin Date: Wed, 13 Feb 2019 16:05:58 +0300 Subject: [PATCH 59/79] add info about replicated_can_become_leader to logs and system.replicas [#CLICKHOUSE-4309] --- .../Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp | 2 ++ dbms/src/Storages/StorageReplicatedMergeTree.cpp | 1 + dbms/src/Storages/StorageReplicatedMergeTree.h | 1 + dbms/src/Storages/System/StorageSystemReplicas.cpp | 2 ++ 4 files changed, 6 insertions(+) diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp index 6cb98cff159..5b58e6ad56b 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp @@ -183,6 +183,8 @@ bool ReplicatedMergeTreeRestartingThread::tryStartup() if (storage.data.settings.replicated_can_become_leader) storage.enterLeaderElection(); + else + LOG_INFO(log, "Will not enter leader election because replicated_can_become_leader=0"); /// Anything above can throw a KeeperException if something is wrong with ZK. /// Anything below should not throw exceptions. diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index a1eaee63a72..fd851bba27c 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -3904,6 +3904,7 @@ void StorageReplicatedMergeTree::getStatus(Status & res, bool with_zk_fields) auto zookeeper = tryGetZooKeeper(); res.is_leader = is_leader; + res.can_become_leader = data.settings.replicated_can_become_leader; res.is_readonly = is_readonly; res.is_session_expired = !zookeeper || zookeeper->expired(); diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.h b/dbms/src/Storages/StorageReplicatedMergeTree.h index 69a611288a7..42b5f0b5f66 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.h +++ b/dbms/src/Storages/StorageReplicatedMergeTree.h @@ -153,6 +153,7 @@ public: struct Status { bool is_leader; + bool can_become_leader; bool is_readonly; bool is_session_expired; ReplicatedMergeTreeQueue::Status queue; diff --git a/dbms/src/Storages/System/StorageSystemReplicas.cpp b/dbms/src/Storages/System/StorageSystemReplicas.cpp index 2c1bb8f9d31..6dd7d7081e7 100644 --- a/dbms/src/Storages/System/StorageSystemReplicas.cpp +++ b/dbms/src/Storages/System/StorageSystemReplicas.cpp @@ -22,6 +22,7 @@ StorageSystemReplicas::StorageSystemReplicas(const std::string & name_) { "table", std::make_shared() }, { "engine", std::make_shared() }, { "is_leader", std::make_shared() }, + { "can_become_leader", std::make_shared() }, { "is_readonly", std::make_shared() }, { "is_session_expired", std::make_shared() }, { "future_parts", std::make_shared() }, @@ -137,6 +138,7 @@ BlockInputStreams StorageSystemReplicas::read( size_t col_num = 3; res_columns[col_num++]->insert(status.is_leader); + res_columns[col_num++]->insert(status.can_become_leader); res_columns[col_num++]->insert(status.is_readonly); res_columns[col_num++]->insert(status.is_session_expired); res_columns[col_num++]->insert(status.queue.future_parts); From ec99ce1ab569de97237ac8487fcbabac7262b1c5 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 13 Feb 2019 16:17:07 +0300 Subject: [PATCH 60/79] Auto version update to [19.3.2] [54415] --- dbms/cmake/version.cmake | 8 ++++---- debian/changelog | 4 ++-- docker/client/Dockerfile | 2 +- docker/server/Dockerfile | 2 +- docker/test/Dockerfile | 2 +- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/dbms/cmake/version.cmake b/dbms/cmake/version.cmake index 5f3758b89a6..1ed78d248f3 100644 --- a/dbms/cmake/version.cmake +++ b/dbms/cmake/version.cmake @@ -2,10 +2,10 @@ set(VERSION_REVISION 54415) set(VERSION_MAJOR 19) set(VERSION_MINOR 3) -set(VERSION_PATCH 1) -set(VERSION_GITHASH 48280074c4a9151ca010fb0a777efd82634460bd) -set(VERSION_DESCRIBE v19.3.1-testing) -set(VERSION_STRING 19.3.1) +set(VERSION_PATCH 2) +set(VERSION_GITHASH 15d0adaf5a852df3635fda42f05c12961474df9f) +set(VERSION_DESCRIBE v19.3.2-testing) +set(VERSION_STRING 19.3.2) # end of autochange set(VERSION_EXTRA "" CACHE STRING "") diff --git a/debian/changelog b/debian/changelog index 8d988cb3cac..a57633f973c 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,5 +1,5 @@ -clickhouse (19.3.1) unstable; urgency=low +clickhouse (19.3.2) unstable; urgency=low * Modified source code - -- Tue, 12 Feb 2019 21:42:14 +0300 + -- Wed, 13 Feb 2019 16:17:06 +0300 diff --git a/docker/client/Dockerfile b/docker/client/Dockerfile index 2196a342ecd..8323bd741ff 100644 --- a/docker/client/Dockerfile +++ b/docker/client/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:18.04 ARG repository="deb http://repo.yandex.ru/clickhouse/deb/stable/ main/" -ARG version=19.3.1 +ARG version=19.3.2 RUN apt-get update \ && apt-get install --yes --no-install-recommends \ diff --git a/docker/server/Dockerfile b/docker/server/Dockerfile index 244c86f92a9..0fe819c9288 100644 --- a/docker/server/Dockerfile +++ b/docker/server/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:18.04 ARG repository="deb http://repo.yandex.ru/clickhouse/deb/stable/ main/" -ARG version=19.3.1 +ARG version=19.3.2 ARG gosu_ver=1.10 RUN apt-get update \ diff --git a/docker/test/Dockerfile b/docker/test/Dockerfile index e5cf7036165..e317d8fb89e 100644 --- a/docker/test/Dockerfile +++ b/docker/test/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:18.04 ARG repository="deb http://repo.yandex.ru/clickhouse/deb/stable/ main/" -ARG version=19.3.1 +ARG version=19.3.2 RUN apt-get update && \ apt-get install -y apt-transport-https dirmngr && \ From 62eb65774a71b4a728b6fb32e1a126e11a0db3ad Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 13 Feb 2019 16:42:01 +0300 Subject: [PATCH 61/79] Added suppression for libunwind --- dbms/src/Core/Defines.h | 2 ++ libs/libdaemon/src/BaseDaemon.cpp | 16 +++++++++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/dbms/src/Core/Defines.h b/dbms/src/Core/Defines.h index ad20cca8458..96c3fa2c57f 100644 --- a/dbms/src/Core/Defines.h +++ b/dbms/src/Core/Defines.h @@ -111,7 +111,9 @@ /// Example: multiplication of signed integers with possibility of overflow when both sides are from user input. #if defined(__clang__) #define NO_SANITIZE_UNDEFINED __attribute__((__no_sanitize__("undefined"))) + #define NO_SANITIZE_ADDRESS __attribute__((__no_sanitize__("address"))) #else /// It does not work in GCC. GCC 7 cannot recognize this attribute and GCC 8 simply ignores it. #define NO_SANITIZE_UNDEFINED + #define NO_SANITIZE_ADDRESS #endif diff --git a/libs/libdaemon/src/BaseDaemon.cpp b/libs/libdaemon/src/BaseDaemon.cpp index 98ca00e6719..eabf2531a41 100644 --- a/libs/libdaemon/src/BaseDaemon.cpp +++ b/libs/libdaemon/src/BaseDaemon.cpp @@ -183,7 +183,21 @@ static void faultSignalHandler(int sig, siginfo_t * info, void * context) #if USE_UNWIND -size_t backtraceLibUnwind(void ** out_frames, size_t max_frames, ucontext_t & context) +/** We suppress the following ASan report. Also shown by Valgrind. +==124==ERROR: AddressSanitizer: stack-use-after-scope on address 0x7f054be57000 at pc 0x0000068b0649 bp 0x7f060eeac590 sp 0x7f060eeabd40 +READ of size 1 at 0x7f054be57000 thread T3 + #0 0x68b0648 in write (/usr/bin/clickhouse+0x68b0648) + #1 0x717da02 in write_validate /build/obj-x86_64-linux-gnu/../contrib/libunwind/src/x86_64/Ginit.c:110:13 + #2 0x717da02 in mincore_validate /build/obj-x86_64-linux-gnu/../contrib/libunwind/src/x86_64/Ginit.c:146 + #3 0x717dec1 in validate_mem /build/obj-x86_64-linux-gnu/../contrib/libunwind/src/x86_64/Ginit.c:206:7 + #4 0x717dec1 in access_mem /build/obj-x86_64-linux-gnu/../contrib/libunwind/src/x86_64/Ginit.c:240 + #5 0x71881a9 in dwarf_get /build/obj-x86_64-linux-gnu/../contrib/libunwind/include/tdep-x86_64/libunwind_i.h:168:12 + #6 0x71881a9 in apply_reg_state /build/obj-x86_64-linux-gnu/../contrib/libunwind/src/dwarf/Gparser.c:872 + #7 0x718705c in _ULx86_64_dwarf_step /build/obj-x86_64-linux-gnu/../contrib/libunwind/src/dwarf/Gparser.c:953:10 + #8 0x718f155 in _ULx86_64_step /build/obj-x86_64-linux-gnu/../contrib/libunwind/src/x86_64/Gstep.c:71:9 + #9 0x7162671 in backtraceLibUnwind(void**, unsigned long, ucontext_t&) /build/obj-x86_64-linux-gnu/../libs/libdaemon/src/BaseDaemon.cpp:202:14 + */ +size_t NO_SANITIZE_ADDRESS backtraceLibUnwind(void ** out_frames, size_t max_frames, ucontext_t & context) { unw_cursor_t cursor; From 49bb1f9ca3ffe2842da0ed6a226273db4dc5d7b3 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 13 Feb 2019 17:17:53 +0300 Subject: [PATCH 62/79] Updated contributors --- .../Storages/System/StorageSystemContributors.generated.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/dbms/src/Storages/System/StorageSystemContributors.generated.cpp b/dbms/src/Storages/System/StorageSystemContributors.generated.cpp index c1dc375a4bd..c578c646f3a 100644 --- a/dbms/src/Storages/System/StorageSystemContributors.generated.cpp +++ b/dbms/src/Storages/System/StorageSystemContributors.generated.cpp @@ -40,6 +40,7 @@ const char * auto_contributors[] { "Anton Popov", "Anton Tihonov", "Anton Tikhonov", + "Anton Zhabolenko", "Arsen Hakobyan", "Artem Andreenko", "Artem Zuikov", @@ -149,6 +150,9 @@ const char * auto_contributors[] { "Michael Razuvaev", "Michal Lisowski", "Mihail Fandyushin", + "Mike F", + "Mikhail", + "Mikhail Fandyushin", "Mikhail Filimonov", "Mikhail Salosin", "Mikhail Surin", @@ -283,6 +287,7 @@ const char * auto_contributors[] { "maiha", "mf5137", "mfridental", + "miha-g", "morty", "moscas", "nicelulu", From 9b8fd5f8a2a8078fa1e0fd44ab1a7b258d5b9a26 Mon Sep 17 00:00:00 2001 From: Alex Zatelepin Date: Wed, 13 Feb 2019 18:32:04 +0300 Subject: [PATCH 63/79] add brotli to clickhouse-test deps --- debian/control | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debian/control b/debian/control index 2be5e609315..cf4e617fcde 100644 --- a/debian/control +++ b/debian/control @@ -60,7 +60,7 @@ Description: debugging symbols for clickhouse-common-static Package: clickhouse-test Priority: optional Architecture: all -Depends: ${shlibs:Depends}, ${misc:Depends}, clickhouse-client, bash, expect, python, python-lxml, python-termcolor, python-requests, curl, perl, sudo, openssl, netcat-openbsd, telnet +Depends: ${shlibs:Depends}, ${misc:Depends}, clickhouse-client, bash, expect, python, python-lxml, python-termcolor, python-requests, curl, perl, sudo, openssl, netcat-openbsd, telnet, brotli Description: ClickHouse tests From f5560660beff430896d7a23d70a7ea8a06416ea9 Mon Sep 17 00:00:00 2001 From: proller Date: Wed, 13 Feb 2019 17:47:24 +0300 Subject: [PATCH 64/79] Fix brotli (unbundled, missing, ...) --- cmake/find_brotli.cmake | 13 +++++++++---- dbms/src/Common/config.h.in | 1 + dbms/src/IO/BrotliReadBuffer.cpp | 4 ++++ .../StorageSystemBuildOptions.generated.cpp.in | 1 + utils/build/build_debian_unbundled.sh | 2 +- 5 files changed, 16 insertions(+), 5 deletions(-) diff --git a/cmake/find_brotli.cmake b/cmake/find_brotli.cmake index 34f5845a346..6e93e88df9c 100644 --- a/cmake/find_brotli.cmake +++ b/cmake/find_brotli.cmake @@ -8,10 +8,15 @@ if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/brotli/c/include/brotli/decode. set (MISSING_INTERNAL_BROTLI_LIBRARY 1) endif () -if (NOT USE_INTERNAL_BROTLI_LIBRARY) - find_library (BROTLI_LIBRARY brotli) - find_path (BROTLI_INCLUDE_DIR NAMES decode.h encode.h port.h types.h PATHS ${BROTLI_INCLUDE_PATHS}) -endif () +if(NOT USE_INTERNAL_BROTLI_LIBRARY) + find_library(BROTLI_LIBRARY_COMMON brotlicommon) + find_library(BROTLI_LIBRARY_DEC brotlidec) + find_library(BROTLI_LIBRARY_ENC brotlienc) + find_path(BROTLI_INCLUDE_DIR NAMES brotli/decode.h brotli/encode.h brotli/port.h brotli/types.h PATHS ${BROTLI_INCLUDE_PATHS}) + if(BROTLI_LIBRARY_DEC AND BROTLI_LIBRARY_ENC AND BROTLI_LIBRARY_COMMON) + set(BROTLI_LIBRARY ${BROTLI_LIBRARY_DEC} ${BROTLI_LIBRARY_ENC} ${BROTLI_LIBRARY_COMMON}) + endif() +endif() if (BROTLI_LIBRARY AND BROTLI_INCLUDE_DIR) set (USE_BROTLI 1) diff --git a/dbms/src/Common/config.h.in b/dbms/src/Common/config.h.in index aa57582f43c..2f4f10624d4 100644 --- a/dbms/src/Common/config.h.in +++ b/dbms/src/Common/config.h.in @@ -20,6 +20,7 @@ #cmakedefine01 USE_PROTOBUF #cmakedefine01 USE_CPUID #cmakedefine01 USE_CPUINFO +#cmakedefine01 USE_BROTLI #cmakedefine01 CLICKHOUSE_SPLIT_BINARY #cmakedefine01 LLVM_HAS_RTTI diff --git a/dbms/src/IO/BrotliReadBuffer.cpp b/dbms/src/IO/BrotliReadBuffer.cpp index 9fc033c41cc..6d81c6e40c6 100644 --- a/dbms/src/IO/BrotliReadBuffer.cpp +++ b/dbms/src/IO/BrotliReadBuffer.cpp @@ -1,3 +1,6 @@ +#include +#if USE_BROTLI + #include "BrotliReadBuffer.h" #include @@ -86,3 +89,4 @@ bool BrotliReadBuffer::nextImpl() } } +#endif diff --git a/dbms/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in b/dbms/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in index be7d93c9fc5..a8e796f20fc 100644 --- a/dbms/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in +++ b/dbms/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in @@ -49,6 +49,7 @@ const char * auto_config_build[] "USE_POCO_NETSSL", "@USE_POCO_NETSSL@", "USE_BASE64", "@USE_BASE64@", "USE_PROTOBUF", "@USE_PROTOBUF@", + "USE_BROTLI", "@USE_BROTLI@", nullptr, nullptr }; diff --git a/utils/build/build_debian_unbundled.sh b/utils/build/build_debian_unbundled.sh index aeab5eef6b9..8af0e4b3c06 100755 --- a/utils/build/build_debian_unbundled.sh +++ b/utils/build/build_debian_unbundled.sh @@ -22,5 +22,5 @@ env TEST_RUN=1 \ `# Use all possible contrib libs from system` \ `# psmisc - killall` \ `# gdb - symbol test in pbuilder` \ - EXTRAPACKAGES="psmisc libboost-program-options-dev libboost-system-dev libboost-filesystem-dev libboost-thread-dev zlib1g-dev liblz4-dev libdouble-conversion-dev libsparsehash-dev librdkafka-dev libpoco-dev unixodbc-dev libsparsehash-dev libgoogle-perftools-dev libzstd-dev libre2-dev libunwind-dev googletest libcctz-dev libcapnp-dev libjemalloc-dev libssl-dev libunwind-dev libxml2-dev libgsasl7-dev $EXTRAPACKAGES" \ + EXTRAPACKAGES="psmisc libboost-program-options-dev libboost-system-dev libboost-filesystem-dev libboost-thread-dev zlib1g-dev liblz4-dev libdouble-conversion-dev libsparsehash-dev librdkafka-dev libpoco-dev unixodbc-dev libsparsehash-dev libgoogle-perftools-dev libzstd-dev libre2-dev libunwind-dev googletest libcctz-dev libcapnp-dev libjemalloc-dev libssl-dev libunwind-dev libxml2-dev libgsasl7-dev libbrotli-dev $EXTRAPACKAGES" \ pdebuild --configfile $ROOT_DIR/debian/.pbuilderrc $PDEBUILD_OPT From cfab694978ec801d8d401e32fd5ca13aaeca25f9 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 13 Feb 2019 18:36:20 +0300 Subject: [PATCH 65/79] Auto version update to [19.3.3] [54415] --- dbms/cmake/version.cmake | 8 ++++---- debian/changelog | 4 ++-- docker/client/Dockerfile | 2 +- docker/server/Dockerfile | 2 +- docker/test/Dockerfile | 2 +- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/dbms/cmake/version.cmake b/dbms/cmake/version.cmake index 1ed78d248f3..4386a343d2f 100644 --- a/dbms/cmake/version.cmake +++ b/dbms/cmake/version.cmake @@ -2,10 +2,10 @@ set(VERSION_REVISION 54415) set(VERSION_MAJOR 19) set(VERSION_MINOR 3) -set(VERSION_PATCH 2) -set(VERSION_GITHASH 15d0adaf5a852df3635fda42f05c12961474df9f) -set(VERSION_DESCRIBE v19.3.2-testing) -set(VERSION_STRING 19.3.2) +set(VERSION_PATCH 3) +set(VERSION_GITHASH f5560660beff430896d7a23d70a7ea8a06416ea9) +set(VERSION_DESCRIBE v19.3.3-testing) +set(VERSION_STRING 19.3.3) # end of autochange set(VERSION_EXTRA "" CACHE STRING "") diff --git a/debian/changelog b/debian/changelog index a57633f973c..89a82c1ad3d 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,5 +1,5 @@ -clickhouse (19.3.2) unstable; urgency=low +clickhouse (19.3.3) unstable; urgency=low * Modified source code - -- Wed, 13 Feb 2019 16:17:06 +0300 + -- Wed, 13 Feb 2019 18:36:20 +0300 diff --git a/docker/client/Dockerfile b/docker/client/Dockerfile index 8323bd741ff..ff256e5e3e7 100644 --- a/docker/client/Dockerfile +++ b/docker/client/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:18.04 ARG repository="deb http://repo.yandex.ru/clickhouse/deb/stable/ main/" -ARG version=19.3.2 +ARG version=19.3.3 RUN apt-get update \ && apt-get install --yes --no-install-recommends \ diff --git a/docker/server/Dockerfile b/docker/server/Dockerfile index 0fe819c9288..fc0c6a53c58 100644 --- a/docker/server/Dockerfile +++ b/docker/server/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:18.04 ARG repository="deb http://repo.yandex.ru/clickhouse/deb/stable/ main/" -ARG version=19.3.2 +ARG version=19.3.3 ARG gosu_ver=1.10 RUN apt-get update \ diff --git a/docker/test/Dockerfile b/docker/test/Dockerfile index e317d8fb89e..83e518e22f9 100644 --- a/docker/test/Dockerfile +++ b/docker/test/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:18.04 ARG repository="deb http://repo.yandex.ru/clickhouse/deb/stable/ main/" -ARG version=19.3.2 +ARG version=19.3.3 RUN apt-get update && \ apt-get install -y apt-transport-https dirmngr && \ From cf40a1538b2c80776e3f2bdc6c75f239689bc890 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 13 Feb 2019 19:49:13 +0300 Subject: [PATCH 66/79] Fix size() and empty() for AggregationDataWithNullKey. #4223 --- dbms/src/Interpreters/Aggregator.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/dbms/src/Interpreters/Aggregator.h b/dbms/src/Interpreters/Aggregator.h index c4ffb64a15e..87febbc77e8 100644 --- a/dbms/src/Interpreters/Aggregator.h +++ b/dbms/src/Interpreters/Aggregator.h @@ -98,6 +98,18 @@ struct AggregationDataWithNullKey : public Base AggregateDataPtr & getNullKeyData() { return null_key_data; } bool hasNullKeyData() const { return has_null_key_data; } const AggregateDataPtr & getNullKeyData() const { return null_key_data; } + size_t size() const { return Base::size() + (has_null_key_data ? 1 : 0); } + bool empty() const { return Base::empty() && !has_null_key_data; } + void clear() + { + Base::clear(); + has_null_key_data = false; + } + void clearAndShrink() + { + Base::clearAndShrink(); + has_null_key_data = false; + } private: bool has_null_key_data = false; From 8e2cc9841698df86d940d909e55f0504ce662080 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 13 Feb 2019 19:56:06 +0300 Subject: [PATCH 67/79] Added test for LowCardinality with rollup and cube. --- .../00906_low_cardinality_rollup.reference | 18 ++++++++++++++++++ .../00906_low_cardinality_rollup.sql | 14 ++++++++++++++ 2 files changed, 32 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/00906_low_cardinality_rollup.reference create mode 100644 dbms/tests/queries/0_stateless/00906_low_cardinality_rollup.sql diff --git a/dbms/tests/queries/0_stateless/00906_low_cardinality_rollup.reference b/dbms/tests/queries/0_stateless/00906_low_cardinality_rollup.reference new file mode 100644 index 00000000000..3e287311126 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00906_low_cardinality_rollup.reference @@ -0,0 +1,18 @@ +c d 1 +a b 1 +c \N 1 +a \N 1 +\N \N 2 +c 1 +a 1 +\N 2 +c d 1 +a b 1 +c \N 1 +a \N 1 +\N b 1 +\N d 1 +\N \N 2 +c 1 +a 1 +\N 2 diff --git a/dbms/tests/queries/0_stateless/00906_low_cardinality_rollup.sql b/dbms/tests/queries/0_stateless/00906_low_cardinality_rollup.sql new file mode 100644 index 00000000000..3a9bf205d68 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00906_low_cardinality_rollup.sql @@ -0,0 +1,14 @@ +SET allow_experimental_low_cardinality_type = 1; + +DROP TABLE if exists test.lc; +CREATE TABLE test.lc (a LowCardinality(Nullable(String)), b LowCardinality(Nullable(String))) ENGINE = MergeTree order by tuple(); +INSERT INTO test.lc VALUES ('a', 'b'); +INSERT INTO test.lc VALUES ('c', 'd'); + +SELECT a, b, count(a) FROM test.lc GROUP BY a, b WITH ROLLUP; +SELECT a, count(a) FROM test.lc GROUP BY a WITH ROLLUP; + +SELECT a, b, count(a) FROM test.lc GROUP BY a, b WITH CUBE; +SELECT a, count(a) FROM test.lc GROUP BY a WITH CUBE; + +DROP TABLE if exists test.lc; From 35703c24847a5a36bef0130f81a0c4ca448d6b57 Mon Sep 17 00:00:00 2001 From: ogorbacheva Date: Wed, 13 Feb 2019 20:01:38 +0300 Subject: [PATCH 68/79] Doc fix: removing note temporarily (#4383) --- docs/en/query_language/alter.md | 3 --- 1 file changed, 3 deletions(-) diff --git a/docs/en/query_language/alter.md b/docs/en/query_language/alter.md index e048d803f83..558f1c81875 100644 --- a/docs/en/query_language/alter.md +++ b/docs/en/query_language/alter.md @@ -39,9 +39,6 @@ CLEAR COLUMN name IN PARTITION partition_name Clears all data in a column in a specified partition. -!!! info - Use the `CLEAR COLUMN .. IN PARTITION` query just with inactive partitions. Otherwise, it can cause the replication queue to freeze processing. - ``` sql MODIFY COLUMN name [type] [default_expr] ``` From 42baf1652fc1aba248be078f261870acef2bd3b8 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 13 Feb 2019 20:08:20 +0300 Subject: [PATCH 69/79] Added test for LowCardinality with rollup and cube. --- dbms/tests/queries/0_stateless/00906_low_cardinality_rollup.sql | 2 -- 1 file changed, 2 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00906_low_cardinality_rollup.sql b/dbms/tests/queries/0_stateless/00906_low_cardinality_rollup.sql index 3a9bf205d68..591a0681357 100644 --- a/dbms/tests/queries/0_stateless/00906_low_cardinality_rollup.sql +++ b/dbms/tests/queries/0_stateless/00906_low_cardinality_rollup.sql @@ -1,5 +1,3 @@ -SET allow_experimental_low_cardinality_type = 1; - DROP TABLE if exists test.lc; CREATE TABLE test.lc (a LowCardinality(Nullable(String)), b LowCardinality(Nullable(String))) ENGINE = MergeTree order by tuple(); INSERT INTO test.lc VALUES ('a', 'b'); From be64f57f7bed40a7d21c6e443dbeb10f0be02b49 Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Mon, 11 Feb 2019 10:57:51 +0300 Subject: [PATCH 70/79] Test runner script and corresponding Dockerfile and docker-compose. --- docker/server/local.Dockerfile | 38 +++++++++++++++++++ ...lickhouse-statelest-test-runner.Dockerfile | 12 ++++++ docker/test/test_runner.sh | 31 +++++++++++++++ docker/test/test_runner_docker_compose.yaml | 30 +++++++++++++++ 4 files changed, 111 insertions(+) create mode 100644 docker/server/local.Dockerfile create mode 100644 docker/test/stateless/clickhouse-statelest-test-runner.Dockerfile create mode 100755 docker/test/test_runner.sh create mode 100644 docker/test/test_runner_docker_compose.yaml diff --git a/docker/server/local.Dockerfile b/docker/server/local.Dockerfile new file mode 100644 index 00000000000..33d7e11f118 --- /dev/null +++ b/docker/server/local.Dockerfile @@ -0,0 +1,38 @@ +FROM ubuntu:18.04 + +ARG gosu_ver=1.10 +ARG CLICKHOUSE_PACKAGES_DIR + +COPY ${CLICKHOUSE_PACKAGES_DIR}/clickhouse-*.deb /packages/ + +# installing via apt to simulate real-world scenario, where user installs deb package and all it's dependecies automatically. +RUN apt update; \ + DEBIAN_FRONTEND=noninteractive \ + apt install -y \ + /packages/clickhouse-common-static_*.deb \ + /packages/clickhouse-server_*.deb \ + locales ;\ + rm -rf /packages + +ADD https://github.com/tianon/gosu/releases/download/${gosu_ver}/gosu-amd64 /bin/gosu + +RUN locale-gen en_US.UTF-8 +ENV LANG en_US.UTF-8 +ENV LANGUAGE en_US:en +ENV LC_ALL en_US.UTF-8 + +RUN mkdir /docker-entrypoint-initdb.d + +COPY server/docker_related_config.xml /etc/clickhouse-server/config.d/ +COPY server/entrypoint.sh /entrypoint.sh + +RUN chmod +x \ + /entrypoint.sh \ + /bin/gosu + +EXPOSE 9000 8123 9009 +VOLUME /var/lib/clickhouse + +ENV CLICKHOUSE_CONFIG /etc/clickhouse-server/config.xml + +ENTRYPOINT ["/entrypoint.sh"] diff --git a/docker/test/stateless/clickhouse-statelest-test-runner.Dockerfile b/docker/test/stateless/clickhouse-statelest-test-runner.Dockerfile new file mode 100644 index 00000000000..8aecb7119cc --- /dev/null +++ b/docker/test/stateless/clickhouse-statelest-test-runner.Dockerfile @@ -0,0 +1,12 @@ +FROM ubuntu:18.10 + +ARG CLICKHOUSE_PACKAGES_DIR +COPY ${CLICKHOUSE_PACKAGES_DIR}/clickhouse-*.deb /packages/ + +RUN apt-get update ;\ + DEBIAN_FRONTEND=noninteractive \ + apt install -y /packages/clickhouse-common-static_*.deb \ + /packages/clickhouse-client_*.deb \ + /packages/clickhouse-test_*.deb \ + wait-for-it; \ + rm -rf /packages diff --git a/docker/test/test_runner.sh b/docker/test/test_runner.sh new file mode 100755 index 00000000000..afa5c95720b --- /dev/null +++ b/docker/test/test_runner.sh @@ -0,0 +1,31 @@ +#!/bin/sh + +set -e + +# Run tests in docker +# OR +# Build containers from deb packages, copying the tests from the source directory + +readonly CLICKHOUSE_DOCKER_DIR="$(realpath ${1})" +readonly CLICKHOUSE_PACKAGES_DIR="${2}" +CLICKHOUSE_SERVER_IMAGE="${3}" + +# Build test runner image +docker build \ + -f "${CLICKHOUSE_DOCKER_DIR}/test/stateless/clickhouse-statelest-test-runner.Dockerfile" \ + -t clickhouse-statelest-test-runner:local \ + --build-arg CLICKHOUSE_PACKAGES_DIR="${CLICKHOUSE_PACKAGES_DIR}" \ + "${CLICKHOUSE_DOCKER_DIR}" + +# Build server image (optional) from local packages +if [ -z "${CLICKHOUSE_SERVER_IMAGE}" ]; then + CLICKHOUSE_SERVER_IMAGE="yandex/clickhouse_server:local" + + docker build \ + -f "${CLICKHOUSE_DOCKER_DIR}/server/local.Dockerfile" \ + -t "${CLICKHOUSE_SERVER_IMAGE}" \ + --build-arg CLICKHOUSE_PACKAGES_DIR=${CLICKHOUSE_PACKAGES_DIR} \ + "${CLICKHOUSE_DOCKER_DIR}" +fi + +CLICKHOUSE_SERVER_IMAGE="${CLICKHOUSE_SERVER_IMAGE}" docker-compose -f "${CLICKHOUSE_DOCKER_DIR}/test/test_runner_docker_compose.yaml" run test-runner \ No newline at end of file diff --git a/docker/test/test_runner_docker_compose.yaml b/docker/test/test_runner_docker_compose.yaml new file mode 100644 index 00000000000..281442f26a4 --- /dev/null +++ b/docker/test/test_runner_docker_compose.yaml @@ -0,0 +1,30 @@ +version: "2" + +services: + clickhouse-server: + image: ${CLICKHOUSE_SERVER_IMAGE} + expose: + - "8123" + - "9000" + - "9009" + restart: "no" + + test-runner: + image: yandex/clickhouse-statelest-test-runner:local + + restart: "no" + depends_on: + - clickhouse-server + environment: + # these are used by clickhouse-test to point clickhouse-client to the right server + - CLICKHOUSE_HOST=clickhouse-server + - CLICKHOUSE_PORT=8123 + + entrypoint: + - wait-for-it + - clickhouse-server:8123 + - -- + - clickhouse-test + # - -c + # - `which clickhouse-client` + - ${CLICKHOUSE_TEST_ARGS} From ca1484ae95dffa7a5eab91f232e266f3e13c8a5d Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Wed, 13 Feb 2019 22:26:24 +0300 Subject: [PATCH 71/79] set exactly one arg --- .../MergeTree/MergeTreeSetSkippingIndex.cpp | 25 ++++++++----------- .../queries/0_stateless/00838_unique_index.sh | 6 ++--- 2 files changed, 14 insertions(+), 17 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeSetSkippingIndex.cpp b/dbms/src/Storages/MergeTree/MergeTreeSetSkippingIndex.cpp index bed74d0d640..3c3a414d000 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeSetSkippingIndex.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeSetSkippingIndex.cpp @@ -31,12 +31,12 @@ void MergeTreeSetIndexGranule::serializeBinary(WriteBuffer & ostr) const { if (empty()) throw Exception( - "Attempt to write empty unique index `" + index.name + "`", ErrorCodes::LOGICAL_ERROR); + "Attempt to write empty set index `" + index.name + "`", ErrorCodes::LOGICAL_ERROR); const auto & columns = set->getSetElements(); const auto & size_type = DataTypePtr(std::make_shared()); - if (index.max_rows && size() > index.max_rows) + if (size() > index.max_rows) { size_type->serializeBinary(0, ostr); return; @@ -87,7 +87,7 @@ void MergeTreeSetIndexGranule::update(const Block & new_block, size_t * pos, UIn size_t rows_read = std::min(limit, new_block.rows() - *pos); - if (index.max_rows && size() > index.max_rows) + if (size() > index.max_rows) { *pos += rows_read; return; @@ -112,7 +112,7 @@ void MergeTreeSetIndexGranule::update(const Block & new_block, size_t * pos, UIn Block MergeTreeSetIndexGranule::getElementsBlock() const { - if (index.max_rows && size() > index.max_rows) + if (size() > index.max_rows) return index.header; return index.header.cloneWithColumns(set->getSetElements()); } @@ -169,12 +169,12 @@ bool SetIndexCondition::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) auto granule = std::dynamic_pointer_cast(idx_granule); if (!granule) throw Exception( - "Unique index condition got a granule with the wrong type.", ErrorCodes::LOGICAL_ERROR); + "Set index condition got a granule with the wrong type.", ErrorCodes::LOGICAL_ERROR); if (useless) return true; - if (index.max_rows && granule->size() > index.max_rows) + if (granule->size() > index.max_rows) return true; Block result = granule->getElementsBlock(); @@ -363,14 +363,11 @@ std::unique_ptr setIndexCreator( throw Exception("Index must have unique name", ErrorCodes::INCORRECT_QUERY); size_t max_rows = 0; - if (node->type->arguments) - { - if (node->type->arguments->children.size() > 1) - throw Exception("Unique index cannot have only 0 or 1 argument", ErrorCodes::INCORRECT_QUERY); - else if (node->type->arguments->children.size() == 1) - max_rows = typeid_cast( - *node->type->arguments->children[0]).value.get(); - } + if (!node->type->arguments || node->type->arguments->children.size() != 1) + throw Exception("Set index must have exactly one argument.", ErrorCodes::INCORRECT_QUERY); + else if (node->type->arguments->children.size() == 1) + max_rows = typeid_cast( + *node->type->arguments->children[0]).value.get(); ASTPtr expr_list = MergeTreeData::extractKeyExpressionList(node->expr->clone()); diff --git a/dbms/tests/queries/0_stateless/00838_unique_index.sh b/dbms/tests/queries/0_stateless/00838_unique_index.sh index dd4440bd5ce..f6bea4f083a 100755 --- a/dbms/tests/queries/0_stateless/00838_unique_index.sh +++ b/dbms/tests/queries/0_stateless/00838_unique_index.sh @@ -16,9 +16,9 @@ CREATE TABLE test.set_idx s String, e Enum8('a' = 1, 'b' = 2, 'c' = 3), dt Date, - INDEX idx_all (i32, i32 + f64, d, s, e, dt) TYPE set GRANULARITY 1, - INDEX idx_all2 (i32, i32 + f64, d, s, e, dt) TYPE set GRANULARITY 2, - INDEX idx_2 (u64 + toYear(dt), substring(s, 2, 4)) TYPE set GRANULARITY 3 + INDEX idx_all (i32, i32 + f64, d, s, e, dt) TYPE set(2) GRANULARITY 1, + INDEX idx_all2 (i32, i32 + f64, d, s, e, dt) TYPE set(4) GRANULARITY 2, + INDEX idx_2 (u64 + toYear(dt), substring(s, 2, 4)) TYPE set(6) GRANULARITY 3 ) ENGINE = MergeTree() ORDER BY u64 SETTINGS index_granularity = 2;" From 04a62f3df4b00e978be45d6425af1db1d03191e0 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Wed, 13 Feb 2019 22:29:31 +0300 Subject: [PATCH 72/79] set args --- docs/en/operations/table_engines/mergetree.md | 6 ++---- docs/ru/operations/table_engines/mergetree.md | 4 ++-- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/docs/en/operations/table_engines/mergetree.md b/docs/en/operations/table_engines/mergetree.md index 948d63ff7d8..7d86776d27b 100644 --- a/docs/en/operations/table_engines/mergetree.md +++ b/docs/en/operations/table_engines/mergetree.md @@ -252,7 +252,7 @@ CREATE TABLE table_name s String, ... INDEX a (u64 * i32, s) TYPE minmax GRANULARITY 3, - INDEX b (u64 * length(s)) TYPE set GRANULARITY 4 + INDEX b (u64 * length(s)) TYPE set(1000) GRANULARITY 4 ) ENGINE = MergeTree() ... ``` @@ -269,12 +269,10 @@ SELECT count() FROM table WHERE u64 * i32 == 10 AND u64 * length(s) >= 1234 Stores extremes of the specified expression (if the expression is `tuple`, then it stores extremes for each element of `tuple`), uses stored info for skipping blocks of the data like the primary key. * `set(max_rows)` -Stores unique values of the specified expression (no more than `max_rows` rows), use them to check if the `WHERE` expression is not satisfiable on a block of the data. -If `max_rows=0`, then there are no limits for storing values. `set` without parameters is equal to `set(0)`. +Stores unique values of the specified expression (no more than `max_rows` rows), use them to check if the `WHERE` expression is not satisfiable on a block of the data. ```sql INDEX sample_index (u64 * length(s)) TYPE minmax GRANULARITY 4 -INDEX b (u64 * length(str), i32 + f64 * 100, date, str) TYPE set GRANULARITY 4 INDEX b (u64 * length(str), i32 + f64 * 100, date, str) TYPE set(100) GRANULARITY 4 ``` diff --git a/docs/ru/operations/table_engines/mergetree.md b/docs/ru/operations/table_engines/mergetree.md index 3c4f84d1c8c..258c6fc7ce1 100644 --- a/docs/ru/operations/table_engines/mergetree.md +++ b/docs/ru/operations/table_engines/mergetree.md @@ -243,7 +243,7 @@ CREATE TABLE table_name s String, ... INDEX a (u64 * i32, s) TYPE minmax GRANULARITY 3, - INDEX b (u64 * length(s), i32) TYPE set GRANULARITY 4 + INDEX b (u64 * length(s), i32) TYPE set(1000) GRANULARITY 4 ) ENGINE = MergeTree() ... ``` @@ -261,7 +261,7 @@ SELECT count() FROM table WHERE u64 * i32 == 10 AND u64 * length(s) >= 1234 * `set(max_rows)` Хранит уникальные значения выражения на блоке в количестве не более `max_rows`, используя их для пропуска блоков, оценивая выполнимость `WHERE` выражения на хранимых данных. -Если `max_rows=0`, то хранит значения выражения без ограничений. Если параметров не передано, то полагается `max_rows=0`. + Примеры From 5f40ae53e37e961858a86cb2139ab3b8579473ff Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Wed, 13 Feb 2019 22:33:10 +0300 Subject: [PATCH 73/79] fix --- docs/ru/operations/table_engines/mergetree.md | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/ru/operations/table_engines/mergetree.md b/docs/ru/operations/table_engines/mergetree.md index 258c6fc7ce1..aca22f61563 100644 --- a/docs/ru/operations/table_engines/mergetree.md +++ b/docs/ru/operations/table_engines/mergetree.md @@ -263,7 +263,6 @@ SELECT count() FROM table WHERE u64 * i32 == 10 AND u64 * length(s) >= 1234 Хранит уникальные значения выражения на блоке в количестве не более `max_rows`, используя их для пропуска блоков, оценивая выполнимость `WHERE` выражения на хранимых данных. - Примеры ```sql INDEX b (u64 * length(str), i32 + f64 * 100, date, str) TYPE minmax GRANULARITY 4 From 55df6f0a1a7b5fcf02b7ef998ee07dda1b1575c3 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Wed, 13 Feb 2019 22:33:58 +0300 Subject: [PATCH 74/79] fix docs --- docs/ru/operations/table_engines/mergetree.md | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/ru/operations/table_engines/mergetree.md b/docs/ru/operations/table_engines/mergetree.md index aca22f61563..22df2d45e64 100644 --- a/docs/ru/operations/table_engines/mergetree.md +++ b/docs/ru/operations/table_engines/mergetree.md @@ -266,7 +266,6 @@ SELECT count() FROM table WHERE u64 * i32 == 10 AND u64 * length(s) >= 1234 Примеры ```sql INDEX b (u64 * length(str), i32 + f64 * 100, date, str) TYPE minmax GRANULARITY 4 -INDEX b (u64 * length(str), i32 + f64 * 100, date, str) TYPE set GRANULARITY 4 INDEX b (u64 * length(str), i32 + f64 * 100, date, str) TYPE set(100) GRANULARITY 4 ``` From 46e8553b1fe4a0de555acee5fbb65fb1fdddea1d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 14 Feb 2019 00:16:40 +0300 Subject: [PATCH 75/79] Config: added clarification --- dbms/programs/server/config.xml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dbms/programs/server/config.xml b/dbms/programs/server/config.xml index 108e64e3387..8a0c5be91de 100644 --- a/dbms/programs/server/config.xml +++ b/dbms/programs/server/config.xml @@ -1,4 +1,7 @@ + From f8c0f4697c11499ff791daf359cb770b1ec12502 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Thu, 14 Feb 2019 11:49:31 +0300 Subject: [PATCH 76/79] create test --- .../00907_set_index_max_rows.reference | 1 + .../0_stateless/00907_set_index_max_rows.sh | 27 +++++++++++++++++++ 2 files changed, 28 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/00907_set_index_max_rows.reference create mode 100755 dbms/tests/queries/0_stateless/00907_set_index_max_rows.sh diff --git a/dbms/tests/queries/0_stateless/00907_set_index_max_rows.reference b/dbms/tests/queries/0_stateless/00907_set_index_max_rows.reference new file mode 100644 index 00000000000..a80322c42a2 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00907_set_index_max_rows.reference @@ -0,0 +1 @@ + "rows_read": 0, diff --git a/dbms/tests/queries/0_stateless/00907_set_index_max_rows.sh b/dbms/tests/queries/0_stateless/00907_set_index_max_rows.sh new file mode 100755 index 00000000000..ed2a732c74f --- /dev/null +++ b/dbms/tests/queries/0_stateless/00907_set_index_max_rows.sh @@ -0,0 +1,27 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS test.set_idx;" + +$CLICKHOUSE_CLIENT -n --query=" +SET allow_experimental_data_skipping_indices = 1; +CREATE TABLE test.set_idx +( + u64 UInt64, + i32 Int32, + INDEX idx (i32) TYPE set(2) GRANULARITY 1 +) ENGINE = MergeTree() +ORDER BY u64 +SETTINGS index_granularity = 6;" + +$CLICKHOUSE_CLIENT --query=" +INSERT INTO test.set_idx +SELECT number, number FROM system.numbers LIMIT 100" + +# simple select +$CLICKHOUSE_CLIENT --query="SELECT * FROM test.set_idx WHERE i32 > 0 FORMAT JSON" | grep "rows_read" + + +$CLICKHOUSE_CLIENT --query="DROP TABLE test.set_idx;" \ No newline at end of file From 60158d06ede348ca440863da46bb5c6ff64a2a71 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Thu, 14 Feb 2019 11:50:17 +0300 Subject: [PATCH 77/79] fix test --- .../queries/0_stateless/00907_set_index_max_rows.reference | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/tests/queries/0_stateless/00907_set_index_max_rows.reference b/dbms/tests/queries/0_stateless/00907_set_index_max_rows.reference index a80322c42a2..4f09265b3b4 100644 --- a/dbms/tests/queries/0_stateless/00907_set_index_max_rows.reference +++ b/dbms/tests/queries/0_stateless/00907_set_index_max_rows.reference @@ -1 +1 @@ - "rows_read": 0, + "rows_read": 100, From 683314b69b668586f3993b1d48b980f3a2129bcb Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Thu, 14 Feb 2019 12:06:32 +0300 Subject: [PATCH 78/79] fix set --- dbms/src/Storages/MergeTree/MergeTreeSetSkippingIndex.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeSetSkippingIndex.cpp b/dbms/src/Storages/MergeTree/MergeTreeSetSkippingIndex.cpp index 3c3a414d000..89d2b38e550 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeSetSkippingIndex.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeSetSkippingIndex.cpp @@ -171,10 +171,7 @@ bool SetIndexCondition::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) throw Exception( "Set index condition got a granule with the wrong type.", ErrorCodes::LOGICAL_ERROR); - if (useless) - return true; - - if (granule->size() > index.max_rows) + if (useless || !granule->size() || granule->size() > index.max_rows) return true; Block result = granule->getElementsBlock(); From 64e0732b4abf7dddb22b22fc2451c935534683e6 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Thu, 14 Feb 2019 12:13:04 +0300 Subject: [PATCH 79/79] fixed --- dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.cpp | 7 ++----- .../queries/0_stateless/00907_set_index_max_rows.reference | 2 +- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.cpp b/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.cpp index f2bcdb4a1ff..2b60901f8df 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.cpp @@ -39,13 +39,11 @@ void MergeTreeMinMaxGranule::serializeBinary(WriteBuffer & ostr) const void MergeTreeMinMaxGranule::deserializeBinary(ReadBuffer & istr) { parallelogram.clear(); + Field min_val, max_val; for (size_t i = 0; i < index.columns.size(); ++i) { const DataTypePtr & type = index.data_types[i]; - - Field min_val; type->deserializeBinary(min_val, istr); - Field max_val; type->deserializeBinary(max_val, istr); parallelogram.emplace_back(min_val, true, max_val, true); @@ -61,11 +59,10 @@ void MergeTreeMinMaxGranule::update(const Block & block, size_t * pos, UInt64 li size_t rows_read = std::min(limit, block.rows() - *pos); + Field field_min, field_max; for (size_t i = 0; i < index.columns.size(); ++i) { const auto & column = block.getByName(index.columns[i]).column; - - Field field_min, field_max; column->cut(*pos, rows_read)->getExtremes(field_min, field_max); if (parallelogram.size() <= i) diff --git a/dbms/tests/queries/0_stateless/00907_set_index_max_rows.reference b/dbms/tests/queries/0_stateless/00907_set_index_max_rows.reference index 4f09265b3b4..3ee41d6fea1 100644 --- a/dbms/tests/queries/0_stateless/00907_set_index_max_rows.reference +++ b/dbms/tests/queries/0_stateless/00907_set_index_max_rows.reference @@ -1 +1 @@ - "rows_read": 100, + "rows_read": 100,