diff --git a/README.md b/README.md index dc253d4db2d..3d7d7441081 100644 --- a/README.md +++ b/README.md @@ -40,8 +40,6 @@ Every month we get together with the community (users, contributors, customers, Keep an eye out for upcoming meetups and events around the world. Somewhere else you want us to be? Please feel free to reach out to tyler `` clickhouse `` com. You can also peruse [ClickHouse Events](https://clickhouse.com/company/news-events) for a list of all upcoming trainings, meetups, speaking engagements, etc. -* [AWS Summit in DC](https://clickhouse.com/company/events/2024-06-aws-summit-dc) - Jun 26 -* [ClickHouse Meetup in Amsterdam](https://www.meetup.com/clickhouse-netherlands-user-group/events/300781068/) - Jun 27 * [ClickHouse Meetup in Paris](https://www.meetup.com/clickhouse-france-user-group/events/300783448/) - Jul 9 * [ClickHouse Cloud - Live Update Call](https://clickhouse.com/company/events/202407-cloud-update-live) - Jul 9 * [ClickHouse Meetup @ Ramp - New York City](https://www.meetup.com/clickhouse-new-york-user-group/events/300595845/) - Jul 9 diff --git a/base/poco/Foundation/include/Poco/Logger.h b/base/poco/Foundation/include/Poco/Logger.h index 2a1cb33b407..74ddceea9dd 100644 --- a/base/poco/Foundation/include/Poco/Logger.h +++ b/base/poco/Foundation/include/Poco/Logger.h @@ -21,6 +21,8 @@ #include #include #include +#include +#include #include #include "Poco/Channel.h" diff --git a/base/poco/Foundation/include/Poco/Message.h b/base/poco/Foundation/include/Poco/Message.h index 9068e56a93c..756e427c5f5 100644 --- a/base/poco/Foundation/include/Poco/Message.h +++ b/base/poco/Foundation/include/Poco/Message.h @@ -19,6 +19,7 @@ #include +#include #include "Poco/Foundation.h" #include "Poco/Timestamp.h" diff --git a/cmake/target.cmake b/cmake/target.cmake index d6c497955f6..3d0ecd032f9 100644 --- a/cmake/target.cmake +++ b/cmake/target.cmake @@ -84,5 +84,5 @@ if (CMAKE_CROSSCOMPILING) message (FATAL_ERROR "Trying to cross-compile to unsupported system: ${CMAKE_SYSTEM_NAME}!") endif () - message (STATUS "Cross-compiling for target: ${CMAKE_CXX_COMPILE_TARGET}") + message (STATUS "Cross-compiling for target: ${CMAKE_CXX_COMPILER_TARGET}") endif () diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 08f58335d16..90ae5981a21 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -228,6 +228,8 @@ add_contrib (ulid-c-cmake ulid-c) add_contrib (libssh-cmake libssh) +add_contrib (prometheus-protobufs-cmake prometheus-protobufs prometheus-protobufs-gogo) + # Put all targets defined here and in subdirectories under "contrib/" folders in GUI-based IDEs. # Some of third-party projects may override CMAKE_FOLDER or FOLDER property of their targets, so they would not appear # in "contrib/..." as originally planned, so we workaround this by fixing FOLDER properties of all targets manually, diff --git a/contrib/aws-cmake/CMakeLists.txt b/contrib/aws-cmake/CMakeLists.txt index abde20addaf..250b47b7c2c 100644 --- a/contrib/aws-cmake/CMakeLists.txt +++ b/contrib/aws-cmake/CMakeLists.txt @@ -125,7 +125,7 @@ configure_file("${AWS_SDK_CORE_DIR}/include/aws/core/SDKConfig.h.in" "${CMAKE_CURRENT_BINARY_DIR}/include/aws/core/SDKConfig.h" @ONLY) aws_get_version(AWS_CRT_CPP_VERSION_MAJOR AWS_CRT_CPP_VERSION_MINOR AWS_CRT_CPP_VERSION_PATCH FULL_VERSION GIT_HASH) -configure_file("${AWS_CRT_DIR}/include/aws/crt/Config.h.in" "${AWS_CRT_DIR}/include/aws/crt/Config.h" @ONLY) +configure_file("${AWS_CRT_DIR}/include/aws/crt/Config.h.in" "${CMAKE_CURRENT_BINARY_DIR}/include/aws/crt/Config.h" @ONLY) list(APPEND AWS_SOURCES ${AWS_SDK_CORE_SRC} ${AWS_SDK_CORE_NET_SRC} ${AWS_SDK_CORE_PLATFORM_SRC}) diff --git a/contrib/azure b/contrib/azure index 6262a76ef4c..92c94d7f37a 160000 --- a/contrib/azure +++ b/contrib/azure @@ -1 +1 @@ -Subproject commit 6262a76ef4c4c330c84e58dd4f6f13f4e6230fcd +Subproject commit 92c94d7f37a43cc8fc4d466884a95f610c0593bf diff --git a/contrib/google-protobuf-cmake/protobuf_generate.cmake b/contrib/google-protobuf-cmake/protobuf_generate.cmake index 3e30b4e40fd..0731a81aeb8 100644 --- a/contrib/google-protobuf-cmake/protobuf_generate.cmake +++ b/contrib/google-protobuf-cmake/protobuf_generate.cmake @@ -157,15 +157,13 @@ function(protobuf_generate) set(_generated_srcs_all) foreach(_proto ${protobuf_generate_PROTOS}) - get_filename_component(_abs_file ${_proto} ABSOLUTE) - get_filename_component(_abs_dir ${_abs_file} DIRECTORY) - get_filename_component(_basename ${_proto} NAME_WE) - file(RELATIVE_PATH _rel_dir ${CMAKE_CURRENT_SOURCE_DIR} ${_abs_dir}) - - set(_possible_rel_dir) - if (NOT protobuf_generate_APPEND_PATH) - set(_possible_rel_dir ${_rel_dir}/) - endif() + # The protobuf compiler doesn't return paths to the files it generates so we have to calculate those paths here: + # _abs_file - absolute path to a .proto file, + # _possible_rel_dir - relative path to the .proto file from some import directory specified in Protobuf_IMPORT_DIRS, + # _basename - filename of the .proto file (without path and without extenstion). + get_proto_absolute_path(_abs_file "${_proto}" ${_protobuf_include_path}) + get_proto_relative_path(_possible_rel_dir "${_abs_file}" ${_protobuf_include_path}) + get_filename_component(_basename "${_abs_file}" NAME_WE) set(_generated_srcs) foreach(_ext ${protobuf_generate_GENERATE_EXTENSIONS}) @@ -173,7 +171,7 @@ function(protobuf_generate) endforeach() if(protobuf_generate_DESCRIPTORS AND protobuf_generate_LANGUAGE STREQUAL cpp) - set(_descriptor_file "${CMAKE_CURRENT_BINARY_DIR}/${_basename}.desc") + set(_descriptor_file "${protobuf_generate_PROTOC_OUT_DIR}/${_possible_rel_dir}${_basename}.desc") set(_dll_desc_out "--descriptor_set_out=${_descriptor_file}") list(APPEND _generated_srcs ${_descriptor_file}) endif() @@ -196,3 +194,36 @@ function(protobuf_generate) target_sources(${protobuf_generate_TARGET} PRIVATE ${_generated_srcs_all}) endif() endfunction() + +# Calculates the absolute path to a .proto file. +function(get_proto_absolute_path result proto) + cmake_path(IS_ABSOLUTE proto _is_abs_path) + if(_is_abs_path) + set(${result} "${proto}" PARENT_SCOPE) + return() + endif() + foreach(_include_dir ${ARGN}) + if(EXISTS "${_include_dir}/${proto}") + set(${result} "${_include_dir}/${proto}" PARENT_SCOPE) + return() + endif() + endforeach() + message(SEND_ERROR "Not found protobuf ${proto} in Protobuf_IMPORT_DIRS: ${ARGN}") +endfunction() + +# Calculates a relative path to a .proto file. The returned path is relative to one of include directories. +function(get_proto_relative_path result abs_path) + set(${result} "" PARENT_SCOPE) + get_filename_component(_abs_dir "${abs_path}" DIRECTORY) + foreach(_include_dir ${ARGN}) + cmake_path(IS_PREFIX _include_dir "${_abs_dir}" _is_prefix) + if(_is_prefix) + file(RELATIVE_PATH _rel_dir "${_include_dir}" "${_abs_dir}") + if(NOT _rel_dir STREQUAL "") + set(${result} "${_rel_dir}/" PARENT_SCOPE) + endif() + return() + endif() + endforeach() + message(WARNING "Not found protobuf ${abs_path} in Protobuf_IMPORT_DIRS: ${ARGN}") +endfunction() diff --git a/contrib/icu-cmake/CMakeLists.txt b/contrib/icu-cmake/CMakeLists.txt index a54bd8c1de2..0a650f2bcc0 100644 --- a/contrib/icu-cmake/CMakeLists.txt +++ b/contrib/icu-cmake/CMakeLists.txt @@ -5,7 +5,7 @@ else () endif () if (NOT ENABLE_ICU) - message(STATUS "Not using icu") + message(STATUS "Not using ICU") return() endif() diff --git a/contrib/jemalloc-cmake/CMakeLists.txt b/contrib/jemalloc-cmake/CMakeLists.txt index 023fdcf103a..38ebcc8f680 100644 --- a/contrib/jemalloc-cmake/CMakeLists.txt +++ b/contrib/jemalloc-cmake/CMakeLists.txt @@ -34,7 +34,11 @@ if (OS_LINUX) # avoid spurious latencies and additional work associated with # MADV_DONTNEED. See # https://github.com/ClickHouse/ClickHouse/issues/11121 for motivation. - set (JEMALLOC_CONFIG_MALLOC_CONF "percpu_arena:percpu,oversize_threshold:0,muzzy_decay_ms:0,dirty_decay_ms:5000,prof:true,prof_active:false,background_thread:true") + if (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG") + set (JEMALLOC_CONFIG_MALLOC_CONF "percpu_arena:percpu,oversize_threshold:0,muzzy_decay_ms:0,dirty_decay_ms:5000") + else() + set (JEMALLOC_CONFIG_MALLOC_CONF "percpu_arena:percpu,oversize_threshold:0,muzzy_decay_ms:0,dirty_decay_ms:5000,prof:true,prof_active:false,background_thread:true") + endif() else() set (JEMALLOC_CONFIG_MALLOC_CONF "oversize_threshold:0,muzzy_decay_ms:0,dirty_decay_ms:5000") endif() diff --git a/contrib/orc b/contrib/orc index 947cebaf943..bcc025c0982 160000 --- a/contrib/orc +++ b/contrib/orc @@ -1 +1 @@ -Subproject commit 947cebaf9432d708253ac08dc3012daa6b4ede6f +Subproject commit bcc025c09828c556f54cfbdf83a66b9acae7d17f diff --git a/contrib/prometheus-protobufs-cmake/CMakeLists.txt b/contrib/prometheus-protobufs-cmake/CMakeLists.txt new file mode 100644 index 00000000000..8c939902be7 --- /dev/null +++ b/contrib/prometheus-protobufs-cmake/CMakeLists.txt @@ -0,0 +1,34 @@ +option(ENABLE_PROMETHEUS_PROTOBUFS "Enable Prometheus Protobufs" ${ENABLE_PROTOBUF}) + +if(NOT ENABLE_PROMETHEUS_PROTOBUFS) + message(STATUS "Not using prometheus-protobufs") + return() +endif() + +set(Protobuf_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/google-protobuf/src") +set(Prometheus_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/prometheus-protobufs") +set(GogoProto_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/prometheus-protobufs-gogo") + +# Protobuf_IMPORT_DIRS specify where the protobuf compiler will look for .proto files. +set(Old_Protobuf_IMPORT_DIRS ${Protobuf_IMPORT_DIRS}) +list(APPEND Protobuf_IMPORT_DIRS "${Protobuf_INCLUDE_DIR}" "${Prometheus_INCLUDE_DIR}" "${GogoProto_INCLUDE_DIR}") + +PROTOBUF_GENERATE_CPP(prometheus_protobufs_sources prometheus_protobufs_headers + "prompb/remote.proto" + "prompb/types.proto" + "gogoproto/gogo.proto" +) + +set(Protobuf_IMPORT_DIRS ${Old_Protobuf_IMPORT_DIRS}) + +# Ignore warnings while compiling protobuf-generated *.pb.h and *.pb.cpp files. +set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -w") + +# Disable clang-tidy for protobuf-generated *.pb.h and *.pb.cpp files. +set (CMAKE_CXX_CLANG_TIDY "") + +add_library(_prometheus_protobufs ${prometheus_protobufs_sources} ${prometheus_protobufs_headers}) +target_include_directories(_prometheus_protobufs SYSTEM PUBLIC "${CMAKE_CURRENT_BINARY_DIR}") +target_link_libraries (_prometheus_protobufs PUBLIC ch_contrib::protobuf) + +add_library (ch_contrib::prometheus_protobufs ALIAS _prometheus_protobufs) diff --git a/contrib/prometheus-protobufs-gogo/LICENSE b/contrib/prometheus-protobufs-gogo/LICENSE new file mode 100644 index 00000000000..16be18e5c50 --- /dev/null +++ b/contrib/prometheus-protobufs-gogo/LICENSE @@ -0,0 +1,35 @@ +Copyright (c) 2022, The Cosmos SDK Authors. All rights reserved. +Copyright (c) 2013, The GoGo Authors. All rights reserved. + +Protocol Buffers for Go with Gadgets + +Go support for Protocol Buffers - Google's data interchange format + +Copyright 2010 The Go Authors. All rights reserved. +https://github.com/golang/protobuf + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/contrib/prometheus-protobufs-gogo/README b/contrib/prometheus-protobufs-gogo/README new file mode 100644 index 00000000000..c40bc42df66 --- /dev/null +++ b/contrib/prometheus-protobufs-gogo/README @@ -0,0 +1,4 @@ +File "gogoproto/gogo.proto" was downloaded from the "Protocol Buffers for Go with Gadgets" project: +https://github.com/cosmos/gogoproto/blob/main/gogoproto/gogo.proto + +File "gogoproto/gogo.proto" is used in ClickHouse to compile prometheus protobufs. diff --git a/contrib/prometheus-protobufs-gogo/gogoproto/gogo.proto b/contrib/prometheus-protobufs-gogo/gogoproto/gogo.proto new file mode 100644 index 00000000000..974b36a7ccd --- /dev/null +++ b/contrib/prometheus-protobufs-gogo/gogoproto/gogo.proto @@ -0,0 +1,145 @@ +// Protocol Buffers for Go with Gadgets +// +// Copyright (c) 2013, The GoGo Authors. All rights reserved. +// http://github.com/cosmos/gogoproto +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +syntax = "proto2"; +package gogoproto; + +import "google/protobuf/descriptor.proto"; + +option java_package = "com.google.protobuf"; +option java_outer_classname = "GoGoProtos"; +option go_package = "github.com/cosmos/gogoproto/gogoproto"; + +extend google.protobuf.EnumOptions { + optional bool goproto_enum_prefix = 62001; + optional bool goproto_enum_stringer = 62021; + optional bool enum_stringer = 62022; + optional string enum_customname = 62023; + optional bool enumdecl = 62024; +} + +extend google.protobuf.EnumValueOptions { + optional string enumvalue_customname = 66001; +} + +extend google.protobuf.FileOptions { + optional bool goproto_getters_all = 63001; + optional bool goproto_enum_prefix_all = 63002; + optional bool goproto_stringer_all = 63003; + optional bool verbose_equal_all = 63004; + optional bool face_all = 63005; + optional bool gostring_all = 63006; + optional bool populate_all = 63007; + optional bool stringer_all = 63008; + optional bool onlyone_all = 63009; + + optional bool equal_all = 63013; + optional bool description_all = 63014; + optional bool testgen_all = 63015; + optional bool benchgen_all = 63016; + optional bool marshaler_all = 63017; + optional bool unmarshaler_all = 63018; + optional bool stable_marshaler_all = 63019; + + optional bool sizer_all = 63020; + + optional bool goproto_enum_stringer_all = 63021; + optional bool enum_stringer_all = 63022; + + optional bool unsafe_marshaler_all = 63023; + optional bool unsafe_unmarshaler_all = 63024; + + optional bool goproto_extensions_map_all = 63025; + optional bool goproto_unrecognized_all = 63026; + optional bool gogoproto_import = 63027; + optional bool protosizer_all = 63028; + optional bool compare_all = 63029; + optional bool typedecl_all = 63030; + optional bool enumdecl_all = 63031; + + optional bool goproto_registration = 63032; + optional bool messagename_all = 63033; + + optional bool goproto_sizecache_all = 63034; + optional bool goproto_unkeyed_all = 63035; +} + +extend google.protobuf.MessageOptions { + optional bool goproto_getters = 64001; + optional bool goproto_stringer = 64003; + optional bool verbose_equal = 64004; + optional bool face = 64005; + optional bool gostring = 64006; + optional bool populate = 64007; + optional bool stringer = 67008; + optional bool onlyone = 64009; + + optional bool equal = 64013; + optional bool description = 64014; + optional bool testgen = 64015; + optional bool benchgen = 64016; + optional bool marshaler = 64017; + optional bool unmarshaler = 64018; + optional bool stable_marshaler = 64019; + + optional bool sizer = 64020; + + optional bool unsafe_marshaler = 64023; + optional bool unsafe_unmarshaler = 64024; + + optional bool goproto_extensions_map = 64025; + optional bool goproto_unrecognized = 64026; + + optional bool protosizer = 64028; + optional bool compare = 64029; + + optional bool typedecl = 64030; + + optional bool messagename = 64033; + + optional bool goproto_sizecache = 64034; + optional bool goproto_unkeyed = 64035; +} + +extend google.protobuf.FieldOptions { + optional bool nullable = 65001; + optional bool embed = 65002; + optional string customtype = 65003; + optional string customname = 65004; + optional string jsontag = 65005; + optional string moretags = 65006; + optional string casttype = 65007; + optional string castkey = 65008; + optional string castvalue = 65009; + + optional bool stdtime = 65010; + optional bool stdduration = 65011; + optional bool wktpointer = 65012; + + optional string castrepeated = 65013; +} diff --git a/contrib/prometheus-protobufs/LICENSE b/contrib/prometheus-protobufs/LICENSE new file mode 100644 index 00000000000..261eeb9e9f8 --- /dev/null +++ b/contrib/prometheus-protobufs/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/contrib/prometheus-protobufs/README b/contrib/prometheus-protobufs/README new file mode 100644 index 00000000000..c557e59bb93 --- /dev/null +++ b/contrib/prometheus-protobufs/README @@ -0,0 +1,2 @@ +Files "prompb/remote.proto" and "prompb/types.proto" were downloaded from the Prometheus repository: +https://github.com/prometheus/prometheus/tree/main/prompb diff --git a/contrib/prometheus-protobufs/prompb/remote.proto b/contrib/prometheus-protobufs/prompb/remote.proto new file mode 100644 index 00000000000..50bb25e7fac --- /dev/null +++ b/contrib/prometheus-protobufs/prompb/remote.proto @@ -0,0 +1,88 @@ +// Copyright 2016 Prometheus Team +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto3"; +package prometheus; + +option go_package = "prompb"; + +import "prompb/types.proto"; +import "gogoproto/gogo.proto"; + +message WriteRequest { + repeated prometheus.TimeSeries timeseries = 1 [(gogoproto.nullable) = false]; + // Cortex uses this field to determine the source of the write request. + // We reserve it to avoid any compatibility issues. + reserved 2; + repeated prometheus.MetricMetadata metadata = 3 [(gogoproto.nullable) = false]; +} + +// ReadRequest represents a remote read request. +message ReadRequest { + repeated Query queries = 1; + + enum ResponseType { + // Server will return a single ReadResponse message with matched series that includes list of raw samples. + // It's recommended to use streamed response types instead. + // + // Response headers: + // Content-Type: "application/x-protobuf" + // Content-Encoding: "snappy" + SAMPLES = 0; + // Server will stream a delimited ChunkedReadResponse message that + // contains XOR or HISTOGRAM(!) encoded chunks for a single series. + // Each message is following varint size and fixed size bigendian + // uint32 for CRC32 Castagnoli checksum. + // + // Response headers: + // Content-Type: "application/x-streamed-protobuf; proto=prometheus.ChunkedReadResponse" + // Content-Encoding: "" + STREAMED_XOR_CHUNKS = 1; + } + + // accepted_response_types allows negotiating the content type of the response. + // + // Response types are taken from the list in the FIFO order. If no response type in `accepted_response_types` is + // implemented by server, error is returned. + // For request that do not contain `accepted_response_types` field the SAMPLES response type will be used. + repeated ResponseType accepted_response_types = 2; +} + +// ReadResponse is a response when response_type equals SAMPLES. +message ReadResponse { + // In same order as the request's queries. + repeated QueryResult results = 1; +} + +message Query { + int64 start_timestamp_ms = 1; + int64 end_timestamp_ms = 2; + repeated prometheus.LabelMatcher matchers = 3; + prometheus.ReadHints hints = 4; +} + +message QueryResult { + // Samples within a time series must be ordered by time. + repeated prometheus.TimeSeries timeseries = 1; +} + +// ChunkedReadResponse is a response when response_type equals STREAMED_XOR_CHUNKS. +// We strictly stream full series after series, optionally split by time. This means that a single frame can contain +// partition of the single series, but once a new series is started to be streamed it means that no more chunks will +// be sent for previous one. Series are returned sorted in the same way TSDB block are internally. +message ChunkedReadResponse { + repeated prometheus.ChunkedSeries chunked_series = 1; + + // query_index represents an index of the query from ReadRequest.queries these chunks relates to. + int64 query_index = 2; +} diff --git a/contrib/prometheus-protobufs/prompb/types.proto b/contrib/prometheus-protobufs/prompb/types.proto new file mode 100644 index 00000000000..61fc1e0143e --- /dev/null +++ b/contrib/prometheus-protobufs/prompb/types.proto @@ -0,0 +1,187 @@ +// Copyright 2017 Prometheus Team +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto3"; +package prometheus; + +option go_package = "prompb"; + +import "gogoproto/gogo.proto"; + +message MetricMetadata { + enum MetricType { + UNKNOWN = 0; + COUNTER = 1; + GAUGE = 2; + HISTOGRAM = 3; + GAUGEHISTOGRAM = 4; + SUMMARY = 5; + INFO = 6; + STATESET = 7; + } + + // Represents the metric type, these match the set from Prometheus. + // Refer to github.com/prometheus/common/model/metadata.go for details. + MetricType type = 1; + string metric_family_name = 2; + string help = 4; + string unit = 5; +} + +message Sample { + double value = 1; + // timestamp is in ms format, see model/timestamp/timestamp.go for + // conversion from time.Time to Prometheus timestamp. + int64 timestamp = 2; +} + +message Exemplar { + // Optional, can be empty. + repeated Label labels = 1 [(gogoproto.nullable) = false]; + double value = 2; + // timestamp is in ms format, see model/timestamp/timestamp.go for + // conversion from time.Time to Prometheus timestamp. + int64 timestamp = 3; +} + +// A native histogram, also known as a sparse histogram. +// Original design doc: +// https://docs.google.com/document/d/1cLNv3aufPZb3fNfaJgdaRBZsInZKKIHo9E6HinJVbpM/edit +// The appendix of this design doc also explains the concept of float +// histograms. This Histogram message can represent both, the usual +// integer histogram as well as a float histogram. +message Histogram { + enum ResetHint { + UNKNOWN = 0; // Need to test for a counter reset explicitly. + YES = 1; // This is the 1st histogram after a counter reset. + NO = 2; // There was no counter reset between this and the previous Histogram. + GAUGE = 3; // This is a gauge histogram where counter resets don't happen. + } + + oneof count { // Count of observations in the histogram. + uint64 count_int = 1; + double count_float = 2; + } + double sum = 3; // Sum of observations in the histogram. + // The schema defines the bucket schema. Currently, valid numbers + // are -4 <= n <= 8. They are all for base-2 bucket schemas, where 1 + // is a bucket boundary in each case, and then each power of two is + // divided into 2^n logarithmic buckets. Or in other words, each + // bucket boundary is the previous boundary times 2^(2^-n). In the + // future, more bucket schemas may be added using numbers < -4 or > + // 8. + sint32 schema = 4; + double zero_threshold = 5; // Breadth of the zero bucket. + oneof zero_count { // Count in zero bucket. + uint64 zero_count_int = 6; + double zero_count_float = 7; + } + + // Negative Buckets. + repeated BucketSpan negative_spans = 8 [(gogoproto.nullable) = false]; + // Use either "negative_deltas" or "negative_counts", the former for + // regular histograms with integer counts, the latter for float + // histograms. + repeated sint64 negative_deltas = 9; // Count delta of each bucket compared to previous one (or to zero for 1st bucket). + repeated double negative_counts = 10; // Absolute count of each bucket. + + // Positive Buckets. + repeated BucketSpan positive_spans = 11 [(gogoproto.nullable) = false]; + // Use either "positive_deltas" or "positive_counts", the former for + // regular histograms with integer counts, the latter for float + // histograms. + repeated sint64 positive_deltas = 12; // Count delta of each bucket compared to previous one (or to zero for 1st bucket). + repeated double positive_counts = 13; // Absolute count of each bucket. + + ResetHint reset_hint = 14; + // timestamp is in ms format, see model/timestamp/timestamp.go for + // conversion from time.Time to Prometheus timestamp. + int64 timestamp = 15; +} + +// A BucketSpan defines a number of consecutive buckets with their +// offset. Logically, it would be more straightforward to include the +// bucket counts in the Span. However, the protobuf representation is +// more compact in the way the data is structured here (with all the +// buckets in a single array separate from the Spans). +message BucketSpan { + sint32 offset = 1; // Gap to previous span, or starting point for 1st span (which can be negative). + uint32 length = 2; // Length of consecutive buckets. +} + +// TimeSeries represents samples and labels for a single time series. +message TimeSeries { + // For a timeseries to be valid, and for the samples and exemplars + // to be ingested by the remote system properly, the labels field is required. + repeated Label labels = 1 [(gogoproto.nullable) = false]; + repeated Sample samples = 2 [(gogoproto.nullable) = false]; + repeated Exemplar exemplars = 3 [(gogoproto.nullable) = false]; + repeated Histogram histograms = 4 [(gogoproto.nullable) = false]; +} + +message Label { + string name = 1; + string value = 2; +} + +message Labels { + repeated Label labels = 1 [(gogoproto.nullable) = false]; +} + +// Matcher specifies a rule, which can match or set of labels or not. +message LabelMatcher { + enum Type { + EQ = 0; + NEQ = 1; + RE = 2; + NRE = 3; + } + Type type = 1; + string name = 2; + string value = 3; +} + +message ReadHints { + int64 step_ms = 1; // Query step size in milliseconds. + string func = 2; // String representation of surrounding function or aggregation. + int64 start_ms = 3; // Start time in milliseconds. + int64 end_ms = 4; // End time in milliseconds. + repeated string grouping = 5; // List of label names used in aggregation. + bool by = 6; // Indicate whether it is without or by. + int64 range_ms = 7; // Range vector selector range in milliseconds. +} + +// Chunk represents a TSDB chunk. +// Time range [min, max] is inclusive. +message Chunk { + int64 min_time_ms = 1; + int64 max_time_ms = 2; + + // We require this to match chunkenc.Encoding. + enum Encoding { + UNKNOWN = 0; + XOR = 1; + HISTOGRAM = 2; + FLOAT_HISTOGRAM = 3; + } + Encoding type = 3; + bytes data = 4; +} + +// ChunkedSeries represents single, encoded time series. +message ChunkedSeries { + // Labels should be sorted. + repeated Label labels = 1 [(gogoproto.nullable) = false]; + // Chunks will be in start time order and may overlap. + repeated Chunk chunks = 2 [(gogoproto.nullable) = false]; +} diff --git a/contrib/s2geometry b/contrib/s2geometry index 0547c383717..6522a40338d 160000 --- a/contrib/s2geometry +++ b/contrib/s2geometry @@ -1 +1 @@ -Subproject commit 0547c38371777a1c1c8be263a6f05c3bf71bb05b +Subproject commit 6522a40338d58752c2a4227a3fc2bc4107c73e43 diff --git a/contrib/s2geometry-cmake/CMakeLists.txt b/contrib/s2geometry-cmake/CMakeLists.txt index 6632f9c27d5..48562b8cead 100644 --- a/contrib/s2geometry-cmake/CMakeLists.txt +++ b/contrib/s2geometry-cmake/CMakeLists.txt @@ -1,7 +1,7 @@ -option(ENABLE_S2_GEOMETRY "Enable S2 geometry library" ${ENABLE_LIBRARIES}) +option(ENABLE_S2_GEOMETRY "Enable S2 Geometry" ${ENABLE_LIBRARIES}) if (NOT ENABLE_S2_GEOMETRY) - message(STATUS "Not using S2 geometry") + message(STATUS "Not using S2 Geometry") return() endif() @@ -38,6 +38,7 @@ set(S2_SRCS "${S2_SOURCE_DIR}/s2/s2cell_index.cc" "${S2_SOURCE_DIR}/s2/s2cell_union.cc" "${S2_SOURCE_DIR}/s2/s2centroids.cc" + "${S2_SOURCE_DIR}/s2/s2chain_interpolation_query.cc" "${S2_SOURCE_DIR}/s2/s2closest_cell_query.cc" "${S2_SOURCE_DIR}/s2/s2closest_edge_query.cc" "${S2_SOURCE_DIR}/s2/s2closest_point_query.cc" @@ -46,6 +47,7 @@ set(S2_SRCS "${S2_SOURCE_DIR}/s2/s2coords.cc" "${S2_SOURCE_DIR}/s2/s2crossing_edge_query.cc" "${S2_SOURCE_DIR}/s2/s2debug.cc" + "${S2_SOURCE_DIR}/s2/s2density_tree.cc" "${S2_SOURCE_DIR}/s2/s2earth.cc" "${S2_SOURCE_DIR}/s2/s2edge_clipping.cc" "${S2_SOURCE_DIR}/s2/s2edge_crosser.cc" @@ -53,8 +55,10 @@ set(S2_SRCS "${S2_SOURCE_DIR}/s2/s2edge_distances.cc" "${S2_SOURCE_DIR}/s2/s2edge_tessellator.cc" "${S2_SOURCE_DIR}/s2/s2error.cc" + "${S2_SOURCE_DIR}/s2/s2fractal.cc" "${S2_SOURCE_DIR}/s2/s2furthest_edge_query.cc" "${S2_SOURCE_DIR}/s2/s2hausdorff_distance_query.cc" + "${S2_SOURCE_DIR}/s2/s2index_cell_data.cc" "${S2_SOURCE_DIR}/s2/s2latlng.cc" "${S2_SOURCE_DIR}/s2/s2latlng_rect.cc" "${S2_SOURCE_DIR}/s2/s2latlng_rect_bounder.cc" @@ -63,10 +67,10 @@ set(S2_SRCS "${S2_SOURCE_DIR}/s2/s2lax_polyline_shape.cc" "${S2_SOURCE_DIR}/s2/s2loop.cc" "${S2_SOURCE_DIR}/s2/s2loop_measures.cc" + "${S2_SOURCE_DIR}/s2/s2max_distance_targets.cc" "${S2_SOURCE_DIR}/s2/s2measures.cc" "${S2_SOURCE_DIR}/s2/s2memory_tracker.cc" "${S2_SOURCE_DIR}/s2/s2metrics.cc" - "${S2_SOURCE_DIR}/s2/s2max_distance_targets.cc" "${S2_SOURCE_DIR}/s2/s2min_distance_targets.cc" "${S2_SOURCE_DIR}/s2/s2padded_cell.cc" "${S2_SOURCE_DIR}/s2/s2point_compression.cc" @@ -80,10 +84,11 @@ set(S2_SRCS "${S2_SOURCE_DIR}/s2/s2predicates.cc" "${S2_SOURCE_DIR}/s2/s2projections.cc" "${S2_SOURCE_DIR}/s2/s2r2rect.cc" - "${S2_SOURCE_DIR}/s2/s2region.cc" - "${S2_SOURCE_DIR}/s2/s2region_term_indexer.cc" + "${S2_SOURCE_DIR}/s2/s2random.cc" "${S2_SOURCE_DIR}/s2/s2region_coverer.cc" "${S2_SOURCE_DIR}/s2/s2region_intersection.cc" + "${S2_SOURCE_DIR}/s2/s2region_sharder.cc" + "${S2_SOURCE_DIR}/s2/s2region_term_indexer.cc" "${S2_SOURCE_DIR}/s2/s2region_union.cc" "${S2_SOURCE_DIR}/s2/s2shape_index.cc" "${S2_SOURCE_DIR}/s2/s2shape_index_buffered_region.cc" @@ -94,9 +99,12 @@ set(S2_SRCS "${S2_SOURCE_DIR}/s2/s2shapeutil_coding.cc" "${S2_SOURCE_DIR}/s2/s2shapeutil_contains_brute_force.cc" "${S2_SOURCE_DIR}/s2/s2shapeutil_conversion.cc" + "${S2_SOURCE_DIR}/s2/s2shapeutil_count_vertices.cc" "${S2_SOURCE_DIR}/s2/s2shapeutil_edge_iterator.cc" + "${S2_SOURCE_DIR}/s2/s2shapeutil_edge_wrap.cc" "${S2_SOURCE_DIR}/s2/s2shapeutil_get_reference_point.cc" "${S2_SOURCE_DIR}/s2/s2shapeutil_visit_crossing_edge_pairs.cc" + "${S2_SOURCE_DIR}/s2/s2testing.cc" "${S2_SOURCE_DIR}/s2/s2text_format.cc" "${S2_SOURCE_DIR}/s2/s2wedge_relations.cc" "${S2_SOURCE_DIR}/s2/s2winding_operation.cc" @@ -140,6 +148,7 @@ target_link_libraries(_s2 PRIVATE absl::strings absl::type_traits absl::utility + absl::vlog_is_on ) target_include_directories(_s2 SYSTEM BEFORE PUBLIC "${S2_SOURCE_DIR}/") diff --git a/contrib/vectorscan b/contrib/vectorscan index 4918f81ea3d..d29730e1cb9 160000 --- a/contrib/vectorscan +++ b/contrib/vectorscan @@ -1 +1 @@ -Subproject commit 4918f81ea3d1abd18905bac9876d4a1fe2ebdf07 +Subproject commit d29730e1cb9daaa66bda63426cdce83505d2c809 diff --git a/contrib/vectorscan-cmake/CMakeLists.txt b/contrib/vectorscan-cmake/CMakeLists.txt index d6c626c1612..35d5fd3dc82 100644 --- a/contrib/vectorscan-cmake/CMakeLists.txt +++ b/contrib/vectorscan-cmake/CMakeLists.txt @@ -1,11 +1,8 @@ -# We use vectorscan, a portable and API/ABI-compatible drop-in replacement for hyperscan. - +# Vectorscan is drop-in replacement for Hyperscan. if ((ARCH_AMD64 AND NOT NO_SSE3_OR_HIGHER) OR ARCH_AARCH64) - option (ENABLE_VECTORSCAN "Enable vectorscan library" ${ENABLE_LIBRARIES}) + option (ENABLE_VECTORSCAN "Enable vectorscan" ${ENABLE_LIBRARIES}) endif() -# TODO PPC should generally work but needs manual generation of ppc/config.h file on a PPC machine - if (NOT ENABLE_VECTORSCAN) message (STATUS "Not using vectorscan") return() @@ -272,34 +269,24 @@ if (ARCH_AARCH64) ) endif() -# TODO -# if (ARCH_PPC64LE) -# list(APPEND SRCS -# "${LIBRARY_DIR}/src/util/supervector/arch/ppc64el/impl.cpp" -# ) -# endif() - add_library (_vectorscan ${SRCS}) -target_compile_options (_vectorscan PRIVATE - -fno-sanitize=undefined # assume the library takes care of itself - -O2 -fno-strict-aliasing -fno-omit-frame-pointer -fvisibility=hidden # options from original build system -) # library has too much debug information if (OMIT_HEAVY_DEBUG_SYMBOLS) target_compile_options (_vectorscan PRIVATE -g0) endif() -# Include version header manually generated by running the original build system -target_include_directories (_vectorscan SYSTEM PRIVATE common) +target_include_directories (_vectorscan SYSTEM PUBLIC "${LIBRARY_DIR}/src") + +# Makes the version header visible. It was generated by running the native build system manually. +# Please update whenever you update vectorscan. +target_include_directories (_vectorscan SYSTEM PUBLIC common) # vectorscan inherited some patched in-source versions of boost headers to fix a bug in # boost 1.69. This bug has been solved long ago but vectorscan's source code still # points to the patched versions, so include it here. target_include_directories (_vectorscan SYSTEM PRIVATE "${LIBRARY_DIR}/include") -target_include_directories (_vectorscan SYSTEM PUBLIC "${LIBRARY_DIR}/src") - # Include platform-specific config header generated by manually running the original build system # Please regenerate these files if you update vectorscan. diff --git a/contrib/vectorscan-cmake/common/hs_version.h b/contrib/vectorscan-cmake/common/hs_version.h index 8315b44fb2a..3d266484095 100644 --- a/contrib/vectorscan-cmake/common/hs_version.h +++ b/contrib/vectorscan-cmake/common/hs_version.h @@ -32,8 +32,12 @@ /** * A version string to identify this release of Hyperscan. */ -#define HS_VERSION_STRING "5.4.7 2022-06-20" +#define HS_VERSION_STRING "5.4.11 2024-07-04" #define HS_VERSION_32BIT ((5 << 24) | (1 << 16) | (7 << 8) | 0) +#define HS_MAJOR 5 +#define HS_MINOR 4 +#define HS_PATCH 11 + #endif /* HS_VERSION_H_C6428FAF8E3713 */ diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile index 018fe57bf56..c59ef1b919a 100644 --- a/docker/keeper/Dockerfile +++ b/docker/keeper/Dockerfile @@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="24.6.1.4423" +ARG VERSION="24.6.2.17" ARG PACKAGES="clickhouse-keeper" ARG DIRECT_DOWNLOAD_URLS="" diff --git a/docker/reqgenerator.py b/docker/reqgenerator.py new file mode 100644 index 00000000000..6c1d89ac0ac --- /dev/null +++ b/docker/reqgenerator.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python3 +# To run this script you must install docker and piddeptree python package +# + +import subprocess +import os +import sys + + +def build_docker_deps(image_name, imagedir): + cmd = f"""docker run --entrypoint "/bin/bash" {image_name} -c "pip install pipdeptree 2>/dev/null 1>/dev/null && pipdeptree --freeze --warn silence | sed 's/ \+//g' | sort | uniq" > {imagedir}/requirements.txt""" + subprocess.check_call(cmd, shell=True) + + +def check_docker_file_install_with_pip(filepath): + image_name = None + with open(filepath, "r") as f: + for line in f: + if "docker build" in line: + arr = line.split(" ") + if len(arr) > 4: + image_name = arr[4] + if "pip3 install" in line or "pip install" in line: + return image_name, True + return image_name, False + + +def process_affected_images(images_dir): + for root, _dirs, files in os.walk(images_dir): + for f in files: + if f == "Dockerfile": + docker_file_path = os.path.join(root, f) + print("Checking image on path", docker_file_path) + image_name, has_pip = check_docker_file_install_with_pip( + docker_file_path + ) + if has_pip: + print("Find pip in", image_name) + try: + build_docker_deps(image_name, root) + except Exception as ex: + print(ex) + else: + print("Pip not found in", docker_file_path) + + +process_affected_images(sys.argv[1]) diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine index a86406e5129..240df79aeb1 100644 --- a/docker/server/Dockerfile.alpine +++ b/docker/server/Dockerfile.alpine @@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="24.6.1.4423" +ARG VERSION="24.6.2.17" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" ARG DIRECT_DOWNLOAD_URLS="" diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu index 25f3273a648..ac64655991a 100644 --- a/docker/server/Dockerfile.ubuntu +++ b/docker/server/Dockerfile.ubuntu @@ -28,7 +28,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list ARG REPO_CHANNEL="stable" ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main" -ARG VERSION="24.6.1.4423" +ARG VERSION="24.6.2.17" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" #docker-official-library:off diff --git a/docker/test/fasttest/Dockerfile b/docker/test/fasttest/Dockerfile index e0be261d5e8..2512268be0f 100644 --- a/docker/test/fasttest/Dockerfile +++ b/docker/test/fasttest/Dockerfile @@ -19,10 +19,7 @@ RUN apt-get update \ odbcinst \ psmisc \ python3 \ - python3-lxml \ python3-pip \ - python3-requests \ - python3-termcolor \ unixodbc \ pv \ jq \ @@ -31,7 +28,8 @@ RUN apt-get update \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* -RUN pip3 install numpy==1.26.3 scipy==1.12.0 pandas==1.5.3 Jinja2==3.1.3 +COPY requirements.txt / +RUN pip3 install --no-cache-dir -r /requirements.txt # This symlink is required by gcc to find the lld linker RUN ln -s /usr/bin/lld-${LLVM_VERSION} /usr/bin/ld.lld @@ -39,6 +37,10 @@ RUN ln -s /usr/bin/lld-${LLVM_VERSION} /usr/bin/ld.lld # https://salsa.debian.org/pkg-llvm-team/llvm-toolchain/-/commit/992e52c0b156a5ba9c6a8a54f8c4857ddd3d371d RUN sed -i '/_IMPORT_CHECK_FILES_FOR_\(mlir-\|llvm-bolt\|merge-fdata\|MLIR\)/ {s|^|#|}' /usr/lib/llvm-${LLVM_VERSION}/lib/cmake/llvm/LLVMExports-*.cmake +# LLVM changes paths for compiler-rt libraries. For some reason clang-18.1.8 cannot catch up libraries from default install path. +# It's very dirty workaround, better to build compiler and LLVM ourself and use it. Details: https://github.com/llvm/llvm-project/issues/95792 +RUN test ! -d /usr/lib/llvm-18/lib/clang/18/lib/x86_64-pc-linux-gnu || ln -s /usr/lib/llvm-18/lib/clang/18/lib/x86_64-pc-linux-gnu /usr/lib/llvm-18/lib/clang/18/lib/x86_64-unknown-linux-gnu + ARG CCACHE_VERSION=4.6.1 RUN mkdir /tmp/ccache \ && cd /tmp/ccache \ diff --git a/docker/test/fasttest/requirements.txt b/docker/test/fasttest/requirements.txt new file mode 100644 index 00000000000..993ea22e5ae --- /dev/null +++ b/docker/test/fasttest/requirements.txt @@ -0,0 +1,41 @@ +Jinja2==3.1.3 +MarkupSafe==2.1.5 +PyJWT==2.3.0 +PyYAML==6.0.1 +Pygments==2.11.2 +SecretStorage==3.3.1 +blinker==1.4 +certifi==2020.6.20 +chardet==4.0.0 +cryptography==3.4.8 +dbus-python==1.2.18 +distro==1.7.0 +httplib2==0.20.2 +idna==3.3 +importlib-metadata==4.6.4 +jeepney==0.7.1 +keyring==23.5.0 +launchpadlib==1.10.16 +lazr.restfulclient==0.14.4 +lazr.uri==1.0.6 +lxml==4.8.0 +more-itertools==8.10.0 +numpy==1.26.3 +oauthlib==3.2.0 +packaging==24.1 +pandas==1.5.3 +pip==24.1.1 +pipdeptree==2.23.0 +pyparsing==2.4.7 +python-apt==2.4.0+ubuntu3 +python-dateutil==2.9.0.post0 +pytz==2024.1 +requests==2.32.3 +scipy==1.12.0 +setuptools==59.6.0 +six==1.16.0 +termcolor==1.1.0 +urllib3==1.26.5 +wadllib==1.3.6 +wheel==0.37.1 +zipp==1.0.0 diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index 4d5159cfa9e..c015d3a3542 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -84,6 +84,8 @@ function start_server echo "ClickHouse server pid '$server_pid' started and responded" } +export -f start_server + function clone_root { [ "$UID" -eq 0 ] && git config --global --add safe.directory "$FASTTEST_SOURCE" @@ -254,6 +256,19 @@ function configure rm -f "$FASTTEST_DATA/config.d/secure_ports.xml" } +function timeout_with_logging() { + local exit_code=0 + + timeout -s TERM --preserve-status "${@}" || exit_code="${?}" + + if [[ "${exit_code}" -eq "124" ]] + then + echo "The command 'timeout ${*}' has been killed by timeout" + fi + + return $exit_code +} + function run_tests { clickhouse-server --version @@ -292,6 +307,8 @@ function run_tests clickhouse stop --pid-path "$FASTTEST_DATA" } +export -f run_tests + case "$stage" in "") ls -la @@ -315,7 +332,7 @@ case "$stage" in configure 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/install_log.txt" ;& "run_tests") - run_tests + timeout_with_logging 35m bash -c run_tests ||: /process_functional_tests_result.py --in-results-dir "$FASTTEST_OUTPUT/" \ --out-results-file "$FASTTEST_OUTPUT/test_results.tsv" \ --out-status-file "$FASTTEST_OUTPUT/check_status.tsv" || echo -e "failure\tCannot parse results" > "$FASTTEST_OUTPUT/check_status.tsv" diff --git a/docker/test/fuzzer/Dockerfile b/docker/test/fuzzer/Dockerfile index d3f78ac1d95..e1fb09b8ed5 100644 --- a/docker/test/fuzzer/Dockerfile +++ b/docker/test/fuzzer/Dockerfile @@ -31,7 +31,8 @@ RUN apt-get update \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* -RUN pip3 install Jinja2 +COPY requirements.txt / +RUN pip3 install --no-cache-dir -r /requirements.txt COPY * / diff --git a/docker/test/fuzzer/requirements.txt b/docker/test/fuzzer/requirements.txt new file mode 100644 index 00000000000..3dce93e023b --- /dev/null +++ b/docker/test/fuzzer/requirements.txt @@ -0,0 +1,27 @@ +blinker==1.4 +cryptography==3.4.8 +dbus-python==1.2.18 +distro==1.7.0 +httplib2==0.20.2 +importlib-metadata==4.6.4 +jeepney==0.7.1 +Jinja2==3.1.4 +keyring==23.5.0 +launchpadlib==1.10.16 +lazr.restfulclient==0.14.4 +lazr.uri==1.0.6 +MarkupSafe==2.1.5 +more-itertools==8.10.0 +oauthlib==3.2.0 +packaging==24.1 +pip==24.1.1 +pipdeptree==2.23.0 +PyJWT==2.3.0 +pyparsing==2.4.7 +python-apt==2.4.0+ubuntu3 +SecretStorage==3.3.1 +setuptools==59.6.0 +six==1.16.0 +wadllib==1.3.6 +wheel==0.37.1 +zipp==1.0.0 diff --git a/docker/test/integration/base/Dockerfile b/docker/test/integration/base/Dockerfile index 270b40e23a6..469251f648c 100644 --- a/docker/test/integration/base/Dockerfile +++ b/docker/test/integration/base/Dockerfile @@ -33,7 +33,8 @@ RUN apt-get update \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* -RUN pip3 install pycurl +COPY requirements.txt / +RUN pip3 install --no-cache-dir -r requirements.txt && rm -rf /root/.cache/pip # Architecture of the image when BuildKit/buildx is used ARG TARGETARCH diff --git a/docker/test/integration/base/requirements.txt b/docker/test/integration/base/requirements.txt new file mode 100644 index 00000000000..d195d8deaf6 --- /dev/null +++ b/docker/test/integration/base/requirements.txt @@ -0,0 +1,26 @@ +blinker==1.4 +cryptography==3.4.8 +dbus-python==1.2.18 +distro==1.7.0 +httplib2==0.20.2 +importlib-metadata==4.6.4 +jeepney==0.7.1 +keyring==23.5.0 +launchpadlib==1.10.16 +lazr.restfulclient==0.14.4 +lazr.uri==1.0.6 +more-itertools==8.10.0 +oauthlib==3.2.0 +packaging==24.1 +pip==24.1.1 +pipdeptree==2.23.0 +pycurl==7.45.3 +PyJWT==2.3.0 +pyparsing==2.4.7 +python-apt==2.4.0+ubuntu3 +SecretStorage==3.3.1 +setuptools==59.6.0 +six==1.16.0 +wadllib==1.3.6 +wheel==0.37.1 +zipp==1.0.0 diff --git a/docker/test/integration/resolver/Dockerfile b/docker/test/integration/resolver/Dockerfile index 01b9b777614..b35a7262651 100644 --- a/docker/test/integration/resolver/Dockerfile +++ b/docker/test/integration/resolver/Dockerfile @@ -2,4 +2,5 @@ # Helper docker container to run python bottle apps FROM python:3 -RUN python -m pip install bottle +COPY requirements.txt / +RUN python -m pip install --no-cache-dir -r requirements.txt diff --git a/docker/test/integration/resolver/requirements.txt b/docker/test/integration/resolver/requirements.txt new file mode 100644 index 00000000000..fbf85295329 --- /dev/null +++ b/docker/test/integration/resolver/requirements.txt @@ -0,0 +1,6 @@ +bottle==0.12.25 +packaging==24.1 +pip==23.2.1 +pipdeptree==2.23.0 +setuptools==69.0.3 +wheel==0.42.0 diff --git a/docker/test/integration/runner/Dockerfile b/docker/test/integration/runner/Dockerfile index 23d8a37d822..d250b746e7d 100644 --- a/docker/test/integration/runner/Dockerfile +++ b/docker/test/integration/runner/Dockerfile @@ -26,7 +26,6 @@ RUN apt-get update \ libicu-dev \ bsdutils \ curl \ - python3-pika \ liblua5.1-dev \ luajit \ libssl-dev \ @@ -61,49 +60,8 @@ RUN curl -fsSL https://download.docker.com/linux/ubuntu/gpg | apt-key add - \ # kazoo 2.10.0 is broken # https://s3.amazonaws.com/clickhouse-test-reports/59337/524625a1d2f4cc608a3f1059e3df2c30f353a649/integration_tests__asan__analyzer__[5_6].html -RUN python3 -m pip install --no-cache-dir \ - PyMySQL==1.1.0 \ - asyncio==3.4.3 \ - avro==1.10.2 \ - azure-storage-blob==12.19.0 \ - boto3==1.34.24 \ - cassandra-driver==3.29.0 \ - confluent-kafka==2.3.0 \ - delta-spark==2.3.0 \ - dict2xml==1.7.4 \ - dicttoxml==1.7.16 \ - docker==6.1.3 \ - docker-compose==1.29.2 \ - grpcio==1.60.0 \ - grpcio-tools==1.60.0 \ - kafka-python==2.0.2 \ - lz4==4.3.3 \ - minio==7.2.3 \ - nats-py==2.6.0 \ - protobuf==4.25.2 \ - kazoo==2.9.0 \ - psycopg2-binary==2.9.6 \ - pyhdfs==0.3.1 \ - pymongo==3.11.0 \ - pyspark==3.3.2 \ - pytest==7.4.4 \ - pytest-order==1.0.0 \ - pytest-random==0.2 \ - pytest-repeat==0.9.3 \ - pytest-timeout==2.2.0 \ - pytest-xdist==3.5.0 \ - pytest-reportlog==0.4.0 \ - pytz==2023.3.post1 \ - pyyaml==5.3.1 \ - redis==5.0.1 \ - requests-kerberos==0.14.0 \ - tzlocal==2.1 \ - retry==0.9.2 \ - bs4==0.0.2 \ - lxml==5.1.0 \ - urllib3==2.0.7 \ - jwcrypto==1.5.6 -# bs4, lxml are for cloud tests, do not delete +COPY requirements.txt / +RUN python3 -m pip install --no-cache-dir -r requirements.txt # Hudi supports only spark 3.3.*, not 3.4 RUN curl -fsSL -O https://archive.apache.org/dist/spark/spark-3.3.2/spark-3.3.2-bin-hadoop3.tgz \ diff --git a/docker/test/integration/runner/requirements.txt b/docker/test/integration/runner/requirements.txt new file mode 100644 index 00000000000..8a77d8abf77 --- /dev/null +++ b/docker/test/integration/runner/requirements.txt @@ -0,0 +1,113 @@ +PyHDFS==0.3.1 +PyJWT==2.3.0 +PyMySQL==1.1.0 +PyNaCl==1.5.0 +PyYAML==5.3.1 +SecretStorage==3.3.1 +argon2-cffi-bindings==21.2.0 +argon2-cffi==23.1.0 +async-timeout==4.0.3 +asyncio==3.4.3 +attrs==23.2.0 +avro==1.10.2 +azure-core==1.30.1 +azure-storage-blob==12.19.0 +bcrypt==4.1.3 +beautifulsoup4==4.12.3 +blinker==1.4 +boto3==1.34.24 +botocore==1.34.101 +bs4==0.0.2 +cassandra-driver==3.29.0 +certifi==2024.2.2 +cffi==1.16.0 +charset-normalizer==3.3.2 +click==8.1.7 +confluent-kafka==2.3.0 +cryptography==3.4.8 +dbus-python==1.2.18 +decorator==5.1.1 +delta-spark==2.3.0 +dict2xml==1.7.4 +dicttoxml==1.7.16 +distro-info==1.1+ubuntu0.2 +distro==1.7.0 +docker-compose==1.29.2 +docker==6.1.3 +dockerpty==0.4.1 +docopt==0.6.2 +exceptiongroup==1.2.1 +execnet==2.1.1 +geomet==0.2.1.post1 +grpcio-tools==1.60.0 +grpcio==1.60.0 +gssapi==1.8.3 +httplib2==0.20.2 +idna==3.7 +importlib-metadata==4.6.4 +iniconfig==2.0.0 +isodate==0.6.1 +jeepney==0.7.1 +jmespath==1.0.1 +jsonschema==3.2.0 +jwcrypto==1.5.6 +kafka-python==2.0.2 +kazoo==2.9.0 +keyring==23.5.0 +krb5==0.5.1 +launchpadlib==1.10.16 +lazr.restfulclient==0.14.4 +lazr.uri==1.0.6 +lxml==5.1.0 +lz4==4.3.3 +minio==7.2.3 +more-itertools==8.10.0 +nats-py==2.6.0 +oauthlib==3.2.0 +packaging==24.0 +paramiko==3.4.0 +pika==1.2.0 +pip==24.1.1 +pipdeptree==2.23.0 +pluggy==1.5.0 +protobuf==4.25.2 +psycopg2-binary==2.9.6 +py4j==0.10.9.5 +py==1.11.0 +pycparser==2.22 +pycryptodome==3.20.0 +pymongo==3.11.0 +pyparsing==2.4.7 +pyrsistent==0.20.0 +pyspark==3.3.2 +pyspnego==0.10.2 +pytest-order==1.0.0 +pytest-random==0.2 +pytest-repeat==0.9.3 +pytest-reportlog==0.4.0 +pytest-timeout==2.2.0 +pytest-xdist==3.5.0 +pytest==7.4.4 +python-apt==2.4.0+ubuntu3 +python-dateutil==2.9.0.post0 +python-dotenv==0.21.1 +pytz==2023.3.post1 +redis==5.0.1 +requests-kerberos==0.14.0 +requests==2.31.0 +retry==0.9.2 +s3transfer==0.10.1 +setuptools==59.6.0 +simplejson==3.19.2 +six==1.16.0 +soupsieve==2.5 +texttable==1.7.0 +tomli==2.0.1 +typing_extensions==4.11.0 +tzlocal==2.1 +unattended-upgrades==0.1 +urllib3==2.0.7 +wadllib==1.3.6 +websocket-client==0.59.0 +wheel==0.37.1 +zipp==1.0.0 diff --git a/docker/test/libfuzzer/Dockerfile b/docker/test/libfuzzer/Dockerfile index c9802a0e44e..e6eb2ae336e 100644 --- a/docker/test/libfuzzer/Dockerfile +++ b/docker/test/libfuzzer/Dockerfile @@ -1,3 +1,4 @@ +# docker build -t clickhouse/libfuzzer . ARG FROM_TAG=latest FROM clickhouse/test-base:$FROM_TAG @@ -29,7 +30,8 @@ RUN apt-get update \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* -RUN pip3 install Jinja2 +COPY requirements.txt / +RUN pip3 install --no-cache-dir -r /requirements.txt COPY * / diff --git a/docker/test/libfuzzer/requirements.txt b/docker/test/libfuzzer/requirements.txt new file mode 100644 index 00000000000..3dce93e023b --- /dev/null +++ b/docker/test/libfuzzer/requirements.txt @@ -0,0 +1,27 @@ +blinker==1.4 +cryptography==3.4.8 +dbus-python==1.2.18 +distro==1.7.0 +httplib2==0.20.2 +importlib-metadata==4.6.4 +jeepney==0.7.1 +Jinja2==3.1.4 +keyring==23.5.0 +launchpadlib==1.10.16 +lazr.restfulclient==0.14.4 +lazr.uri==1.0.6 +MarkupSafe==2.1.5 +more-itertools==8.10.0 +oauthlib==3.2.0 +packaging==24.1 +pip==24.1.1 +pipdeptree==2.23.0 +PyJWT==2.3.0 +pyparsing==2.4.7 +python-apt==2.4.0+ubuntu3 +SecretStorage==3.3.1 +setuptools==59.6.0 +six==1.16.0 +wadllib==1.3.6 +wheel==0.37.1 +zipp==1.0.0 diff --git a/docker/test/performance-comparison/Dockerfile b/docker/test/performance-comparison/Dockerfile index 1835900b316..c68a39f6f70 100644 --- a/docker/test/performance-comparison/Dockerfile +++ b/docker/test/performance-comparison/Dockerfile @@ -23,7 +23,6 @@ RUN apt-get update \ python3 \ python3-dev \ python3-pip \ - python3-setuptools \ rsync \ tree \ tzdata \ @@ -33,12 +32,14 @@ RUN apt-get update \ cargo \ ripgrep \ zstd \ - && pip3 --no-cache-dir install 'clickhouse-driver==0.2.1' scipy \ && apt-get purge --yes python3-dev g++ \ && apt-get autoremove --yes \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* +COPY requirements.txt / +RUN pip3 --no-cache-dir install -r requirements.txt + COPY run.sh / CMD ["bash", "/run.sh"] diff --git a/docker/test/performance-comparison/requirements.txt b/docker/test/performance-comparison/requirements.txt new file mode 100644 index 00000000000..932527cc022 --- /dev/null +++ b/docker/test/performance-comparison/requirements.txt @@ -0,0 +1,32 @@ +blinker==1.4 +clickhouse-driver==0.2.7 +cryptography==3.4.8 +dbus-python==1.2.18 +distro==1.7.0 +httplib2==0.20.2 +importlib-metadata==4.6.4 +jeepney==0.7.1 +keyring==23.5.0 +launchpadlib==1.10.16 +lazr.restfulclient==0.14.4 +lazr.uri==1.0.6 +more-itertools==8.10.0 +numpy==1.26.3 +oauthlib==3.2.0 +packaging==24.1 +pip==24.1.1 +pipdeptree==2.23.0 +Pygments==2.11.2 +PyJWT==2.3.0 +pyparsing==2.4.7 +python-apt==2.4.0+ubuntu3 +pytz==2023.4 +PyYAML==6.0.1 +scipy==1.12.0 +SecretStorage==3.3.1 +setuptools==59.6.0 +six==1.16.0 +tzlocal==2.1 +wadllib==1.3.6 +wheel==0.37.1 +zipp==1.0.0 diff --git a/docker/test/sqllogic/Dockerfile b/docker/test/sqllogic/Dockerfile index 1ea1e52e6fa..1425e12cd84 100644 --- a/docker/test/sqllogic/Dockerfile +++ b/docker/test/sqllogic/Dockerfile @@ -18,11 +18,8 @@ RUN apt-get update --yes \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* -RUN pip3 install \ - numpy \ - pyodbc \ - deepdiff \ - sqlglot +COPY requirements.txt / +RUN pip3 install --no-cache-dir -r /requirements.txt ARG odbc_driver_url="https://github.com/ClickHouse/clickhouse-odbc/releases/download/v1.1.6.20200320/clickhouse-odbc-1.1.6-Linux.tar.gz" diff --git a/docker/test/sqllogic/requirements.txt b/docker/test/sqllogic/requirements.txt new file mode 100644 index 00000000000..abc0a368659 --- /dev/null +++ b/docker/test/sqllogic/requirements.txt @@ -0,0 +1,30 @@ +blinker==1.4 +cryptography==3.4.8 +dbus-python==1.2.18 +deepdiff==7.0.1 +distro==1.7.0 +httplib2==0.20.2 +importlib-metadata==4.6.4 +jeepney==0.7.1 +keyring==23.5.0 +launchpadlib==1.10.16 +lazr.restfulclient==0.14.4 +lazr.uri==1.0.6 +more-itertools==8.10.0 +numpy==1.26.4 +oauthlib==3.2.0 +ordered-set==4.1.0 +packaging==24.1 +pip==24.1.1 +pipdeptree==2.23.0 +PyJWT==2.3.0 +pyodbc==5.1.0 +pyparsing==2.4.7 +python-apt==2.4.0+ubuntu3 +SecretStorage==3.3.1 +setuptools==59.6.0 +six==1.16.0 +sqlglot==23.16.0 +wadllib==1.3.6 +wheel==0.37.1 +zipp==1.0.0 diff --git a/docker/test/sqltest/Dockerfile b/docker/test/sqltest/Dockerfile index 7f59f65761f..71d915b0c7a 100644 --- a/docker/test/sqltest/Dockerfile +++ b/docker/test/sqltest/Dockerfile @@ -14,9 +14,8 @@ RUN apt-get update --yes \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* -RUN pip3 install \ - pyyaml \ - clickhouse-driver +COPY requirements.txt / +RUN pip3 install --no-cache-dir -r /requirements.txt ARG sqltest_repo="https://github.com/elliotchance/sqltest/" diff --git a/docker/test/sqltest/requirements.txt b/docker/test/sqltest/requirements.txt new file mode 100644 index 00000000000..4a0ae3edbac --- /dev/null +++ b/docker/test/sqltest/requirements.txt @@ -0,0 +1,29 @@ +blinker==1.4 +clickhouse-driver==0.2.7 +cryptography==3.4.8 +dbus-python==1.2.18 +distro==1.7.0 +httplib2==0.20.2 +importlib-metadata==4.6.4 +jeepney==0.7.1 +keyring==23.5.0 +launchpadlib==1.10.16 +lazr.restfulclient==0.14.4 +lazr.uri==1.0.6 +more-itertools==8.10.0 +oauthlib==3.2.0 +packaging==24.1 +pip==24.1.1 +pipdeptree==2.23.0 +PyJWT==2.3.0 +pyparsing==2.4.7 +python-apt==2.4.0+ubuntu3 +pytz==2024.1 +PyYAML==6.0.1 +SecretStorage==3.3.1 +setuptools==59.6.0 +six==1.16.0 +tzlocal==5.2 +wadllib==1.3.6 +wheel==0.37.1 +zipp==1.0.0 diff --git a/docker/test/stateful/Dockerfile b/docker/test/stateful/Dockerfile index 355e70f180e..0daf88cad7e 100644 --- a/docker/test/stateful/Dockerfile +++ b/docker/test/stateful/Dockerfile @@ -6,7 +6,6 @@ FROM clickhouse/stateless-test:$FROM_TAG RUN apt-get update -y \ && env DEBIAN_FRONTEND=noninteractive \ apt-get install --yes --no-install-recommends \ - python3-requests \ nodejs \ npm \ && apt-get clean \ diff --git a/docker/test/stateless/Dockerfile b/docker/test/stateless/Dockerfile index c3d80a7334b..5a655a3fd2b 100644 --- a/docker/test/stateless/Dockerfile +++ b/docker/test/stateless/Dockerfile @@ -25,10 +25,7 @@ RUN apt-get update -y \ openssl \ postgresql-client \ python3 \ - python3-lxml \ python3-pip \ - python3-requests \ - python3-termcolor \ qemu-user-static \ sqlite3 \ sudo \ @@ -51,7 +48,8 @@ RUN curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v${PR && unzip protoc-${PROTOC_VERSION}-linux-x86_64.zip -d /usr/local \ && rm protoc-${PROTOC_VERSION}-linux-x86_64.zip -RUN pip3 install numpy==1.26.3 scipy==1.12.0 pandas==1.5.3 Jinja2==3.1.3 pyarrow==15.0.0 +COPY requirements.txt / +RUN pip3 install --no-cache-dir -r /requirements.txt RUN mkdir -p /tmp/clickhouse-odbc-tmp \ && cd /tmp/clickhouse-odbc-tmp \ diff --git a/docker/test/stateless/requirements.txt b/docker/test/stateless/requirements.txt new file mode 100644 index 00000000000..3284107e24e --- /dev/null +++ b/docker/test/stateless/requirements.txt @@ -0,0 +1,51 @@ +awscli==1.22.34 +blinker==1.4 +botocore==1.23.34 +certifi==2020.6.20 +chardet==4.0.0 +colorama==0.4.4 +cryptography==3.4.8 +dbus-python==1.2.18 +distro==1.7.0 +docutils==0.17.1 +gyp==0.1 +httplib2==0.20.2 +idna==3.3 +importlib-metadata==4.6.4 +jeepney==0.7.1 +Jinja2==3.1.3 +jmespath==0.10.0 +keyring==23.5.0 +launchpadlib==1.10.16 +lazr.restfulclient==0.14.4 +lazr.uri==1.0.6 +lxml==4.8.0 +MarkupSafe==2.1.5 +more-itertools==8.10.0 +numpy==1.26.3 +oauthlib==3.2.0 +packaging==24.1 +pandas==1.5.3 +pip==24.1.1 +pipdeptree==2.23.0 +pyarrow==15.0.0 +pyasn1==0.4.8 +PyJWT==2.3.0 +pyparsing==2.4.7 +python-apt==2.4.0+ubuntu3 +python-dateutil==2.8.1 +pytz==2024.1 +PyYAML==6.0.1 +requests==2.32.3 +roman==3.3 +rsa==4.8 +s3transfer==0.5.0 +scipy==1.12.0 +SecretStorage==3.3.1 +setuptools==59.6.0 +six==1.16.0 +termcolor==1.1.0 +urllib3==1.26.5 +wadllib==1.3.6 +wheel==0.37.1 +zipp==1.0.0 diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index 5ece9743498..029c5a03151 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -6,6 +6,9 @@ source /setup_export_logs.sh # fail on errors, verbose and export all env variables set -e -x -a +MAX_RUN_TIME=${MAX_RUN_TIME:-10800} +MAX_RUN_TIME=$((MAX_RUN_TIME == 0 ? 10800 : MAX_RUN_TIME)) + # Choose random timezone for this test run. # # NOTE: that clickhouse-test will randomize session_timezone by itself as well @@ -262,14 +265,17 @@ function run_tests() export -f run_tests + +# This should be enough to setup job and collect artifacts +TIMEOUT=$((MAX_RUN_TIME - 300)) if [ "$NUM_TRIES" -gt "1" ]; then # We don't run tests with Ordinary database in PRs, only in master. # So run new/changed tests with Ordinary at least once in flaky check. - timeout_with_logging "$MAX_RUN_TIME" bash -c 'NUM_TRIES=1; USE_DATABASE_ORDINARY=1; run_tests' \ + timeout_with_logging "$TIMEOUT" bash -c 'NUM_TRIES=1; USE_DATABASE_ORDINARY=1; run_tests' \ | sed 's/All tests have finished//' | sed 's/No tests were run//' ||: fi -timeout_with_logging "$MAX_RUN_TIME" bash -c run_tests ||: +timeout_with_logging "$TIMEOUT" bash -c run_tests ||: echo "Files in current directory" ls -la ./ diff --git a/docker/test/stateless/utils.lib b/docker/test/stateless/utils.lib index 9b6ab535a90..833e1a05384 100644 --- a/docker/test/stateless/utils.lib +++ b/docker/test/stateless/utils.lib @@ -38,7 +38,7 @@ function fn_exists() { function timeout_with_logging() { local exit_code=0 - timeout "${@}" || exit_code="${?}" + timeout -s TERM --preserve-status "${@}" || exit_code="${?}" if [[ "${exit_code}" -eq "124" ]] then diff --git a/docker/test/style/Dockerfile b/docker/test/style/Dockerfile index 7cd712b73f6..cdc1d1fa095 100644 --- a/docker/test/style/Dockerfile +++ b/docker/test/style/Dockerfile @@ -23,22 +23,8 @@ RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \ && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* # python-magic is the same version as in Ubuntu 22.04 -RUN pip3 install \ - PyGithub \ - black==23.12.0 \ - boto3 \ - codespell==2.2.1 \ - mypy==1.8.0 \ - pylint==3.1.0 \ - python-magic==0.4.24 \ - flake8==4.0.1 \ - requests \ - thefuzz \ - tqdm==4.66.4 \ - types-requests \ - unidiff \ - jwt \ - && rm -rf /root/.cache/pip +COPY requirements.txt / +RUN pip3 install --no-cache-dir -r requirements.txt RUN echo "en_US.UTF-8 UTF-8" > /etc/locale.gen && locale-gen en_US.UTF-8 ENV LC_ALL en_US.UTF-8 diff --git a/docker/test/style/requirements.txt b/docker/test/style/requirements.txt new file mode 100644 index 00000000000..bb0cd55dd1a --- /dev/null +++ b/docker/test/style/requirements.txt @@ -0,0 +1,58 @@ +aiohttp==3.9.5 +aiosignal==1.3.1 +astroid==3.1.0 +async-timeout==4.0.3 +attrs==23.2.0 +black==23.12.0 +boto3==1.34.131 +botocore==1.34.131 +certifi==2024.6.2 +cffi==1.16.0 +charset-normalizer==3.3.2 +click==8.1.7 +codespell==2.2.1 +cryptography==42.0.8 +Deprecated==1.2.14 +dill==0.3.8 +flake8==4.0.1 +frozenlist==1.4.1 +idna==3.7 +isort==5.13.2 +jmespath==1.0.1 +jwt==1.3.1 +mccabe==0.6.1 +multidict==6.0.5 +mypy==1.8.0 +mypy-extensions==1.0.0 +packaging==24.1 +pathspec==0.9.0 +pip==24.1.1 +pipdeptree==2.23.0 +platformdirs==4.2.2 +pycodestyle==2.8.0 +pycparser==2.22 +pyflakes==2.4.0 +PyGithub==2.3.0 +PyJWT==2.8.0 +pylint==3.1.0 +PyNaCl==1.5.0 +python-dateutil==2.9.0.post0 +python-magic==0.4.24 +PyYAML==6.0.1 +rapidfuzz==3.9.3 +requests==2.32.3 +s3transfer==0.10.1 +setuptools==59.6.0 +six==1.16.0 +thefuzz==0.22.1 +tomli==2.0.1 +tomlkit==0.12.5 +tqdm==4.66.4 +types-requests==2.32.0.20240622 +typing_extensions==4.12.2 +unidiff==0.7.5 +urllib3==2.2.2 +wheel==0.37.1 +wrapt==1.16.0 +yamllint==1.26.3 +yarl==1.9.4 diff --git a/docs/changelogs/v24.6.2.17-stable.md b/docs/changelogs/v24.6.2.17-stable.md new file mode 100644 index 00000000000..820937f6291 --- /dev/null +++ b/docs/changelogs/v24.6.2.17-stable.md @@ -0,0 +1,26 @@ +--- +sidebar_position: 1 +sidebar_label: 2024 +--- + +# 2024 Changelog + +### ClickHouse release v24.6.2.17-stable (5710a8b5c0c) FIXME as compared to v24.6.1.4423-stable (dcced7c8478) + +#### New Feature +* Backported in [#66002](https://github.com/ClickHouse/ClickHouse/issues/66002): Add AzureQueue storage. [#65458](https://github.com/ClickHouse/ClickHouse/pull/65458) ([Kseniia Sumarokova](https://github.com/kssenii)). + +#### Improvement +* Backported in [#65898](https://github.com/ClickHouse/ClickHouse/issues/65898): Respect cgroup CPU limit in Keeper. [#65819](https://github.com/ClickHouse/ClickHouse/pull/65819) ([Antonio Andelic](https://github.com/antonio2368)). + +#### Bug Fix (user-visible misbehavior in an official stable release) +* Backported in [#65935](https://github.com/ClickHouse/ClickHouse/issues/65935): For queries that read from `PostgreSQL`, cancel the internal `PostgreSQL` query if the ClickHouse query is finished. Otherwise, `ClickHouse` query cannot be canceled until the internal `PostgreSQL` query is finished. [#65771](https://github.com/ClickHouse/ClickHouse/pull/65771) ([Maksim Kita](https://github.com/kitaisreal)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Backported in [#65907](https://github.com/ClickHouse/ClickHouse/issues/65907): Fix bug with session closing in Keeper. [#65735](https://github.com/ClickHouse/ClickHouse/pull/65735) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#65962](https://github.com/ClickHouse/ClickHouse/issues/65962): Add missing workload identity changes. [#65848](https://github.com/ClickHouse/ClickHouse/pull/65848) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Backported in [#66033](https://github.com/ClickHouse/ClickHouse/issues/66033): Follow up to [#65046](https://github.com/ClickHouse/ClickHouse/issues/65046). [#65928](https://github.com/ClickHouse/ClickHouse/pull/65928) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#66076](https://github.com/ClickHouse/ClickHouse/issues/66076): Fix support of non-const scale arguments in rounding functions. [#65983](https://github.com/ClickHouse/ClickHouse/pull/65983) ([Mikhail Gorshkov](https://github.com/mgorshkov)). +* Backported in [#66017](https://github.com/ClickHouse/ClickHouse/issues/66017): Fix race in s3queue. [#65986](https://github.com/ClickHouse/ClickHouse/pull/65986) ([Kseniia Sumarokova](https://github.com/kssenii)). + diff --git a/docs/en/operations/backup.md b/docs/en/operations/backup.md index 46c24ad8491..fc861e25e9f 100644 --- a/docs/en/operations/backup.md +++ b/docs/en/operations/backup.md @@ -84,6 +84,7 @@ The BACKUP and RESTORE statements take a list of DATABASE and TABLE names, a des - [`compression_method`](/docs/en/sql-reference/statements/create/table.md/#column-compression-codecs) and compression_level - `password` for the file on disk - `base_backup`: the destination of the previous backup of this source. For example, `Disk('backups', '1.zip')` + - `use_same_s3_credentials_for_base_backup`: whether base backup to S3 should inherit credentials from the query. Only works with `S3`. - `structure_only`: if enabled, allows to only backup or restore the CREATE statements without the data of tables - `storage_policy`: storage policy for the tables being restored. See [Using Multiple Block Devices for Data Storage](../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes). This setting is only applicable to the `RESTORE` command. The specified storage policy applies only to tables with an engine from the `MergeTree` family. - `s3_storage_class`: the storage class used for S3 backup. For example, `STANDARD` diff --git a/docs/en/operations/settings/merge-tree-settings.md b/docs/en/operations/settings/merge-tree-settings.md index 9879ee35612..22c8c704ba2 100644 --- a/docs/en/operations/settings/merge-tree-settings.md +++ b/docs/en/operations/settings/merge-tree-settings.md @@ -974,6 +974,13 @@ Default value: false - [exclude_deleted_rows_for_part_size_in_merge](#exclude_deleted_rows_for_part_size_in_merge) setting +## use_compact_variant_discriminators_serialization {#use_compact_variant_discriminators_serialization} + +Enables compact mode for binary serialization of discriminators in Variant data type. +This mode allows to use significantly less memory for storing discriminators in parts when there is mostly one variant or a lot of NULL values. + +Default value: true + ## merge_workload Used to regulate how resources are utilized and shared between merges and other workloads. Specified value is used as `workload` setting value for background merges of this table. If not specified (empty string), then server setting `merge_workload` is used instead. diff --git a/docs/en/operations/startup-scripts.md b/docs/en/operations/startup-scripts.md new file mode 100644 index 00000000000..91aa4772bcf --- /dev/null +++ b/docs/en/operations/startup-scripts.md @@ -0,0 +1,30 @@ +--- +slug: /en/operations/startup-scripts +sidebar_label: Startup Scripts +--- + +# Startup Scripts + +ClickHouse can run arbitrary SQL queries from the server configuration during startup. This can be useful for migrations or automatic schema creation. + +```xml + + + + CREATE ROLE OR REPLACE test_role + + + CREATE TABLE TestTable (id UInt64) ENGINE=TinyLog + SELECT 1; + + + +``` + +ClickHouse executes all queries from the `startup_scripts` sequentially in the specified order. If any of the queries fail, the execution of the following queries won't be interrupted. + +You can specify a conditional query in the config. In that case, the corresponding query executes only when the condition query returns the value `1` or `true`. + +:::note +If the condition query returns any other value than `1` or `true`, the result will be interpreted as `false`, and the corresponding won't be executed. +::: diff --git a/docs/en/operations/utilities/clickhouse-disks.md b/docs/en/operations/utilities/clickhouse-disks.md index 76db9e41836..e22bc06b641 100644 --- a/docs/en/operations/utilities/clickhouse-disks.md +++ b/docs/en/operations/utilities/clickhouse-disks.md @@ -4,35 +4,56 @@ sidebar_position: 59 sidebar_label: clickhouse-disks --- -# clickhouse-disks +# Clickhouse-disks -A utility providing filesystem-like operations for ClickHouse disks. +A utility providing filesystem-like operations for ClickHouse disks. It can work in both interactive and not interactive modes. -Program-wide options: +## Program-wide options * `--config-file, -C` -- path to ClickHouse config, defaults to `/etc/clickhouse-server/config.xml`. * `--save-logs` -- Log progress of invoked commands to `/var/log/clickhouse-server/clickhouse-disks.log`. * `--log-level` -- What [type](../server-configuration-parameters/settings#server_configuration_parameters-logger) of events to log, defaults to `none`. * `--disk` -- what disk to use for `mkdir, move, read, write, remove` commands. Defaults to `default`. +* `--query, -q` -- single query that can be executed without launching interactive mode +* `--help, -h` -- print all the options and commands with description + +## Default Disks +After the launch two disks are initialized. The first one is a disk `local` that is supposed to imitate local file system from which clickhouse-disks utility was launched. The second one is a disk `default` that is mounted to the local filesystem in the directory that can be found in config as a parameter `clickhouse/path` (default value is `/var/lib/clickhouse`). + +## Clickhouse-disks state +For each disk that was added the utility stores current directory (as in a usual filesystem). User can change current directory and switch between disks. + +State is reflected in a prompt "`disk_name`:`path_name`" ## Commands -* `copy [--disk-from d1] [--disk-to d2] `. - Recursively copy data from `FROM_PATH` at disk `d1` (defaults to `disk` value if not provided) - to `TO_PATH` at disk `d2` (defaults to `disk` value if not provided). -* `move `. - Move file or directory from `FROM_PATH` to `TO_PATH`. -* `remove `. - Remove `PATH` recursively. -* `link `. - Create a hardlink from `FROM_PATH` to `TO_PATH`. -* `list [--recursive] ...` - List files at `PATH`s. Non-recursive by default. -* `list-disks`. +In these documentation file all mandatory positional arguments are referred as ``, named arguments are referred as `[--parameter value]`. All positional parameters could be mentioned as a named parameter with a corresponding name. + +* `cd (change-dir, change_dir) [--disk disk] ` + Change directory to path `path` on disk `disk` (default value is a current disk). No disk switching happens. +* `copy (cp) [--disk-from disk_1] [--disk-to disk_2] `. + Recursively copy data from `path-from` at disk `disk_1` (default value is a current disk (parameter `disk` in a non-interactive mode)) + to `path-to` at disk `disk_2` (default value is a current disk (parameter `disk` in a non-interactive mode)). +* `current_disk_with_path (current, current_disk, current_path)` + Print current state in format: + `Disk: "current_disk" Path: "current path on current disk"` +* `help []` + Print help message about command `command`. If `command` is not specified print information about all commands. +* `move (mv) `. + Move file or directory from `path-from` to `path-to` within current disk. +* `remove (rm, delete) `. + Remove `path` recursively on a current disk. +* `link (ln) `. + Create a hardlink from `path-from` to `path-to` on a current disk. +* `list (ls) [--recursive] ` + List files at `path`s on a current disk. Non-recursive by default. +* `list-disks (list_disks, ls-disks, ls_disks)`. List disks names. -* `mkdir [--recursive] `. +* `mkdir [--recursive] ` on a current disk. Create a directory. Non-recursive by default. -* `read: []` - Read a file from `FROM_PATH` to `TO_PATH` (`stdout` if not supplied). -* `write [FROM_PATH] `. - Write a file from `FROM_PATH` (`stdin` if not supplied) to `TO_PATH`. +* `read (r) [--path-to path]` + Read a file from `path-from` to `path` (`stdout` if not supplied). +* `switch-disk [--path path] ` + Switch to disk `disk` on path `path` (if `path` is not specified default value is a previous path on disk `disk`). +* `write (w) [--path-from path] `. + Write a file from `path` (`stdin` if `path` is not supplied, input must finish by Ctrl+D) to `path-to`. diff --git a/docs/en/sql-reference/aggregate-functions/reference/aggthrow.md b/docs/en/sql-reference/aggregate-functions/reference/aggthrow.md new file mode 100644 index 00000000000..fdbfd5b9e41 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/aggthrow.md @@ -0,0 +1,37 @@ +--- +slug: /en/sql-reference/aggregate-functions/reference/aggthrow +sidebar_position: 101 +--- + +# aggThrow + +This function can be used for the purpose of testing exception safety. It will throw an exception on creation with the specified probability. + +**Syntax** + +```sql +aggThrow(throw_prob) +``` + +**Arguments** + +- `throw_prob` — Probability to throw on creation. [Float64](../../data-types/float.md). + +**Returned value** + +- An exception: `Code: 503. DB::Exception: Aggregate function aggThrow has thrown exception successfully`. + +**Example** + +Query: + +```sql +SELECT number % 2 AS even, aggThrow(number) FROM numbers(10) GROUP BY even; +``` + +Result: + +```response +Received exception: +Code: 503. DB::Exception: Aggregate function aggThrow has thrown exception successfully: While executing AggregatingTransform. (AGGREGATE_FUNCTION_THROW) +``` diff --git a/docs/en/sql-reference/aggregate-functions/reference/index.md b/docs/en/sql-reference/aggregate-functions/reference/index.md index e3725b6a430..b0e5582bd87 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/index.md +++ b/docs/en/sql-reference/aggregate-functions/reference/index.md @@ -43,6 +43,7 @@ Standard aggregate functions: ClickHouse-specific aggregate functions: +- [aggThrow](../reference/aggthrow.md) - [analysisOfVariance](../reference/analysis_of_variance.md) - [any](../reference/any_respect_nulls.md) - [anyHeavy](../reference/anyheavy.md) diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index b532e0de8f0..46b1167fa33 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -83,7 +83,57 @@ Result: ``` ## makeDate32 -Like [makeDate](#makedate) but produces a [Date32](../data-types/date32.md). +Creates a date of type [Date32](../../sql-reference/data-types/date32.md) from a year, month, day (or optionally a year and a day). + +**Syntax** + +```sql +makeDate32(year, [month,] day) +``` + +**Arguments** + +- `year` — Year. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- `month` — Month (optional). [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- `day` — Day. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). + +:::note +If `month` is omitted then `day` should take a value between `1` and `365`, otherwise it should take a value between `1` and `31`. +::: + +**Returned values** + +- A date created from the arguments. [Date32](../../sql-reference/data-types/date32.md). + +**Examples** + +Create a date from a year, month, and day: + +Query: + +```sql +SELECT makeDate32(2024, 1, 1); +``` + +Result: + +```response +2024-01-01 +``` + +Create a Date from a year and day of year: + +Query: + +``` sql +SELECT makeDate32(2024, 100); +``` + +Result: + +```response +2024-04-09 +``` ## makeDateTime @@ -125,12 +175,38 @@ Result: ## makeDateTime64 -Like [makeDateTime](#makedatetime) but produces a [DateTime64](../data-types/datetime64.md). +Creates a [DateTime64](../../sql-reference/data-types/datetime64.md) data type value from its components: year, month, day, hour, minute, second. With optional sub-second precision. **Syntax** +```sql +makeDateTime64(year, month, day, hour, minute, second[, precision]) +``` + +**Arguments** + +- `year` — Year (0-9999). [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- `month` — Month (1-12). [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- `day` — Day (1-31). [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- `hour` — Hour (0-23). [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- `minute` — Minute (0-59). [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- `second` — Second (0-59). [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- `precision` — Optional precision of the sub-second component (0-9). [Integer](../../sql-reference/data-types/int-uint.md). + +**Returned value** + +- A date and time created from the supplied arguments. [DateTime64](../../sql-reference/data-types/datetime64.md). + +**Example** + ``` sql -makeDateTime64(year, month, day, hour, minute, second[, fraction[, precision[, timezone]]]) +SELECT makeDateTime64(2023, 5, 15, 10, 30, 45, 779, 5); +``` + +```response +┌─makeDateTime64(2023, 5, 15, 10, 30, 45, 779, 5)─┐ +│ 2023-05-15 10:30:45.00779 │ +└─────────────────────────────────────────────────┘ ``` ## timestamp diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index 4e252785715..8f9fd9abb0d 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -86,7 +86,7 @@ Returns the fully qualified domain name of the ClickHouse server. fqdn(); ``` -This function is case-insensitive. +Aliases: `fullHostName`, 'FQDN'. **Returned value** diff --git a/docs/en/sql-reference/functions/time-window-functions.md b/docs/en/sql-reference/functions/time-window-functions.md index 2cec1987c20..5169d4487ec 100644 --- a/docs/en/sql-reference/functions/time-window-functions.md +++ b/docs/en/sql-reference/functions/time-window-functions.md @@ -6,44 +6,122 @@ sidebar_label: Time Window # Time Window Functions -Time window functions return the inclusive lower and exclusive upper bound of the corresponding window. The functions for working with WindowView are listed below: +Time window functions return the inclusive lower and exclusive upper bound of the corresponding window. The functions for working with [WindowView](../statements/create/view.md/#window-view-experimental) are listed below: ## tumble A tumbling time window assigns records to non-overlapping, continuous windows with a fixed duration (`interval`). +**Syntax** + ``` sql tumble(time_attr, interval [, timezone]) ``` **Arguments** -- `time_attr` - Date and time. [DateTime](../data-types/datetime.md) data type. -- `interval` - Window interval in [Interval](../data-types/special-data-types/interval.md) data type. +- `time_attr` — Date and time. [DateTime](../data-types/datetime.md). +- `interval` — Window interval in [Interval](../data-types/special-data-types/interval.md). - `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). **Returned values** -- The inclusive lower and exclusive upper bound of the corresponding tumbling window. [Tuple](../data-types/tuple.md)([DateTime](../data-types/datetime.md), [DateTime](../data-types/datetime.md))`. +- The inclusive lower and exclusive upper bound of the corresponding tumbling window. [Tuple](../data-types/tuple.md)([DateTime](../data-types/datetime.md), [DateTime](../data-types/datetime.md)). **Example** Query: ``` sql -SELECT tumble(now(), toIntervalDay('1')) +SELECT tumble(now(), toIntervalDay('1')); ``` Result: ``` text ┌─tumble(now(), toIntervalDay('1'))─────────────┐ -│ ['2020-01-01 00:00:00','2020-01-02 00:00:00'] │ +│ ('2024-07-04 00:00:00','2024-07-05 00:00:00') │ └───────────────────────────────────────────────┘ ``` +## tumbleStart + +Returns the inclusive lower bound of the corresponding [tumbling window](#tumble). + +**Syntax** + +``` sql +tumbleStart(time_attr, interval [, timezone]); +``` + +**Arguments** + +- `time_attr` — Date and time. [DateTime](../data-types/datetime.md). +- `interval` — Window interval in [Interval](../data-types/special-data-types/interval.md). +- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). + +The parameters above can also be passed to the function as a [tuple](../data-types/tuple.md). + +**Returned values** + +- The inclusive lower bound of the corresponding tumbling window. [DateTime](../data-types/datetime.md), [Tuple](../data-types/tuple.md) or [UInt32](../data-types/int-uint.md). + +**Example** + +Query: + +```sql +SELECT tumbleStart(now(), toIntervalDay('1')); +``` + +Result: + +```response +┌─tumbleStart(now(), toIntervalDay('1'))─┐ +│ 2024-07-04 00:00:00 │ +└────────────────────────────────────────┘ +``` + +## tumbleEnd + +Returns the exclusive upper bound of the corresponding [tumbling window](#tumble). + +**Syntax** + +``` sql +tumbleEnd(time_attr, interval [, timezone]); +``` + +**Arguments** + +- `time_attr` — Date and time. [DateTime](../data-types/datetime.md). +- `interval` — Window interval in [Interval](../data-types/special-data-types/interval.md). +- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). + +The parameters above can also be passed to the function as a [tuple](../data-types/tuple.md). + +**Returned values** + +- The inclusive lower bound of the corresponding tumbling window. [DateTime](../data-types/datetime.md), [Tuple](../data-types/tuple.md) or [UInt32](../data-types/int-uint.md). + +**Example** + +Query: + +```sql +SELECT tumbleEnd(now(), toIntervalDay('1')); +``` + +Result: + +```response +┌─tumbleEnd(now(), toIntervalDay('1'))─┐ +│ 2024-07-05 00:00:00 │ +└──────────────────────────────────────┘ +``` + ## hop -A hopping time window has a fixed duration (`window_interval`) and hops by a specified hop interval (`hop_interval`). If the `hop_interval` is smaller than the `window_interval`, hopping windows are overlapping. Thus, records can be assigned to multiple windows. +A hopping time window has a fixed duration (`window_interval`) and hops by a specified hop interval (`hop_interval`). If the `hop_interval` is smaller than the `window_interval`, hopping windows are overlapping. Thus, records can be assigned to multiple windows. ``` sql hop(time_attr, hop_interval, window_interval [, timezone]) @@ -51,65 +129,118 @@ hop(time_attr, hop_interval, window_interval [, timezone]) **Arguments** -- `time_attr` - Date and time. [DateTime](../data-types/datetime.md) data type. -- `hop_interval` - Hop interval in [Interval](../data-types/special-data-types/interval.md) data type. Should be a positive number. -- `window_interval` - Window interval in [Interval](../data-types/special-data-types/interval.md) data type. Should be a positive number. -- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). +- `time_attr` — Date and time. [DateTime](../data-types/datetime.md). +- `hop_interval` — Positive Hop interval. [Interval](../data-types/special-data-types/interval.md). +- `window_interval` — Positive Window interval. [Interval](../data-types/special-data-types/interval.md). +- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). **Returned values** -- The inclusive lower and exclusive upper bound of the corresponding hopping window. Since one record can be assigned to multiple hop windows, the function only returns the bound of the **first** window when hop function is used **without** `WINDOW VIEW`. [Tuple](../data-types/tuple.md)([DateTime](../data-types/datetime.md), [DateTime](../data-types/datetime.md))`. +- The inclusive lower and exclusive upper bound of the corresponding hopping window. [Tuple](../data-types/tuple.md)([DateTime](../data-types/datetime.md), [DateTime](../data-types/datetime.md))`. + +:::note +Since one record can be assigned to multiple hop windows, the function only returns the bound of the **first** window when hop function is used **without** `WINDOW VIEW`. +::: **Example** Query: ``` sql -SELECT hop(now(), INTERVAL '1' SECOND, INTERVAL '2' SECOND) +SELECT hop(now(), INTERVAL '1' DAY, INTERVAL '2' DAY); ``` Result: ``` text -┌─hop(now(), toIntervalSecond('1'), toIntervalSecond('2'))──┐ -│ ('2020-01-14 16:58:22','2020-01-14 16:58:24') │ -└───────────────────────────────────────────────────────────┘ -``` - -## tumbleStart - -Returns the inclusive lower bound of the corresponding tumbling window. - -``` sql -tumbleStart(bounds_tuple); -tumbleStart(time_attr, interval [, timezone]); -``` - -## tumbleEnd - -Returns the exclusive upper bound of the corresponding tumbling window. - -``` sql -tumbleEnd(bounds_tuple); -tumbleEnd(time_attr, interval [, timezone]); +┌─hop(now(), toIntervalDay('1'), toIntervalDay('2'))─┐ +│ ('2024-07-03 00:00:00','2024-07-05 00:00:00') │ +└────────────────────────────────────────────────────┘ ``` ## hopStart -Returns the inclusive lower bound of the corresponding hopping window. +Returns the inclusive lower bound of the corresponding [hopping window](#hop). + +**Syntax** ``` sql -hopStart(bounds_tuple); hopStart(time_attr, hop_interval, window_interval [, timezone]); ``` +**Arguments** + +- `time_attr` — Date and time. [DateTime](../data-types/datetime.md). +- `hop_interval` — Positive Hop interval. [Interval](../data-types/special-data-types/interval.md). +- `window_interval` — Positive Window interval. [Interval](../data-types/special-data-types/interval.md). +- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). + +The parameters above can also be passed to the function as a [tuple](../data-types/tuple.md). + +**Returned values** + +- The inclusive lower bound of the corresponding hopping window. [DateTime](../data-types/datetime.md), [Tuple](../data-types/tuple.md) or [UInt32](../data-types/int-uint.md). + +:::note +Since one record can be assigned to multiple hop windows, the function only returns the bound of the **first** window when hop function is used **without** `WINDOW VIEW`. +::: + +**Example** + +Query: + +``` sql +SELECT hopStart(now(), INTERVAL '1' DAY, INTERVAL '2' DAY); +``` + +Result: + +``` text +┌─hopStart(now(), toIntervalDay('1'), toIntervalDay('2'))─┐ +│ 2024-07-03 00:00:00 │ +└─────────────────────────────────────────────────────────┘ +``` ## hopEnd -Returns the exclusive upper bound of the corresponding hopping window. +Returns the exclusive upper bound of the corresponding [hopping window](#hop). + +**Syntax** ``` sql -hopEnd(bounds_tuple); hopEnd(time_attr, hop_interval, window_interval [, timezone]); +``` +**Arguments** + +- `time_attr` — Date and time. [DateTime](../data-types/datetime.md). +- `hop_interval` — Positive Hop interval. [Interval](../data-types/special-data-types/interval.md). +- `window_interval` — Positive Window interval. [Interval](../data-types/special-data-types/interval.md). +- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). + +The parameters above can also be passed to the function as a [tuple](../data-types/tuple.md). + +**Returned values** + +- The exclusive upper bound of the corresponding hopping window. [DateTime](../data-types/datetime.md), [Tuple](../data-types/tuple.md) or [UInt32](../data-types/int-uint.md). + +:::note +Since one record can be assigned to multiple hop windows, the function only returns the bound of the **first** window when hop function is used **without** `WINDOW VIEW`. +::: + +**Example** + +Query: + +``` sql +SELECT hopEnd(now(), INTERVAL '1' DAY, INTERVAL '2' DAY); +``` + +Result: + +``` text +┌─hopEnd(now(), toIntervalDay('1'), toIntervalDay('2'))─┐ +│ 2024-07-05 00:00:00 │ +└───────────────────────────────────────────────────────┘ + ``` ## Related content diff --git a/docs/en/sql-reference/functions/tuple-map-functions.md b/docs/en/sql-reference/functions/tuple-map-functions.md index ad40725d680..24b356eca87 100644 --- a/docs/en/sql-reference/functions/tuple-map-functions.md +++ b/docs/en/sql-reference/functions/tuple-map-functions.md @@ -600,7 +600,7 @@ mapApply(func, map) **Arguments** -- `func` - [Lambda function](../../sql-reference/functions/index.md#higher-order-functions---operator-and-lambdaparams-expr-function). +- `func` — [Lambda function](../../sql-reference/functions/index.md#higher-order-functions---operator-and-lambdaparams-expr-function). - `map` — [Map](../data-types/map.md). **Returned value** @@ -831,7 +831,39 @@ SELECT mapSort((k, v) -> v, map('key2', 2, 'key3', 1, 'key1', 3)) AS map; └──────────────────────────────┘ ``` -For more details see the [reference](../../sql-reference/functions/array-functions.md#array_functions-sort) for `arraySort` function. +For more details see the [reference](../../sql-reference/functions/array-functions.md#array_functions-sort) for `arraySort` function. + +## mapPartialSort + +Sorts the elements of a map in ascending order with additional `limit` argument allowing partial sorting. +If the `func` function is specified, the sorting order is determined by the result of the `func` function applied to the keys and values of the map. + +**Syntax** + +```sql +mapPartialSort([func,] limit, map) +``` +**Arguments** + +- `func` – Optional function to apply to the keys and values of the map. [Lambda function](../../sql-reference/functions/index.md#higher-order-functions---operator-and-lambdaparams-expr-function). +- `limit` – Elements in range [1..limit] are sorted. [(U)Int](../data-types/int-uint.md). +- `map` – Map to sort. [Map](../data-types/map.md). + +**Returned value** + +- Partially sorted map. [Map](../data-types/map.md). + +**Example** + +``` sql +SELECT mapPartialSort((k, v) -> v, 2, map('k1', 3, 'k2', 1, 'k3', 2)); +``` + +``` text +┌─mapPartialSort(lambda(tuple(k, v), v), 2, map('k1', 3, 'k2', 1, 'k3', 2))─┐ +│ {'k2':1,'k3':2,'k1':3} │ +└───────────────────────────────────────────────────────────────────────────┘ +``` ## mapReverseSort(\[func,\], map) @@ -861,3 +893,35 @@ SELECT mapReverseSort((k, v) -> v, map('key2', 2, 'key3', 1, 'key1', 3)) AS map; ``` For more details see function [arrayReverseSort](../../sql-reference/functions/array-functions.md#array_functions-reverse-sort). + +## mapPartialReverseSort + +Sorts the elements of a map in descending order with additional `limit` argument allowing partial sorting. +If the `func` function is specified, the sorting order is determined by the result of the `func` function applied to the keys and values of the map. + +**Syntax** + +```sql +mapPartialReverseSort([func,] limit, map) +``` +**Arguments** + +- `func` – Optional function to apply to the keys and values of the map. [Lambda function](../../sql-reference/functions/index.md#higher-order-functions---operator-and-lambdaparams-expr-function). +- `limit` – Elements in range [1..limit] are sorted. [(U)Int](../data-types/int-uint.md). +- `map` – Map to sort. [Map](../data-types/map.md). + +**Returned value** + +- Partially sorted map. [Map](../data-types/map.md). + +**Example** + +``` sql +SELECT mapPartialReverseSort((k, v) -> v, 2, map('k1', 3, 'k2', 1, 'k3', 2)); +``` + +``` text +┌─mapPartialReverseSort(lambda(tuple(k, v), v), 2, map('k1', 3, 'k2', 1, 'k3', 2))─┐ +│ {'k1':3,'k3':2,'k2':1} │ +└──────────────────────────────────────────────────────────────────────────────────┘ +``` \ No newline at end of file diff --git a/docs/en/sql-reference/table-functions/fuzzQuery.md b/docs/en/sql-reference/table-functions/fuzzQuery.md new file mode 100644 index 00000000000..e15f8a40156 --- /dev/null +++ b/docs/en/sql-reference/table-functions/fuzzQuery.md @@ -0,0 +1,36 @@ +--- +slug: /en/sql-reference/table-functions/fuzzQuery +sidebar_position: 75 +sidebar_label: fuzzQuery +--- + +# fuzzQuery + +Perturbs the given query string with random variations. + +``` sql +fuzzQuery(query[, max_query_length[, random_seed]]) +``` + +**Arguments** + +- `query` (String) - The source query to perform the fuzzing on. +- `max_query_length` (UInt64) - A maximum length the query can get during the fuzzing process. +- `random_seed` (UInt64) - A random seed for producing stable results. + +**Returned Value** + +A table object with a single column containing perturbed query strings. + +## Usage Example + +``` sql +SELECT * FROM fuzzQuery('SELECT materialize(\'a\' AS key) GROUP BY key') LIMIT 2; +``` + +``` + ┌─query──────────────────────────────────────────────────────────┐ +1. │ SELECT 'a' AS key GROUP BY key │ +2. │ EXPLAIN PIPELINE compact = true SELECT 'a' AS key GROUP BY key │ + └────────────────────────────────────────────────────────────────┘ +``` diff --git a/docs/en/sql-reference/window-functions/index.md b/docs/en/sql-reference/window-functions/index.md index 49076f3cbe1..16225d4b0e2 100644 --- a/docs/en/sql-reference/window-functions/index.md +++ b/docs/en/sql-reference/window-functions/index.md @@ -23,6 +23,7 @@ ClickHouse supports the standard grammar for defining windows and window functio | `GROUPS` frame | ❌ | | Calculating aggregate functions over a frame (`sum(value) over (order by time)`) | ✅ (All aggregate functions are supported) | | `rank()`, `dense_rank()`, `row_number()` | ✅ | +| `percent_rank()` | ✅ Efficiently computes the relative standing of a value within a partition in a dataset. This function effectively replaces the more verbose and computationally intensive manual SQL calculation expressed as `ifNull((rank() OVER(PARTITION BY x ORDER BY y) - 1) / nullif(count(1) OVER(PARTITION BY x) - 1, 0), 0)`| | `lag/lead(value, offset)` | ❌
You can use one of the following workarounds:
1) `any(value) over (.... rows between preceding and preceding)`, or `following` for `lead`
2) `lagInFrame/leadInFrame`, which are analogous, but respect the window frame. To get behavior identical to `lag/lead`, use `rows between unbounded preceding and unbounded following` | | ntile(buckets) | ✅
Specify window like, (partition by x order by y rows between unbounded preceding and unrounded following). | diff --git a/programs/client/Client.h b/programs/client/Client.h index 229608f787d..6d57a6ea648 100644 --- a/programs/client/Client.h +++ b/programs/client/Client.h @@ -9,7 +9,10 @@ namespace DB class Client : public ClientBase { public: - Client() = default; + Client() + { + fuzzer = QueryFuzzer(randomSeed(), &std::cout, &std::cerr); + } void initialize(Poco::Util::Application & self) override; diff --git a/programs/disks/CMakeLists.txt b/programs/disks/CMakeLists.txt index f0949fcfceb..7e8afe084fb 100644 --- a/programs/disks/CMakeLists.txt +++ b/programs/disks/CMakeLists.txt @@ -1,6 +1,8 @@ set (CLICKHOUSE_DISKS_SOURCES DisksApp.cpp + DisksClient.cpp ICommand.cpp + CommandChangeDirectory.cpp CommandCopy.cpp CommandLink.cpp CommandList.cpp @@ -9,10 +11,14 @@ set (CLICKHOUSE_DISKS_SOURCES CommandMove.cpp CommandRead.cpp CommandRemove.cpp - CommandWrite.cpp) + CommandSwitchDisk.cpp + CommandWrite.cpp + CommandHelp.cpp + CommandTouch.cpp + CommandGetCurrentDiskAndPath.cpp) if (CLICKHOUSE_CLOUD) - set (CLICKHOUSE_DISKS_SOURCES ${CLICKHOUSE_DISKS_SOURCES} CommandPackedIO.cpp) + set (CLICKHOUSE_DISKS_SOURCES ${CLICKHOUSE_DISKS_SOURCES} CommandPackedIO.cpp) endif () set (CLICKHOUSE_DISKS_LINK diff --git a/programs/disks/CommandChangeDirectory.cpp b/programs/disks/CommandChangeDirectory.cpp new file mode 100644 index 00000000000..b545f37de72 --- /dev/null +++ b/programs/disks/CommandChangeDirectory.cpp @@ -0,0 +1,35 @@ +#include +#include +#include "DisksApp.h" +#include "DisksClient.h" +#include "ICommand.h" + +namespace DB +{ + +class CommandChangeDirectory final : public ICommand +{ +public: + explicit CommandChangeDirectory() : ICommand() + { + command_name = "cd"; + description = "Change directory (makes sense only in interactive mode)"; + options_description.add_options()("path", po::value(), "the path to which we want to change (mandatory, positional)")( + "disk", po::value(), "A disk where the path is changed (without disk switching)"); + positional_options_description.add("path", 1); + } + + void executeImpl(const CommandLineOptions & options, DisksClient & client) override + { + DiskWithPath & disk = getDiskWithPath(client, options, "disk"); + String path = getValueFromCommandLineOptionsThrow(options, "path"); + disk.setPath(path); + } +}; + +CommandPtr makeCommandChangeDirectory() +{ + return std::make_shared(); +} + +} diff --git a/programs/disks/CommandCopy.cpp b/programs/disks/CommandCopy.cpp index f176fa277d7..e3051f2702c 100644 --- a/programs/disks/CommandCopy.cpp +++ b/programs/disks/CommandCopy.cpp @@ -1,6 +1,8 @@ -#include "ICommand.h" #include +#include "Common/Exception.h" #include +#include "DisksClient.h" +#include "ICommand.h" namespace DB { @@ -10,59 +12,89 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } + class CommandCopy final : public ICommand { public: - CommandCopy() + explicit CommandCopy() : ICommand() { command_name = "copy"; - command_option_description.emplace(createOptionsDescription("Allowed options", getTerminalWidth())); - description = "Recursively copy data from `FROM_PATH` to `TO_PATH`"; - usage = "copy [OPTION]... "; - command_option_description->add_options() - ("disk-from", po::value(), "disk from which we copy") - ("disk-to", po::value(), "disk to which we copy"); + description = "Recursively copy data from `path-from` to `path-to`"; + options_description.add_options()( + "disk-from", po::value(), "disk from which we copy is executed (default value is a current disk)")( + "disk-to", po::value(), "disk to which copy is executed (default value is a current disk)")( + "path-from", po::value(), "path from which copy is executed (mandatory, positional)")( + "path-to", po::value(), "path to which copy is executed (mandatory, positional)")( + "recursive,r", "recursively copy the directory (required to remove a directory)"); + positional_options_description.add("path-from", 1); + positional_options_description.add("path-to", 1); } - void processOptions( - Poco::Util::LayeredConfiguration & config, - po::variables_map & options) const override + void executeImpl(const CommandLineOptions & options, DisksClient & client) override { - if (options.count("disk-from")) - config.setString("disk-from", options["disk-from"].as()); - if (options.count("disk-to")) - config.setString("disk-to", options["disk-to"].as()); - } + auto disk_from = getDiskWithPath(client, options, "disk-from"); + auto disk_to = getDiskWithPath(client, options, "disk-to"); + String path_from = disk_from.getRelativeFromRoot(getValueFromCommandLineOptionsThrow(options, "path-from")); + String path_to = disk_to.getRelativeFromRoot(getValueFromCommandLineOptionsThrow(options, "path-to")); + bool recursive = options.count("recursive"); - void execute( - const std::vector & command_arguments, - std::shared_ptr & disk_selector, - Poco::Util::LayeredConfiguration & config) override - { - if (command_arguments.size() != 2) + if (!disk_from.getDisk()->exists(path_from)) { - printHelpMessage(); - throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Bad Arguments"); + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "cannot stat '{}' on disk '{}': No such file or directory", + path_from, + disk_from.getDisk()->getName()); } + else if (disk_from.getDisk()->isFile(path_from)) + { + auto target_location = getTargetLocation(path_from, disk_to, path_to); + if (!disk_to.getDisk()->exists(target_location) || disk_to.getDisk()->isFile(target_location)) + { + disk_from.getDisk()->copyFile( + path_from, + *disk_to.getDisk(), + target_location, + /* read_settings= */ {}, + /* write_settings= */ {}, + /* cancellation_hook= */ {}); + } + else + { + throw Exception( + ErrorCodes::BAD_ARGUMENTS, "cannot overwrite directory {} with non-directory {}", target_location, path_from); + } + } + else if (disk_from.getDisk()->isDirectory(path_from)) + { + if (!recursive) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "--recursive not specified; omitting directory {}", path_from); + } + auto target_location = getTargetLocation(path_from, disk_to, path_to); - String disk_name_from = config.getString("disk-from", config.getString("disk", "default")); - String disk_name_to = config.getString("disk-to", config.getString("disk", "default")); - - const String & path_from = command_arguments[0]; - const String & path_to = command_arguments[1]; - - DiskPtr disk_from = disk_selector->get(disk_name_from); - DiskPtr disk_to = disk_selector->get(disk_name_to); - - String relative_path_from = validatePathAndGetAsRelative(path_from); - String relative_path_to = validatePathAndGetAsRelative(path_to); - - disk_from->copyDirectoryContent(relative_path_from, disk_to, relative_path_to, /* read_settings= */ {}, /* write_settings= */ {}, /* cancellation_hook= */ {}); + if (disk_to.getDisk()->isFile(target_location)) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "cannot overwrite non-directory {} with directory {}", path_to, target_location); + } + else if (!disk_to.getDisk()->exists(target_location)) + { + disk_to.getDisk()->createDirectory(target_location); + } + disk_from.getDisk()->copyDirectoryContent( + path_from, + disk_to.getDisk(), + target_location, + /* read_settings= */ {}, + /* write_settings= */ {}, + /* cancellation_hook= */ {}); + } } }; + +CommandPtr makeCommandCopy() +{ + return std::make_shared(); } -std::unique_ptr makeCommandCopy() -{ - return std::make_unique(); } diff --git a/programs/disks/CommandGetCurrentDiskAndPath.cpp b/programs/disks/CommandGetCurrentDiskAndPath.cpp new file mode 100644 index 00000000000..15f8ef5aae8 --- /dev/null +++ b/programs/disks/CommandGetCurrentDiskAndPath.cpp @@ -0,0 +1,30 @@ +#include +#include +#include "DisksApp.h" +#include "DisksClient.h" +#include "ICommand.h" + +namespace DB +{ + +class CommandGetCurrentDiskAndPath final : public ICommand +{ +public: + explicit CommandGetCurrentDiskAndPath() : ICommand() + { + command_name = "current_disk_with_path"; + description = "Prints current disk and path (which coincide with the prompt)"; + } + + void executeImpl(const CommandLineOptions &, DisksClient & client) override + { + auto disk = client.getCurrentDiskWithPath(); + std::cout << "Disk: " << disk.getDisk()->getName() << "\nPath: " << disk.getCurrentPath() << std::endl; + } +}; + +CommandPtr makeCommandGetCurrentDiskAndPath() +{ + return std::make_shared(); +} +} diff --git a/programs/disks/CommandHelp.cpp b/programs/disks/CommandHelp.cpp new file mode 100644 index 00000000000..a3aee9498d3 --- /dev/null +++ b/programs/disks/CommandHelp.cpp @@ -0,0 +1,43 @@ +#include "DisksApp.h" +#include "ICommand.h" + +#include +#include + +namespace DB +{ + +class CommandHelp final : public ICommand +{ +public: + explicit CommandHelp(const DisksApp & disks_app_) : disks_app(disks_app_) + { + command_name = "help"; + description = "Print help message about available commands"; + options_description.add_options()( + "command", po::value(), "A command to help with (optional, positional), if not specified, help lists all the commands"); + positional_options_description.add("command", 1); + } + + void executeImpl(const CommandLineOptions & options, DisksClient & /*client*/) override + { + std::optional command = getValueFromCommandLineOptionsWithOptional(options, "command"); + if (command.has_value()) + { + disks_app.printCommandHelpMessage(command.value()); + } + else + { + disks_app.printAvailableCommandsHelpMessage(); + } + } + + const DisksApp & disks_app; +}; + +CommandPtr makeCommandHelp(const DisksApp & disks_app) +{ + return std::make_shared(disks_app); +} + +} diff --git a/programs/disks/CommandLink.cpp b/programs/disks/CommandLink.cpp index dbaa3162f82..11c196cafc5 100644 --- a/programs/disks/CommandLink.cpp +++ b/programs/disks/CommandLink.cpp @@ -1,14 +1,9 @@ -#include "ICommand.h" #include +#include "ICommand.h" namespace DB { -namespace ErrorCodes -{ - extern const int BAD_ARGUMENTS; -} - class CommandLink final : public ICommand { public: @@ -16,42 +11,27 @@ public: { command_name = "link"; description = "Create hardlink from `from_path` to `to_path`"; - usage = "link [OPTION]... "; + options_description.add_options()( + "path-from", po::value(), "the path from which a hard link will be created (mandatory, positional)")( + "path-to", po::value(), "the path where a hard link will be created (mandatory, positional)"); + positional_options_description.add("path-from", 1); + positional_options_description.add("path-to", 1); } - void processOptions( - Poco::Util::LayeredConfiguration &, - po::variables_map &) const override + void executeImpl(const CommandLineOptions & options, DisksClient & client) override { - } + auto disk = client.getCurrentDiskWithPath(); - void execute( - const std::vector & command_arguments, - std::shared_ptr & disk_selector, - Poco::Util::LayeredConfiguration & config) override - { - if (command_arguments.size() != 2) - { - printHelpMessage(); - throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Bad Arguments"); - } + const String & path_from = disk.getRelativeFromRoot(getValueFromCommandLineOptionsThrow(options, "path-from")); + const String & path_to = disk.getRelativeFromRoot(getValueFromCommandLineOptionsThrow(options, "path-to")); - String disk_name = config.getString("disk", "default"); - - const String & path_from = command_arguments[0]; - const String & path_to = command_arguments[1]; - - DiskPtr disk = disk_selector->get(disk_name); - - String relative_path_from = validatePathAndGetAsRelative(path_from); - String relative_path_to = validatePathAndGetAsRelative(path_to); - - disk->createHardLink(relative_path_from, relative_path_to); + disk.getDisk()->createHardLink(path_from, path_to); } }; + +CommandPtr makeCommandLink() +{ + return std::make_shared(); } -std::unique_ptr makeCommandLink() -{ - return std::make_unique(); } diff --git a/programs/disks/CommandList.cpp b/programs/disks/CommandList.cpp index 7213802ea86..77479b1d217 100644 --- a/programs/disks/CommandList.cpp +++ b/programs/disks/CommandList.cpp @@ -1,98 +1,95 @@ -#include "ICommand.h" #include #include +#include "DisksApp.h" +#include "DisksClient.h" +#include "ICommand.h" namespace DB { -namespace ErrorCodes -{ - extern const int BAD_ARGUMENTS; -} - class CommandList final : public ICommand { public: - CommandList() + explicit CommandList() : ICommand() { command_name = "list"; - command_option_description.emplace(createOptionsDescription("Allowed options", getTerminalWidth())); description = "List files at path[s]"; - usage = "list [OPTION]... ..."; - command_option_description->add_options() - ("recursive", "recursively list all directories"); + options_description.add_options()("recursive", "recursively list the directory")("all", "show hidden files")( + "path", po::value(), "the path of listing (mandatory, positional)"); + positional_options_description.add("path", 1); } - void processOptions( - Poco::Util::LayeredConfiguration & config, - po::variables_map & options) const override + void executeImpl(const CommandLineOptions & options, DisksClient & client) override { - if (options.count("recursive")) - config.setBool("recursive", true); - } - - void execute( - const std::vector & command_arguments, - std::shared_ptr & disk_selector, - Poco::Util::LayeredConfiguration & config) override - { - if (command_arguments.size() != 1) - { - printHelpMessage(); - throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Bad Arguments"); - } - - String disk_name = config.getString("disk", "default"); - - const String & path = command_arguments[0]; - - DiskPtr disk = disk_selector->get(disk_name); - - String relative_path = validatePathAndGetAsRelative(path); - - bool recursive = config.getBool("recursive", false); + bool recursive = options.count("recursive"); + bool show_hidden = options.count("all"); + auto disk = client.getCurrentDiskWithPath(); + String path = getValueFromCommandLineOptionsWithDefault(options, "path", "."); if (recursive) - listRecursive(disk, relative_path); + listRecursive(disk, path, show_hidden); else - list(disk, relative_path); + list(disk, path, show_hidden); } private: - static void list(const DiskPtr & disk, const std::string & relative_path) + static void list(const DiskWithPath & disk, const std::string & path, bool show_hidden) { - std::vector file_names; - disk->listFiles(relative_path, file_names); + std::vector file_names = disk.listAllFilesByPath(path); + std::vector selected_and_sorted_file_names{}; for (const auto & file_name : file_names) - std::cout << file_name << '\n'; + if (show_hidden || (!file_name.starts_with('.'))) + selected_and_sorted_file_names.push_back(file_name); + + std::sort(selected_and_sorted_file_names.begin(), selected_and_sorted_file_names.end()); + for (const auto & file_name : selected_and_sorted_file_names) + { + std::cout << file_name << "\n"; + } } - static void listRecursive(const DiskPtr & disk, const std::string & relative_path) + static void listRecursive(const DiskWithPath & disk, const std::string & relative_path, bool show_hidden) { - std::vector file_names; - disk->listFiles(relative_path, file_names); + std::vector file_names = disk.listAllFilesByPath(relative_path); + std::vector selected_and_sorted_file_names{}; std::cout << relative_path << ":\n"; - if (!file_names.empty()) - { - for (const auto & file_name : file_names) - std::cout << file_name << '\n'; - std::cout << "\n"; - } - for (const auto & file_name : file_names) + if (show_hidden || (!file_name.starts_with('.'))) + selected_and_sorted_file_names.push_back(file_name); + + std::sort(selected_and_sorted_file_names.begin(), selected_and_sorted_file_names.end()); + for (const auto & file_name : selected_and_sorted_file_names) { - auto path = relative_path.empty() ? file_name : (relative_path + "/" + file_name); - if (disk->isDirectory(path)) - listRecursive(disk, path); + std::cout << file_name << "\n"; + } + std::cout << "\n"; + + for (const auto & file_name : selected_and_sorted_file_names) + { + auto path = [&]() -> String + { + if (relative_path.ends_with("/")) + { + return relative_path + file_name; + } + else + { + return relative_path + "/" + file_name; + } + }(); + if (disk.isDirectory(path)) + { + listRecursive(disk, path, show_hidden); + } } } }; -} -std::unique_ptr makeCommandList() +CommandPtr makeCommandList() { - return std::make_unique(); + return std::make_shared(); +} } diff --git a/programs/disks/CommandListDisks.cpp b/programs/disks/CommandListDisks.cpp index 79da021fd00..9fb67fed5e0 100644 --- a/programs/disks/CommandListDisks.cpp +++ b/programs/disks/CommandListDisks.cpp @@ -1,68 +1,40 @@ -#include "ICommand.h" +#include #include +#include +#include "DisksClient.h" +#include "ICommand.h" namespace DB { -namespace ErrorCodes -{ - extern const int BAD_ARGUMENTS; -} - class CommandListDisks final : public ICommand { public: - CommandListDisks() + explicit CommandListDisks() : ICommand() { command_name = "list-disks"; - description = "List disks names"; - usage = "list-disks [OPTION]"; + description = "Lists all available disks"; } - void processOptions( - Poco::Util::LayeredConfiguration &, - po::variables_map &) const override - {} - - void execute( - const std::vector & command_arguments, - std::shared_ptr &, - Poco::Util::LayeredConfiguration & config) override + void executeImpl(const CommandLineOptions &, DisksClient & client) override { - if (!command_arguments.empty()) + std::vector sorted_and_selected{}; + for (const auto & disk_name : client.getAllDiskNames()) { - printHelpMessage(); - throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Bad Arguments"); + sorted_and_selected.push_back(disk_name + ":" + client.getDiskWithPath(disk_name).getAbsolutePath("")); } - - constexpr auto config_prefix = "storage_configuration.disks"; - constexpr auto default_disk_name = "default"; - - Poco::Util::AbstractConfiguration::Keys keys; - config.keys(config_prefix, keys); - - bool has_default_disk = false; - - /// For the output to be ordered - std::set disks; - - for (const auto & disk_name : keys) + std::sort(sorted_and_selected.begin(), sorted_and_selected.end()); + for (const auto & disk_name : sorted_and_selected) { - if (disk_name == default_disk_name) - has_default_disk = true; - disks.insert(disk_name); + std::cout << disk_name << "\n"; } - - if (!has_default_disk) - disks.insert(default_disk_name); - - for (const auto & disk : disks) - std::cout << disk << '\n'; } -}; -} -std::unique_ptr makeCommandListDisks() +private: +}; + +CommandPtr makeCommandListDisks() { - return std::make_unique(); + return std::make_shared(); +} } diff --git a/programs/disks/CommandMkDir.cpp b/programs/disks/CommandMkDir.cpp index 6d33bdec498..c6222f326d4 100644 --- a/programs/disks/CommandMkDir.cpp +++ b/programs/disks/CommandMkDir.cpp @@ -6,61 +6,35 @@ namespace DB { -namespace ErrorCodes -{ - extern const int BAD_ARGUMENTS; -} - class CommandMkDir final : public ICommand { public: CommandMkDir() { command_name = "mkdir"; - command_option_description.emplace(createOptionsDescription("Allowed options", getTerminalWidth())); - description = "Create a directory"; - usage = "mkdir [OPTION]... "; - command_option_description->add_options() - ("recursive", "recursively create directories"); + description = "Creates a directory"; + options_description.add_options()("parents", "recursively create directories")( + "path", po::value(), "the path on which directory should be created (mandatory, positional)"); + positional_options_description.add("path", 1); } - void processOptions( - Poco::Util::LayeredConfiguration & config, - po::variables_map & options) const override + void executeImpl(const CommandLineOptions & options, DisksClient & client) override { - if (options.count("recursive")) - config.setBool("recursive", true); - } + bool recursive = options.count("parents"); + auto disk = client.getCurrentDiskWithPath(); - void execute( - const std::vector & command_arguments, - std::shared_ptr & disk_selector, - Poco::Util::LayeredConfiguration & config) override - { - if (command_arguments.size() != 1) - { - printHelpMessage(); - throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Bad Arguments"); - } - - String disk_name = config.getString("disk", "default"); - - const String & path = command_arguments[0]; - - DiskPtr disk = disk_selector->get(disk_name); - - String relative_path = validatePathAndGetAsRelative(path); - bool recursive = config.getBool("recursive", false); + String path = disk.getRelativeFromRoot(getValueFromCommandLineOptionsThrow(options, "path")); if (recursive) - disk->createDirectories(relative_path); + disk.getDisk()->createDirectories(path); else - disk->createDirectory(relative_path); + disk.getDisk()->createDirectory(path); } }; + +CommandPtr makeCommandMkDir() +{ + return std::make_shared(); } -std::unique_ptr makeCommandMkDir() -{ - return std::make_unique(); } diff --git a/programs/disks/CommandMove.cpp b/programs/disks/CommandMove.cpp index 75cf96252ed..e3d485032e0 100644 --- a/programs/disks/CommandMove.cpp +++ b/programs/disks/CommandMove.cpp @@ -1,5 +1,5 @@ -#include "ICommand.h" #include +#include "ICommand.h" namespace DB { @@ -9,6 +9,7 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } + class CommandMove final : public ICommand { public: @@ -16,44 +17,62 @@ public: { command_name = "move"; description = "Move file or directory from `from_path` to `to_path`"; - usage = "move [OPTION]... "; + options_description.add_options()("path-from", po::value(), "path from which we copy (mandatory, positional)")( + "path-to", po::value(), "path to which we copy (mandatory, positional)"); + positional_options_description.add("path-from", 1); + positional_options_description.add("path-to", 1); } - void processOptions( - Poco::Util::LayeredConfiguration &, - po::variables_map &) const override - {} - - void execute( - const std::vector & command_arguments, - std::shared_ptr & disk_selector, - Poco::Util::LayeredConfiguration & config) override + void executeImpl(const CommandLineOptions & options, DisksClient & client) override { - if (command_arguments.size() != 2) + auto disk = client.getCurrentDiskWithPath(); + + String path_from = disk.getRelativeFromRoot(getValueFromCommandLineOptionsThrow(options, "path-from")); + String path_to = disk.getRelativeFromRoot(getValueFromCommandLineOptionsThrow(options, "path-to")); + + if (disk.getDisk()->isFile(path_from)) { - printHelpMessage(); - throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Bad Arguments"); + disk.getDisk()->moveFile(path_from, path_to); + } + else if (disk.getDisk()->isDirectory(path_from)) + { + auto target_location = getTargetLocation(path_from, disk, path_to); + if (!disk.getDisk()->exists(target_location)) + { + disk.getDisk()->createDirectory(target_location); + disk.getDisk()->moveDirectory(path_from, target_location); + } + else + { + if (disk.getDisk()->isFile(target_location)) + { + throw Exception( + ErrorCodes::BAD_ARGUMENTS, "cannot overwrite non-directory '{}' with directory '{}'", target_location, path_from); + } + if (!disk.getDisk()->isDirectoryEmpty(target_location)) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "cannot move '{}' to '{}': Directory not empty", path_from, target_location); + } + else + { + disk.getDisk()->moveDirectory(path_from, target_location); + } + } + } + else if (!disk.getDisk()->exists(path_from)) + { + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "cannot stat '{}' on disk: '{}': No such file or directory", + path_from, + disk.getDisk()->getName()); } - - String disk_name = config.getString("disk", "default"); - - const String & path_from = command_arguments[0]; - const String & path_to = command_arguments[1]; - - DiskPtr disk = disk_selector->get(disk_name); - - String relative_path_from = validatePathAndGetAsRelative(path_from); - String relative_path_to = validatePathAndGetAsRelative(path_to); - - if (disk->isFile(relative_path_from)) - disk->moveFile(relative_path_from, relative_path_to); - else - disk->moveDirectory(relative_path_from, relative_path_to); } }; + +CommandPtr makeCommandMove() +{ + return std::make_shared(); } -std::unique_ptr makeCommandMove() -{ - return std::make_unique(); } diff --git a/programs/disks/CommandRead.cpp b/programs/disks/CommandRead.cpp index 0f3ac7ab98c..277e735f507 100644 --- a/programs/disks/CommandRead.cpp +++ b/programs/disks/CommandRead.cpp @@ -1,78 +1,52 @@ -#include "ICommand.h" -#include #include #include #include +#include #include +#include "ICommand.h" namespace DB { -namespace ErrorCodes -{ - extern const int BAD_ARGUMENTS; -} - class CommandRead final : public ICommand { public: CommandRead() { command_name = "read"; - command_option_description.emplace(createOptionsDescription("Allowed options", getTerminalWidth())); - description = "Read a file from `FROM_PATH` to `TO_PATH`"; - usage = "read [OPTION]... []"; - command_option_description->add_options() - ("output", po::value(), "file to which we are reading, defaults to `stdout`"); + description = "Read a file from `path-from` to `path-to`"; + options_description.add_options()("path-from", po::value(), "file from which we are reading (mandatory, positional)")( + "path-to", po::value(), "file to which we are writing, defaults to `stdout`"); + positional_options_description.add("path-from", 1); } - void processOptions( - Poco::Util::LayeredConfiguration & config, - po::variables_map & options) const override + void executeImpl(const CommandLineOptions & options, DisksClient & client) override { - if (options.count("output")) - config.setString("output", options["output"].as()); - } + auto disk = client.getCurrentDiskWithPath(); + String path_from = disk.getRelativeFromRoot(getValueFromCommandLineOptionsThrow(options, "path-from")); + std::optional path_to = getValueFromCommandLineOptionsWithOptional(options, "path-to"); - void execute( - const std::vector & command_arguments, - std::shared_ptr & disk_selector, - Poco::Util::LayeredConfiguration & config) override - { - if (command_arguments.size() != 1) + auto in = disk.getDisk()->readFile(path_from); + std::unique_ptr out = {}; + if (path_to.has_value()) { - printHelpMessage(); - throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Bad Arguments"); - } - - String disk_name = config.getString("disk", "default"); - - DiskPtr disk = disk_selector->get(disk_name); - - String relative_path = validatePathAndGetAsRelative(command_arguments[0]); - - String path_output = config.getString("output", ""); - - if (!path_output.empty()) - { - String relative_path_output = validatePathAndGetAsRelative(path_output); - - auto in = disk->readFile(relative_path); - auto out = disk->writeFile(relative_path_output); + String relative_path_to = disk.getRelativeFromRoot(path_to.value()); + out = disk.getDisk()->writeFile(relative_path_to); copyData(*in, *out); - out->finalize(); } else { - auto in = disk->readFile(relative_path); - std::unique_ptr out = std::make_unique(STDOUT_FILENO); + out = std::make_unique(STDOUT_FILENO); copyData(*in, *out); + out->write('\n'); } + out->finalize(); } }; + +CommandPtr makeCommandRead() +{ + return std::make_shared(); } -std::unique_ptr makeCommandRead() -{ - return std::make_unique(); } diff --git a/programs/disks/CommandRemove.cpp b/programs/disks/CommandRemove.cpp index 0c631eacff3..4388f6c0b14 100644 --- a/programs/disks/CommandRemove.cpp +++ b/programs/disks/CommandRemove.cpp @@ -1,5 +1,6 @@ -#include "ICommand.h" #include +#include "Common/Exception.h" +#include "ICommand.h" namespace DB { @@ -9,46 +10,49 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } + class CommandRemove final : public ICommand { public: CommandRemove() { command_name = "remove"; - description = "Remove file or directory with all children. Throws exception if file doesn't exists.\nPath should be in format './' or './path' or 'path'"; - usage = "remove [OPTION]... "; + description = "Remove file or directory. Throws exception if file doesn't exists"; + options_description.add_options()("path", po::value(), "path that is going to be deleted (mandatory, positional)")( + "recursive,r", "recursively removes the directory (required to remove a directory)"); + positional_options_description.add("path", 1); } - void processOptions( - Poco::Util::LayeredConfiguration &, - po::variables_map &) const override - {} - - void execute( - const std::vector & command_arguments, - std::shared_ptr & disk_selector, - Poco::Util::LayeredConfiguration & config) override + void executeImpl(const CommandLineOptions & options, DisksClient & client) override { - if (command_arguments.size() != 1) + auto disk = client.getCurrentDiskWithPath(); + const String & path = disk.getRelativeFromRoot(getValueFromCommandLineOptionsThrow(options, "path")); + bool recursive = options.count("recursive"); + if (!disk.getDisk()->exists(path)) { - printHelpMessage(); - throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Bad Arguments"); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Path {} on disk {} doesn't exist", path, disk.getDisk()->getName()); + } + else if (disk.getDisk()->isDirectory(path)) + { + if (!recursive) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "cannot remove '{}': Is a directory", path); + } + else + { + disk.getDisk()->removeRecursive(path); + } + } + else + { + disk.getDisk()->removeFileIfExists(path); } - - String disk_name = config.getString("disk", "default"); - - const String & path = command_arguments[0]; - - DiskPtr disk = disk_selector->get(disk_name); - - String relative_path = validatePathAndGetAsRelative(path); - - disk->removeRecursive(relative_path); } }; + +CommandPtr makeCommandRemove() +{ + return std::make_shared(); } -std::unique_ptr makeCommandRemove() -{ - return std::make_unique(); } diff --git a/programs/disks/CommandSwitchDisk.cpp b/programs/disks/CommandSwitchDisk.cpp new file mode 100644 index 00000000000..fa02d991365 --- /dev/null +++ b/programs/disks/CommandSwitchDisk.cpp @@ -0,0 +1,35 @@ +#include +#include +#include +#include "DisksApp.h" +#include "ICommand.h" + +namespace DB +{ + +class CommandSwitchDisk final : public ICommand +{ +public: + explicit CommandSwitchDisk() : ICommand() + { + command_name = "switch-disk"; + description = "Switch disk (makes sense only in interactive mode)"; + options_description.add_options()("disk", po::value(), "the disk to switch to (mandatory, positional)")( + "path", po::value(), "the path to switch on the disk"); + positional_options_description.add("disk", 1); + } + + void executeImpl(const CommandLineOptions & options, DisksClient & client) override + { + String disk = getValueFromCommandLineOptions(options, "disk"); + std::optional path = getValueFromCommandLineOptionsWithOptional(options, "path"); + + client.switchToDisk(disk, path); + } +}; + +CommandPtr makeCommandSwitchDisk() +{ + return std::make_shared(); +} +} diff --git a/programs/disks/CommandTouch.cpp b/programs/disks/CommandTouch.cpp new file mode 100644 index 00000000000..c0bdb64cf9e --- /dev/null +++ b/programs/disks/CommandTouch.cpp @@ -0,0 +1,34 @@ +#include +#include +#include "DisksApp.h" +#include "DisksClient.h" +#include "ICommand.h" + +namespace DB +{ + +class CommandTouch final : public ICommand +{ +public: + explicit CommandTouch() : ICommand() + { + command_name = "touch"; + description = "Create a file by path"; + options_description.add_options()("path", po::value(), "the path of listing (mandatory, positional)"); + positional_options_description.add("path", 1); + } + + void executeImpl(const CommandLineOptions & options, DisksClient & client) override + { + auto disk = client.getCurrentDiskWithPath(); + String path = getValueFromCommandLineOptionsThrow(options, "path"); + + disk.getDisk()->createFile(disk.getRelativeFromRoot(path)); + } +}; + +CommandPtr makeCommandTouch() +{ + return std::make_shared(); +} +} diff --git a/programs/disks/CommandWrite.cpp b/programs/disks/CommandWrite.cpp index 7ded37e067a..9c82132e284 100644 --- a/programs/disks/CommandWrite.cpp +++ b/programs/disks/CommandWrite.cpp @@ -1,79 +1,57 @@ -#include "ICommand.h" #include +#include "ICommand.h" -#include #include #include #include +#include namespace DB { -namespace ErrorCodes -{ - extern const int BAD_ARGUMENTS; -} - class CommandWrite final : public ICommand { public: CommandWrite() { command_name = "write"; - command_option_description.emplace(createOptionsDescription("Allowed options", getTerminalWidth())); - description = "Write a file from `FROM_PATH` to `TO_PATH`"; - usage = "write [OPTION]... [] "; - command_option_description->add_options() - ("input", po::value(), "file from which we are reading, defaults to `stdin`"); + description = "Write a file from `path-from` to `path-to`"; + options_description.add_options()("path-from", po::value(), "file from which we are reading, defaults to `stdin` (input from `stdin` is finished by Ctrl+D)")( + "path-to", po::value(), "file to which we are writing (mandatory, positional)"); + positional_options_description.add("path-to", 1); } - void processOptions( - Poco::Util::LayeredConfiguration & config, - po::variables_map & options) const override + + void executeImpl(const CommandLineOptions & options, DisksClient & client) override { - if (options.count("input")) - config.setString("input", options["input"].as()); - } + auto disk = client.getCurrentDiskWithPath(); - void execute( - const std::vector & command_arguments, - std::shared_ptr & disk_selector, - Poco::Util::LayeredConfiguration & config) override - { - if (command_arguments.size() != 1) + std::optional path_from = getValueFromCommandLineOptionsWithOptional(options, "path-from"); + + String path_to = disk.getRelativeFromRoot(getValueFromCommandLineOptionsThrow(options, "path-to")); + + auto in = [&]() -> std::unique_ptr { - printHelpMessage(); - throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Bad Arguments"); - } + if (!path_from.has_value()) + { + return std::make_unique(STDIN_FILENO); + } + else + { + String relative_path_from = disk.getRelativeFromRoot(path_from.value()); + return disk.getDisk()->readFile(relative_path_from); + } + }(); - String disk_name = config.getString("disk", "default"); - - const String & path = command_arguments[0]; - - DiskPtr disk = disk_selector->get(disk_name); - - String relative_path = validatePathAndGetAsRelative(path); - - String path_input = config.getString("input", ""); - std::unique_ptr in; - if (path_input.empty()) - { - in = std::make_unique(STDIN_FILENO); - } - else - { - String relative_path_input = validatePathAndGetAsRelative(path_input); - in = disk->readFile(relative_path_input); - } - - auto out = disk->writeFile(relative_path); + auto out = disk.getDisk()->writeFile(path_to); copyData(*in, *out); out->finalize(); } }; + +CommandPtr makeCommandWrite() +{ + return std::make_shared(); } -std::unique_ptr makeCommandWrite() -{ - return std::make_unique(); } diff --git a/programs/disks/DisksApp.cpp b/programs/disks/DisksApp.cpp index 5da5ab4bae9..59ba45b9451 100644 --- a/programs/disks/DisksApp.cpp +++ b/programs/disks/DisksApp.cpp @@ -1,11 +1,22 @@ #include "DisksApp.h" +#include +#include +#include "Common/Exception.h" +#include "Common/filesystemHelpers.h" +#include +#include "DisksClient.h" #include "ICommand.h" +#include "ICommand_fwd.h" + +#include +#include +#include +#include #include -#include #include - +#include namespace DB { @@ -13,74 +24,289 @@ namespace DB namespace ErrorCodes { extern const int BAD_ARGUMENTS; + extern const int LOGICAL_ERROR; +}; + +LineReader::Patterns DisksApp::query_extenders = {"\\"}; +LineReader::Patterns DisksApp::query_delimiters = {""}; +String DisksApp::word_break_characters = " \t\v\f\a\b\r\n"; + +CommandPtr DisksApp::getCommandByName(const String & command) const +{ + try + { + if (auto it = aliases.find(command); it != aliases.end()) + return command_descriptions.at(it->second); + + return command_descriptions.at(command); + } + catch (std::out_of_range &) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The command `{}` is unknown", command); + } } -size_t DisksApp::findCommandPos(std::vector & common_arguments) +std::vector DisksApp::getEmptyCompletion(String command_name) const { - for (size_t i = 0; i < common_arguments.size(); i++) - if (supported_commands.contains(common_arguments[i])) - return i + 1; - return common_arguments.size(); + auto command_ptr = command_descriptions.at(command_name); + std::vector answer{}; + if (multidisk_commands.contains(command_ptr->command_name)) + { + answer = client->getAllFilesByPatternFromAllDisks(""); + } + else + { + answer = client->getCurrentDiskWithPath().getAllFilesByPattern(""); + } + for (const auto & disk_name : client->getAllDiskNames()) + { + answer.push_back(disk_name); + } + for (const auto & option : command_ptr->options_description.options()) + { + answer.push_back("--" + option->long_name()); + } + if (command_name == "help") + { + for (const auto & [current_command_name, description] : command_descriptions) + { + answer.push_back(current_command_name); + } + } + std::sort(answer.begin(), answer.end()); + return answer; } -void DisksApp::printHelpMessage(ProgramOptionsDescription & command_option_description) +std::vector DisksApp::getCommandsToComplete(const String & command_prefix) const { - std::optional help_description = - createOptionsDescription("Help Message for clickhouse-disks", getTerminalWidth()); - - help_description->add(command_option_description); - - std::cout << "ClickHouse disk management tool\n"; - std::cout << "Usage: ./clickhouse-disks [OPTION]\n"; - std::cout << "clickhouse-disks\n\n"; - - for (const auto & current_command : supported_commands) - std::cout << command_descriptions[current_command]->command_name - << "\t" - << command_descriptions[current_command]->description - << "\n\n"; - - std::cout << command_option_description << '\n'; + std::vector answer{}; + for (const auto & [word, _] : command_descriptions) + { + if (word.starts_with(command_prefix)) + { + answer.push_back(word); + } + } + if (!answer.empty()) + { + std::sort(answer.begin(), answer.end()); + return answer; + } + for (const auto & [word, _] : aliases) + { + if (word.starts_with(command_prefix)) + { + answer.push_back(word); + } + } + if (!answer.empty()) + { + std::sort(answer.begin(), answer.end()); + return answer; + } + return {command_prefix}; } -String DisksApp::getDefaultConfigFileName() +std::vector DisksApp::getCompletions(const String & prefix) const { - return "/etc/clickhouse-server/config.xml"; + auto arguments = po::split_unix(prefix, word_break_characters); + if (arguments.empty()) + { + return {}; + } + if (word_break_characters.contains(prefix.back())) + { + CommandPtr command; + try + { + command = getCommandByName(arguments[0]); + } + catch (...) + { + return {arguments.back()}; + } + return getEmptyCompletion(command->command_name); + } + else if (arguments.size() == 1) + { + String command_prefix = arguments[0]; + return getCommandsToComplete(command_prefix); + } + else + { + String last_token = arguments.back(); + CommandPtr command; + try + { + command = getCommandByName(arguments[0]); + } + catch (...) + { + return {last_token}; + } + std::vector answer = {}; + if (command->command_name == "help") + { + return getCommandsToComplete(last_token); + } + else + { + answer = [&]() -> std::vector + { + if (multidisk_commands.contains(command->command_name)) + { + return client->getAllFilesByPatternFromAllDisks(last_token); + } + else + { + return client->getCurrentDiskWithPath().getAllFilesByPattern(last_token); + } + }(); + + for (const auto & disk_name : client->getAllDiskNames()) + { + if (disk_name.starts_with(last_token)) + { + answer.push_back(disk_name); + } + } + for (const auto & option : command->options_description.options()) + { + String option_sign = "--" + option->long_name(); + if (option_sign.starts_with(last_token)) + { + answer.push_back(option_sign); + } + } + } + if (!answer.empty()) + { + std::sort(answer.begin(), answer.end()); + return answer; + } + else + { + return {last_token}; + } + } } -void DisksApp::addOptions( - ProgramOptionsDescription & options_description_, - boost::program_options::positional_options_description & positional_options_description -) +bool DisksApp::processQueryText(const String & text) { - options_description_.add_options() - ("help,h", "Print common help message") - ("config-file,C", po::value(), "Set config file") - ("disk", po::value(), "Set disk name") - ("command_name", po::value(), "Name for command to do") - ("save-logs", "Save logs to a file") - ("log-level", po::value(), "Logging level") - ; + if (text.find_first_not_of(word_break_characters) == std::string::npos) + { + return true; + } + if (exit_strings.find(text) != exit_strings.end()) + return false; + CommandPtr command; + try + { + auto arguments = po::split_unix(text, word_break_characters); + command = getCommandByName(arguments[0]); + arguments.erase(arguments.begin()); + command->execute(arguments, *client); + } + catch (DB::Exception & err) + { + int code = getCurrentExceptionCode(); + if (code == ErrorCodes::LOGICAL_ERROR) + { + throw std::move(err); + } + else if (code == ErrorCodes::BAD_ARGUMENTS) + { + std::cerr << err.message() << "\n" + << "\n"; + if (command.get()) + { + std::cerr << "COMMAND: " << command->command_name << "\n"; + std::cerr << command->options_description << "\n"; + } + else + { + printAvailableCommandsHelpMessage(); + } + } + else + { + std::cerr << err.message() << "\n"; + } + } + catch (std::exception & err) + { + std::cerr << err.what() << "\n"; + } - positional_options_description.add("command_name", 1); + return true; +} - supported_commands = {"list-disks", "list", "move", "remove", "link", "copy", "write", "read", "mkdir"}; -#ifdef CLICKHOUSE_CLOUD - supported_commands.insert("packed-io"); -#endif +void DisksApp::runInteractiveReplxx() +{ + ReplxxLineReader lr( + suggest, + history_file, + /* multiline= */ false, + query_extenders, + query_delimiters, + word_break_characters.c_str(), + /* highlighter_= */ {}); + lr.enableBracketedPaste(); + + while (true) + { + DiskWithPath disk_with_path = client->getCurrentDiskWithPath(); + String prompt = "\x1b[1;34m" + disk_with_path.getDisk()->getName() + "\x1b[0m:" + "\x1b[1;31m" + disk_with_path.getCurrentPath() + + "\x1b[0m$ "; + + auto input = lr.readLine(prompt, "\x1b[1;31m:-] \x1b[0m"); + if (input.empty()) + break; + + if (!processQueryText(input)) + break; + } +} + +void DisksApp::parseAndCheckOptions( + const std::vector & arguments, const ProgramOptionsDescription & options_description, CommandLineOptions & options) +{ + auto parser = po::command_line_parser(arguments).options(options_description).allow_unregistered(); + po::parsed_options parsed = parser.run(); + po::store(parsed, options); +} + +void DisksApp::addOptions() +{ + options_description.add_options()("help,h", "Print common help message")("config-file,C", po::value(), "Set config file")( + "disk", po::value(), "Set disk name")("save-logs", "Save logs to a file")( + "log-level", po::value(), "Logging level")("query,q", po::value(), "Query for a non-interactive mode")( + "test-mode", "Interactive interface in test regyme"); command_descriptions.emplace("list-disks", makeCommandListDisks()); + command_descriptions.emplace("copy", makeCommandCopy()); command_descriptions.emplace("list", makeCommandList()); + command_descriptions.emplace("cd", makeCommandChangeDirectory()); command_descriptions.emplace("move", makeCommandMove()); command_descriptions.emplace("remove", makeCommandRemove()); command_descriptions.emplace("link", makeCommandLink()); - command_descriptions.emplace("copy", makeCommandCopy()); command_descriptions.emplace("write", makeCommandWrite()); command_descriptions.emplace("read", makeCommandRead()); command_descriptions.emplace("mkdir", makeCommandMkDir()); + command_descriptions.emplace("switch-disk", makeCommandSwitchDisk()); + command_descriptions.emplace("current_disk_with_path", makeCommandGetCurrentDiskAndPath()); + command_descriptions.emplace("touch", makeCommandTouch()); + command_descriptions.emplace("help", makeCommandHelp(*this)); #ifdef CLICKHOUSE_CLOUD command_descriptions.emplace("packed-io", makeCommandPackedIO()); #endif + for (const auto & [command_name, command_ptr] : command_descriptions) + { + if (command_name != command_ptr->command_name) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, "Command name inside map doesn't coincide with actual command name"); + } + } } void DisksApp::processOptions() @@ -93,76 +319,122 @@ void DisksApp::processOptions() config().setBool("save-logs", true); if (options.count("log-level")) config().setString("log-level", options["log-level"].as()); + if (options.count("test-mode")) + config().setBool("test-mode", true); + if (options.count("query")) + query = std::optional{options["query"].as()}; } -DisksApp::~DisksApp() + +void DisksApp::printEntryHelpMessage() const { - if (global_context) - global_context->shutdown(); + std::cout << "\x1b[1;33m ClickHouse disk management tool \x1b[0m \n"; + std::cout << options_description << '\n'; } -void DisksApp::init(std::vector & common_arguments) + +void DisksApp::printAvailableCommandsHelpMessage() const { - stopOptionsProcessing(); + std::cout << "\x1b[1;32mAvailable commands:\x1b[0m\n"; + std::vector> commands_with_aliases_and_descrtiptions{}; + size_t maximal_command_length = 0; + for (const auto & [command_name, command_ptr] : command_descriptions) + { + std::string command_string = getCommandLineWithAliases(command_ptr); + maximal_command_length = std::max(maximal_command_length, command_string.size()); + commands_with_aliases_and_descrtiptions.push_back({std::move(command_string), command_descriptions.at(command_name)}); + } + for (const auto & [command_with_aliases, command_ptr] : commands_with_aliases_and_descrtiptions) + { + std::cout << "\x1b[1;33m" << command_with_aliases << "\x1b[0m" << std::string(5, ' ') << "\x1b[1;33m" << command_ptr->description + << "\x1b[0m \n"; + std::cout << command_ptr->options_description; + std::cout << std::endl; + } +} - ProgramOptionsDescription options_description{createOptionsDescription("clickhouse-disks", getTerminalWidth())}; +void DisksApp::printCommandHelpMessage(CommandPtr command) const +{ + String command_name_with_aliases = getCommandLineWithAliases(command); + std::cout << "\x1b[1;32m" << command_name_with_aliases << "\x1b[0m" << std::string(2, ' ') << command->description << "\n"; + std::cout << command->options_description; +} - po::positional_options_description positional_options_description; +void DisksApp::printCommandHelpMessage(String command_name) const +{ + printCommandHelpMessage(getCommandByName(command_name)); +} - addOptions(options_description, positional_options_description); +String DisksApp::getCommandLineWithAliases(CommandPtr command) const +{ + String command_string = command->command_name; + bool need_comma = false; + for (const auto & [alias_name, alias_command_name] : aliases) + { + if (alias_command_name == command->command_name) + { + if (std::exchange(need_comma, true)) + command_string += ","; + else + command_string += "("; + command_string += alias_name; + } + } + command_string += (need_comma ? ")" : ""); + return command_string; +} - size_t command_pos = findCommandPos(common_arguments); - std::vector global_flags(command_pos); - command_arguments.resize(common_arguments.size() - command_pos); - copy(common_arguments.begin(), common_arguments.begin() + command_pos, global_flags.begin()); - copy(common_arguments.begin() + command_pos, common_arguments.end(), command_arguments.begin()); +void DisksApp::initializeHistoryFile() +{ + String home_path; + const char * home_path_cstr = getenv("HOME"); // NOLINT(concurrency-mt-unsafe) + if (home_path_cstr) + home_path = home_path_cstr; + if (config().has("history-file")) + history_file = config().getString("history-file"); + else + history_file = home_path + "/.disks-file-history"; - parseAndCheckOptions(options_description, positional_options_description, global_flags); + if (!history_file.empty() && !fs::exists(history_file)) + { + try + { + FS::createFile(history_file); + } + catch (const ErrnoException & e) + { + if (e.getErrno() != EEXIST) + throw; + } + } +} + +void DisksApp::init(const std::vector & common_arguments) +{ + addOptions(); + parseAndCheckOptions(common_arguments, options_description, options); po::notify(options); if (options.count("help")) { - printHelpMessage(options_description); + printEntryHelpMessage(); + printAvailableCommandsHelpMessage(); exit(0); // NOLINT(concurrency-mt-unsafe) } - if (!supported_commands.contains(command_name)) - { - std::cerr << "Unknown command name: " << command_name << "\n"; - printHelpMessage(options_description); - throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Bad Arguments"); - } - processOptions(); } -void DisksApp::parseAndCheckOptions( - ProgramOptionsDescription & options_description_, - boost::program_options::positional_options_description & positional_options_description, - std::vector & arguments) +String DisksApp::getDefaultConfigFileName() { - auto parser = po::command_line_parser(arguments) - .options(options_description_) - .positional(positional_options_description) - .allow_unregistered(); - - po::parsed_options parsed = parser.run(); - po::store(parsed, options); - - auto positional_arguments = po::collect_unrecognized(parsed.options, po::collect_unrecognized_mode::include_positional); - for (const auto & arg : positional_arguments) - { - if (command_descriptions.contains(arg)) - { - command_name = arg; - break; - } - } + return "/etc/clickhouse-server/config.xml"; } int DisksApp::main(const std::vector & /*args*/) { + std::vector keys; + config().keys(keys); if (config().has("config-file") || fs::exists(getDefaultConfigFileName())) { String config_path = config().getString("config-file", getDefaultConfigFileName()); @@ -173,9 +445,13 @@ int DisksApp::main(const std::vector & /*args*/) } else { + printEntryHelpMessage(); throw Exception(ErrorCodes::BAD_ARGUMENTS, "No config-file specified"); } + config().keys(keys); + initializeHistoryFile(); + if (config().has("save-logs")) { auto log_level = config().getString("log-level", "trace"); @@ -200,61 +476,68 @@ int DisksApp::main(const std::vector & /*args*/) global_context->setApplicationType(Context::ApplicationType::DISKS); String path = config().getString("path", DBMS_DEFAULT_PATH); + global_context->setPath(path); - auto & command = command_descriptions[command_name]; + String main_disk = config().getString("disk", "default"); - auto command_options = command->getCommandOptions(); - std::vector args; - if (command_options) + auto validator = [](const Poco::Util::AbstractConfiguration &, const std::string &, const std::string &) { return true; }; + + constexpr auto config_prefix = "storage_configuration.disks"; + auto disk_selector = std::make_shared(std::unordered_set{"cache", "encrypted"}); + disk_selector->initialize(config(), config_prefix, global_context, validator); + + std::vector>> disks_with_path; + + for (const auto & [_, disk_ptr] : disk_selector->getDisksMap()) { - auto parser = po::command_line_parser(command_arguments).options(*command_options).allow_unregistered(); - po::parsed_options parsed = parser.run(); - po::store(parsed, options); - po::notify(options); + disks_with_path.emplace_back( + disk_ptr, (disk_ptr->getName() == "local") ? std::optional{fs::current_path().string()} : std::nullopt); + } - args = po::collect_unrecognized(parsed.options, po::collect_unrecognized_mode::include_positional); - command->processOptions(config(), options); + + client = std::make_unique(std::move(disks_with_path), main_disk); + + suggest.setCompletionsCallback([&](const String & prefix, size_t /* prefix_length */) { return getCompletions(prefix); }); + + if (!query.has_value()) + { + runInteractive(); } else { - auto parser = po::command_line_parser(command_arguments).options({}).allow_unregistered(); - po::parsed_options parsed = parser.run(); - args = po::collect_unrecognized(parsed.options, po::collect_unrecognized_mode::include_positional); + processQueryText(query.value()); } - std::unordered_set disks - { - config().getString("disk", "default"), - config().getString("disk-from", config().getString("disk", "default")), - config().getString("disk-to", config().getString("disk", "default")), - }; - - auto validator = [&disks]( - const Poco::Util::AbstractConfiguration & config, - const std::string & disk_config_prefix, - const std::string & disk_name) - { - if (!disks.contains(disk_name)) - return false; - - const auto disk_type = config.getString(disk_config_prefix + ".type", "local"); - - if (disk_type == "cache") - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Disk type 'cache' of disk {} is not supported by clickhouse-disks", disk_name); - - return true; - }; - - constexpr auto config_prefix = "storage_configuration.disks"; - auto disk_selector = std::make_shared(); - disk_selector->initialize(config(), config_prefix, global_context, validator); - - command->execute(args, disk_selector, config()); - return Application::EXIT_OK; } +DisksApp::~DisksApp() +{ + client.reset(nullptr); + if (global_context) + global_context->shutdown(); +} + +void DisksApp::runInteractiveTestMode() +{ + for (String input; std::getline(std::cin, input);) + { + if (!processQueryText(input)) + break; + + std::cout << "\a\a\a\a" << std::endl; + std::cerr << std::flush; + } +} + +void DisksApp::runInteractive() +{ + if (config().hasOption("test-mode")) + runInteractiveTestMode(); + else + runInteractiveReplxx(); +} } int mainEntryClickHouseDisks(int argc, char ** argv) @@ -269,16 +552,16 @@ int mainEntryClickHouseDisks(int argc, char ** argv) catch (const DB::Exception & e) { std::cerr << DB::getExceptionMessage(e, false) << std::endl; - return 1; + return 0; } catch (const boost::program_options::error & e) { std::cerr << "Bad arguments: " << e.what() << std::endl; - return DB::ErrorCodes::BAD_ARGUMENTS; + return 0; } catch (...) { std::cerr << DB::getCurrentExceptionMessage(true) << std::endl; - return 1; + return 0; } } diff --git a/programs/disks/DisksApp.h b/programs/disks/DisksApp.h index 51bc3f58dc4..f8167884c62 100644 --- a/programs/disks/DisksApp.h +++ b/programs/disks/DisksApp.h @@ -1,61 +1,107 @@ #pragma once +#include +#include +#include #include +#include "DisksClient.h" +#include "ICommand_fwd.h" #include +#include +#include #include -#include - namespace DB { -class ICommand; -using CommandPtr = std::unique_ptr; - -namespace po = boost::program_options; using ProgramOptionsDescription = boost::program_options::options_description; using CommandLineOptions = boost::program_options::variables_map; -class DisksApp : public Poco::Util::Application, public Loggers +class DisksApp : public Poco::Util::Application { public: - DisksApp() = default; - ~DisksApp() override; + void addOptions(); - void init(std::vector & common_arguments); - - int main(const std::vector & args) override; - -protected: - static String getDefaultConfigFileName(); - - void addOptions( - ProgramOptionsDescription & options_description, - boost::program_options::positional_options_description & positional_options_description); void processOptions(); - void printHelpMessage(ProgramOptionsDescription & command_option_description); + bool processQueryText(const String & text); - size_t findCommandPos(std::vector & common_arguments); + void init(const std::vector & common_arguments); + + int main(const std::vector & /*args*/) override; + + CommandPtr getCommandByName(const String & command) const; + + void initializeHistoryFile(); + + static void parseAndCheckOptions( + const std::vector & arguments, const ProgramOptionsDescription & options_description, CommandLineOptions & options); + + void printEntryHelpMessage() const; + void printAvailableCommandsHelpMessage() const; + void printCommandHelpMessage(String command_name) const; + void printCommandHelpMessage(CommandPtr command) const; + String getCommandLineWithAliases(CommandPtr command) const; + + + std::vector getCompletions(const String & prefix) const; + + std::vector getEmptyCompletion(String command_name) const; + + ~DisksApp() override; private: - void parseAndCheckOptions( - ProgramOptionsDescription & options_description, - boost::program_options::positional_options_description & positional_options_description, - std::vector & arguments); + void runInteractive(); + void runInteractiveReplxx(); + void runInteractiveTestMode(); + + String getDefaultConfigFileName(); + + std::vector getCommandsToComplete(const String & command_prefix) const; + + // Fields responsible for the REPL work + String history_file; + LineReader::Suggest suggest; + static LineReader::Patterns query_extenders; + static LineReader::Patterns query_delimiters; + static String word_break_characters; + + // General command line arguments parsing fields -protected: - ContextMutablePtr global_context; SharedContextHolder shared_context; - - String command_name; - std::vector command_arguments; - - std::unordered_set supported_commands; + ContextMutablePtr global_context; + ProgramOptionsDescription options_description; + CommandLineOptions options; std::unordered_map command_descriptions; - po::variables_map options; -}; + std::optional query; + const std::unordered_map aliases + = {{"cp", "copy"}, + {"mv", "move"}, + {"ls", "list"}, + {"list_disks", "list-disks"}, + {"ln", "link"}, + {"rm", "remove"}, + {"cat", "read"}, + {"r", "read"}, + {"w", "write"}, + {"create", "touch"}, + {"delete", "remove"}, + {"ls-disks", "list-disks"}, + {"ls_disks", "list-disks"}, + {"packed_io", "packed-io"}, + {"change-dir", "cd"}, + {"change_dir", "cd"}, + {"switch_disk", "switch-disk"}, + {"current", "current_disk_with_path"}, + {"current_disk", "current_disk_with_path"}, + {"current_path", "current_disk_with_path"}, + {"cur", "current_disk_with_path"}}; + + std::set multidisk_commands = {"copy", "packed-io", "switch-disk", "cd"}; + + std::unique_ptr client{}; +}; } diff --git a/programs/disks/DisksClient.cpp b/programs/disks/DisksClient.cpp new file mode 100644 index 00000000000..7e36c7911ab --- /dev/null +++ b/programs/disks/DisksClient.cpp @@ -0,0 +1,263 @@ +#include "DisksClient.h" +#include +#include +#include +#include + +#include + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; + extern const int LOGICAL_ERROR; +}; + +namespace DB +{ +DiskWithPath::DiskWithPath(DiskPtr disk_, std::optional path_) : disk(disk_) +{ + if (path_.has_value()) + { + if (!fs::path{path_.value()}.is_absolute()) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Initializing path {} is not absolute", path_.value()); + } + path = path_.value(); + } + else + { + path = String{"/"}; + } + + String relative_path = normalizePathAndGetAsRelative(path); + if (disk->isDirectory(relative_path) || (relative_path.empty() && (disk->isDirectory("/")))) + { + return; + } + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Initializing path {} (normalized path: {}) at disk {} is not a directory", + path, + relative_path, + disk->getName()); +} + +std::vector DiskWithPath::listAllFilesByPath(const String & any_path) const +{ + if (isDirectory(any_path)) + { + std::vector file_names; + disk->listFiles(getRelativeFromRoot(any_path), file_names); + return file_names; + } + else + { + return {}; + } +} + +std::vector DiskWithPath::getAllFilesByPattern(const String & pattern) const +{ + auto [path_before, path_after] = [&]() -> std::pair + { + auto slash_pos = pattern.find_last_of('/'); + if (slash_pos >= pattern.size()) + { + return {"", pattern}; + } + else + { + return {pattern.substr(0, slash_pos + 1), pattern.substr(slash_pos + 1, pattern.size() - slash_pos - 1)}; + } + }(); + + if (!isDirectory(path_before)) + { + return {}; + } + else + { + std::vector file_names = listAllFilesByPath(path_before); + + std::vector answer; + + for (const auto & file_name : file_names) + { + if (file_name.starts_with(path_after)) + { + String file_pattern = path_before + file_name; + if (isDirectory(file_pattern)) + { + file_pattern = file_pattern + "/"; + } + answer.push_back(file_pattern); + } + } + return answer; + } +}; + +void DiskWithPath::setPath(const String & any_path) +{ + if (isDirectory(any_path)) + { + path = getAbsolutePath(any_path); + } + else + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Path {} at disk {} is not a directory", any_path, disk->getName()); + } +} + +String DiskWithPath::validatePathAndGetAsRelative(const String & path) +{ + String lexically_normal_path = fs::path(path).lexically_normal(); + if (lexically_normal_path.find("..") != std::string::npos) + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Path {} is not normalized", path); + + /// If path is absolute we should keep it as relative inside disk, so disk will look like + /// an ordinary filesystem with root. + if (fs::path(lexically_normal_path).is_absolute()) + return lexically_normal_path.substr(1); + + return lexically_normal_path; +} + +String DiskWithPath::normalizePathAndGetAsRelative(const String & messyPath) +{ + std::filesystem::path path(messyPath); + std::filesystem::path canonical_path = std::filesystem::weakly_canonical(path); + String npath = canonical_path.make_preferred().string(); + return validatePathAndGetAsRelative(npath); +} + +String DiskWithPath::normalizePath(const String & path) +{ + std::filesystem::path canonical_path = std::filesystem::weakly_canonical(path); + return canonical_path.make_preferred().string(); +} + +DisksClient::DisksClient(std::vector>> && disks_with_paths, std::optional begin_disk) +{ + if (disks_with_paths.empty()) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Initializing array of disks is empty"); + } + if (!begin_disk.has_value()) + { + begin_disk = disks_with_paths[0].first->getName(); + } + bool has_begin_disk = false; + for (auto & [disk, path] : disks_with_paths) + { + addDisk(disk, path); + if (disk->getName() == begin_disk.value()) + { + has_begin_disk = true; + } + } + if (!has_begin_disk) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is no begin_disk '{}' in initializing array", begin_disk.value()); + } + current_disk = std::move(begin_disk.value()); +} + +const DiskWithPath & DisksClient::getDiskWithPath(const String & disk) const +{ + try + { + return disks.at(disk); + } + catch (...) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The disk '{}' is unknown", disk); + } +} + +DiskWithPath & DisksClient::getDiskWithPath(const String & disk) +{ + try + { + return disks.at(disk); + } + catch (...) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The disk '{}' is unknown", disk); + } +} + +const DiskWithPath & DisksClient::getCurrentDiskWithPath() const +{ + try + { + return disks.at(current_disk); + } + catch (...) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no current disk in client"); + } +} + +DiskWithPath & DisksClient::getCurrentDiskWithPath() +{ + try + { + return disks.at(current_disk); + } + catch (...) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no current disk in client"); + } +} + +void DisksClient::switchToDisk(const String & disk_, const std::optional & path_) +{ + if (disks.contains(disk_)) + { + if (path_.has_value()) + { + disks.at(disk_).setPath(path_.value()); + } + current_disk = disk_; + } + else + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The disk '{}' is unknown", disk_); + } +} + +std::vector DisksClient::getAllDiskNames() const +{ + std::vector answer{}; + answer.reserve(disks.size()); + for (const auto & [disk_name, _] : disks) + { + answer.push_back(disk_name); + } + return answer; +} + +std::vector DisksClient::getAllFilesByPatternFromAllDisks(const String & pattern) const +{ + std::vector answer{}; + for (const auto & [_, disk] : disks) + { + for (auto & word : disk.getAllFilesByPattern(pattern)) + { + answer.push_back(word); + } + } + return answer; +} + +void DisksClient::addDisk(DiskPtr disk_, const std::optional & path_) +{ + String disk_name = disk_->getName(); + if (disks.contains(disk_->getName())) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The disk '{}' already exists", disk_name); + } + disks.emplace(disk_name, DiskWithPath{disk_, path_}); +} +} diff --git a/programs/disks/DisksClient.h b/programs/disks/DisksClient.h new file mode 100644 index 00000000000..8a55d22af93 --- /dev/null +++ b/programs/disks/DisksClient.h @@ -0,0 +1,89 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include "Disks/IDisk.h" + +#include +#include +#include + +namespace fs = std::filesystem; + +namespace DB +{ + +std::vector split(const String & text, const String & delimiters); + +using ProgramOptionsDescription = boost::program_options::options_description; +using CommandLineOptions = boost::program_options::variables_map; + +class DiskWithPath +{ +public: + explicit DiskWithPath(DiskPtr disk_, std::optional path_ = std::nullopt); + + String getAbsolutePath(const String & any_path) const { return normalizePath(fs::path(path) / any_path); } + + String getCurrentPath() const { return path; } + + bool isDirectory(const String & any_path) const + { + return disk->isDirectory(getRelativeFromRoot(any_path)) || (getRelativeFromRoot(any_path).empty() && (disk->isDirectory("/"))); + } + + std::vector listAllFilesByPath(const String & any_path) const; + + std::vector getAllFilesByPattern(const String & pattern) const; + + DiskPtr getDisk() const { return disk; } + + void setPath(const String & any_path); + + String getRelativeFromRoot(const String & any_path) const { return normalizePathAndGetAsRelative(getAbsolutePath(any_path)); } + +private: + static String validatePathAndGetAsRelative(const String & path); + static std::string normalizePathAndGetAsRelative(const std::string & messyPath); + static std::string normalizePath(const std::string & messyPath); + + const DiskPtr disk; + String path; +}; + +class DisksClient +{ +public: + explicit DisksClient(std::vector>> && disks_with_paths, std::optional begin_disk); + + const DiskWithPath & getDiskWithPath(const String & disk) const; + + DiskWithPath & getDiskWithPath(const String & disk); + + const DiskWithPath & getCurrentDiskWithPath() const; + + DiskWithPath & getCurrentDiskWithPath(); + + DiskPtr getCurrentDisk() const { return getCurrentDiskWithPath().getDisk(); } + + DiskPtr getDisk(const String & disk) const { return getDiskWithPath(disk).getDisk(); } + + void switchToDisk(const String & disk_, const std::optional & path_); + + std::vector getAllDiskNames() const; + + std::vector getAllFilesByPatternFromAllDisks(const String & pattern) const; + + +private: + void addDisk(DiskPtr disk_, const std::optional & path_); + + String current_disk; + std::unordered_map disks; +}; +} diff --git a/programs/disks/ICommand.cpp b/programs/disks/ICommand.cpp index 86188fb6db1..f622bcad3c6 100644 --- a/programs/disks/ICommand.cpp +++ b/programs/disks/ICommand.cpp @@ -1,5 +1,5 @@ #include "ICommand.h" -#include +#include "DisksClient.h" namespace DB @@ -10,43 +10,42 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } -void ICommand::printHelpMessage() const +CommandLineOptions ICommand::processCommandLineArguments(const Strings & commands) { - std::cout << "Command: " << command_name << '\n'; - std::cout << "Description: " << description << '\n'; - std::cout << "Usage: " << usage << '\n'; + CommandLineOptions options; + auto parser = po::command_line_parser(commands); + parser.options(options_description).positional(positional_options_description); - if (command_option_description) + po::parsed_options parsed = parser.run(); + po::store(parsed, options); + + return options; +} + +void ICommand::execute(const Strings & commands, DisksClient & client) +{ + try { - auto options = *command_option_description; - if (!options.options().empty()) - std::cout << options << '\n'; + processCommandLineArguments(commands); + } + catch (std::exception & exc) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "{}", exc.what()); + } + executeImpl(processCommandLineArguments(commands), client); +} + +DiskWithPath & ICommand::getDiskWithPath(DisksClient & client, const CommandLineOptions & options, const String & name) +{ + auto disk_name = getValueFromCommandLineOptionsWithOptional(options, name); + if (disk_name.has_value()) + { + return client.getDiskWithPath(disk_name.value()); + } + else + { + return client.getCurrentDiskWithPath(); } } -void ICommand::addOptions(ProgramOptionsDescription & options_description) -{ - if (!command_option_description || command_option_description->options().empty()) - return; - - options_description.add(*command_option_description); -} - -String ICommand::validatePathAndGetAsRelative(const String & path) -{ - /// If path contain non-normalized symbols like . we will normalized them. If the resulting normalized path - /// still contain '..' it can be dangerous, disallow such paths. Also since clickhouse-disks - /// is not an interactive program (don't track you current path) it's OK to disallow .. paths. - String lexically_normal_path = fs::path(path).lexically_normal(); - if (lexically_normal_path.find("..") != std::string::npos) - throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Path {} is not normalized", path); - - /// If path is absolute we should keep it as relative inside disk, so disk will look like - /// an ordinary filesystem with root. - if (fs::path(lexically_normal_path).is_absolute()) - return lexically_normal_path.substr(1); - - return lexically_normal_path; -} - } diff --git a/programs/disks/ICommand.h b/programs/disks/ICommand.h index efe350fe87b..6faf90e2b52 100644 --- a/programs/disks/ICommand.h +++ b/programs/disks/ICommand.h @@ -1,66 +1,146 @@ #pragma once -#include +#include #include +#include +#include #include -#include #include +#include "Common/Exception.h" +#include -#include +#include + +#include "DisksApp.h" + +#include "DisksClient.h" + +#include "ICommand_fwd.h" namespace DB { namespace po = boost::program_options; -using ProgramOptionsDescription = boost::program_options::options_description; -using CommandLineOptions = boost::program_options::variables_map; +using ProgramOptionsDescription = po::options_description; +using PositionalProgramOptionsDescription = po::positional_options_description; +using CommandLineOptions = po::variables_map; + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} class ICommand { public: - ICommand() = default; + explicit ICommand() = default; virtual ~ICommand() = default; - virtual void execute( - const std::vector & command_arguments, - std::shared_ptr & disk_selector, - Poco::Util::LayeredConfiguration & config) = 0; + void execute(const Strings & commands, DisksClient & client); - const std::optional & getCommandOptions() const { return command_option_description; } + virtual void executeImpl(const CommandLineOptions & options, DisksClient & client) = 0; - void addOptions(ProgramOptionsDescription & options_description); - - virtual void processOptions(Poco::Util::LayeredConfiguration & config, po::variables_map & options) const = 0; + CommandLineOptions processCommandLineArguments(const Strings & commands); protected: - void printHelpMessage() const; + template + static T getValueFromCommandLineOptions(const CommandLineOptions & options, const String & name) + { + try + { + return options[name].as(); + } + catch (boost::bad_any_cast &) + { + throw DB::Exception(ErrorCodes::BAD_ARGUMENTS, "Argument '{}' has wrong type and can't be parsed", name); + } + } + + template + static T getValueFromCommandLineOptionsThrow(const CommandLineOptions & options, const String & name) + { + if (options.count(name)) + { + return getValueFromCommandLineOptions(options, name); + } + else + { + throw DB::Exception(ErrorCodes::BAD_ARGUMENTS, "Mandatory argument '{}' is missing", name); + } + } + + template + static T getValueFromCommandLineOptionsWithDefault(const CommandLineOptions & options, const String & name, const T & default_value) + { + if (options.count(name)) + { + return getValueFromCommandLineOptions(options, name); + } + else + { + return default_value; + } + } + + template + static std::optional getValueFromCommandLineOptionsWithOptional(const CommandLineOptions & options, const String & name) + { + if (options.count(name)) + { + return std::optional{getValueFromCommandLineOptions(options, name)}; + } + else + { + return std::nullopt; + } + } + + DiskWithPath & getDiskWithPath(DisksClient & client, const CommandLineOptions & options, const String & name); + + String getTargetLocation(const String & path_from, DiskWithPath & disk_to, const String & path_to) + { + if (!disk_to.getDisk()->isDirectory(path_to)) + { + return path_to; + } + String copied_path_from = path_from; + if (copied_path_from.ends_with('/')) + { + copied_path_from.pop_back(); + } + String plain_filename = fs::path(copied_path_from).filename(); + + return fs::path{path_to} / plain_filename; + } - static String validatePathAndGetAsRelative(const String & path); public: String command_name; String description; + ProgramOptionsDescription options_description; protected: - std::optional command_option_description; - String usage; - po::positional_options_description positional_options_description; + PositionalProgramOptionsDescription positional_options_description; }; -using CommandPtr = std::unique_ptr; - -} - DB::CommandPtr makeCommandCopy(); -DB::CommandPtr makeCommandLink(); -DB::CommandPtr makeCommandList(); DB::CommandPtr makeCommandListDisks(); +DB::CommandPtr makeCommandList(); +DB::CommandPtr makeCommandChangeDirectory(); +DB::CommandPtr makeCommandLink(); DB::CommandPtr makeCommandMove(); DB::CommandPtr makeCommandRead(); DB::CommandPtr makeCommandRemove(); DB::CommandPtr makeCommandWrite(); DB::CommandPtr makeCommandMkDir(); +DB::CommandPtr makeCommandSwitchDisk(); +DB::CommandPtr makeCommandGetCurrentDiskAndPath(); +DB::CommandPtr makeCommandHelp(const DisksApp & disks_app); +DB::CommandPtr makeCommandTouch(); +#ifdef CLICKHOUSE_CLOUD DB::CommandPtr makeCommandPackedIO(); +#endif +} diff --git a/programs/disks/ICommand_fwd.h b/programs/disks/ICommand_fwd.h new file mode 100644 index 00000000000..84310b4a18d --- /dev/null +++ b/programs/disks/ICommand_fwd.h @@ -0,0 +1,10 @@ +#pragma once + +#include + +namespace DB +{ +class ICommand; + +using CommandPtr = std::shared_ptr; +} diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp index 60834dbe582..44c2daa33ad 100644 --- a/programs/keeper/Keeper.cpp +++ b/programs/keeper/Keeper.cpp @@ -27,6 +27,8 @@ #include #include +#include + #include #include @@ -262,11 +264,43 @@ HTTPContextPtr httpContext() return std::make_shared(Context::getGlobalContextInstance()); } +String getKeeperPath(Poco::Util::LayeredConfiguration & config) +{ + String path; + if (config.has("keeper_server.storage_path")) + { + path = config.getString("keeper_server.storage_path"); + } + else if (config.has("keeper_server.log_storage_path")) + { + path = std::filesystem::path(config.getString("keeper_server.log_storage_path")).parent_path(); + } + else if (config.has("keeper_server.snapshot_storage_path")) + { + path = std::filesystem::path(config.getString("keeper_server.snapshot_storage_path")).parent_path(); + } + else if (std::filesystem::is_directory(std::filesystem::path{config.getString("path", DBMS_DEFAULT_PATH)} / "coordination")) + { + throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, + "By default 'keeper_server.storage_path' could be assigned to {}, but the directory {} already exists. Please specify 'keeper_server.storage_path' in the keeper configuration explicitly", + KEEPER_DEFAULT_PATH, String{std::filesystem::path{config.getString("path", DBMS_DEFAULT_PATH)} / "coordination"}); + } + else + { + path = KEEPER_DEFAULT_PATH; + } + return path; +} + + } int Keeper::main(const std::vector & /*args*/) try { +#if USE_JEMALLOC + setJemallocBackgroundThreads(true); +#endif Poco::Logger * log = &logger(); UseSSL use_ssl; @@ -311,31 +345,7 @@ try updateMemorySoftLimitInConfig(config()); - std::string path; - - if (config().has("keeper_server.storage_path")) - { - path = config().getString("keeper_server.storage_path"); - } - else if (config().has("keeper_server.log_storage_path")) - { - path = std::filesystem::path(config().getString("keeper_server.log_storage_path")).parent_path(); - } - else if (config().has("keeper_server.snapshot_storage_path")) - { - path = std::filesystem::path(config().getString("keeper_server.snapshot_storage_path")).parent_path(); - } - else if (std::filesystem::is_directory(std::filesystem::path{config().getString("path", DBMS_DEFAULT_PATH)} / "coordination")) - { - throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, - "By default 'keeper_server.storage_path' could be assigned to {}, but the directory {} already exists. Please specify 'keeper_server.storage_path' in the keeper configuration explicitly", - KEEPER_DEFAULT_PATH, String{std::filesystem::path{config().getString("path", DBMS_DEFAULT_PATH)} / "coordination"}); - } - else - { - path = KEEPER_DEFAULT_PATH; - } - + std::string path = getKeeperPath(config()); std::filesystem::create_directories(path); /// Check that the process user id matches the owner of the data. @@ -549,7 +559,7 @@ try auto main_config_reloader = std::make_unique( config_path, extra_paths, - config().getString("path", KEEPER_DEFAULT_PATH), + getKeeperPath(config()), std::move(unused_cache), unused_event, [&](ConfigurationPtr config, bool /* initial_loading */) diff --git a/programs/keeper/conf.d/local.yaml b/programs/keeper/conf.d/local.yaml new file mode 100644 index 00000000000..722e90e374a --- /dev/null +++ b/programs/keeper/conf.d/local.yaml @@ -0,0 +1,9 @@ +logger: + log: + "@remove": remove + errorlog: + "@remove": remove + console: 1 +keeper_server: + log_storage_path: ./logs + snapshot_storage_path: ./snapshots diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index f992fdc13a9..3b88bb36954 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -586,6 +587,54 @@ static void sanityChecks(Server & server) } } +void loadStartupScripts(const Poco::Util::AbstractConfiguration & config, ContextMutablePtr context, Poco::Logger * log) +{ + try + { + Poco::Util::AbstractConfiguration::Keys keys; + config.keys("startup_scripts", keys); + + SetResultDetailsFunc callback; + for (const auto & key : keys) + { + std::string full_prefix = "startup_scripts." + key; + + if (config.has(full_prefix + ".condition")) + { + auto condition = config.getString(full_prefix + ".condition"); + auto condition_read_buffer = ReadBufferFromString(condition); + auto condition_write_buffer = WriteBufferFromOwnString(); + + LOG_DEBUG(log, "Checking startup query condition `{}`", condition); + executeQuery(condition_read_buffer, condition_write_buffer, true, context, callback, QueryFlags{ .internal = true }, std::nullopt, {}); + + auto result = condition_write_buffer.str(); + + if (result != "1\n" && result != "true\n") + { + if (result != "0\n" && result != "false\n") + context->addWarningMessage(fmt::format("The condition query returned `{}`, which can't be interpreted as a boolean (`0`, `false`, `1`, `true`). Will skip this query.", result)); + + continue; + } + + LOG_DEBUG(log, "Condition is true, will execute the query next"); + } + + auto query = config.getString(full_prefix + ".query"); + auto read_buffer = ReadBufferFromString(query); + auto write_buffer = WriteBufferFromOwnString(); + + LOG_DEBUG(log, "Executing query `{}`", query); + executeQuery(read_buffer, write_buffer, true, context, callback, QueryFlags{ .internal = true }, std::nullopt, {}); + } + } + catch (...) + { + tryLogCurrentException(log, "Failed to parse startup scripts file"); + } +} + static void initializeAzureSDKLogger( [[ maybe_unused ]] const ServerSettings & server_settings, [[ maybe_unused ]] int server_logs_level) @@ -625,9 +674,35 @@ static void initializeAzureSDKLogger( #endif } +#if defined(SANITIZER) +static std::vector getSanitizerNames() +{ + std::vector names; + +#if defined(ADDRESS_SANITIZER) + names.push_back("address"); +#endif +#if defined(THREAD_SANITIZER) + names.push_back("thread"); +#endif +#if defined(MEMORY_SANITIZER) + names.push_back("memory"); +#endif +#if defined(UNDEFINED_BEHAVIOR_SANITIZER) + names.push_back("undefined behavior"); +#endif + + return names; +} +#endif + int Server::main(const std::vector & /*args*/) try { +#if USE_JEMALLOC + setJemallocBackgroundThreads(true); +#endif + Stopwatch startup_watch; Poco::Logger * log = &logger(); @@ -711,7 +786,17 @@ try global_context->addWarningMessage("ThreadFuzzer is enabled. Application will run slowly and unstable."); #if defined(SANITIZER) - global_context->addWarningMessage("Server was built with sanitizer. It will work slowly."); + auto sanitizers = getSanitizerNames(); + + String log_message; + if (sanitizers.empty()) + log_message = "sanitizer"; + else if (sanitizers.size() == 1) + log_message = fmt::format("{} sanitizer", sanitizers.front()); + else + log_message = fmt::format("sanitizers ({})", fmt::join(sanitizers, ", ")); + + global_context->addWarningMessage(fmt::format("Server was built with {}. It will work slowly.", log_message)); #endif #if defined(SANITIZE_COVERAGE) || WITH_COVERAGE @@ -1953,6 +2038,11 @@ try /// otherwise there is a race condition between the system database initialization /// and creation of new tables in the database. waitLoad(TablesLoaderForegroundPoolId, system_startup_tasks); + + /// Startup scripts can depend on the system log tables. + if (config().has("startup_scripts") && !server_settings.prepare_system_log_tables_on_startup.changed) + global_context->setServerSetting("prepare_system_log_tables_on_startup", true); + /// After attaching system databases we can initialize system log. global_context->initializeSystemLogs(); global_context->setSystemZooKeeperLogAfterInitializationIfNeeded(); @@ -2101,6 +2191,9 @@ try load_metadata_tasks.clear(); load_metadata_tasks.shrink_to_fit(); + if (config().has("startup_scripts")) + loadStartupScripts(config(), global_context, log); + { std::lock_guard lock(servers_lock); for (auto & server : servers) diff --git a/src/Backups/BackupIO_AzureBlobStorage.cpp b/src/Backups/BackupIO_AzureBlobStorage.cpp index 44e7808babc..cee41861d70 100644 --- a/src/Backups/BackupIO_AzureBlobStorage.cpp +++ b/src/Backups/BackupIO_AzureBlobStorage.cpp @@ -29,48 +29,49 @@ namespace ErrorCodes } BackupReaderAzureBlobStorage::BackupReaderAzureBlobStorage( - const StorageAzureConfiguration & configuration_, + const AzureBlobStorage::ConnectionParams & connection_params_, + const String & blob_path_, bool allow_azure_native_copy, const ReadSettings & read_settings_, const WriteSettings & write_settings_, const ContextPtr & context_) : BackupReaderDefault(read_settings_, write_settings_, getLogger("BackupReaderAzureBlobStorage")) - , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, configuration_.getConnectionURL().toString(), false, false} - , configuration(configuration_) + , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, connection_params_.getConnectionURL(), false, false} + , connection_params(connection_params_) + , blob_path(blob_path_) { - auto client_ptr = configuration.createClient(/* is_readonly */false, /* attempt_to_create_container */true); - client_ptr->SetClickhouseOptions(Azure::Storage::Blobs::ClickhouseClientOptions{.IsClientForDisk=true}); + auto client_ptr = AzureBlobStorage::getContainerClient(connection_params, /*readonly=*/ false); + auto settings_ptr = AzureBlobStorage::getRequestSettingsForBackup(context_->getSettingsRef(), allow_azure_native_copy); - object_storage = std::make_unique("BackupReaderAzureBlobStorage", - std::move(client_ptr), - configuration.createSettings(context_), - configuration_.container, - configuration.getConnectionURL().toString()); + object_storage = std::make_unique( + "BackupReaderAzureBlobStorage", + std::move(client_ptr), + std::move(settings_ptr), + connection_params.getContainer(), + connection_params.getConnectionURL()); client = object_storage->getAzureBlobStorageClient(); - auto settings_copy = *object_storage->getSettings(); - settings_copy.use_native_copy = allow_azure_native_copy; - settings = std::make_unique(settings_copy); + settings = object_storage->getSettings(); } BackupReaderAzureBlobStorage::~BackupReaderAzureBlobStorage() = default; bool BackupReaderAzureBlobStorage::fileExists(const String & file_name) { - String key = fs::path(configuration.blob_path) / file_name; + String key = fs::path(blob_path) / file_name; return object_storage->exists(StoredObject(key)); } UInt64 BackupReaderAzureBlobStorage::getFileSize(const String & file_name) { - String key = fs::path(configuration.blob_path) / file_name; + String key = fs::path(blob_path) / file_name; ObjectMetadata object_metadata = object_storage->getObjectMetadata(key); return object_metadata.size_bytes; } std::unique_ptr BackupReaderAzureBlobStorage::readFile(const String & file_name) { - String key = fs::path(configuration.blob_path) / file_name; + String key = fs::path(blob_path) / file_name; return std::make_unique( client, key, read_settings, settings->max_single_read_retries, settings->max_single_download_retries); @@ -85,23 +86,23 @@ void BackupReaderAzureBlobStorage::copyFileToDisk(const String & path_in_backup, && destination_data_source_description.is_encrypted == encrypted_in_backup) { LOG_TRACE(log, "Copying {} from AzureBlobStorage to disk {}", path_in_backup, destination_disk->getName()); - auto write_blob_function = [&](const Strings & blob_path, WriteMode mode, const std::optional &) -> size_t + auto write_blob_function = [&](const Strings & dst_blob_path, WriteMode mode, const std::optional &) -> size_t { /// Object storage always uses mode `Rewrite` because it simulates append using metadata and different files. - if (blob_path.size() != 2 || mode != WriteMode::Rewrite) + if (dst_blob_path.size() != 2 || mode != WriteMode::Rewrite) throw Exception(ErrorCodes::LOGICAL_ERROR, "Blob writing function called with unexpected blob_path.size={} or mode={}", - blob_path.size(), mode); + dst_blob_path.size(), mode); copyAzureBlobStorageFile( client, destination_disk->getObjectStorage()->getAzureBlobStorageClient(), - configuration.container, - fs::path(configuration.blob_path) / path_in_backup, + connection_params.getContainer(), + fs::path(blob_path) / path_in_backup, 0, file_size, - /* dest_container */ blob_path[1], - /* dest_path */ blob_path[0], + /* dest_container */ dst_blob_path[1], + /* dest_path */ dst_blob_path[0], settings, read_settings, threadPoolCallbackRunnerUnsafe(getBackupsIOThreadPool().get(), "BackupRDAzure")); @@ -119,28 +120,33 @@ void BackupReaderAzureBlobStorage::copyFileToDisk(const String & path_in_backup, BackupWriterAzureBlobStorage::BackupWriterAzureBlobStorage( - const StorageAzureConfiguration & configuration_, + const AzureBlobStorage::ConnectionParams & connection_params_, + const String & blob_path_, bool allow_azure_native_copy, const ReadSettings & read_settings_, const WriteSettings & write_settings_, const ContextPtr & context_, bool attempt_to_create_container) : BackupWriterDefault(read_settings_, write_settings_, getLogger("BackupWriterAzureBlobStorage")) - , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, configuration_.getConnectionURL().toString(), false, false} - , configuration(configuration_) + , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, connection_params_.getConnectionURL(), false, false} + , connection_params(connection_params_) + , blob_path(blob_path_) { - auto client_ptr = configuration.createClient(/* is_readonly */false, attempt_to_create_container); - client_ptr->SetClickhouseOptions(Azure::Storage::Blobs::ClickhouseClientOptions{.IsClientForDisk=true}); + if (!attempt_to_create_container) + connection_params.endpoint.container_already_exists = true; + + auto client_ptr = AzureBlobStorage::getContainerClient(connection_params, /*readonly=*/ false); + auto settings_ptr = AzureBlobStorage::getRequestSettingsForBackup(context_->getSettingsRef(), allow_azure_native_copy); + + object_storage = std::make_unique( + "BackupWriterAzureBlobStorage", + std::move(client_ptr), + std::move(settings_ptr), + connection_params.getContainer(), + connection_params.getConnectionURL()); - object_storage = std::make_unique("BackupWriterAzureBlobStorage", - std::move(client_ptr), - configuration.createSettings(context_), - configuration.container, - configuration_.getConnectionURL().toString()); client = object_storage->getAzureBlobStorageClient(); - auto settings_copy = *object_storage->getSettings(); - settings_copy.use_native_copy = allow_azure_native_copy; - settings = std::make_unique(settings_copy); + settings = object_storage->getSettings(); } void BackupWriterAzureBlobStorage::copyFileFromDisk( @@ -159,18 +165,18 @@ void BackupWriterAzureBlobStorage::copyFileFromDisk( { /// getBlobPath() can return more than 3 elements if the file is stored as multiple objects in AzureBlobStorage container. /// In this case we can't use the native copy. - if (auto blob_path = src_disk->getBlobPath(src_path); blob_path.size() == 2) + if (auto src_blob_path = src_disk->getBlobPath(src_path); src_blob_path.size() == 2) { LOG_TRACE(log, "Copying file {} from disk {} to AzureBlobStorag", src_path, src_disk->getName()); copyAzureBlobStorageFile( src_disk->getObjectStorage()->getAzureBlobStorageClient(), client, - /* src_container */ blob_path[1], - /* src_path */ blob_path[0], + /* src_container */ src_blob_path[1], + /* src_path */ src_blob_path[0], start_pos, length, - configuration.container, - fs::path(configuration.blob_path) / path_in_backup, + connection_params.getContainer(), + fs::path(blob_path) / path_in_backup, settings, read_settings, threadPoolCallbackRunnerUnsafe(getBackupsIOThreadPool().get(), "BackupWRAzure")); @@ -188,11 +194,11 @@ void BackupWriterAzureBlobStorage::copyFile(const String & destination, const St copyAzureBlobStorageFile( client, client, - configuration.container, - fs::path(configuration.blob_path)/ source, + connection_params.getContainer(), + fs::path(blob_path)/ source, 0, size, - /* dest_container */ configuration.container, + /* dest_container */ connection_params.getContainer(), /* dest_path */ destination, settings, read_settings, @@ -206,22 +212,28 @@ void BackupWriterAzureBlobStorage::copyDataToFile( UInt64 length) { copyDataToAzureBlobStorageFile( - create_read_buffer, start_pos, length, client, configuration.container, - fs::path(configuration.blob_path) / path_in_backup, settings, - threadPoolCallbackRunnerUnsafe(getBackupsIOThreadPool().get(), "BackupWRAzure")); + create_read_buffer, + start_pos, + length, + client, + connection_params.getContainer(), + fs::path(blob_path) / path_in_backup, + settings, + threadPoolCallbackRunnerUnsafe(getBackupsIOThreadPool().get(), + "BackupWRAzure")); } BackupWriterAzureBlobStorage::~BackupWriterAzureBlobStorage() = default; bool BackupWriterAzureBlobStorage::fileExists(const String & file_name) { - String key = fs::path(configuration.blob_path) / file_name; + String key = fs::path(blob_path) / file_name; return object_storage->exists(StoredObject(key)); } UInt64 BackupWriterAzureBlobStorage::getFileSize(const String & file_name) { - String key = fs::path(configuration.blob_path) / file_name; + String key = fs::path(blob_path) / file_name; RelativePathsWithMetadata children; object_storage->listObjects(key,children,/*max_keys*/0); if (children.empty()) @@ -231,7 +243,7 @@ UInt64 BackupWriterAzureBlobStorage::getFileSize(const String & file_name) std::unique_ptr BackupWriterAzureBlobStorage::readFile(const String & file_name, size_t /*expected_file_size*/) { - String key = fs::path(configuration.blob_path) / file_name; + String key = fs::path(blob_path) / file_name; return std::make_unique( client, key, read_settings, settings->max_single_read_retries, settings->max_single_download_retries); @@ -239,7 +251,7 @@ std::unique_ptr BackupWriterAzureBlobStorage::readFile(const String std::unique_ptr BackupWriterAzureBlobStorage::writeFile(const String & file_name) { - String key = fs::path(configuration.blob_path) / file_name; + String key = fs::path(blob_path) / file_name; return std::make_unique( client, key, @@ -251,7 +263,7 @@ std::unique_ptr BackupWriterAzureBlobStorage::writeFile(const Strin void BackupWriterAzureBlobStorage::removeFile(const String & file_name) { - String key = fs::path(configuration.blob_path) / file_name; + String key = fs::path(blob_path) / file_name; StoredObject object(key); object_storage->removeObjectIfExists(object); } @@ -260,7 +272,7 @@ void BackupWriterAzureBlobStorage::removeFiles(const Strings & file_names) { StoredObjects objects; for (const auto & file_name : file_names) - objects.emplace_back(fs::path(configuration.blob_path) / file_name); + objects.emplace_back(fs::path(blob_path) / file_name); object_storage->removeObjectsIfExist(objects); @@ -270,7 +282,7 @@ void BackupWriterAzureBlobStorage::removeFilesBatch(const Strings & file_names) { StoredObjects objects; for (const auto & file_name : file_names) - objects.emplace_back(fs::path(configuration.blob_path) / file_name); + objects.emplace_back(fs::path(blob_path) / file_name); object_storage->removeObjectsIfExist(objects); } diff --git a/src/Backups/BackupIO_AzureBlobStorage.h b/src/Backups/BackupIO_AzureBlobStorage.h index 61688107839..c3b88f245ab 100644 --- a/src/Backups/BackupIO_AzureBlobStorage.h +++ b/src/Backups/BackupIO_AzureBlobStorage.h @@ -1,12 +1,10 @@ #pragma once - #include "config.h" #if USE_AZURE_BLOB_STORAGE #include #include -#include -#include +#include namespace DB @@ -17,7 +15,8 @@ class BackupReaderAzureBlobStorage : public BackupReaderDefault { public: BackupReaderAzureBlobStorage( - const StorageAzureConfiguration & configuration_, + const AzureBlobStorage::ConnectionParams & connection_params_, + const String & blob_path_, bool allow_azure_native_copy, const ReadSettings & read_settings_, const WriteSettings & write_settings_, @@ -40,16 +39,18 @@ public: private: const DataSourceDescription data_source_description; std::shared_ptr client; - StorageAzureConfiguration configuration; + AzureBlobStorage::ConnectionParams connection_params; + String blob_path; std::unique_ptr object_storage; - std::shared_ptr settings; + std::shared_ptr settings; }; class BackupWriterAzureBlobStorage : public BackupWriterDefault { public: BackupWriterAzureBlobStorage( - const StorageAzureConfiguration & configuration_, + const AzureBlobStorage::ConnectionParams & connection_params_, + const String & blob_path_, bool allow_azure_native_copy, const ReadSettings & read_settings_, const WriteSettings & write_settings_, @@ -87,9 +88,10 @@ private: const DataSourceDescription data_source_description; std::shared_ptr client; - StorageAzureConfiguration configuration; + AzureBlobStorage::ConnectionParams connection_params; + String blob_path; std::unique_ptr object_storage; - std::shared_ptr settings; + std::shared_ptr settings; }; } diff --git a/src/Backups/registerBackupEngineAzureBlobStorage.cpp b/src/Backups/registerBackupEngineAzureBlobStorage.cpp index 03d156d1009..626df99b00c 100644 --- a/src/Backups/registerBackupEngineAzureBlobStorage.cpp +++ b/src/Backups/registerBackupEngineAzureBlobStorage.cpp @@ -5,6 +5,7 @@ #if USE_AZURE_BLOB_STORAGE #include +#include #include #include #include @@ -49,7 +50,9 @@ void registerBackupEngineAzureBlobStorage(BackupFactory & factory) const String & id_arg = params.backup_info.id_arg; const auto & args = params.backup_info.args; - StorageAzureConfiguration configuration; + String blob_path; + AzureBlobStorage::ConnectionParams connection_params; + auto request_settings = AzureBlobStorage::getRequestSettings(params.context->getSettingsRef()); if (!id_arg.empty()) { @@ -59,55 +62,42 @@ void registerBackupEngineAzureBlobStorage(BackupFactory & factory) if (!config.has(config_prefix)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is no collection named `{}` in config", id_arg); - if (config.has(config_prefix + ".connection_string")) + connection_params = { - configuration.connection_url = config.getString(config_prefix + ".connection_string"); - configuration.is_connection_string = true; - configuration.container = config.getString(config_prefix + ".container"); - } - else - { - configuration.connection_url = config.getString(config_prefix + ".storage_account_url"); - configuration.is_connection_string = false; - configuration.container = config.getString(config_prefix + ".container"); - configuration.account_name = config.getString(config_prefix + ".account_name"); - configuration.account_key = config.getString(config_prefix + ".account_key"); - - if (config.has(config_prefix + ".account_name") && config.has(config_prefix + ".account_key")) - { - configuration.account_name = config.getString(config_prefix + ".account_name"); - configuration.account_key = config.getString(config_prefix + ".account_key"); - } - } + .endpoint = AzureBlobStorage::processEndpoint(config, config_prefix), + .auth_method = AzureBlobStorage::getAuthMethod(config, config_prefix), + .client_options = AzureBlobStorage::getClientOptions(*request_settings, /*for_disk=*/ true), + }; if (args.size() > 1) throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Backup AzureBlobStorage requires 1 or 2 arguments: named_collection, [filename]"); if (args.size() == 1) - configuration.setPath(args[0].safeGet()); - + blob_path = args[0].safeGet(); } else { if (args.size() == 3) { - configuration.connection_url = args[0].safeGet(); - configuration.is_connection_string = !configuration.connection_url.starts_with("http"); + auto connection_url = args[0].safeGet(); + auto container_name = args[1].safeGet(); + blob_path = args[2].safeGet(); - configuration.container = args[1].safeGet(); - configuration.blob_path = args[2].safeGet(); + AzureBlobStorage::processURL(connection_url, container_name, connection_params.endpoint, connection_params.auth_method); + connection_params.client_options = AzureBlobStorage::getClientOptions(*request_settings, /*for_disk=*/ true); } else if (args.size() == 5) { - configuration.connection_url = args[0].safeGet(); - configuration.is_connection_string = false; + connection_params.endpoint.storage_account_url = args[0].safeGet(); + connection_params.endpoint.container_name = args[1].safeGet(); + blob_path = args[2].safeGet(); - configuration.container = args[1].safeGet(); - configuration.blob_path = args[2].safeGet(); - configuration.account_name = args[3].safeGet(); - configuration.account_key = args[4].safeGet(); + auto account_name = args[3].safeGet(); + auto account_key = args[4].safeGet(); + connection_params.auth_method = std::make_shared(account_name, account_key); + connection_params.client_options = AzureBlobStorage::getClientOptions(*request_settings, /*for_disk=*/ true); } else { @@ -117,16 +107,12 @@ void registerBackupEngineAzureBlobStorage(BackupFactory & factory) } BackupImpl::ArchiveParams archive_params; - if (hasRegisteredArchiveFileExtension(configuration.getPath())) + if (hasRegisteredArchiveFileExtension(blob_path)) { if (params.is_internal_backup) throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Using archives with backups on clusters is disabled"); - auto path = configuration.getPath(); - auto filename = removeFileNameFromURL(path); - configuration.setPath(path); - - archive_params.archive_name = filename; + archive_params.archive_name = removeFileNameFromURL(blob_path); archive_params.compression_method = params.compression_method; archive_params.compression_level = params.compression_level; archive_params.password = params.password; @@ -141,7 +127,8 @@ void registerBackupEngineAzureBlobStorage(BackupFactory & factory) if (params.open_mode == IBackup::OpenMode::READ) { auto reader = std::make_shared( - configuration, + connection_params, + blob_path, params.allow_azure_native_copy, params.read_settings, params.write_settings, @@ -159,7 +146,8 @@ void registerBackupEngineAzureBlobStorage(BackupFactory & factory) else { auto writer = std::make_shared( - configuration, + connection_params, + blob_path, params.allow_azure_native_copy, params.read_settings, params.write_settings, diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index b18207e55ad..d985595154c 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -607,6 +607,10 @@ if (TARGET ch_contrib::usearch) dbms_target_link_libraries(PUBLIC ch_contrib::usearch) endif() +if (TARGET ch_contrib::prometheus_protobufs) + dbms_target_link_libraries (PUBLIC ch_contrib::prometheus_protobufs) +endif() + if (TARGET ch_rust::skim) dbms_target_include_directories(PRIVATE $) dbms_target_link_libraries(PUBLIC ch_rust::skim) diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h index 30dc4168996..a260fd5761e 100644 --- a/src/Client/ClientBase.h +++ b/src/Client/ClientBase.h @@ -6,13 +6,13 @@ #include #include #include +#include #include #include #include #include #include #include -#include #include #include #include diff --git a/src/Client/HedgedConnections.cpp b/src/Client/HedgedConnections.cpp index 8c993f906e0..51cbe6f3d6f 100644 --- a/src/Client/HedgedConnections.cpp +++ b/src/Client/HedgedConnections.cpp @@ -195,6 +195,12 @@ void HedgedConnections::sendQuery( modified_settings.parallel_replica_offset = fd_to_replica_location[replica.packet_receiver->getFileDescriptor()].offset; } + /// FIXME: Remove once we will make `allow_experimental_analyzer` obsolete setting. + /// Make the analyzer being set, so it will be effectively applied on the remote server. + /// In other words, the initiator always controls whether the analyzer enabled or not for + /// all servers involved in the distributed query processing. + modified_settings.set("allow_experimental_analyzer", static_cast(modified_settings.allow_experimental_analyzer)); + replica.connection->sendQuery(timeouts, query, /* query_parameters */ {}, query_id, stage, &modified_settings, &client_info, with_pending_data, {}); replica.change_replica_timeout.setRelative(timeouts.receive_data_timeout); replica.packet_receiver->setTimeout(hedged_connections_factory.getConnectionTimeouts().receive_timeout); diff --git a/src/Client/MultiplexedConnections.cpp b/src/Client/MultiplexedConnections.cpp index 5d0fc8fd39e..99bdd706d8b 100644 --- a/src/Client/MultiplexedConnections.cpp +++ b/src/Client/MultiplexedConnections.cpp @@ -150,6 +150,12 @@ void MultiplexedConnections::sendQuery( } } + /// FIXME: Remove once we will make `allow_experimental_analyzer` obsolete setting. + /// Make the analyzer being set, so it will be effectively applied on the remote server. + /// In other words, the initiator always controls whether the analyzer enabled or not for + /// all servers involved in the distributed query processing. + modified_settings.set("allow_experimental_analyzer", static_cast(modified_settings.allow_experimental_analyzer)); + const bool enable_sample_offset_parallel_processing = settings.max_parallel_replicas > 1 && settings.allow_experimental_parallel_reading_from_replicas == 0; size_t num_replicas = replica_states.size(); diff --git a/src/Columns/ColumnObject.cpp b/src/Columns/ColumnObject.cpp index ded56b60e64..90ef974010c 100644 --- a/src/Columns/ColumnObject.cpp +++ b/src/Columns/ColumnObject.cpp @@ -1093,10 +1093,4 @@ void ColumnObject::finalize() checkObjectHasNoAmbiguosPaths(getKeys()); } -void ColumnObject::updateHashFast(SipHash & hash) const -{ - for (const auto & entry : subcolumns) - for (auto & part : entry->data.data) - part->updateHashFast(hash); -} } diff --git a/src/Columns/ColumnObject.h b/src/Columns/ColumnObject.h index b1b8827622f..e2936b27994 100644 --- a/src/Columns/ColumnObject.h +++ b/src/Columns/ColumnObject.h @@ -242,7 +242,7 @@ public: const char * skipSerializedInArena(const char *) const override { throwMustBeConcrete(); } void updateHashWithValue(size_t, SipHash &) const override { throwMustBeConcrete(); } void updateWeakHash32(WeakHash32 &) const override { throwMustBeConcrete(); } - void updateHashFast(SipHash & hash) const override; + void updateHashFast(SipHash &) const override { throwMustBeConcrete(); } void expand(const Filter &, bool) override { throwMustBeConcrete(); } bool hasEqualValues() const override { throwMustBeConcrete(); } size_t byteSizeAt(size_t) const override { throwMustBeConcrete(); } diff --git a/src/Columns/ColumnTuple.cpp b/src/Columns/ColumnTuple.cpp index 2159495b68f..f262a8676b7 100644 --- a/src/Columns/ColumnTuple.cpp +++ b/src/Columns/ColumnTuple.cpp @@ -711,7 +711,13 @@ void ColumnTuple::takeDynamicStructureFromSourceColumns(const Columns & source_c ColumnPtr ColumnTuple::compress() const { if (columns.empty()) - return Ptr(); + { + return ColumnCompressed::create(size(), 0, + [n = column_length] + { + return ColumnTuple::create(n); + }); + } size_t byte_size = 0; Columns compressed; diff --git a/src/Common/BinStringDecodeHelper.h b/src/Common/BinStringDecodeHelper.h index df3e014cfad..03c175fd37f 100644 --- a/src/Common/BinStringDecodeHelper.h +++ b/src/Common/BinStringDecodeHelper.h @@ -5,7 +5,7 @@ namespace DB { -static void inline hexStringDecode(const char * pos, const char * end, char *& out, size_t word_size = 2) +static void inline hexStringDecode(const char * pos, const char * end, char *& out, size_t word_size) { if ((end - pos) & 1) { @@ -23,7 +23,7 @@ static void inline hexStringDecode(const char * pos, const char * end, char *& o ++out; } -static void inline binStringDecode(const char * pos, const char * end, char *& out) +static void inline binStringDecode(const char * pos, const char * end, char *& out, size_t word_size) { if (pos == end) { @@ -53,7 +53,7 @@ static void inline binStringDecode(const char * pos, const char * end, char *& o ++out; } - assert((end - pos) % 8 == 0); + chassert((end - pos) % word_size == 0); while (end - pos != 0) { diff --git a/src/Common/CgroupsMemoryUsageObserver.cpp b/src/Common/CgroupsMemoryUsageObserver.cpp index 8a4792f0a5a..d36c7fd08aa 100644 --- a/src/Common/CgroupsMemoryUsageObserver.cpp +++ b/src/Common/CgroupsMemoryUsageObserver.cpp @@ -12,7 +12,9 @@ #include #include +#include #include +#include #include #include "config.h" @@ -22,24 +24,169 @@ #define STRINGIFY(x) STRINGIFY_HELPER(x) #endif +using namespace DB; namespace DB { namespace ErrorCodes { - extern const int CANNOT_CLOSE_FILE; - extern const int CANNOT_OPEN_FILE; - extern const int FILE_DOESNT_EXIST; - extern const int INCORRECT_DATA; +extern const int FILE_DOESNT_EXIST; +extern const int INCORRECT_DATA; } -CgroupsMemoryUsageObserver::CgroupsMemoryUsageObserver(std::chrono::seconds wait_time_) - : log(getLogger("CgroupsMemoryUsageObserver")) - , wait_time(wait_time_) - , memory_usage_file(log) +} + +namespace { - LOG_INFO(log, "Initialized cgroups memory limit observer, wait time is {} sec", wait_time.count()); + +/// Format is +/// kernel 5 +/// rss 15 +/// [...] +uint64_t readMetricFromStatFile(ReadBufferFromFile & buf, const std::string & key) +{ + while (!buf.eof()) + { + std::string current_key; + readStringUntilWhitespace(current_key, buf); + if (current_key != key) + { + std::string dummy; + readStringUntilNewlineInto(dummy, buf); + buf.ignore(); + continue; + } + + assertChar(' ', buf); + uint64_t value = 0; + readIntText(value, buf); + return value; + } + + throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot find '{}' in '{}'", key, buf.getFileName()); +} + +struct CgroupsV1Reader : ICgroupsReader +{ + explicit CgroupsV1Reader(const std::filesystem::path & stat_file_dir) : buf(stat_file_dir / "memory.stat") { } + + uint64_t readMemoryUsage() override + { + std::lock_guard lock(mutex); + buf.rewind(); + return readMetricFromStatFile(buf, "rss"); + } + +private: + std::mutex mutex; + ReadBufferFromFile buf TSA_GUARDED_BY(mutex); +}; + +struct CgroupsV2Reader : ICgroupsReader +{ + explicit CgroupsV2Reader(const std::filesystem::path & stat_file_dir) + : current_buf(stat_file_dir / "memory.current"), stat_buf(stat_file_dir / "memory.stat") + { + } + + uint64_t readMemoryUsage() override + { + std::lock_guard lock(mutex); + current_buf.rewind(); + stat_buf.rewind(); + + int64_t mem_usage = 0; + /// memory.current contains a single number + /// the reason why we subtract it described here: https://github.com/ClickHouse/ClickHouse/issues/64652#issuecomment-2149630667 + readIntText(mem_usage, current_buf); + mem_usage -= readMetricFromStatFile(stat_buf, "inactive_file"); + chassert(mem_usage >= 0, "Negative memory usage"); + return mem_usage; + } + +private: + std::mutex mutex; + ReadBufferFromFile current_buf TSA_GUARDED_BY(mutex); + ReadBufferFromFile stat_buf TSA_GUARDED_BY(mutex); +}; + +/// Caveats: +/// - All of the logic in this file assumes that the current process is the only process in the +/// containing cgroup (or more precisely: the only process with significant memory consumption). +/// If this is not the case, then other processe's memory consumption may affect the internal +/// memory tracker ... +/// - Cgroups v1 and v2 allow nested cgroup hierarchies. As v1 is deprecated for over half a +/// decade and will go away at some point, hierarchical detection is only implemented for v2. +/// - I did not test what happens if a host has v1 and v2 simultaneously enabled. I believe such +/// systems existed only for a short transition period. + +std::optional getCgroupsV2Path() +{ + if (!cgroupsV2Enabled()) + return {}; + + if (!cgroupsV2MemoryControllerEnabled()) + return {}; + + String cgroup = cgroupV2OfProcess(); + auto current_cgroup = cgroup.empty() ? default_cgroups_mount : (default_cgroups_mount / cgroup); + + /// Return the bottom-most nested current memory file. If there is no such file at the current + /// level, try again at the parent level as memory settings are inherited. + while (current_cgroup != default_cgroups_mount.parent_path()) + { + const auto current_path = current_cgroup / "memory.current"; + const auto stat_path = current_cgroup / "memory.stat"; + if (std::filesystem::exists(current_path) && std::filesystem::exists(stat_path)) + return {current_cgroup}; + current_cgroup = current_cgroup.parent_path(); + } + return {}; +} + +std::optional getCgroupsV1Path() +{ + auto path = default_cgroups_mount / "memory/memory.stat"; + if (!std::filesystem::exists(path)) + return {}; + return {default_cgroups_mount / "memory"}; +} + +std::pair getCgroupsPath() +{ + auto v2_path = getCgroupsV2Path(); + if (v2_path.has_value()) + return {*v2_path, CgroupsMemoryUsageObserver::CgroupsVersion::V2}; + + auto v1_path = getCgroupsV1Path(); + if (v1_path.has_value()) + return {*v1_path, CgroupsMemoryUsageObserver::CgroupsVersion::V1}; + + throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Cannot find cgroups v1 or v2 current memory file"); +} + +} + +namespace DB +{ + +CgroupsMemoryUsageObserver::CgroupsMemoryUsageObserver(std::chrono::seconds wait_time_) + : log(getLogger("CgroupsMemoryUsageObserver")), wait_time(wait_time_) +{ + const auto [cgroup_path, version] = getCgroupsPath(); + + if (version == CgroupsVersion::V2) + cgroup_reader = std::make_unique(cgroup_path); + else + cgroup_reader = std::make_unique(cgroup_path); + + LOG_INFO( + log, + "Will read the current memory usage from '{}' (cgroups version: {}), wait time is {} sec", + cgroup_path, + (version == CgroupsVersion::V1) ? "v1" : "v2", + wait_time.count()); } CgroupsMemoryUsageObserver::~CgroupsMemoryUsageObserver() @@ -79,12 +226,13 @@ void CgroupsMemoryUsageObserver::setMemoryUsageLimits(uint64_t hard_limit_, uint { LOG_WARNING(log, "Exceeded soft memory limit ({})", ReadableSize(soft_limit_)); -#if USE_JEMALLOC +# if USE_JEMALLOC LOG_INFO(log, "Purging jemalloc arenas"); mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".purge", nullptr, nullptr, nullptr, 0); -#endif +# endif /// Reset current usage in memory tracker. Expect zero for free_memory_in_allocator_arenas as we just purged them. - uint64_t memory_usage = memory_usage_file.readMemoryUsage(); + uint64_t memory_usage = cgroup_reader->readMemoryUsage(); + LOG_TRACE(log, "Read current memory usage {} bytes ({}) from cgroups", memory_usage, ReadableSize(memory_usage)); MemoryTracker::setRSS(memory_usage, 0); LOG_INFO(log, "Purged jemalloc arenas. Current memory usage is {}", ReadableSize(memory_usage)); @@ -104,152 +252,6 @@ void CgroupsMemoryUsageObserver::setOnMemoryAmountAvailableChangedFn(OnMemoryAmo on_memory_amount_available_changed = on_memory_amount_available_changed_; } -namespace -{ - -/// Caveats: -/// - All of the logic in this file assumes that the current process is the only process in the -/// containing cgroup (or more precisely: the only process with significant memory consumption). -/// If this is not the case, then other processe's memory consumption may affect the internal -/// memory tracker ... -/// - Cgroups v1 and v2 allow nested cgroup hierarchies. As v1 is deprecated for over half a -/// decade and will go away at some point, hierarchical detection is only implemented for v2. -/// - I did not test what happens if a host has v1 and v2 simultaneously enabled. I believe such -/// systems existed only for a short transition period. - -std::optional getCgroupsV2FileName() -{ - if (!cgroupsV2Enabled()) - return {}; - - if (!cgroupsV2MemoryControllerEnabled()) - return {}; - - String cgroup = cgroupV2OfProcess(); - auto current_cgroup = cgroup.empty() ? default_cgroups_mount : (default_cgroups_mount / cgroup); - - /// Return the bottom-most nested current memory file. If there is no such file at the current - /// level, try again at the parent level as memory settings are inherited. - while (current_cgroup != default_cgroups_mount.parent_path()) - { - auto path = current_cgroup / "memory.current"; - if (std::filesystem::exists(path)) - return {path}; - current_cgroup = current_cgroup.parent_path(); - } - return {}; -} - -std::optional getCgroupsV1FileName() -{ - auto path = default_cgroups_mount / "memory/memory.stat"; - if (!std::filesystem::exists(path)) - return {}; - return {path}; -} - -std::pair getCgroupsFileName() -{ - auto v2_file_name = getCgroupsV2FileName(); - if (v2_file_name.has_value()) - return {*v2_file_name, CgroupsMemoryUsageObserver::CgroupsVersion::V2}; - - auto v1_file_name = getCgroupsV1FileName(); - if (v1_file_name.has_value()) - return {*v1_file_name, CgroupsMemoryUsageObserver::CgroupsVersion::V1}; - - throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Cannot find cgroups v1 or v2 current memory file"); -} - -} - -CgroupsMemoryUsageObserver::MemoryUsageFile::MemoryUsageFile(LoggerPtr log_) - : log(log_) -{ - std::tie(file_name, version) = getCgroupsFileName(); - - LOG_INFO(log, "Will read the current memory usage from '{}' (cgroups version: {})", file_name, (version == CgroupsVersion::V1) ? "v1" : "v2"); - - fd = ::open(file_name.data(), O_RDONLY); - if (fd == -1) - ErrnoException::throwFromPath( - (errno == ENOENT) ? ErrorCodes::FILE_DOESNT_EXIST : ErrorCodes::CANNOT_OPEN_FILE, - file_name, "Cannot open file '{}'", file_name); -} - -CgroupsMemoryUsageObserver::MemoryUsageFile::~MemoryUsageFile() -{ - assert(fd != -1); - if (::close(fd) != 0) - { - try - { - ErrnoException::throwFromPath( - ErrorCodes::CANNOT_CLOSE_FILE, - file_name, "Cannot close file '{}'", file_name); - } - catch (const ErrnoException &) - { - tryLogCurrentException(log, __PRETTY_FUNCTION__); - } - } -} - -uint64_t CgroupsMemoryUsageObserver::MemoryUsageFile::readMemoryUsage() const -{ - /// File read is probably not read is thread-safe, just to be sure - std::lock_guard lock(mutex); - - ReadBufferFromFileDescriptor buf(fd); - buf.rewind(); - - uint64_t mem_usage = 0; - - switch (version) - { - case CgroupsVersion::V1: - { - /// Format is - /// kernel 5 - /// rss 15 - /// [...] - std::string key; - bool found_rss = false; - - while (!buf.eof()) - { - readStringUntilWhitespace(key, buf); - if (key != "rss") - { - std::string dummy; - readStringUntilNewlineInto(dummy, buf); - buf.ignore(); - continue; - } - - assertChar(' ', buf); - readIntText(mem_usage, buf); - found_rss = true; - break; - } - - if (!found_rss) - throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot find 'rss' in '{}'", file_name); - - break; - } - case CgroupsVersion::V2: - { - readIntText(mem_usage, buf); - break; - } - } - - LOG_TRACE(log, "Read current memory usage {} from cgroups", ReadableSize(mem_usage)); - - return mem_usage; -} - void CgroupsMemoryUsageObserver::startThread() { if (!thread.joinable()) @@ -301,7 +303,8 @@ void CgroupsMemoryUsageObserver::runThread() std::lock_guard limit_lock(limit_mutex); if (soft_limit > 0 && hard_limit > 0) { - uint64_t memory_usage = memory_usage_file.readMemoryUsage(); + uint64_t memory_usage = cgroup_reader->readMemoryUsage(); + LOG_TRACE(log, "Read current memory usage {} bytes ({}) from cgroups", memory_usage, ReadableSize(memory_usage)); if (memory_usage > hard_limit) { if (last_memory_usage <= hard_limit) diff --git a/src/Common/CgroupsMemoryUsageObserver.h b/src/Common/CgroupsMemoryUsageObserver.h index edc1cee750a..b848a2bff3c 100644 --- a/src/Common/CgroupsMemoryUsageObserver.h +++ b/src/Common/CgroupsMemoryUsageObserver.h @@ -3,11 +3,19 @@ #include #include +#include #include namespace DB { +struct ICgroupsReader +{ + virtual ~ICgroupsReader() = default; + + virtual uint64_t readMemoryUsage() = 0; +}; + /// Does two things: /// 1. Periodically reads the memory usage of the process from Linux cgroups. /// You can specify soft or hard memory limits: @@ -61,27 +69,12 @@ private: uint64_t last_memory_usage = 0; /// how much memory does the process use uint64_t last_available_memory_amount; /// how much memory can the process use - /// Represents the cgroup virtual file that shows the memory consumption of the process's cgroup. - struct MemoryUsageFile - { - public: - explicit MemoryUsageFile(LoggerPtr log_); - ~MemoryUsageFile(); - uint64_t readMemoryUsage() const; - private: - LoggerPtr log; - mutable std::mutex mutex; - int fd TSA_GUARDED_BY(mutex) = -1; - CgroupsVersion version; - std::string file_name; - }; - - MemoryUsageFile memory_usage_file; - void stopThread(); void runThread(); + std::unique_ptr cgroup_reader; + std::mutex thread_mutex; std::condition_variable cond; ThreadFromGlobalPool thread; diff --git a/src/Common/CollectionOfDerived.h b/src/Common/CollectionOfDerived.h deleted file mode 100644 index 97c0c3fbc06..00000000000 --- a/src/Common/CollectionOfDerived.h +++ /dev/null @@ -1,184 +0,0 @@ -#pragma once - -#include - -#include - -#include -#include -#include -#include -#include - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - -/* This is a collections of objects derived from ItemBase. -* Collection contains no more than one instance for each derived type. -* The derived type is used to access the instance. -*/ - -template -class CollectionOfDerivedItems -{ -public: - using Self = CollectionOfDerivedItems; - using ItemPtr = std::shared_ptr; - -private: - struct Rec - { - std::type_index type_idx; - ItemPtr ptr; - - bool operator<(const Rec & other) const - { - return type_idx < other.type_idx; - } - - bool operator<(const std::type_index & value) const - { - return type_idx < value; - } - - bool operator==(const Rec & other) const - { - return type_idx == other.type_idx; - } - }; - using Records = std::vector; - -public: - void swap(Self & other) noexcept - { - records.swap(other.records); - } - - void clear() - { - records.clear(); - } - - bool empty() const - { - return records.empty(); - } - - size_t size() const - { - return records.size(); - } - - Self clone() const - { - Self result; - result.records.reserve(records.size()); - for (const auto & rec : records) - result.records.emplace_back(rec.type_idx, rec.ptr->clone()); - return result; - } - - void append(Self && other) - { - auto middle_idx = records.size(); - std::move(other.records.begin(), other.records.end(), std::back_inserter(records)); - std::inplace_merge(records.begin(), records.begin() + middle_idx, records.end()); - chassert(isUniqTypes()); - } - - template - void add(std::shared_ptr info) - { - static_assert(std::is_base_of_v, "Template parameter must inherit items base class"); - return addImpl(std::type_index(typeid(T)), std::move(info)); - } - - template - std::shared_ptr get() const - { - static_assert(std::is_base_of_v, "Template parameter must inherit items base class"); - auto it = getImpl(std::type_index(typeid(T))); - if (it == records.cend()) - return nullptr; - auto cast = std::dynamic_pointer_cast(it->ptr); - chassert(cast); - return cast; - } - - template - std::shared_ptr extract() - { - static_assert(std::is_base_of_v, "Template parameter must inherit items base class"); - auto it = getImpl(std::type_index(typeid(T))); - if (it == records.cend()) - return nullptr; - auto cast = std::dynamic_pointer_cast(it->ptr); - chassert(cast); - - records.erase(it); - return cast; - } - - std::string debug() const - { - std::string result; - - for (auto & rec : records) - { - result.append(rec.type_idx.name()); - result.append(" "); - } - - return result; - } - -private: - bool isUniqTypes() const - { - auto uniq_it = std::adjacent_find(records.begin(), records.end()); - - return uniq_it == records.end(); - } - - void addImpl(std::type_index type_idx, ItemPtr item) - { - auto it = std::lower_bound(records.begin(), records.end(), type_idx); - - if (it == records.end()) - { - records.emplace_back(type_idx, item); - return; - } - - if (it->type_idx == type_idx) - throw Exception(ErrorCodes::LOGICAL_ERROR, "inserted items must be unique by their type, type {} is inserted twice", type_idx.name()); - - - records.emplace(it, type_idx, item); - - chassert(isUniqTypes()); - } - - Records::const_iterator getImpl(std::type_index type_idx) const - { - auto it = std::lower_bound(records.cbegin(), records.cend(), type_idx); - - if (it == records.cend()) - return records.cend(); - - if (it->type_idx != type_idx) - return records.cend(); - - return it; - } - - Records records; -}; - -} diff --git a/src/Common/GWPAsan.cpp b/src/Common/GWPAsan.cpp index 488f8e2c5dc..48fbd07ec34 100644 --- a/src/Common/GWPAsan.cpp +++ b/src/Common/GWPAsan.cpp @@ -57,9 +57,12 @@ static bool guarded_alloc_initialized = [] opts.MaxSimultaneousAllocations = 1024; if (!env_options_raw || !std::string_view{env_options_raw}.contains("SampleRate")) - opts.SampleRate = 50000; + opts.SampleRate = 10000; + + const char * collect_stacktraces = std::getenv("GWP_ASAN_COLLECT_STACKTRACES"); // NOLINT(concurrency-mt-unsafe) + if (collect_stacktraces && std::string_view{collect_stacktraces} == "1") + opts.Backtrace = getBackTrace; - opts.Backtrace = getBackTrace; GuardedAlloc.init(opts); return true; diff --git a/src/Common/Jemalloc.cpp b/src/Common/Jemalloc.cpp index fbe2f62c944..d7cc246db6a 100644 --- a/src/Common/Jemalloc.cpp +++ b/src/Common/Jemalloc.cpp @@ -46,6 +46,20 @@ void checkJemallocProfilingEnabled() "set: MALLOC_CONF=background_thread:true,prof:true"); } +template +void setJemallocValue(const char * name, T value) +{ + T old_value; + size_t old_value_size = sizeof(T); + if (mallctl(name, &old_value, &old_value_size, reinterpret_cast(&value), sizeof(T))) + { + LOG_WARNING(getLogger("Jemalloc"), "mallctl for {} failed", name); + return; + } + + LOG_INFO(getLogger("Jemalloc"), "Value for {} set to {} (from {})", name, value, old_value); +} + void setJemallocProfileActive(bool value) { checkJemallocProfilingEnabled(); @@ -58,7 +72,7 @@ void setJemallocProfileActive(bool value) return; } - mallctl("prof.active", nullptr, nullptr, &value, sizeof(bool)); + setJemallocValue("prof.active", value); LOG_TRACE(getLogger("SystemJemalloc"), "Profiling is {}", value ? "enabled" : "disabled"); } @@ -84,6 +98,16 @@ std::string flushJemallocProfile(const std::string & file_prefix) return profile_dump_path; } +void setJemallocBackgroundThreads(bool enabled) +{ + setJemallocValue("background_thread", enabled); +} + +void setJemallocMaxBackgroundThreads(size_t max_threads) +{ + setJemallocValue("max_background_threads", max_threads); +} + } #endif diff --git a/src/Common/Jemalloc.h b/src/Common/Jemalloc.h index 80ff0f1a319..499a906fd3d 100644 --- a/src/Common/Jemalloc.h +++ b/src/Common/Jemalloc.h @@ -17,6 +17,10 @@ void setJemallocProfileActive(bool value); std::string flushJemallocProfile(const std::string & file_prefix); +void setJemallocBackgroundThreads(bool enabled); + +void setJemallocMaxBackgroundThreads(size_t max_threads); + } #endif diff --git a/src/Client/QueryFuzzer.cpp b/src/Common/QueryFuzzer.cpp similarity index 97% rename from src/Client/QueryFuzzer.cpp rename to src/Common/QueryFuzzer.cpp index f5b700ea529..161c38f20e0 100644 --- a/src/Client/QueryFuzzer.cpp +++ b/src/Common/QueryFuzzer.cpp @@ -68,22 +68,21 @@ Field QueryFuzzer::getRandomField(int type) { case 0: { - return bad_int64_values[fuzz_rand() % (sizeof(bad_int64_values) - / sizeof(*bad_int64_values))]; + return bad_int64_values[fuzz_rand() % std::size(bad_int64_values)]; } case 1: { static constexpr double values[] = {NAN, INFINITY, -INFINITY, 0., -0., 0.0001, 0.5, 0.9999, 1., 1.0001, 2., 10.0001, 100.0001, 1000.0001, 1e10, 1e20, - FLT_MIN, FLT_MIN + FLT_EPSILON, FLT_MAX, FLT_MAX + FLT_EPSILON}; return values[fuzz_rand() % (sizeof(values) / sizeof(*values))]; + FLT_MIN, FLT_MIN + FLT_EPSILON, FLT_MAX, FLT_MAX + FLT_EPSILON}; return values[fuzz_rand() % std::size(values)]; } case 2: { static constexpr UInt64 scales[] = {0, 1, 2, 10}; return DecimalField( - bad_int64_values[fuzz_rand() % (sizeof(bad_int64_values) / sizeof(*bad_int64_values))], - static_cast(scales[fuzz_rand() % (sizeof(scales) / sizeof(*scales))]) + bad_int64_values[fuzz_rand() % std::size(bad_int64_values)], + static_cast(scales[fuzz_rand() % std::size(scales)]) ); } default: @@ -165,7 +164,8 @@ Field QueryFuzzer::fuzzField(Field field) { size_t pos = fuzz_rand() % arr.size(); arr.erase(arr.begin() + pos); - std::cerr << "erased\n"; + if (debug_stream) + *debug_stream << "erased\n"; } if (fuzz_rand() % 5 == 0) @@ -174,12 +174,14 @@ Field QueryFuzzer::fuzzField(Field field) { size_t pos = fuzz_rand() % arr.size(); arr.insert(arr.begin() + pos, fuzzField(arr[pos])); - std::cerr << fmt::format("inserted (pos {})\n", pos); + if (debug_stream) + *debug_stream << fmt::format("inserted (pos {})\n", pos); } else { arr.insert(arr.begin(), getRandomField(0)); - std::cerr << "inserted (0)\n"; + if (debug_stream) + *debug_stream << "inserted (0)\n"; } } @@ -197,7 +199,9 @@ Field QueryFuzzer::fuzzField(Field field) { size_t pos = fuzz_rand() % arr.size(); arr.erase(arr.begin() + pos); - std::cerr << "erased\n"; + + if (debug_stream) + *debug_stream << "erased\n"; } if (fuzz_rand() % 5 == 0) @@ -206,12 +210,16 @@ Field QueryFuzzer::fuzzField(Field field) { size_t pos = fuzz_rand() % arr.size(); arr.insert(arr.begin() + pos, fuzzField(arr[pos])); - std::cerr << fmt::format("inserted (pos {})\n", pos); + + if (debug_stream) + *debug_stream << fmt::format("inserted (pos {})\n", pos); } else { arr.insert(arr.begin(), getRandomField(0)); - std::cerr << "inserted (0)\n"; + + if (debug_stream) + *debug_stream << "inserted (0)\n"; } } @@ -344,7 +352,8 @@ void QueryFuzzer::fuzzOrderByList(IAST * ast) } else { - std::cerr << "No random column.\n"; + if (debug_stream) + *debug_stream << "No random column.\n"; } } @@ -378,7 +387,8 @@ void QueryFuzzer::fuzzColumnLikeExpressionList(IAST * ast) if (col) impl->children.insert(pos, col); else - std::cerr << "No random column.\n"; + if (debug_stream) + *debug_stream << "No random column.\n"; } // We don't have to recurse here to fuzz the children, this is handled by @@ -1361,11 +1371,15 @@ void QueryFuzzer::fuzzMain(ASTPtr & ast) collectFuzzInfoMain(ast); fuzz(ast); - std::cout << std::endl; - WriteBufferFromOStream ast_buf(std::cout, 4096); - formatAST(*ast, ast_buf, false /*highlight*/); - ast_buf.finalize(); - std::cout << std::endl << std::endl; + if (out_stream) + { + *out_stream << std::endl; + + WriteBufferFromOStream ast_buf(*out_stream, 4096); + formatAST(*ast, ast_buf, false /*highlight*/); + ast_buf.finalize(); + *out_stream << std::endl << std::endl; + } } } diff --git a/src/Client/QueryFuzzer.h b/src/Common/QueryFuzzer.h similarity index 91% rename from src/Client/QueryFuzzer.h rename to src/Common/QueryFuzzer.h index 6165e589cae..35d088809f2 100644 --- a/src/Client/QueryFuzzer.h +++ b/src/Common/QueryFuzzer.h @@ -35,9 +35,31 @@ struct ASTWindowDefinition; * queries, so you want to feed it a lot of queries to get some interesting mix * of them. Normally we feed SQL regression tests to it. */ -struct QueryFuzzer +class QueryFuzzer { - pcg64 fuzz_rand{randomSeed()}; +public: + explicit QueryFuzzer(pcg64 fuzz_rand_ = randomSeed(), std::ostream * out_stream_ = nullptr, std::ostream * debug_stream_ = nullptr) + : fuzz_rand(fuzz_rand_) + , out_stream(out_stream_) + , debug_stream(debug_stream_) + { + } + + // This is the only function you have to call -- it will modify the passed + // ASTPtr to point to new AST with some random changes. + void fuzzMain(ASTPtr & ast); + + ASTs getInsertQueriesForFuzzedTables(const String & full_query); + ASTs getDropQueriesForFuzzedTables(const ASTDropQuery & drop_query); + void notifyQueryFailed(ASTPtr ast); + + static bool isSuitableForFuzzing(const ASTCreateQuery & create); + +private: + pcg64 fuzz_rand; + + std::ostream * out_stream = nullptr; + std::ostream * debug_stream = nullptr; // We add elements to expression lists with fixed probability. Some elements // are so large, that the expected number of elements we add to them is @@ -66,10 +88,6 @@ struct QueryFuzzer std::unordered_map index_of_fuzzed_table; std::set created_tables_hashes; - // This is the only function you have to call -- it will modify the passed - // ASTPtr to point to new AST with some random changes. - void fuzzMain(ASTPtr & ast); - // Various helper functions follow, normally you shouldn't have to call them. Field getRandomField(int type); Field fuzzField(Field field); @@ -77,9 +95,6 @@ struct QueryFuzzer ASTPtr getRandomExpressionList(); DataTypePtr fuzzDataType(DataTypePtr type); DataTypePtr getRandomType(); - ASTs getInsertQueriesForFuzzedTables(const String & full_query); - ASTs getDropQueriesForFuzzedTables(const ASTDropQuery & drop_query); - void notifyQueryFailed(ASTPtr ast); void replaceWithColumnLike(ASTPtr & ast); void replaceWithTableLike(ASTPtr & ast); void fuzzOrderByElement(ASTOrderByElement * elem); @@ -102,8 +117,6 @@ struct QueryFuzzer void addTableLike(ASTPtr ast); void addColumnLike(ASTPtr ast); void collectFuzzInfoRecurse(ASTPtr ast); - - static bool isSuitableForFuzzing(const ASTCreateQuery & create); }; } diff --git a/src/Common/config.h.in b/src/Common/config.h.in index ad2ca2652d1..f68701d5d10 100644 --- a/src/Common/config.h.in +++ b/src/Common/config.h.in @@ -63,6 +63,7 @@ #cmakedefine01 USE_BCRYPT #cmakedefine01 USE_LIBARCHIVE #cmakedefine01 USE_POCKETFFT +#cmakedefine01 USE_PROMETHEUS_PROTOBUFS /// This is needed for .incbin in assembly. For some reason, include paths don't work there in presence of LTO. /// That's why we use absolute paths. diff --git a/src/Common/formatIPv6.h b/src/Common/formatIPv6.h index bb83e0381ef..abeda95ed0d 100644 --- a/src/Common/formatIPv6.h +++ b/src/Common/formatIPv6.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include diff --git a/src/Coordination/Changelog.h b/src/Coordination/Changelog.h index 2e8dbe75e90..c9b45d9a344 100644 --- a/src/Coordination/Changelog.h +++ b/src/Coordination/Changelog.h @@ -5,6 +5,7 @@ #include #include +#include #include #include diff --git a/src/Coordination/FourLetterCommand.h b/src/Coordination/FourLetterCommand.h index 82b30a0b5f6..2a53bade62f 100644 --- a/src/Coordination/FourLetterCommand.h +++ b/src/Coordination/FourLetterCommand.h @@ -2,6 +2,7 @@ #include "config.h" +#include #include #include #include diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h index 68ac45fa24f..ad29d59e4a3 100644 --- a/src/Core/ServerSettings.h +++ b/src/Core/ServerSettings.h @@ -153,7 +153,8 @@ namespace DB M(Bool, enable_azure_sdk_logging, false, "Enables logging from Azure sdk", 0) \ M(String, merge_workload, "default", "Name of workload to be used to access resources for all merges (may be overridden by a merge tree setting)", 0) \ M(String, mutation_workload, "default", "Name of workload to be used to access resources for all mutations (may be overridden by a merge tree setting)", 0) \ - M(Double, gwp_asan_force_sample_probability, 0, "Probability that an allocation from specific places will be sampled by GWP Asan (i.e. PODArray allocations)", 0) \ + M(Bool, prepare_system_log_tables_on_startup, false, "If true, ClickHouse creates all configured `system.*_log` tables before the startup. It can be helpful if some startup scripts depend on these tables.", 0) \ + M(Double, gwp_asan_force_sample_probability, 0.0003, "Probability that an allocation from specific places will be sampled by GWP Asan (i.e. PODArray allocations)", 0) \ M(UInt64, config_reload_interval_ms, 2000, "How often clickhouse will reload config and check for new changes", 0) \ /// If you add a setting which can be updated at runtime, please update 'changeable_settings' map in StorageSystemServerSettings.cpp diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 4a343d864db..edb095f640e 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -36,7 +36,7 @@ class IColumn; M(Dialect, dialect, Dialect::clickhouse, "Which dialect will be used to parse query", 0)\ M(UInt64, min_compress_block_size, 65536, "The actual size of the block to compress, if the uncompressed data less than max_compress_block_size is no less than this value and no less than the volume of data for one mark.", 0) \ M(UInt64, max_compress_block_size, 1048576, "The maximum size of blocks of uncompressed data before compressing for writing to a table.", 0) \ - M(UInt64, max_block_size, DEFAULT_BLOCK_SIZE, "Maximum block size in rows for reading", 0) \ + M(UInt64, max_block_size, DEFAULT_BLOCK_SIZE, "Maximum block size for reading", 0) \ M(UInt64, max_insert_block_size, DEFAULT_INSERT_BLOCK_SIZE, "The maximum block size for insertion, if we control the creation of blocks for insertion.", 0) \ M(UInt64, min_insert_block_size_rows, DEFAULT_INSERT_BLOCK_SIZE, "Squash blocks passed to INSERT query to specified size in rows, if blocks are not big enough.", 0) \ M(UInt64, min_insert_block_size_bytes, (DEFAULT_INSERT_BLOCK_SIZE * 256), "Squash blocks passed to INSERT query to specified size in bytes, if blocks are not big enough.", 0) \ @@ -125,6 +125,9 @@ class IColumn; M(Bool, s3_ignore_file_doesnt_exist, false, "Return 0 rows when the requested files don't exist, instead of throwing an exception in S3 table engine", 0) \ M(Bool, hdfs_ignore_file_doesnt_exist, false, "Return 0 rows when the requested files don't exist, instead of throwing an exception in HDFS table engine", 0) \ M(Bool, azure_ignore_file_doesnt_exist, false, "Return 0 rows when the requested files don't exist, instead of throwing an exception in AzureBlobStorage table engine", 0) \ + M(UInt64, azure_sdk_max_retries, 10, "Maximum number of retries in azure sdk", 0) \ + M(UInt64, azure_sdk_retry_initial_backoff_ms, 10, "Minimal backoff between retries in azure sdk", 0) \ + M(UInt64, azure_sdk_retry_max_backoff_ms, 1000, "Maximal backoff between retries in azure sdk", 0) \ M(Bool, s3_validate_request_settings, true, "Validate S3 request settings", 0) \ M(Bool, s3_disable_checksum, S3::DEFAULT_DISABLE_CHECKSUM, "Do not calculate a checksum when sending a file to S3. This speeds up writes by avoiding excessive processing passes on a file. It is mostly safe as the data of MergeTree tables is checksummed by ClickHouse anyway, and when S3 is accessed with HTTPS, the TLS layer already provides integrity while transferring through the network. While additional checksums on S3 give defense in depth.", 0) \ M(UInt64, s3_retry_attempts, S3::DEFAULT_RETRY_ATTEMPTS, "Setting for Aws::Client::RetryStrategy, Aws::Client does retries itself, 0 means no retries", 0) \ @@ -399,7 +402,7 @@ class IColumn; M(Float, opentelemetry_start_trace_probability, 0., "Probability to start an OpenTelemetry trace for an incoming query.", 0) \ M(Bool, opentelemetry_trace_processors, false, "Collect OpenTelemetry spans for processors.", 0) \ M(Bool, prefer_column_name_to_alias, false, "Prefer using column names instead of aliases if possible.", 0) \ - M(Bool, allow_experimental_analyzer, true, "Allow experimental analyzer.", 0) \ + M(Bool, allow_experimental_analyzer, true, "Allow experimental analyzer.", IMPORTANT) \ M(Bool, analyzer_compatibility_join_using_top_level_identifier, false, "Force to resolve identifier in JOIN USING from projection (for example, in `SELECT a + 1 AS b FROM t1 JOIN t2 USING (b)` join will be performed by `t1.a + 1 = t2.b`, rather then `t1.b = t2.b`).", 0) \ M(Bool, prefer_global_in_and_join, false, "If enabled, all IN/JOIN operators will be rewritten as GLOBAL IN/JOIN. It's useful when the to-be-joined tables are only available on the initiator and we need to always scatter their data on-the-fly during distributed processing with the GLOBAL keyword. It's also useful to reduce the need to access the external sources joining external tables.", 0) \ M(Bool, enable_vertical_final, true, "If enable, remove duplicated rows during FINAL by marking rows as deleted and filtering them later instead of merging rows", 0) \ @@ -631,8 +634,9 @@ class IColumn; M(Bool, optimize_time_filter_with_preimage, true, "Optimize Date and DateTime predicates by converting functions into equivalent comparisons without conversions (e.g. toYear(col) = 2023 -> col >= '2023-01-01' AND col <= '2023-12-31')", 0) \ M(Bool, normalize_function_names, true, "Normalize function names to their canonical names", 0) \ M(Bool, enable_early_constant_folding, true, "Enable query optimization where we analyze function and subqueries results and rewrite query if there are constants there", 0) \ - M(Bool, deduplicate_blocks_in_dependent_materialized_views, false, "Should deduplicate blocks for materialized views. Use true to always deduplicate in dependent tables.", 0) \ + M(Bool, deduplicate_blocks_in_dependent_materialized_views, false, "Should deduplicate blocks for materialized views if the block is not a duplicate for the table. Use true to always deduplicate in dependent tables.", 0) \ M(Bool, throw_if_deduplication_in_dependent_materialized_views_enabled_with_async_insert, true, "Throw exception on INSERT query when the setting `deduplicate_blocks_in_dependent_materialized_views` is enabled along with `async_insert`. It guarantees correctness, because these features can't work together.", 0) \ + M(Bool, update_insert_deduplication_token_in_dependent_materialized_views, false, "Should update insert deduplication token with table identifier during insert in dependent materialized views.", 0) \ M(Bool, materialized_views_ignore_errors, false, "Allows to ignore errors for MATERIALIZED VIEW, and deliver original block to the table regardless of MVs", 0) \ M(Bool, ignore_materialized_views_with_dropped_target_table, false, "Ignore MVs with dropped target table during pushing to views", 0) \ M(Bool, allow_experimental_refreshable_materialized_view, false, "Allow refreshable materialized views (CREATE MATERIALIZED VIEW REFRESH ...).", 0) \ @@ -758,6 +762,7 @@ class IColumn; M(Bool, query_plan_push_down_limit, true, "Allow to move LIMITs down in the query plan", 0) \ M(Bool, query_plan_split_filter, true, "Allow to split filters in the query plan", 0) \ M(Bool, query_plan_merge_expressions, true, "Allow to merge expressions in the query plan", 0) \ + M(Bool, query_plan_merge_filters, false, "Allow to merge filters in the query plan", 0) \ M(Bool, query_plan_filter_push_down, true, "Allow to push down filter by predicate query plan step", 0) \ M(Bool, query_plan_convert_outer_join_to_inner_join, true, "Allow to convert OUTER JOIN to INNER JOIN if filter after JOIN always filters default values", 0) \ M(Bool, query_plan_optimize_prewhere, true, "Allow to push down filter to PREWHERE expression for supported storages", 0) \ @@ -949,7 +954,6 @@ class IColumn; #define OBSOLETE_SETTINGS(M, ALIAS) \ /** Obsolete settings that do nothing but left for compatibility reasons. Remove each one after half a year of obsolescence. */ \ - MAKE_OBSOLETE(M, Bool, update_insert_deduplication_token_in_dependent_materialized_views, 1) \ MAKE_OBSOLETE(M, UInt64, max_memory_usage_for_all_queries, 0) \ MAKE_OBSOLETE(M, UInt64, multiple_joins_rewriter_version, 0) \ MAKE_OBSOLETE(M, Bool, enable_debug_queries, false) \ diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index 6af6b4b15aa..886ca0fa246 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -63,6 +63,10 @@ static std::initializer_list #include #include +#include #include #include #include +#include #include #include #include @@ -110,28 +112,58 @@ std::unique_ptr DataTypeDynamic::getDynamicSubcolumnDa } /// Extract nested subcolumn of requested dynamic subcolumn if needed. - if (!subcolumn_nested_name.empty()) + /// If requested subcolumn is null map, it's processed separately as there is no Nullable type yet. + bool is_null_map_subcolumn = subcolumn_nested_name == "null"; + if (is_null_map_subcolumn) + { + res->type = std::make_shared(); + } + else if (!subcolumn_nested_name.empty()) { res = getSubcolumnData(subcolumn_nested_name, *res, throw_if_null); if (!res) return nullptr; } - res->serialization = std::make_shared(res->serialization, subcolumn_type->getName()); - res->type = makeNullableOrLowCardinalityNullableSafe(res->type); + res->serialization = std::make_shared(res->serialization, subcolumn_type->getName(), is_null_map_subcolumn); + /// Make resulting subcolumn Nullable only if type subcolumn can be inside Nullable or can be LowCardinality(Nullable()). + bool make_subcolumn_nullable = subcolumn_type->canBeInsideNullable() || subcolumn_type->lowCardinality(); + if (!is_null_map_subcolumn && make_subcolumn_nullable) + res->type = makeNullableOrLowCardinalityNullableSafe(res->type); + if (data.column) { if (discriminator) { - /// Provided Dynamic column has subcolumn of this type, we should use VariantSubcolumnCreator to + /// Provided Dynamic column has subcolumn of this type, we should use VariantSubcolumnCreator/VariantNullMapSubcolumnCreator to /// create full subcolumn from variant according to discriminators. const auto & variant_column = assert_cast(*data.column).getVariantColumn(); - auto creator = SerializationVariantElement::VariantSubcolumnCreator(variant_column.getLocalDiscriminatorsPtr(), "", *discriminator, variant_column.localDiscriminatorByGlobal(*discriminator)); - res->column = creator.create(res->column); + std::unique_ptr creator; + if (is_null_map_subcolumn) + creator = std::make_unique( + variant_column.getLocalDiscriminatorsPtr(), + "", + *discriminator, + variant_column.localDiscriminatorByGlobal(*discriminator)); + else + creator = std::make_unique( + variant_column.getLocalDiscriminatorsPtr(), + "", + *discriminator, + variant_column.localDiscriminatorByGlobal(*discriminator), + make_subcolumn_nullable); + res->column = creator->create(res->column); + } + /// Provided Dynamic column doesn't have subcolumn of this type, just create column filled with default values. + else if (is_null_map_subcolumn) + { + /// Fill null map with 1 when there is no such Dynamic subcolumn. + auto column = ColumnUInt8::create(); + assert_cast(*column).getData().resize_fill(data.column->size(), 1); + res->column = std::move(column); } else { - /// Provided Dynamic column doesn't have subcolumn of this type, just create column filled with default values. auto column = res->type->createColumn(); column->insertManyDefaults(data.column->size()); res->column = std::move(column); diff --git a/src/DataTypes/IDataType.cpp b/src/DataTypes/IDataType.cpp index 1c9715bbf53..1cb64b65d3a 100644 --- a/src/DataTypes/IDataType.cpp +++ b/src/DataTypes/IDataType.cpp @@ -173,7 +173,7 @@ bool IDataType::hasDynamicSubcolumns() const auto data = SubstreamData(getDefaultSerialization()).withType(getPtr()); auto callback = [&](const SubstreamPath &, const String &, const SubstreamData & subcolumn_data) { - has_dynamic_subcolumns |= subcolumn_data.type->hasDynamicSubcolumnsData(); + has_dynamic_subcolumns |= subcolumn_data.type && subcolumn_data.type->hasDynamicSubcolumnsData(); }; forEachSubcolumn(callback, data); return has_dynamic_subcolumns; diff --git a/src/DataTypes/Serializations/ISerialization.cpp b/src/DataTypes/Serializations/ISerialization.cpp index bbb1d1a6cd1..7642a6619b3 100644 --- a/src/DataTypes/Serializations/ISerialization.cpp +++ b/src/DataTypes/Serializations/ISerialization.cpp @@ -64,6 +64,9 @@ String ISerialization::Substream::toString() const if (type == VariantElement) return fmt::format("VariantElement({})", variant_element_name); + if (type == VariantElementNullMap) + return fmt::format("VariantElementNullMap({}.null)", variant_element_name); + return String(magic_enum::enum_name(type)); } @@ -195,6 +198,8 @@ String getNameForSubstreamPath( stream_name += ".variant_offsets"; else if (it->type == Substream::VariantElement) stream_name += "." + it->variant_element_name; + else if (it->type == Substream::VariantElementNullMap) + stream_name += "." + it->variant_element_name + ".null"; else if (it->type == SubstreamType::DynamicStructure) stream_name += ".dynamic_structure"; } @@ -395,7 +400,8 @@ bool ISerialization::hasSubcolumnForPath(const SubstreamPath & path, size_t pref return path[last_elem].type == Substream::NullMap || path[last_elem].type == Substream::TupleElement || path[last_elem].type == Substream::ArraySizes - || path[last_elem].type == Substream::VariantElement; + || path[last_elem].type == Substream::VariantElement + || path[last_elem].type == Substream::VariantElementNullMap; } ISerialization::SubstreamData ISerialization::createFromPath(const SubstreamPath & path, size_t prefix_len) diff --git a/src/DataTypes/Serializations/ISerialization.h b/src/DataTypes/Serializations/ISerialization.h index 914ff9cf4a2..6007eca94d4 100644 --- a/src/DataTypes/Serializations/ISerialization.h +++ b/src/DataTypes/Serializations/ISerialization.h @@ -184,6 +184,7 @@ public: VariantOffsets, VariantElements, VariantElement, + VariantElementNullMap, DynamicData, DynamicStructure, @@ -256,6 +257,8 @@ public: bool position_independent_encoding = true; + bool use_compact_variant_discriminators_serialization = false; + enum class DynamicStatisticsMode { NONE, /// Don't write statistics. @@ -434,6 +437,9 @@ protected: template State * checkAndGetState(const StatePtr & state) const; + template + static State * checkAndGetState(const StatePtr & state, const ISerialization * serialization); + [[noreturn]] void throwUnexpectedDataAfterParsedValue(IColumn & column, ReadBuffer & istr, const FormatSettings &, const String & type_name) const; }; @@ -444,10 +450,16 @@ using SubstreamType = ISerialization::Substream::Type; template State * ISerialization::checkAndGetState(const StatePtr & state) const +{ + return checkAndGetState(state, this); +} + +template +State * ISerialization::checkAndGetState(const StatePtr & state, const ISerialization * serialization) { if (!state) throw Exception(ErrorCodes::LOGICAL_ERROR, - "Got empty state for {}", demangle(typeid(*this).name())); + "Got empty state for {}", demangle(typeid(*serialization).name())); auto * state_concrete = typeid_cast(state.get()); if (!state_concrete) @@ -455,7 +467,7 @@ State * ISerialization::checkAndGetState(const StatePtr & state) const auto & state_ref = *state; throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid State for {}. Expected: {}, got {}", - demangle(typeid(*this).name()), + demangle(typeid(*serialization).name()), demangle(typeid(State).name()), demangle(typeid(state_ref).name())); } diff --git a/src/DataTypes/Serializations/SerializationDynamicElement.cpp b/src/DataTypes/Serializations/SerializationDynamicElement.cpp index dafd6d663b0..211f0ac9377 100644 --- a/src/DataTypes/Serializations/SerializationDynamicElement.cpp +++ b/src/DataTypes/Serializations/SerializationDynamicElement.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -77,7 +78,10 @@ void SerializationDynamicElement::deserializeBinaryBulkStatePrefix( if (auto global_discr = assert_cast(*variant_type).tryGetVariantDiscriminator(dynamic_element_name)) { settings.path.push_back(Substream::DynamicData); - dynamic_element_state->variant_serialization = std::make_shared(nested_serialization, dynamic_element_name, *global_discr); + if (is_null_map_subcolumn) + dynamic_element_state->variant_serialization = std::make_shared(dynamic_element_name, *global_discr); + else + dynamic_element_state->variant_serialization = std::make_shared(nested_serialization, dynamic_element_name, *global_discr); dynamic_element_state->variant_serialization->deserializeBinaryBulkStatePrefix(settings, dynamic_element_state->variant_element_state, cache); settings.path.pop_back(); } @@ -98,7 +102,16 @@ void SerializationDynamicElement::deserializeBinaryBulkWithMultipleStreams( SubstreamsCache * cache) const { if (!state) + { + if (is_null_map_subcolumn) + { + auto mutable_column = result_column->assumeMutable(); + auto & data = assert_cast(*mutable_column).getData(); + data.resize_fill(data.size() + limit, 1); + } + return; + } auto * dynamic_element_state = checkAndGetState(state); @@ -108,6 +121,12 @@ void SerializationDynamicElement::deserializeBinaryBulkWithMultipleStreams( dynamic_element_state->variant_serialization->deserializeBinaryBulkWithMultipleStreams(result_column, limit, settings, dynamic_element_state->variant_element_state, cache); settings.path.pop_back(); } + else if (is_null_map_subcolumn) + { + auto mutable_column = result_column->assumeMutable(); + auto & data = assert_cast(*mutable_column).getData(); + data.resize_fill(data.size() + limit, 1); + } else { auto mutable_column = result_column->assumeMutable(); diff --git a/src/DataTypes/Serializations/SerializationDynamicElement.h b/src/DataTypes/Serializations/SerializationDynamicElement.h index 2ddc3324139..127d14a55e0 100644 --- a/src/DataTypes/Serializations/SerializationDynamicElement.h +++ b/src/DataTypes/Serializations/SerializationDynamicElement.h @@ -13,11 +13,11 @@ private: /// To be able to deserialize Dynamic element as a subcolumn /// we need its type name and global discriminator. String dynamic_element_name; + bool is_null_map_subcolumn; public: - SerializationDynamicElement(const SerializationPtr & nested_, const String & dynamic_element_name_) - : SerializationWrapper(nested_) - , dynamic_element_name(dynamic_element_name_) + SerializationDynamicElement(const SerializationPtr & nested_, const String & dynamic_element_name_, bool is_null_map_subcolumn_ = false) + : SerializationWrapper(nested_), dynamic_element_name(dynamic_element_name_), is_null_map_subcolumn(is_null_map_subcolumn_) { } diff --git a/src/DataTypes/Serializations/SerializationVariant.cpp b/src/DataTypes/Serializations/SerializationVariant.cpp index b386fd8ab45..e4d71e84cc7 100644 --- a/src/DataTypes/Serializations/SerializationVariant.cpp +++ b/src/DataTypes/Serializations/SerializationVariant.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -30,12 +31,18 @@ namespace ErrorCodes struct SerializeBinaryBulkStateVariant : public ISerialization::SerializeBinaryBulkState { - std::vector states; + explicit SerializeBinaryBulkStateVariant(UInt64 mode) : discriminators_mode(mode) + { + } + + SerializationVariant::DiscriminatorsSerializationMode discriminators_mode; + std::vector variant_states; }; struct DeserializeBinaryBulkStateVariant : public ISerialization::DeserializeBinaryBulkState { - std::vector states; + ISerialization::DeserializeBinaryBulkStatePtr discriminators_state; + std::vector variant_states; }; void SerializationVariant::enumerateStreams( @@ -65,13 +72,19 @@ void SerializationVariant::enumerateStreams( for (size_t i = 0; i < variants.size(); ++i) { - settings.path.back().creator = std::make_shared(local_discriminators, variant_names[i], i, column_variant ? column_variant->localDiscriminatorByGlobal(i) : i); + DataTypePtr type = type_variant ? type_variant->getVariant(i) : nullptr; + settings.path.back().creator = std::make_shared( + local_discriminators, + variant_names[i], + i, + column_variant ? column_variant->localDiscriminatorByGlobal(i) : i, + !type || type->canBeInsideNullable() || type->lowCardinality()); auto variant_data = SubstreamData(variants[i]) - .withType(type_variant ? type_variant->getVariant(i) : nullptr) + .withType(type) .withColumn(column_variant ? column_variant->getVariantPtrByGlobalDiscriminator(i) : nullptr) .withSerializationInfo(data.serialization_info) - .withDeserializeState(variant_deserialize_state ? variant_deserialize_state->states[i] : nullptr); + .withDeserializeState(variant_deserialize_state ? variant_deserialize_state->variant_states[i] : nullptr); addVariantElementToPath(settings.path, i); settings.path.back().data = variant_data; @@ -79,6 +92,24 @@ void SerializationVariant::enumerateStreams( settings.path.pop_back(); } + /// Variant subcolumns like variant.Type have type Nullable(Type), so we want to support reading null map subcolumn from it: variant.Type.null. + /// Nullable column is created during deserialization of a variant subcolumn according to the discriminators, so we don't have actual Nullable + /// serialization with null map subcolumn. To be able to read null map subcolumn from the variant subcolumn we use special serialization + /// SerializationVariantElementNullMap. + auto null_map_data = SubstreamData(std::make_shared>()) + .withType(type_variant ? std::make_shared() : nullptr) + .withColumn(column_variant ? ColumnUInt8::create() : nullptr); + + for (size_t i = 0; i < variants.size(); ++i) + { + settings.path.back().creator = std::make_shared(local_discriminators, variant_names[i], i, column_variant ? column_variant->localDiscriminatorByGlobal(i) : i); + settings.path.push_back(Substream::VariantElementNullMap); + settings.path.back().variant_element_name = variant_names[i]; + settings.path.back().data = null_map_data; + callback(settings.path); + settings.path.pop_back(); + } + settings.path.pop_back(); } @@ -87,17 +118,26 @@ void SerializationVariant::serializeBinaryBulkStatePrefix( SerializeBinaryBulkSettings & settings, SerializeBinaryBulkStatePtr & state) const { - const ColumnVariant & col = assert_cast(column); + settings.path.push_back(Substream::VariantDiscriminators); + auto * discriminators_stream = settings.getter(settings.path); + settings.path.pop_back(); - auto variant_state = std::make_shared(); - variant_state->states.resize(variants.size()); + if (!discriminators_stream) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Got empty stream for VariantDiscriminators in SerializationVariant::serializeBinaryBulkStatePrefix"); + + UInt64 mode = settings.use_compact_variant_discriminators_serialization ? DiscriminatorsSerializationMode::COMPACT : DiscriminatorsSerializationMode::BASIC; + writeBinaryLittleEndian(mode, *discriminators_stream); + + const ColumnVariant & col = assert_cast(column); + auto variant_state = std::make_shared(mode); + variant_state->variant_states.resize(variants.size()); settings.path.push_back(Substream::VariantElements); for (size_t i = 0; i < variants.size(); ++i) { addVariantElementToPath(settings.path, i); - variants[i]->serializeBinaryBulkStatePrefix(col.getVariantByGlobalDiscriminator(i), settings, variant_state->states[i]); + variants[i]->serializeBinaryBulkStatePrefix(col.getVariantByGlobalDiscriminator(i), settings, variant_state->variant_states[i]); settings.path.pop_back(); } @@ -116,7 +156,7 @@ void SerializationVariant::serializeBinaryBulkStateSuffix( for (size_t i = 0; i < variants.size(); ++i) { addVariantElementToPath(settings.path, i); - variants[i]->serializeBinaryBulkStateSuffix(settings, variant_state->states[i]); + variants[i]->serializeBinaryBulkStateSuffix(settings, variant_state->variant_states[i]); settings.path.pop_back(); } settings.path.pop_back(); @@ -128,14 +168,19 @@ void SerializationVariant::deserializeBinaryBulkStatePrefix( DeserializeBinaryBulkStatePtr & state, SubstreamsDeserializeStatesCache * cache) const { + DeserializeBinaryBulkStatePtr discriminators_state = deserializeDiscriminatorsStatePrefix(settings, cache); + if (!discriminators_state) + return; + auto variant_state = std::make_shared(); - variant_state->states.resize(variants.size()); + variant_state->discriminators_state = discriminators_state; + variant_state->variant_states.resize(variants.size()); settings.path.push_back(Substream::VariantElements); for (size_t i = 0; i < variants.size(); ++i) { addVariantElementToPath(settings.path, i); - variants[i]->deserializeBinaryBulkStatePrefix(settings, variant_state->states[i], cache); + variants[i]->deserializeBinaryBulkStatePrefix(settings, variant_state->variant_states[i], cache); settings.path.pop_back(); } @@ -143,6 +188,29 @@ void SerializationVariant::deserializeBinaryBulkStatePrefix( state = std::move(variant_state); } +ISerialization::DeserializeBinaryBulkStatePtr SerializationVariant::deserializeDiscriminatorsStatePrefix( + DeserializeBinaryBulkSettings & settings, + SubstreamsDeserializeStatesCache * cache) +{ + settings.path.push_back(Substream::VariantDiscriminators); + + DeserializeBinaryBulkStatePtr discriminators_state = nullptr; + if (auto cached_state = getFromSubstreamsDeserializeStatesCache(cache, settings.path)) + { + discriminators_state = cached_state; + } + else if (auto * discriminators_stream = settings.getter(settings.path)) + { + UInt64 mode; + readBinaryLittleEndian(mode, *discriminators_stream); + discriminators_state = std::make_shared(mode); + addToSubstreamsDeserializeStatesCache(cache, settings.path, discriminators_state); + } + + settings.path.pop_back(); + return discriminators_state; +} + void SerializationVariant::serializeBinaryBulkWithMultipleStreamsAndUpdateVariantStatistics( const IColumn & column, @@ -165,13 +233,71 @@ void SerializationVariant::serializeBinaryBulkWithMultipleStreamsAndUpdateVarian auto * variant_state = checkAndGetState(state); - /// If offset = 0 and limit == col.size() or we have only NULLs, we don't need to calculate + /// Don't write anything if column is empty. + if (limit == 0) + return; + + /// Write number of rows in this granule in compact mode. + if (variant_state->discriminators_mode.value == DiscriminatorsSerializationMode::COMPACT) + writeVarUInt(UInt64(limit), *discriminators_stream); + + /// If column has only one none empty discriminators and no NULLs we don't need to + /// calculate limits for variants and use provided offset/limit. + if (auto non_empty_local_discr = col.getLocalDiscriminatorOfOneNoneEmptyVariantNoNulls()) + { + auto non_empty_global_discr = col.globalDiscriminatorByLocal(*non_empty_local_discr); + + /// In compact mode write the format of the granule and single non-empty discriminator. + if (variant_state->discriminators_mode.value == DiscriminatorsSerializationMode::COMPACT) + { + writeBinaryLittleEndian(UInt8(CompactDiscriminatorsGranuleFormat::COMPACT), *discriminators_stream); + writeBinaryLittleEndian(non_empty_global_discr, *discriminators_stream); + } + /// For basic mode just serialize this discriminator limit times. + else + { + for (size_t i = 0; i < limit; ++i) + writeBinaryLittleEndian(non_empty_global_discr, *discriminators_stream); + } + + settings.path.push_back(Substream::VariantElements); + addVariantElementToPath(settings.path, non_empty_global_discr); + /// We can use the same offset/limit as for whole Variant column + variants[non_empty_global_discr]->serializeBinaryBulkWithMultipleStreams(col.getVariantByGlobalDiscriminator(non_empty_global_discr), offset, limit, settings, variant_state->variant_states[non_empty_global_discr]); + variants_statistics[variant_names[non_empty_global_discr]] += limit; + settings.path.pop_back(); + settings.path.pop_back(); + return; + } + /// If column has only NULLs, just serialize NULL discriminators. + else if (col.hasOnlyNulls()) + { + /// In compact mode write single NULL_DISCRIMINATOR. + if (variant_state->discriminators_mode.value == DiscriminatorsSerializationMode::COMPACT) + { + writeBinaryLittleEndian(UInt8(CompactDiscriminatorsGranuleFormat::COMPACT), *discriminators_stream); + writeBinaryLittleEndian(ColumnVariant::NULL_DISCRIMINATOR, *discriminators_stream); + } + /// In basic mode write NULL_DISCRIMINATOR limit times. + else + { + for (size_t i = 0; i < limit; ++i) + writeBinaryLittleEndian(ColumnVariant::NULL_DISCRIMINATOR, *discriminators_stream); + } + return; + } + + /// If offset = 0 and limit == col.size() we don't need to calculate /// offsets and limits for variants and need to just serialize whole columns. - if ((offset == 0 && limit == col.size()) || col.hasOnlyNulls()) + if ((offset == 0 && limit == col.size())) { /// First, serialize discriminators. - /// If we have only NULLs or local and global discriminators are the same, just serialize the column as is. - if (col.hasOnlyNulls() || col.hasGlobalVariantsOrder()) + /// Here we are sure that column contains different discriminators, use plain granule format in compact mode. + if (variant_state->discriminators_mode.value == DiscriminatorsSerializationMode::COMPACT) + writeBinaryLittleEndian(UInt8(CompactDiscriminatorsGranuleFormat::PLAIN), *discriminators_stream); + + /// If local and global discriminators are the same, just serialize the column as is. + if (col.hasGlobalVariantsOrder()) { SerializationNumber().serializeBinaryBulk(col.getLocalDiscriminatorsColumn(), *discriminators_stream, offset, limit); } @@ -188,7 +314,7 @@ void SerializationVariant::serializeBinaryBulkWithMultipleStreamsAndUpdateVarian for (size_t i = 0; i != variants.size(); ++i) { addVariantElementToPath(settings.path, i); - variants[i]->serializeBinaryBulkWithMultipleStreams(col.getVariantByGlobalDiscriminator(i), 0, 0, settings, variant_state->states[i]); + variants[i]->serializeBinaryBulkWithMultipleStreams(col.getVariantByGlobalDiscriminator(i), 0, 0, settings, variant_state->variant_states[i]); variants_statistics[variant_names[i]] += col.getVariantByGlobalDiscriminator(i).size(); settings.path.pop_back(); } @@ -196,36 +322,16 @@ void SerializationVariant::serializeBinaryBulkWithMultipleStreamsAndUpdateVarian return; } - /// If we have only one non empty variant and no NULLs, we can use the same limit offset for this variant. - if (auto non_empty_local_discr = col.getLocalDiscriminatorOfOneNoneEmptyVariantNoNulls()) - { - /// First, serialize discriminators. - /// We know that all discriminators are the same, so we just need to serialize this discriminator limit times. - auto non_empty_global_discr = col.globalDiscriminatorByLocal(*non_empty_local_discr); - for (size_t i = 0; i != limit; ++i) - writeBinaryLittleEndian(non_empty_global_discr, *discriminators_stream); - - /// Second, serialize non-empty variant (other variants are empty and we can skip their serialization). - settings.path.push_back(Substream::VariantElements); - addVariantElementToPath(settings.path, non_empty_global_discr); - /// We can use the same offset/limit as for whole Variant column - variants[non_empty_global_discr]->serializeBinaryBulkWithMultipleStreams(col.getVariantByGlobalDiscriminator(non_empty_global_discr), offset, limit, settings, variant_state->states[non_empty_global_discr]); - variants_statistics[variant_names[non_empty_global_discr]] += limit; - settings.path.pop_back(); - settings.path.pop_back(); - return; - } - /// In general case we should iterate through local discriminators in range [offset, offset + limit] to serialize global discriminators and calculate offset/limit pair for each variant. const auto & local_discriminators = col.getLocalDiscriminators(); const auto & offsets = col.getOffsets(); std::vector> variant_offsets_and_limits(variants.size(), {0, 0}); size_t end = offset + limit; + size_t num_non_empty_variants_in_range = 0; + ColumnVariant::Discriminator last_non_empty_variant_discr = 0; for (size_t i = offset; i < end; ++i) { auto global_discr = col.globalDiscriminatorByLocal(local_discriminators[i]); - writeBinaryLittleEndian(global_discr, *discriminators_stream); - if (global_discr != ColumnVariant::NULL_DISCRIMINATOR) { /// If we see this discriminator for the first time, update offset @@ -233,9 +339,38 @@ void SerializationVariant::serializeBinaryBulkWithMultipleStreamsAndUpdateVarian variant_offsets_and_limits[global_discr].first = offsets[i]; /// Update limit for this discriminator. ++variant_offsets_and_limits[global_discr].second; + ++num_non_empty_variants_in_range; + last_non_empty_variant_discr = global_discr; } } + /// In basic mode just serialize discriminators as is row by row. + if (variant_state->discriminators_mode.value == DiscriminatorsSerializationMode::BASIC) + { + for (size_t i = offset; i < end; ++i) + writeBinaryLittleEndian(col.globalDiscriminatorByLocal(local_discriminators[i]), *discriminators_stream); + } + /// In compact mode check if we have the same discriminator for all rows in this granule. + /// First, check if all values in granule are NULLs. + else if (num_non_empty_variants_in_range == 0) + { + writeBinaryLittleEndian(UInt8(CompactDiscriminatorsGranuleFormat::COMPACT), *discriminators_stream); + writeBinaryLittleEndian(ColumnVariant::NULL_DISCRIMINATOR, *discriminators_stream); + } + /// Then, check if there is only 1 variant and no NULLs in this granule. + else if (num_non_empty_variants_in_range == 1 && variant_offsets_and_limits[last_non_empty_variant_discr].second == limit) + { + writeBinaryLittleEndian(UInt8(CompactDiscriminatorsGranuleFormat::COMPACT), *discriminators_stream); + writeBinaryLittleEndian(last_non_empty_variant_discr, *discriminators_stream); + } + /// Otherwise there are different discriminators in this granule. + else + { + writeBinaryLittleEndian(UInt8(CompactDiscriminatorsGranuleFormat::PLAIN), *discriminators_stream); + for (size_t i = offset; i < end; ++i) + writeBinaryLittleEndian(col.globalDiscriminatorByLocal(local_discriminators[i]), *discriminators_stream); + } + /// Serialize variants in global order. settings.path.push_back(Substream::VariantElements); for (size_t i = 0; i != variants.size(); ++i) @@ -249,7 +384,7 @@ void SerializationVariant::serializeBinaryBulkWithMultipleStreamsAndUpdateVarian variant_offsets_and_limits[i].first, variant_offsets_and_limits[i].second, settings, - variant_state->states[i]); + variant_state->variant_states[i]); variants_statistics[variant_names[i]] += variant_offsets_and_limits[i].second; settings.path.pop_back(); } @@ -284,39 +419,68 @@ void SerializationVariant::deserializeBinaryBulkWithMultipleStreams( /// First, deserialize discriminators. settings.path.push_back(Substream::VariantDiscriminators); + + DeserializeBinaryBulkStateVariant * variant_state = nullptr; + std::vector variant_limits; if (auto cached_discriminators = getFromSubstreamsCache(cache, settings.path)) { + variant_state = checkAndGetState(state); col.getLocalDiscriminatorsPtr() = cached_discriminators; } - else + else if (auto * discriminators_stream = settings.getter(settings.path)) { - auto * discriminators_stream = settings.getter(settings.path); - if (!discriminators_stream) - return; + variant_state = checkAndGetState(state); + auto * discriminators_state = checkAndGetState(variant_state->discriminators_state); + + /// Deserialize discriminators according to serialization mode. + if (discriminators_state->mode.value == DiscriminatorsSerializationMode::BASIC) + SerializationNumber().deserializeBinaryBulk(*col.getLocalDiscriminatorsPtr()->assumeMutable(), *discriminators_stream, limit, 0); + else + variant_limits = deserializeCompactDiscriminators(col.getLocalDiscriminatorsPtr(), limit, discriminators_stream, settings.continuous_reading, *discriminators_state); - SerializationNumber().deserializeBinaryBulk(*col.getLocalDiscriminatorsPtr()->assumeMutable(), *discriminators_stream, limit, 0); addToSubstreamsCache(cache, settings.path, col.getLocalDiscriminatorsPtr()); } + /// It may happen that there is no such stream, in this case just do nothing. + else + { + settings.path.pop_back(); + return; + } + settings.path.pop_back(); - /// Second, calculate limits for each variant by iterating through new discriminators. - std::vector variant_limits(variants.size(), 0); - auto & discriminators_data = col.getLocalDiscriminators(); - size_t discriminators_offset = discriminators_data.size() - limit; - for (size_t i = discriminators_offset; i != discriminators_data.size(); ++i) + /// Second, calculate limits for each variant by iterating through new discriminators + /// if we didn't do it during discriminators deserialization. + if (variant_limits.empty()) { - ColumnVariant::Discriminator discr = discriminators_data[i]; - if (discr != ColumnVariant::NULL_DISCRIMINATOR) - ++variant_limits[discr]; + variant_limits.resize(variants.size(), 0); + auto & discriminators_data = col.getLocalDiscriminators(); + + /// We can actually read less than limit discriminators and we cannot determine the actual number of read rows + /// by discriminators column as it could be taken from the substreams cache. And we need actual number of read + /// rows to fill offsets correctly later if they are not in the cache. We can determine if offsets column is in cache + /// or not by comparing it with discriminators column size (they should be the same when offsets are in cache). + /// If offsets are not in the cache, we can use it's size to determine the actual number of read rows. + size_t num_new_discriminators = limit; + size_t offsets_size = col.getOffsetsPtr()->size(); + if (discriminators_data.size() > offsets_size) + num_new_discriminators = discriminators_data.size() - offsets_size; + size_t discriminators_offset = discriminators_data.size() - num_new_discriminators; + + for (size_t i = discriminators_offset; i != discriminators_data.size(); ++i) + { + ColumnVariant::Discriminator discr = discriminators_data[i]; + if (discr != ColumnVariant::NULL_DISCRIMINATOR) + ++variant_limits[discr]; + } } /// Now we can deserialize variants according to their limits. - auto * variant_state = checkAndGetState(state); settings.path.push_back(Substream::VariantElements); for (size_t i = 0; i != variants.size(); ++i) { addVariantElementToPath(settings.path, i); - variants[i]->deserializeBinaryBulkWithMultipleStreams(col.getVariantPtrByLocalDiscriminator(i), variant_limits[i], settings, variant_state->states[i], cache); + variants[i]->deserializeBinaryBulkWithMultipleStreams(col.getVariantPtrByLocalDiscriminator(i), variant_limits[i], settings, variant_state->variant_states[i], cache); settings.path.pop_back(); } settings.path.pop_back(); @@ -336,20 +500,49 @@ void SerializationVariant::deserializeBinaryBulkWithMultipleStreams( } else { - auto & offsets = col.getOffsets(); - offsets.reserve(offsets.size() + limit); std::vector variant_offsets; variant_offsets.reserve(variants.size()); + size_t num_non_empty_variants = 0; + ColumnVariant::Discriminator last_non_empty_discr = 0; for (size_t i = 0; i != variants.size(); ++i) - variant_offsets.push_back(col.getVariantByLocalDiscriminator(i).size() - variant_limits[i]); - - for (size_t i = discriminators_offset; i != discriminators_data.size(); ++i) { - ColumnVariant::Discriminator discr = discriminators_data[i]; - if (discr == ColumnVariant::NULL_DISCRIMINATOR) - offsets.emplace_back(); - else - offsets.push_back(variant_offsets[discr]++); + if (variant_limits[i]) + { + ++num_non_empty_variants; + last_non_empty_discr = i; + } + + variant_offsets.push_back(col.getVariantByLocalDiscriminator(i).size() - variant_limits[i]); + } + + auto & discriminators_data = col.getLocalDiscriminators(); + auto & offsets = col.getOffsets(); + size_t num_new_offsets = discriminators_data.size() - offsets.size(); + offsets.reserve(offsets.size() + num_new_offsets); + /// If there are only NULLs were read, fill offsets with 0. + if (num_non_empty_variants == 0) + { + offsets.resize_fill(discriminators_data.size(), 0); + } + /// If there is only 1 variant and no NULLs was read, fill offsets with sequential offsets of this variant. + else if (num_non_empty_variants == 1 && variant_limits[last_non_empty_discr] == num_new_offsets) + { + size_t first_offset = col.getVariantByLocalDiscriminator(last_non_empty_discr).size() - num_new_offsets; + for (size_t i = 0; i != num_new_offsets; ++i) + offsets.push_back(first_offset + i); + } + /// Otherwise iterate through discriminators and fill offsets accordingly. + else + { + size_t start = offsets.size(); + for (size_t i = start; i != discriminators_data.size(); ++i) + { + ColumnVariant::Discriminator discr = discriminators_data[i]; + if (discr == ColumnVariant::NULL_DISCRIMINATOR) + offsets.emplace_back(); + else + offsets.push_back(variant_offsets[discr]++); + } } addToSubstreamsCache(cache, settings.path, col.getOffsetsPtr()); @@ -357,6 +550,72 @@ void SerializationVariant::deserializeBinaryBulkWithMultipleStreams( settings.path.pop_back(); } +std::vector SerializationVariant::deserializeCompactDiscriminators( + DB::ColumnPtr & discriminators_column, + size_t limit, + ReadBuffer * stream, + bool continuous_reading, + DeserializeBinaryBulkStateVariantDiscriminators & state) const +{ + auto & discriminators = assert_cast(*discriminators_column->assumeMutable()); + auto & discriminators_data = discriminators.getData(); + + /// Reset state if we are reading from the start of the granule and not from the previous position in the file. + if (!continuous_reading) + state.remaining_rows_in_granule = 0; + + /// Calculate limits for variants during discriminators deserialization. + std::vector variant_limits(variants.size(), 0); + while (limit) + { + /// If we read all rows from current granule, start reading the next one. + if (state.remaining_rows_in_granule == 0) + { + if (stream->eof()) + return variant_limits; + + readDiscriminatorsGranuleStart(state, stream); + } + + size_t limit_in_granule = std::min(limit, state.remaining_rows_in_granule); + if (state.granule_format == CompactDiscriminatorsGranuleFormat::COMPACT) + { + auto & data = discriminators.getData(); + data.resize_fill(data.size() + limit_in_granule, state.compact_discr); + if (state.compact_discr != ColumnVariant::NULL_DISCRIMINATOR) + variant_limits[state.compact_discr] += limit_in_granule; + } + else + { + SerializationNumber().deserializeBinaryBulk(discriminators, *stream, limit_in_granule, 0); + size_t start = discriminators_data.size() - limit_in_granule; + for (size_t i = start; i != discriminators_data.size(); ++i) + { + ColumnVariant::Discriminator discr = discriminators_data[i]; + if (discr != ColumnVariant::NULL_DISCRIMINATOR) + ++variant_limits[discr]; + } + } + + state.remaining_rows_in_granule -= limit_in_granule; + limit -= limit_in_granule; + } + + return variant_limits; +} + +void SerializationVariant::readDiscriminatorsGranuleStart(DeserializeBinaryBulkStateVariantDiscriminators & state, DB::ReadBuffer * stream) +{ + UInt64 granule_size; + readVarUInt(granule_size, *stream); + state.remaining_rows_in_granule = granule_size; + UInt8 granule_format; + readBinaryLittleEndian(granule_format, *stream); + state.granule_format = static_cast(granule_format); + if (granule_format == CompactDiscriminatorsGranuleFormat::COMPACT) + readBinaryLittleEndian(state.compact_discr, *stream); +} + void SerializationVariant::addVariantElementToPath(DB::ISerialization::SubstreamPath & path, size_t i) const { path.push_back(Substream::VariantElement); diff --git a/src/DataTypes/Serializations/SerializationVariant.h b/src/DataTypes/Serializations/SerializationVariant.h index b6aa1534538..af89632cf81 100644 --- a/src/DataTypes/Serializations/SerializationVariant.h +++ b/src/DataTypes/Serializations/SerializationVariant.h @@ -2,10 +2,18 @@ #include #include +#include namespace DB { + +namespace ErrorCodes +{ + extern const int INCORRECT_DATA; +} + + /// Class for serializing/deserializing column with Variant type. /// It supports both text and binary bulk serializations/deserializations. /// @@ -18,6 +26,17 @@ namespace DB /// /// During binary bulk serialization it transforms local discriminators /// to global and serializes them into a separate stream VariantDiscriminators. +/// There are 2 modes of serialising discriminators: +/// Basic mode, when all discriminators are serialized as is row by row. +/// Compact mode, when we avoid writing the same discriminators in granules when there is +/// only one variant (or only NULLs) in the granule. +/// In compact mode we serialize granules in the following format: +/// +/// There are 2 different formats of granule - plain and compact. +/// Plain format is used when there are different discriminators in this granule, +/// in this format all discriminators are serialized as is row by row. +/// Compact format is used when all discriminators are the same in this granule, +/// in this case only this single discriminator is serialized. /// Each variant is serialized into a separate stream with path VariantElements/VariantElement /// (VariantElements stream is needed for correct sub-columns creation). We store and serialize /// variants in a sparse form (the size of a variant column equals to the number of its discriminator @@ -32,6 +51,25 @@ namespace DB class SerializationVariant : public ISerialization { public: + struct DiscriminatorsSerializationMode + { + enum Value + { + BASIC = 0, /// Store the whole discriminators column. + COMPACT = 1, /// Don't write discriminators in granule if all of them are the same. + }; + + static void checkMode(UInt64 mode) + { + if (mode > Value::COMPACT) + throw Exception(ErrorCodes::INCORRECT_DATA, "Invalid version for SerializationVariant discriminators column."); + } + + explicit DiscriminatorsSerializationMode(UInt64 mode) : value(static_cast(mode)) { checkMode(mode); } + + Value value; + }; + using VariantSerializations = std::vector; explicit SerializationVariant( @@ -123,8 +161,44 @@ public: static std::vector getVariantsDeserializeTextOrder(const DataTypes & variant_types); private: + friend SerializationVariantElement; + friend SerializationVariantElementNullMap; + void addVariantElementToPath(SubstreamPath & path, size_t i) const; + enum CompactDiscriminatorsGranuleFormat + { + PLAIN = 0, /// Granule has different discriminators and they are serialized as is row by row. + COMPACT = 1, /// Granule has single discriminator for all rows and it is serialized as single value. + }; + + struct DeserializeBinaryBulkStateVariantDiscriminators : public ISerialization::DeserializeBinaryBulkState + { + explicit DeserializeBinaryBulkStateVariantDiscriminators(UInt64 mode_) : mode(mode_) + { + } + + DiscriminatorsSerializationMode mode; + + /// Deserialize state of currently read granule in compact mode. + CompactDiscriminatorsGranuleFormat granule_format = CompactDiscriminatorsGranuleFormat::PLAIN; + size_t remaining_rows_in_granule = 0; + ColumnVariant::Discriminator compact_discr = 0; + }; + + static DeserializeBinaryBulkStatePtr deserializeDiscriminatorsStatePrefix( + DeserializeBinaryBulkSettings & settings, + SubstreamsDeserializeStatesCache * cache); + + std::vector deserializeCompactDiscriminators( + ColumnPtr & discriminators_column, + size_t limit, + ReadBuffer * stream, + bool continuous_reading, + DeserializeBinaryBulkStateVariantDiscriminators & state) const; + + static void readDiscriminatorsGranuleStart(DeserializeBinaryBulkStateVariantDiscriminators & state, ReadBuffer * stream); + bool tryDeserializeTextEscapedImpl(IColumn & column, const String & field, const FormatSettings & settings) const; bool tryDeserializeTextQuotedImpl(IColumn & column, const String & field, const FormatSettings & settings) const; bool tryDeserializeWholeTextImpl(IColumn & column, const String & field, const FormatSettings & settings) const; diff --git a/src/DataTypes/Serializations/SerializationVariantElement.cpp b/src/DataTypes/Serializations/SerializationVariantElement.cpp index ec0b4019c2f..8ceab17cba4 100644 --- a/src/DataTypes/Serializations/SerializationVariantElement.cpp +++ b/src/DataTypes/Serializations/SerializationVariantElement.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -12,7 +13,7 @@ namespace ErrorCodes extern const int NOT_IMPLEMENTED; } -struct DeserializeBinaryBulkStateVariantElement : public ISerialization::DeserializeBinaryBulkState +struct SerializationVariantElement::DeserializeBinaryBulkStateVariantElement : public ISerialization::DeserializeBinaryBulkState { /// During deserialization discriminators and variant streams can be shared. /// For example we can read several variant elements together: "select v.UInt32, v.String from table", @@ -24,7 +25,7 @@ struct DeserializeBinaryBulkStateVariantElement : public ISerialization::Deseria /// substream cache correctly. ColumnPtr discriminators; ColumnPtr variant; - + ISerialization::DeserializeBinaryBulkStatePtr discriminators_state; ISerialization::DeserializeBinaryBulkStatePtr variant_element_state; }; @@ -65,7 +66,12 @@ void SerializationVariantElement::serializeBinaryBulkStateSuffix(SerializeBinary void SerializationVariantElement::deserializeBinaryBulkStatePrefix( DeserializeBinaryBulkSettings & settings, DeserializeBinaryBulkStatePtr & state, SubstreamsDeserializeStatesCache * cache) const { + DeserializeBinaryBulkStatePtr discriminators_state = SerializationVariant::deserializeDiscriminatorsStatePrefix(settings, cache); + if (!discriminators_state) + return; + auto variant_element_state = std::make_shared(); + variant_element_state->discriminators_state = discriminators_state; addVariantToPath(settings.path); nested_serialization->deserializeBinaryBulkStatePrefix(settings, variant_element_state->variant_element_state, cache); @@ -86,35 +92,61 @@ void SerializationVariantElement::deserializeBinaryBulkWithMultipleStreams( DeserializeBinaryBulkStatePtr & state, SubstreamsCache * cache) const { - auto * variant_element_state = checkAndGetState(state); - /// First, deserialize discriminators from Variant column. settings.path.push_back(Substream::VariantDiscriminators); + + DeserializeBinaryBulkStateVariantElement * variant_element_state = nullptr; + std::optional variant_limit; if (auto cached_discriminators = getFromSubstreamsCache(cache, settings.path)) { + variant_element_state = checkAndGetState(state); variant_element_state->discriminators = cached_discriminators; } - else + else if (auto * discriminators_stream = settings.getter(settings.path)) { - auto * discriminators_stream = settings.getter(settings.path); - if (!discriminators_stream) - return; + variant_element_state = checkAndGetState(state); + auto * discriminators_state = checkAndGetState(variant_element_state->discriminators_state); /// If we started to read a new column, reinitialize discriminators column in deserialization state. if (!variant_element_state->discriminators || result_column->empty()) variant_element_state->discriminators = ColumnVariant::ColumnDiscriminators::create(); - SerializationNumber().deserializeBinaryBulk(*variant_element_state->discriminators->assumeMutable(), *discriminators_stream, limit, 0); + /// Deserialize discriminators according to serialization mode. + if (discriminators_state->mode.value == SerializationVariant::DiscriminatorsSerializationMode::BASIC) + SerializationNumber().deserializeBinaryBulk(*variant_element_state->discriminators->assumeMutable(), *discriminators_stream, limit, 0); + else + variant_limit = deserializeCompactDiscriminators( + variant_element_state->discriminators, + variant_discriminator, + limit, + discriminators_stream, + settings.continuous_reading, + variant_element_state->discriminators_state, + this); + addToSubstreamsCache(cache, settings.path, variant_element_state->discriminators); } + else + { + settings.path.pop_back(); + return; + } + settings.path.pop_back(); - /// Iterate through new discriminators to calculate the limit for our variant. + /// We could read less than limit discriminators, but we will need actual number of read rows later. + size_t num_new_discriminators = variant_element_state->discriminators->size() - result_column->size(); + + /// Iterate through new discriminators to calculate the limit for our variant + /// if we didn't do it during discriminators deserialization. const auto & discriminators_data = assert_cast(*variant_element_state->discriminators).getData(); - size_t discriminators_offset = variant_element_state->discriminators->size() - limit; - size_t variant_limit = 0; - for (size_t i = discriminators_offset; i != discriminators_data.size(); ++i) - variant_limit += (discriminators_data[i] == variant_discriminator); + size_t discriminators_offset = variant_element_state->discriminators->size() - num_new_discriminators; + if (!variant_limit) + { + variant_limit = 0; + for (size_t i = discriminators_offset; i != discriminators_data.size(); ++i) + *variant_limit += (discriminators_data[i] == variant_discriminator); + } /// Now we know the limit for our variant and can deserialize it. @@ -125,19 +157,19 @@ void SerializationVariantElement::deserializeBinaryBulkWithMultipleStreams( auto & nullable_column = assert_cast(*mutable_column); NullMap & null_map = nullable_column.getNullMapData(); /// If we have only our discriminator in range, fill null map with 0. - if (variant_limit == limit) + if (variant_limit == num_new_discriminators) { - null_map.resize_fill(null_map.size() + limit, 0); + null_map.resize_fill(null_map.size() + num_new_discriminators, 0); } /// If no our discriminator in current range, fill null map with 1. else if (variant_limit == 0) { - null_map.resize_fill(null_map.size() + limit, 1); + null_map.resize_fill(null_map.size() + num_new_discriminators, 1); } /// Otherwise we should iterate through discriminators to fill null map. else { - null_map.reserve(null_map.size() + limit); + null_map.reserve(null_map.size() + num_new_discriminators); for (size_t i = discriminators_offset; i != discriminators_data.size(); ++i) null_map.push_back(discriminators_data[i] != variant_discriminator); } @@ -159,12 +191,12 @@ void SerializationVariantElement::deserializeBinaryBulkWithMultipleStreams( /// If nothing to deserialize, just insert defaults. if (variant_limit == 0) { - mutable_column->insertManyDefaults(limit); + mutable_column->insertManyDefaults(num_new_discriminators); return; } addVariantToPath(settings.path); - nested_serialization->deserializeBinaryBulkWithMultipleStreams(variant_element_state->variant, variant_limit, settings, variant_element_state->variant_element_state, cache); + nested_serialization->deserializeBinaryBulkWithMultipleStreams(variant_element_state->variant, *variant_limit, settings, variant_element_state->variant_element_state, cache); removeVariantFromPath(settings.path); /// If nothing was deserialized when variant_limit > 0 @@ -173,16 +205,16 @@ void SerializationVariantElement::deserializeBinaryBulkWithMultipleStreams( /// In this case we should just insert default values. if (variant_element_state->variant->empty()) { - mutable_column->insertManyDefaults(limit); + mutable_column->insertManyDefaults(num_new_discriminators); return; } - size_t variant_offset = variant_element_state->variant->size() - variant_limit; + size_t variant_offset = variant_element_state->variant->size() - *variant_limit; /// If we have only our discriminator in range, insert the whole range to result column. - if (variant_limit == limit) + if (variant_limit == num_new_discriminators) { - mutable_column->insertRangeFrom(*variant_element_state->variant, variant_offset, variant_limit); + mutable_column->insertRangeFrom(*variant_element_state->variant, variant_offset, *variant_limit); } /// Otherwise iterate through discriminators and insert value from variant or default value depending on the discriminator. else @@ -197,6 +229,59 @@ void SerializationVariantElement::deserializeBinaryBulkWithMultipleStreams( } } +size_t SerializationVariantElement::deserializeCompactDiscriminators( + DB::ColumnPtr & discriminators_column, + ColumnVariant::Discriminator variant_discriminator, + size_t limit, + DB::ReadBuffer * stream, + bool continuous_reading, + DeserializeBinaryBulkStatePtr & discriminators_state_, + const ISerialization * serialization) +{ + auto * discriminators_state = checkAndGetState(discriminators_state_, serialization); + auto & discriminators = assert_cast(*discriminators_column->assumeMutable()); + auto & discriminators_data = discriminators.getData(); + + /// Reset state if we are reading from the start of the granule and not from the previous position in the file. + if (!continuous_reading) + discriminators_state->remaining_rows_in_granule = 0; + + /// Calculate our variant limit during discriminators deserialization. + size_t variant_limit = 0; + while (limit) + { + /// If we read all rows from current granule, start reading the next one. + if (discriminators_state->remaining_rows_in_granule == 0) + { + if (stream->eof()) + return variant_limit; + + SerializationVariant::readDiscriminatorsGranuleStart(*discriminators_state, stream); + } + + size_t limit_in_granule = std::min(limit, discriminators_state->remaining_rows_in_granule); + if (discriminators_state->granule_format == SerializationVariant::CompactDiscriminatorsGranuleFormat::COMPACT) + { + auto & data = discriminators.getData(); + data.resize_fill(data.size() + limit_in_granule, discriminators_state->compact_discr); + if (discriminators_state->compact_discr == variant_discriminator) + variant_limit += limit_in_granule; + } + else + { + SerializationNumber().deserializeBinaryBulk(discriminators, *stream, limit_in_granule, 0); + size_t start = discriminators_data.size() - limit_in_granule; + for (size_t i = start; i != discriminators_data.size(); ++i) + variant_limit += (discriminators_data[i] == variant_discriminator); + } + + discriminators_state->remaining_rows_in_granule -= limit_in_granule; + limit -= limit_in_granule; + } + + return variant_limit; +} + void SerializationVariantElement::addVariantToPath(DB::ISerialization::SubstreamPath & path) const { path.push_back(Substream::VariantElements); @@ -214,17 +299,19 @@ SerializationVariantElement::VariantSubcolumnCreator::VariantSubcolumnCreator( const ColumnPtr & local_discriminators_, const String & variant_element_name_, ColumnVariant::Discriminator global_variant_discriminator_, - ColumnVariant::Discriminator local_variant_discriminator_) + ColumnVariant::Discriminator local_variant_discriminator_, + bool make_nullable_) : local_discriminators(local_discriminators_) , variant_element_name(variant_element_name_) , global_variant_discriminator(global_variant_discriminator_) , local_variant_discriminator(local_variant_discriminator_) + , make_nullable(make_nullable_) { } DataTypePtr SerializationVariantElement::VariantSubcolumnCreator::create(const DB::DataTypePtr & prev) const { - return makeNullableOrLowCardinalityNullableSafe(prev); + return make_nullable ? makeNullableOrLowCardinalityNullableSafe(prev) : prev; } SerializationPtr SerializationVariantElement::VariantSubcolumnCreator::create(const DB::SerializationPtr & prev) const @@ -237,12 +324,12 @@ ColumnPtr SerializationVariantElement::VariantSubcolumnCreator::create(const DB: /// Case when original Variant column contained only one non-empty variant and no NULLs. /// In this case just use this variant. if (prev->size() == local_discriminators->size()) - return makeNullableOrLowCardinalityNullableSafe(prev); + return make_nullable ? makeNullableOrLowCardinalityNullableSafe(prev) : prev; /// If this variant is empty, fill result column with default values. if (prev->empty()) { - auto res = makeNullableOrLowCardinalityNullableSafe(prev)->cloneEmpty(); + auto res = make_nullable ? makeNullableOrLowCardinalityNullableSafe(prev)->cloneEmpty() : prev->cloneEmpty(); res->insertManyDefaults(local_discriminators->size()); return res; } @@ -257,16 +344,16 @@ ColumnPtr SerializationVariantElement::VariantSubcolumnCreator::create(const DB: /// Now we can create new column from null-map and variant column using IColumn::expand. auto res_column = IColumn::mutate(prev); - /// Special case for LowCardinality. We want the result to be LowCardinality(Nullable), + /// Special case for LowCardinality when we want the result to be LowCardinality(Nullable), /// but we don't have a good way to apply null-mask for LowCardinality(), so, we first /// convert our column to LowCardinality(Nullable()) and then use expand which will /// fill rows with 0 in mask with default value (that is NULL). - if (prev->lowCardinality()) + if (make_nullable && prev->lowCardinality()) res_column = assert_cast(*res_column).cloneNullable(); res_column->expand(null_map, /*inverted = */ true); - if (res_column->canBeInsideNullable()) + if (make_nullable && prev->canBeInsideNullable()) { auto null_map_col = ColumnUInt8::create(); null_map_col->getData() = std::move(null_map); diff --git a/src/DataTypes/Serializations/SerializationVariantElement.h b/src/DataTypes/Serializations/SerializationVariantElement.h index 0ce0a72e250..69101aea0f5 100644 --- a/src/DataTypes/Serializations/SerializationVariantElement.h +++ b/src/DataTypes/Serializations/SerializationVariantElement.h @@ -9,6 +9,7 @@ namespace DB { class SerializationVariant; +class SerializationVariantElementNullMap; /// Serialization for Variant element when we read it as a subcolumn. class SerializationVariantElement final : public SerializationWrapper @@ -66,12 +67,14 @@ public: const String variant_element_name; const ColumnVariant::Discriminator global_variant_discriminator; const ColumnVariant::Discriminator local_variant_discriminator; + bool make_nullable; VariantSubcolumnCreator( const ColumnPtr & local_discriminators_, const String & variant_element_name_, ColumnVariant::Discriminator global_variant_discriminator_, - ColumnVariant::Discriminator local_variant_discriminator_); + ColumnVariant::Discriminator local_variant_discriminator_, + bool make_nullable_); DataTypePtr create(const DataTypePtr & prev) const override; ColumnPtr create(const ColumnPtr & prev) const override; @@ -79,6 +82,18 @@ public: }; private: friend SerializationVariant; + friend SerializationVariantElementNullMap; + + struct DeserializeBinaryBulkStateVariantElement; + + static size_t deserializeCompactDiscriminators( + ColumnPtr & discriminators_column, + ColumnVariant::Discriminator variant_discriminator, + size_t limit, + ReadBuffer * stream, + bool continuous_reading, + DeserializeBinaryBulkStatePtr & discriminators_state_, + const ISerialization * serialization); void addVariantToPath(SubstreamPath & path) const; void removeVariantFromPath(SubstreamPath & path) const; diff --git a/src/DataTypes/Serializations/SerializationVariantElementNullMap.cpp b/src/DataTypes/Serializations/SerializationVariantElementNullMap.cpp new file mode 100644 index 00000000000..f30da4fecf9 --- /dev/null +++ b/src/DataTypes/Serializations/SerializationVariantElementNullMap.cpp @@ -0,0 +1,190 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NOT_IMPLEMENTED; +} + +struct DeserializeBinaryBulkStateVariantElementNullMap : public ISerialization::DeserializeBinaryBulkState +{ + /// During deserialization discriminators streams can be shared. + /// For example we can read several variant elements together: "select v.UInt32, v.String.null from table", + /// or we can read the whole variant and some of variant elements or their subcolumns: "select v, v.UInt32.null from table". + /// To read the same column from the same stream more than once we use substream cache, + /// but this cache stores the whole column, not only the current range. + /// During deserialization of variant elements or their subcolumns discriminators column is not stored + /// in the result column, so we need to store them inside deserialization state, so we can use + /// substream cache correctly. + ColumnPtr discriminators; + ISerialization::DeserializeBinaryBulkStatePtr discriminators_state; +}; + +void SerializationVariantElementNullMap::enumerateStreams( + DB::ISerialization::EnumerateStreamsSettings & settings, + const DB::ISerialization::StreamCallback & callback, + const DB::ISerialization::SubstreamData &) const +{ + /// We will need stream for discriminators during deserialization. + settings.path.push_back(Substream::VariantDiscriminators); + callback(settings.path); + settings.path.pop_back(); +} + +void SerializationVariantElementNullMap::serializeBinaryBulkStatePrefix( + const IColumn &, SerializeBinaryBulkSettings &, SerializeBinaryBulkStatePtr &) const +{ + throw Exception( + ErrorCodes::NOT_IMPLEMENTED, "Method serializeBinaryBulkStatePrefix is not implemented for SerializationVariantElementNullMap"); +} + +void SerializationVariantElementNullMap::serializeBinaryBulkStateSuffix(SerializeBinaryBulkSettings &, SerializeBinaryBulkStatePtr &) const +{ + throw Exception( + ErrorCodes::NOT_IMPLEMENTED, "Method serializeBinaryBulkStateSuffix is not implemented for SerializationVariantElementNullMap"); +} + +void SerializationVariantElementNullMap::deserializeBinaryBulkStatePrefix( + DeserializeBinaryBulkSettings & settings, DeserializeBinaryBulkStatePtr & state, SubstreamsDeserializeStatesCache * cache) const +{ + DeserializeBinaryBulkStatePtr discriminators_state = SerializationVariant::deserializeDiscriminatorsStatePrefix(settings, cache); + if (!discriminators_state) + return; + + auto variant_element_null_map_state = std::make_shared(); + variant_element_null_map_state->discriminators_state = std::move(discriminators_state); + state = std::move(variant_element_null_map_state); +} + +void SerializationVariantElementNullMap::serializeBinaryBulkWithMultipleStreams( + const IColumn &, size_t, size_t, SerializeBinaryBulkSettings &, SerializeBinaryBulkStatePtr &) const +{ + throw Exception( + ErrorCodes::NOT_IMPLEMENTED, + "Method serializeBinaryBulkWithMultipleStreams is not implemented for SerializationVariantElementNullMap"); +} + +void SerializationVariantElementNullMap::deserializeBinaryBulkWithMultipleStreams( + ColumnPtr & result_column, + size_t limit, + DeserializeBinaryBulkSettings & settings, + DeserializeBinaryBulkStatePtr & state, + SubstreamsCache * cache) const +{ + /// Deserialize discriminators from Variant column. + settings.path.push_back(Substream::VariantDiscriminators); + + DeserializeBinaryBulkStateVariantElementNullMap * variant_element_null_map_state = nullptr; + std::optional variant_limit; + if (auto cached_discriminators = getFromSubstreamsCache(cache, settings.path)) + { + variant_element_null_map_state = checkAndGetState(state); + variant_element_null_map_state->discriminators = cached_discriminators; + } + else if (auto * discriminators_stream = settings.getter(settings.path)) + { + variant_element_null_map_state = checkAndGetState(state); + auto * discriminators_state = checkAndGetState( + variant_element_null_map_state->discriminators_state); + + /// If we started to read a new column, reinitialize discriminators column in deserialization state. + if (!variant_element_null_map_state->discriminators || result_column->empty()) + variant_element_null_map_state->discriminators = ColumnVariant::ColumnDiscriminators::create(); + + /// Deserialize discriminators according to serialization mode. + if (discriminators_state->mode.value == SerializationVariant::DiscriminatorsSerializationMode::BASIC) + SerializationNumber().deserializeBinaryBulk( + *variant_element_null_map_state->discriminators->assumeMutable(), *discriminators_stream, limit, 0); + else + variant_limit = SerializationVariantElement::deserializeCompactDiscriminators( + variant_element_null_map_state->discriminators, + variant_discriminator, + limit, + discriminators_stream, + settings.continuous_reading, + variant_element_null_map_state->discriminators_state, + this); + + addToSubstreamsCache(cache, settings.path, variant_element_null_map_state->discriminators); + } + else + { + /// There is no such stream or cached data, it means that there is no Variant column in this part (it could happen after alter table add column). + /// In such cases columns are filled with default values, but for null-map column default value should be 1, not 0. Fill column with 1 here instead. + MutableColumnPtr mutable_column = result_column->assumeMutable(); + auto & data = assert_cast(*mutable_column).getData(); + data.resize_fill(data.size() + limit, 1); + settings.path.pop_back(); + return; + } + settings.path.pop_back(); + + MutableColumnPtr mutable_column = result_column->assumeMutable(); + auto & data = assert_cast(*mutable_column).getData(); + /// Check if there are no such variant in read range. + if (variant_limit && *variant_limit == 0) + { + data.resize_fill(data.size() + limit, 1); + } + /// Check if there is only our variant in read range. + else if (variant_limit && *variant_limit == limit) + { + data.resize_fill(data.size() + limit, 0); + } + /// Iterate through new discriminators to calculate the null map of our variant. + else + { + const auto & discriminators_data + = assert_cast(*variant_element_null_map_state->discriminators).getData(); + size_t discriminators_offset = variant_element_null_map_state->discriminators->size() - limit; + for (size_t i = discriminators_offset; i != discriminators_data.size(); ++i) + data.push_back(discriminators_data[i] != variant_discriminator); + } +} + +SerializationVariantElementNullMap::VariantNullMapSubcolumnCreator::VariantNullMapSubcolumnCreator( + const ColumnPtr & local_discriminators_, + const String & variant_element_name_, + ColumnVariant::Discriminator global_variant_discriminator_, + ColumnVariant::Discriminator local_variant_discriminator_) + : local_discriminators(local_discriminators_) + , variant_element_name(variant_element_name_) + , global_variant_discriminator(global_variant_discriminator_) + , local_variant_discriminator(local_variant_discriminator_) +{ +} + +DataTypePtr SerializationVariantElementNullMap::VariantNullMapSubcolumnCreator::create(const DB::DataTypePtr &) const +{ + return std::make_shared(); +} + +SerializationPtr SerializationVariantElementNullMap::VariantNullMapSubcolumnCreator::create(const DB::SerializationPtr &) const +{ + return std::make_shared(variant_element_name, global_variant_discriminator); +} + +ColumnPtr SerializationVariantElementNullMap::VariantNullMapSubcolumnCreator::create(const DB::ColumnPtr &) const +{ + /// Iterate through discriminators and create null-map for our variant. + auto null_map_col = ColumnUInt8::create(); + auto & null_map_data = null_map_col->getData(); + null_map_data.reserve(local_discriminators->size()); + const auto & local_discriminators_data = assert_cast(*local_discriminators).getData(); + for (auto local_discr : local_discriminators_data) + null_map_data.push_back(local_discr != local_variant_discriminator); + + return null_map_col; +} + + +} diff --git a/src/DataTypes/Serializations/SerializationVariantElementNullMap.h b/src/DataTypes/Serializations/SerializationVariantElementNullMap.h new file mode 100644 index 00000000000..cd81b445189 --- /dev/null +++ b/src/DataTypes/Serializations/SerializationVariantElementNullMap.h @@ -0,0 +1,107 @@ +#pragma once + +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NOT_IMPLEMENTED; +} + +class SerializationVariant; +class SerializationVariantElement; + +/// Serialization for Variant element null map when we read it as a subcolumn. +/// For example, variant.UInt64.null. +/// It requires separate serialization because there is no actual Nullable column +/// and we should construct null map from variant discriminators. +/// The implementation of deserializeBinaryBulk* methods is similar to SerializationVariantElement, +/// but differs in that there is no need to read the actual data of the variant, only discriminators. +class SerializationVariantElementNullMap final : public SimpleTextSerialization +{ +public: + SerializationVariantElementNullMap(const String & variant_element_name_, ColumnVariant::Discriminator variant_discriminator_) + : variant_element_name(variant_element_name_), variant_discriminator(variant_discriminator_) + { + } + + void enumerateStreams( + EnumerateStreamsSettings & settings, + const StreamCallback & callback, + const SubstreamData & data) const override; + + void serializeBinaryBulkStatePrefix( + const IColumn & column, + SerializeBinaryBulkSettings & settings, + SerializeBinaryBulkStatePtr & state) const override; + + void serializeBinaryBulkStateSuffix( + SerializeBinaryBulkSettings & settings, + SerializeBinaryBulkStatePtr & state) const override; + + void deserializeBinaryBulkStatePrefix( + DeserializeBinaryBulkSettings & settings, + DeserializeBinaryBulkStatePtr & state, + SubstreamsDeserializeStatesCache * cache) const override; + + void serializeBinaryBulkWithMultipleStreams( + const IColumn & column, + size_t offset, + size_t limit, + SerializeBinaryBulkSettings & settings, + SerializeBinaryBulkStatePtr & state) const override; + + void deserializeBinaryBulkWithMultipleStreams( + ColumnPtr & column, + size_t limit, + DeserializeBinaryBulkSettings & settings, + DeserializeBinaryBulkStatePtr & state, + SubstreamsCache * cache) const override; + + void serializeBinary(const Field &, WriteBuffer &, const FormatSettings &) const override { throwNoSerialization(); } + void deserializeBinary(Field &, ReadBuffer &, const FormatSettings &) const override { throwNoSerialization(); } + void serializeBinary(const IColumn &, size_t, WriteBuffer &, const FormatSettings &) const override { throwNoSerialization(); } + void deserializeBinary(IColumn &, ReadBuffer &, const FormatSettings &) const override { throwNoSerialization(); } + void serializeText(const IColumn &, size_t, WriteBuffer &, const FormatSettings &) const override { throwNoSerialization(); } + void deserializeText(IColumn &, ReadBuffer &, const FormatSettings &, bool) const override { throwNoSerialization(); } + bool tryDeserializeText(IColumn &, ReadBuffer &, const FormatSettings &, bool) const override { throwNoSerialization(); } + + struct VariantNullMapSubcolumnCreator : public ISubcolumnCreator + { + const ColumnPtr local_discriminators; + const String variant_element_name; + const ColumnVariant::Discriminator global_variant_discriminator; + const ColumnVariant::Discriminator local_variant_discriminator; + + VariantNullMapSubcolumnCreator( + const ColumnPtr & local_discriminators_, + const String & variant_element_name_, + ColumnVariant::Discriminator global_variant_discriminator_, + ColumnVariant::Discriminator local_variant_discriminator_); + + DataTypePtr create(const DataTypePtr & prev) const override; + ColumnPtr create(const ColumnPtr & prev) const override; + SerializationPtr create(const SerializationPtr & prev) const override; + }; +private: + [[noreturn]] static void throwNoSerialization() + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Text/binary serialization is not implemented for variant element null map subcolumn"); + } + + friend SerializationVariant; + friend SerializationVariantElement; + + /// To be able to deserialize Variant element null map as a subcolumn + /// we need variant element type name and global discriminator. + String variant_element_name; + ColumnVariant::Discriminator variant_discriminator; + +}; + +} diff --git a/src/Disks/DiskFactory.cpp b/src/Disks/DiskFactory.cpp index de7ee5a74f4..4aa7f6ff564 100644 --- a/src/Disks/DiskFactory.cpp +++ b/src/Disks/DiskFactory.cpp @@ -27,7 +27,8 @@ DiskPtr DiskFactory::create( ContextPtr context, const DisksMap & map, bool attach, - bool custom_disk) const + bool custom_disk, + const std::unordered_set & skip_types) const { const auto disk_type = config.getString(config_prefix + ".type", "local"); @@ -38,6 +39,11 @@ DiskPtr DiskFactory::create( "DiskFactory: the disk '{}' has unknown disk type: {}", name, disk_type); } + if (skip_types.contains(found->first)) + { + return nullptr; + } + const auto & disk_creator = found->second; return disk_creator(name, config, config_prefix, context, map, attach, custom_disk); } diff --git a/src/Disks/DiskFactory.h b/src/Disks/DiskFactory.h index d03ffa6a40f..044ce81dbae 100644 --- a/src/Disks/DiskFactory.h +++ b/src/Disks/DiskFactory.h @@ -42,7 +42,8 @@ public: ContextPtr context, const DisksMap & map, bool attach = false, - bool custom_disk = false) const; + bool custom_disk = false, + const std::unordered_set & skip_types = {}) const; private: using DiskTypeRegistry = std::unordered_map; diff --git a/src/Disks/DiskSelector.cpp b/src/Disks/DiskSelector.cpp index a9260a249dd..f45d12618bf 100644 --- a/src/Disks/DiskSelector.cpp +++ b/src/Disks/DiskSelector.cpp @@ -7,7 +7,6 @@ #include #include -#include namespace DB { @@ -27,7 +26,8 @@ void DiskSelector::assertInitialized() const } -void DiskSelector::initialize(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context, DiskValidator disk_validator) +void DiskSelector::initialize( + const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context, DiskValidator disk_validator) { Poco::Util::AbstractConfiguration::Keys keys; config.keys(config_prefix, keys); @@ -36,6 +36,8 @@ void DiskSelector::initialize(const Poco::Util::AbstractConfiguration & config, constexpr auto default_disk_name = "default"; bool has_default_disk = false; + constexpr auto local_disk_name = "local"; + bool has_local_disk = false; for (const auto & disk_name : keys) { if (!std::all_of(disk_name.begin(), disk_name.end(), isWordCharASCII)) @@ -44,21 +46,31 @@ void DiskSelector::initialize(const Poco::Util::AbstractConfiguration & config, if (disk_name == default_disk_name) has_default_disk = true; + if (disk_name == local_disk_name) + has_local_disk = true; + const auto disk_config_prefix = config_prefix + "." + disk_name; if (disk_validator && !disk_validator(config, disk_config_prefix, disk_name)) continue; - - disks.emplace(disk_name, factory.create(disk_name, config, disk_config_prefix, context, disks)); + auto created_disk + = factory.create(disk_name, config, disk_config_prefix, context, disks, /*attach*/ false, /*custom_disk*/ false, skip_types); + if (created_disk.get()) + { + disks.emplace(disk_name, std::move(created_disk)); + } } if (!has_default_disk) { disks.emplace( - default_disk_name, - std::make_shared( - default_disk_name, context->getPath(), 0, context, config, config_prefix)); + default_disk_name, std::make_shared(default_disk_name, context->getPath(), 0, context, config, config_prefix)); } + if (!has_local_disk && (context->getApplicationType() == Context::ApplicationType::DISKS)) + { + throw_away_local_on_update = true; + disks.emplace(local_disk_name, std::make_shared(local_disk_name, "/", 0, context, config, config_prefix)); + } is_initialized = true; } @@ -76,6 +88,7 @@ DiskSelectorPtr DiskSelector::updateFromConfig( std::shared_ptr result = std::make_shared(*this); constexpr auto default_disk_name = "default"; + constexpr auto local_disk_name = "local"; DisksMap old_disks_minus_new_disks(result->getDisksMap()); for (const auto & disk_name : keys) @@ -86,7 +99,12 @@ DiskSelectorPtr DiskSelector::updateFromConfig( auto disk_config_prefix = config_prefix + "." + disk_name; if (!result->getDisksMap().contains(disk_name)) { - result->addToDiskMap(disk_name, factory.create(disk_name, config, disk_config_prefix, context, result->getDisksMap())); + auto created_disk = factory.create( + disk_name, config, disk_config_prefix, context, result->getDisksMap(), /*attach*/ false, /*custom_disk*/ false, skip_types); + if (created_disk) + { + result->addToDiskMap(disk_name, created_disk); + } } else { @@ -99,6 +117,10 @@ DiskSelectorPtr DiskSelector::updateFromConfig( } old_disks_minus_new_disks.erase(default_disk_name); + if (throw_away_local_on_update) + { + old_disks_minus_new_disks.erase(local_disk_name); + } if (!old_disks_minus_new_disks.empty()) { diff --git a/src/Disks/DiskSelector.h b/src/Disks/DiskSelector.h index 6669b428158..49a1be5cf50 100644 --- a/src/Disks/DiskSelector.h +++ b/src/Disks/DiskSelector.h @@ -20,7 +20,7 @@ class DiskSelector public: static constexpr auto TMP_INTERNAL_DISK_PREFIX = "__tmp_internal_"; - DiskSelector() = default; + explicit DiskSelector(std::unordered_set skip_types_ = {}) : skip_types(skip_types_) { } DiskSelector(const DiskSelector & from) = default; using DiskValidator = std::function; @@ -48,6 +48,10 @@ private: bool is_initialized = false; void assertInitialized() const; + + const std::unordered_set skip_types; + + bool throw_away_local_on_update = false; }; } diff --git a/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp b/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp index d1324e22978..60fa2997c50 100644 --- a/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp +++ b/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp @@ -39,7 +39,7 @@ struct WriteBufferFromAzureBlobStorage::PartData size_t data_size = 0; }; -BufferAllocationPolicyPtr createBufferAllocationPolicy(const AzureObjectStorageSettings & settings) +BufferAllocationPolicyPtr createBufferAllocationPolicy(const AzureBlobStorage::RequestSettings & settings) { BufferAllocationPolicy::Settings allocation_settings; allocation_settings.strict_size = settings.strict_upload_part_size; @@ -57,7 +57,7 @@ WriteBufferFromAzureBlobStorage::WriteBufferFromAzureBlobStorage( const String & blob_path_, size_t buf_size_, const WriteSettings & write_settings_, - std::shared_ptr settings_, + std::shared_ptr settings_, ThreadPoolCallbackRunnerUnsafe schedule_) : WriteBufferFromFileBase(buf_size_, nullptr, 0) , log(getLogger("WriteBufferFromAzureBlobStorage")) diff --git a/src/Disks/IO/WriteBufferFromAzureBlobStorage.h b/src/Disks/IO/WriteBufferFromAzureBlobStorage.h index 10fe871a727..3ee497c4e44 100644 --- a/src/Disks/IO/WriteBufferFromAzureBlobStorage.h +++ b/src/Disks/IO/WriteBufferFromAzureBlobStorage.h @@ -35,7 +35,7 @@ public: const String & blob_path_, size_t buf_size_, const WriteSettings & write_settings_, - std::shared_ptr settings_, + std::shared_ptr settings_, ThreadPoolCallbackRunnerUnsafe schedule_ = {}); ~WriteBufferFromAzureBlobStorage() override; diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.cpp deleted file mode 100644 index 1a5388349f8..00000000000 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.cpp +++ /dev/null @@ -1,272 +0,0 @@ -#include - -#if USE_AZURE_BLOB_STORAGE - -#include -#include -#include -#include -#include -#include -#include -#include - -using namespace Azure::Storage::Blobs; - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int BAD_ARGUMENTS; -} - - -void validateStorageAccountUrl(const String & storage_account_url) -{ - const auto * storage_account_url_pattern_str = R"(http(()|s)://[a-z0-9-.:]+(()|/)[a-z0-9]*(()|/))"; - static const RE2 storage_account_url_pattern(storage_account_url_pattern_str); - - if (!re2::RE2::FullMatch(storage_account_url, storage_account_url_pattern)) - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Blob Storage URL is not valid, should follow the format: {}, got: {}", storage_account_url_pattern_str, storage_account_url); -} - - -void validateContainerName(const String & container_name) -{ - auto len = container_name.length(); - if (len < 3 || len > 64) - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "AzureBlob Storage container name is not valid, should have length between 3 and 64, but has length: {}", len); - - const auto * container_name_pattern_str = R"([a-z][a-z0-9-]+)"; - static const RE2 container_name_pattern(container_name_pattern_str); - - if (!re2::RE2::FullMatch(container_name, container_name_pattern)) - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "AzureBlob Storage container name is not valid, should follow the format: {}, got: {}", - container_name_pattern_str, container_name); -} - - -AzureBlobStorageEndpoint processAzureBlobStorageEndpoint(const Poco::Util::AbstractConfiguration & config, const String & config_prefix) -{ - String storage_url; - String account_name; - String container_name; - String prefix; - if (config.has(config_prefix + ".endpoint")) - { - String endpoint = config.getString(config_prefix + ".endpoint"); - - /// For some authentication methods account name is not present in the endpoint - /// 'endpoint_contains_account_name' bool is used to understand how to split the endpoint (default : true) - bool endpoint_contains_account_name = config.getBool(config_prefix + ".endpoint_contains_account_name", true); - - size_t pos = endpoint.find("//"); - if (pos == std::string::npos) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected '//' in endpoint"); - - if (endpoint_contains_account_name) - { - size_t acc_pos_begin = endpoint.find('/', pos+2); - if (acc_pos_begin == std::string::npos) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected account_name in endpoint"); - - storage_url = endpoint.substr(0,acc_pos_begin); - size_t acc_pos_end = endpoint.find('/',acc_pos_begin+1); - - if (acc_pos_end == std::string::npos) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected container_name in endpoint"); - - account_name = endpoint.substr(acc_pos_begin+1,(acc_pos_end-acc_pos_begin)-1); - - size_t cont_pos_end = endpoint.find('/', acc_pos_end+1); - - if (cont_pos_end != std::string::npos) - { - container_name = endpoint.substr(acc_pos_end+1,(cont_pos_end-acc_pos_end)-1); - prefix = endpoint.substr(cont_pos_end+1); - } - else - { - container_name = endpoint.substr(acc_pos_end+1); - } - } - else - { - size_t cont_pos_begin = endpoint.find('/', pos+2); - - if (cont_pos_begin == std::string::npos) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected container_name in endpoint"); - - storage_url = endpoint.substr(0,cont_pos_begin); - size_t cont_pos_end = endpoint.find('/',cont_pos_begin+1); - - if (cont_pos_end != std::string::npos) - { - container_name = endpoint.substr(cont_pos_begin+1,(cont_pos_end-cont_pos_begin)-1); - prefix = endpoint.substr(cont_pos_end+1); - } - else - { - container_name = endpoint.substr(cont_pos_begin+1); - } - } - } - else if (config.has(config_prefix + ".connection_string")) - { - storage_url = config.getString(config_prefix + ".connection_string"); - container_name = config.getString(config_prefix + ".container_name"); - } - else if (config.has(config_prefix + ".storage_account_url")) - { - storage_url = config.getString(config_prefix + ".storage_account_url"); - validateStorageAccountUrl(storage_url); - container_name = config.getString(config_prefix + ".container_name"); - } - else - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected either `storage_account_url` or `connection_string` or `endpoint` in config"); - - if (!container_name.empty()) - validateContainerName(container_name); - std::optional container_already_exists {}; - if (config.has(config_prefix + ".container_already_exists")) - container_already_exists = {config.getBool(config_prefix + ".container_already_exists")}; - return {storage_url, account_name, container_name, prefix, container_already_exists}; -} - - -template -std::unique_ptr getClientWithConnectionString(const String & connection_str, const String & container_name, const BlobClientOptions & client_options) = delete; - -template<> -std::unique_ptr getClientWithConnectionString(const String & connection_str, const String & /*container_name*/, const BlobClientOptions & client_options) -{ - return std::make_unique(BlobServiceClient::CreateFromConnectionString(connection_str, client_options)); -} - -template<> -std::unique_ptr getClientWithConnectionString(const String & connection_str, const String & container_name, const BlobClientOptions & client_options) -{ - return std::make_unique(BlobContainerClient::CreateFromConnectionString(connection_str, container_name, client_options)); -} - -template -std::unique_ptr getAzureBlobStorageClientWithAuth( - const String & url, - const String & container_name, - const Poco::Util::AbstractConfiguration & config, - const String & config_prefix, - const Azure::Storage::Blobs::BlobClientOptions & client_options) -{ - std::string connection_str; - if (config.has(config_prefix + ".connection_string")) - connection_str = config.getString(config_prefix + ".connection_string"); - - if (!connection_str.empty()) - return getClientWithConnectionString(connection_str, container_name, client_options); - - if (config.has(config_prefix + ".account_key") && config.has(config_prefix + ".account_name")) - { - auto storage_shared_key_credential = std::make_shared( - config.getString(config_prefix + ".account_name"), - config.getString(config_prefix + ".account_key") - ); - return std::make_unique(url, storage_shared_key_credential, client_options); - } - - if (config.getBool(config_prefix + ".use_workload_identity", false)) - { - auto workload_identity_credential = std::make_shared(); - return std::make_unique(url, workload_identity_credential, client_options); - } - - auto managed_identity_credential = std::make_shared(); - return std::make_unique(url, managed_identity_credential, client_options); -} - -Azure::Storage::Blobs::BlobClientOptions getAzureBlobClientOptions(const Poco::Util::AbstractConfiguration & config, const String & config_prefix) -{ - Azure::Core::Http::Policies::RetryOptions retry_options; - retry_options.MaxRetries = config.getUInt(config_prefix + ".max_tries", 10); - retry_options.RetryDelay = std::chrono::milliseconds(config.getUInt(config_prefix + ".retry_initial_backoff_ms", 10)); - retry_options.MaxRetryDelay = std::chrono::milliseconds(config.getUInt(config_prefix + ".retry_max_backoff_ms", 1000)); - - using CurlOptions = Azure::Core::Http::CurlTransportOptions; - CurlOptions curl_options; - curl_options.NoSignal = true; - - if (config.has(config_prefix + ".curl_ip_resolve")) - { - auto value = config.getString(config_prefix + ".curl_ip_resolve"); - if (value == "ipv4") - curl_options.IPResolve = CurlOptions::CURL_IPRESOLVE_V4; - else if (value == "ipv6") - curl_options.IPResolve = CurlOptions::CURL_IPRESOLVE_V6; - else - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected value for option 'curl_ip_resolve': {}. Expected one of 'ipv4' or 'ipv6'", value); - } - - Azure::Storage::Blobs::BlobClientOptions client_options; - client_options.Retry = retry_options; - client_options.Transport.Transport = std::make_shared(curl_options); - - client_options.ClickhouseOptions = Azure::Storage::Blobs::ClickhouseClientOptions{.IsClientForDisk=true}; - - return client_options; -} - -std::unique_ptr getAzureBlobContainerClient(const Poco::Util::AbstractConfiguration & config, const String & config_prefix) -{ - auto endpoint = processAzureBlobStorageEndpoint(config, config_prefix); - auto container_name = endpoint.container_name; - auto final_url = endpoint.getEndpoint(); - auto client_options = getAzureBlobClientOptions(config, config_prefix); - - if (endpoint.container_already_exists.value_or(false)) - return getAzureBlobStorageClientWithAuth(final_url, container_name, config, config_prefix, client_options); - - auto blob_service_client = getAzureBlobStorageClientWithAuth(endpoint.getEndpointWithoutContainer(), container_name, config, config_prefix, client_options); - - try - { - return std::make_unique(blob_service_client->CreateBlobContainer(container_name).Value); - } - catch (const Azure::Storage::StorageException & e) - { - /// If container_already_exists is not set (in config), ignore already exists error. - /// (Conflict - The specified container already exists) - if (!endpoint.container_already_exists.has_value() && e.StatusCode == Azure::Core::Http::HttpStatusCode::Conflict) - return getAzureBlobStorageClientWithAuth(final_url, container_name, config, config_prefix, client_options); - throw; - } -} - -std::unique_ptr getAzureBlobStorageSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context) -{ - std::unique_ptr settings = std::make_unique(); - settings->max_single_part_upload_size = config.getUInt64(config_prefix + ".max_single_part_upload_size", context->getSettings().azure_max_single_part_upload_size); - settings->min_bytes_for_seek = config.getUInt64(config_prefix + ".min_bytes_for_seek", 1024 * 1024); - settings->max_single_read_retries = config.getInt(config_prefix + ".max_single_read_retries", 3); - settings->max_single_download_retries = config.getInt(config_prefix + ".max_single_download_retries", 3); - settings->list_object_keys_size = config.getInt(config_prefix + ".list_object_keys_size", 1000); - settings->min_upload_part_size = config.getUInt64(config_prefix + ".min_upload_part_size", context->getSettings().azure_min_upload_part_size); - settings->max_upload_part_size = config.getUInt64(config_prefix + ".max_upload_part_size", context->getSettings().azure_max_upload_part_size); - settings->max_single_part_copy_size = config.getUInt64(config_prefix + ".max_single_part_copy_size", context->getSettings().azure_max_single_part_copy_size); - settings->use_native_copy = config.getBool(config_prefix + ".use_native_copy", false); - settings->max_blocks_in_multipart_upload = config.getUInt64(config_prefix + ".max_blocks_in_multipart_upload", 50000); - settings->max_unexpected_write_error_retries = config.getUInt64(config_prefix + ".max_unexpected_write_error_retries", context->getSettings().azure_max_unexpected_write_error_retries); - settings->max_inflight_parts_for_one_file = config.getUInt64(config_prefix + ".max_inflight_parts_for_one_file", context->getSettings().azure_max_inflight_parts_for_one_file); - settings->strict_upload_part_size = config.getUInt64(config_prefix + ".strict_upload_part_size", context->getSettings().azure_strict_upload_part_size); - settings->upload_part_size_multiply_factor = config.getUInt64(config_prefix + ".upload_part_size_multiply_factor", context->getSettings().azure_upload_part_size_multiply_factor); - settings->upload_part_size_multiply_parts_count_threshold = config.getUInt64(config_prefix + ".upload_part_size_multiply_parts_count_threshold", context->getSettings().azure_upload_part_size_multiply_parts_count_threshold); - - return settings; -} - -} - -#endif diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.h b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.h deleted file mode 100644 index e4775a053c1..00000000000 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.h +++ /dev/null @@ -1,58 +0,0 @@ -#pragma once - -#include "config.h" - -#if USE_AZURE_BLOB_STORAGE - -#include -#include - -namespace DB -{ - -struct AzureBlobStorageEndpoint -{ - const String storage_account_url; - const String account_name; - const String container_name; - const String prefix; - const std::optional container_already_exists; - - String getEndpoint() - { - String url = storage_account_url; - if (url.ends_with('/')) - url.pop_back(); - - if (!account_name.empty()) - url += "/" + account_name; - - if (!container_name.empty()) - url += "/" + container_name; - - if (!prefix.empty()) - url += "/" + prefix; - - return url; - } - - String getEndpointWithoutContainer() - { - String url = storage_account_url; - - if (!account_name.empty()) - url += "/" + account_name; - - return url; - } -}; - -std::unique_ptr getAzureBlobContainerClient(const Poco::Util::AbstractConfiguration & config, const String & config_prefix); - -AzureBlobStorageEndpoint processAzureBlobStorageEndpoint(const Poco::Util::AbstractConfiguration & config, const String & config_prefix); - -std::unique_ptr getAzureBlobStorageSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context); - -} - -#endif diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.cpp new file mode 100644 index 00000000000..d9dfedadd48 --- /dev/null +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.cpp @@ -0,0 +1,351 @@ +#include + +#if USE_AZURE_BLOB_STORAGE + +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + +namespace AzureBlobStorage +{ + +static void validateStorageAccountUrl(const String & storage_account_url) +{ + const auto * storage_account_url_pattern_str = R"(http(()|s)://[a-z0-9-.:]+(()|/)[a-z0-9]*(()|/))"; + static const RE2 storage_account_url_pattern(storage_account_url_pattern_str); + + if (!re2::RE2::FullMatch(storage_account_url, storage_account_url_pattern)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Blob Storage URL is not valid, should follow the format: {}, got: {}", storage_account_url_pattern_str, storage_account_url); +} + +static void validateContainerName(const String & container_name) +{ + auto len = container_name.length(); + if (len < 3 || len > 64) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "AzureBlob Storage container name is not valid, should have length between 3 and 64, but has length: {}", len); + + const auto * container_name_pattern_str = R"([a-z][a-z0-9-]+)"; + static const RE2 container_name_pattern(container_name_pattern_str); + + if (!re2::RE2::FullMatch(container_name, container_name_pattern)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "AzureBlob Storage container name is not valid, should follow the format: {}, got: {}", + container_name_pattern_str, container_name); +} + +static bool isConnectionString(const std::string & candidate) +{ + return !candidate.starts_with("http"); +} + +String ConnectionParams::getConnectionURL() const +{ + if (std::holds_alternative(auth_method)) + { + auto parsed_connection_string = Azure::Storage::_internal::ParseConnectionString(endpoint.storage_account_url); + return parsed_connection_string.BlobServiceUrl.GetAbsoluteUrl(); + } + + return endpoint.storage_account_url; +} + +std::unique_ptr ConnectionParams::createForService() const +{ + return std::visit([this](const T & auth) + { + if constexpr (std::is_same_v) + return std::make_unique(ServiceClient::CreateFromConnectionString(auth.toUnderType(), client_options)); + else + return std::make_unique(endpoint.getEndpointWithoutContainer(), auth, client_options); + }, auth_method); +} + +std::unique_ptr ConnectionParams::createForContainer() const +{ + return std::visit([this](const T & auth) + { + if constexpr (std::is_same_v) + return std::make_unique(ContainerClient::CreateFromConnectionString(auth.toUnderType(), endpoint.container_name, client_options)); + else + return std::make_unique(endpoint.getEndpoint(), auth, client_options); + }, auth_method); +} + +Endpoint processEndpoint(const Poco::Util::AbstractConfiguration & config, const String & config_prefix) +{ + String storage_url; + String account_name; + String container_name; + String prefix; + + auto get_container_name = [&] + { + if (config.has(config_prefix + ".container_name")) + return config.getString(config_prefix + ".container_name"); + + if (config.has(config_prefix + ".container")) + return config.getString(config_prefix + ".container"); + + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected either `container` or `container_name` parameter in config"); + }; + + if (config.has(config_prefix + ".endpoint")) + { + String endpoint = config.getString(config_prefix + ".endpoint"); + + /// For some authentication methods account name is not present in the endpoint + /// 'endpoint_contains_account_name' bool is used to understand how to split the endpoint (default : true) + bool endpoint_contains_account_name = config.getBool(config_prefix + ".endpoint_contains_account_name", true); + + size_t pos = endpoint.find("//"); + if (pos == std::string::npos) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected '//' in endpoint"); + + if (endpoint_contains_account_name) + { + size_t acc_pos_begin = endpoint.find('/', pos + 2); + if (acc_pos_begin == std::string::npos) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected account_name in endpoint"); + + storage_url = endpoint.substr(0, acc_pos_begin); + size_t acc_pos_end = endpoint.find('/', acc_pos_begin + 1); + + if (acc_pos_end == std::string::npos) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected container_name in endpoint"); + + account_name = endpoint.substr(acc_pos_begin + 1, acc_pos_end - acc_pos_begin - 1); + + size_t cont_pos_end = endpoint.find('/', acc_pos_end + 1); + + if (cont_pos_end != std::string::npos) + { + container_name = endpoint.substr(acc_pos_end + 1, cont_pos_end - acc_pos_end - 1); + prefix = endpoint.substr(cont_pos_end + 1); + } + else + { + container_name = endpoint.substr(acc_pos_end + 1); + } + } + else + { + size_t cont_pos_begin = endpoint.find('/', pos + 2); + + if (cont_pos_begin == std::string::npos) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected container_name in endpoint"); + + storage_url = endpoint.substr(0, cont_pos_begin); + size_t cont_pos_end = endpoint.find('/', cont_pos_begin + 1); + + if (cont_pos_end != std::string::npos) + { + container_name = endpoint.substr(cont_pos_begin + 1,cont_pos_end - cont_pos_begin - 1); + prefix = endpoint.substr(cont_pos_end + 1); + } + else + { + container_name = endpoint.substr(cont_pos_begin + 1); + } + } + } + else if (config.has(config_prefix + ".connection_string")) + { + storage_url = config.getString(config_prefix + ".connection_string"); + container_name = get_container_name(); + } + else if (config.has(config_prefix + ".storage_account_url")) + { + storage_url = config.getString(config_prefix + ".storage_account_url"); + validateStorageAccountUrl(storage_url); + container_name = get_container_name(); + } + else + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected either `storage_account_url` or `connection_string` or `endpoint` in config"); + + if (!container_name.empty()) + validateContainerName(container_name); + + std::optional container_already_exists {}; + if (config.has(config_prefix + ".container_already_exists")) + container_already_exists = {config.getBool(config_prefix + ".container_already_exists")}; + + return {storage_url, account_name, container_name, prefix, "", container_already_exists}; +} + +void processURL(const String & url, const String & container_name, Endpoint & endpoint, AuthMethod & auth_method) +{ + endpoint.container_name = container_name; + + if (isConnectionString(url)) + { + endpoint.storage_account_url = url; + auth_method = ConnectionString{url}; + return; + } + + auto pos = url.find('?'); + + /// If conneciton_url does not have '?', then its not SAS + if (pos == std::string::npos) + { + endpoint.storage_account_url = url; + auth_method = std::make_shared(); + } + else + { + endpoint.storage_account_url = url.substr(0, pos); + endpoint.sas_auth = url.substr(pos + 1); + auth_method = std::make_shared(); + } +} + +std::unique_ptr getContainerClient(const ConnectionParams & params, bool readonly) +{ + if (params.endpoint.container_already_exists.value_or(false) || readonly) + return params.createForContainer(); + + try + { + auto service_client = params.createForService(); + return std::make_unique(service_client->CreateBlobContainer(params.endpoint.container_name).Value); + } + catch (const Azure::Storage::StorageException & e) + { + /// If container_already_exists is not set (in config), ignore already exists error. + /// (Conflict - The specified container already exists) + if (!params.endpoint.container_already_exists.has_value() && e.StatusCode == Azure::Core::Http::HttpStatusCode::Conflict) + return params.createForContainer(); + throw; + } +} + +AuthMethod getAuthMethod(const Poco::Util::AbstractConfiguration & config, const String & config_prefix) +{ + if (config.has(config_prefix + ".account_key") && config.has(config_prefix + ".account_name")) + { + return std::make_shared( + config.getString(config_prefix + ".account_name"), + config.getString(config_prefix + ".account_key") + ); + } + + if (config.has(config_prefix + ".connection_string")) + return ConnectionString{config.getString(config_prefix + ".connection_string")}; + + if (config.getBool(config_prefix + ".use_workload_identity", false)) + return std::make_shared(); + + return std::make_shared(); +} + +BlobClientOptions getClientOptions(const RequestSettings & settings, bool for_disk) +{ + Azure::Core::Http::Policies::RetryOptions retry_options; + retry_options.MaxRetries = static_cast(settings.sdk_max_retries); + retry_options.RetryDelay = std::chrono::milliseconds(settings.sdk_retry_initial_backoff_ms); + retry_options.MaxRetryDelay = std::chrono::milliseconds(settings.sdk_retry_max_backoff_ms); + + Azure::Core::Http::CurlTransportOptions curl_options; + curl_options.NoSignal = true; + curl_options.IPResolve = settings.curl_ip_resolve; + + Azure::Storage::Blobs::BlobClientOptions client_options; + client_options.Retry = retry_options; + client_options.Transport.Transport = std::make_shared(curl_options); + client_options.ClickhouseOptions = Azure::Storage::Blobs::ClickhouseClientOptions{.IsClientForDisk=for_disk}; + + return client_options; +} + +std::unique_ptr getRequestSettings(const Settings & query_settings) +{ + auto settings = std::make_unique(); + + settings->max_single_part_upload_size = query_settings.azure_max_single_part_upload_size; + settings->max_single_read_retries = query_settings.azure_max_single_read_retries; + settings->max_single_download_retries = query_settings.azure_max_single_read_retries; + settings->list_object_keys_size = query_settings.azure_list_object_keys_size; + settings->min_upload_part_size = query_settings.azure_min_upload_part_size; + settings->max_upload_part_size = query_settings.azure_max_upload_part_size; + settings->max_single_part_copy_size = query_settings.azure_max_single_part_copy_size; + settings->max_blocks_in_multipart_upload = query_settings.azure_max_blocks_in_multipart_upload; + settings->max_unexpected_write_error_retries = query_settings.azure_max_unexpected_write_error_retries; + settings->max_inflight_parts_for_one_file = query_settings.azure_max_inflight_parts_for_one_file; + settings->strict_upload_part_size = query_settings.azure_strict_upload_part_size; + settings->upload_part_size_multiply_factor = query_settings.azure_upload_part_size_multiply_factor; + settings->upload_part_size_multiply_parts_count_threshold = query_settings.azure_upload_part_size_multiply_parts_count_threshold; + settings->sdk_max_retries = query_settings.azure_sdk_max_retries; + settings->sdk_retry_initial_backoff_ms = query_settings.azure_sdk_retry_initial_backoff_ms; + settings->sdk_retry_max_backoff_ms = query_settings.azure_sdk_retry_max_backoff_ms; + + return settings; +} + +std::unique_ptr getRequestSettingsForBackup(const Settings & query_settings, bool use_native_copy) +{ + auto settings = getRequestSettings(query_settings); + settings->use_native_copy = use_native_copy; + return settings; +} + +std::unique_ptr getRequestSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context) +{ + auto settings = std::make_unique(); + const auto & settings_ref = context->getSettingsRef(); + + settings->min_bytes_for_seek = config.getUInt64(config_prefix + ".min_bytes_for_seek", 1024 * 1024); + settings->use_native_copy = config.getBool(config_prefix + ".use_native_copy", false); + + settings->max_single_part_upload_size = config.getUInt64(config_prefix + ".max_single_part_upload_size", settings_ref.azure_max_single_part_upload_size); + settings->max_single_read_retries = config.getUInt64(config_prefix + ".max_single_read_retries", settings_ref.azure_max_single_read_retries); + settings->max_single_download_retries = config.getUInt64(config_prefix + ".max_single_download_retries", settings_ref.azure_max_single_read_retries); + settings->list_object_keys_size = config.getUInt64(config_prefix + ".list_object_keys_size", settings_ref.azure_list_object_keys_size); + settings->min_upload_part_size = config.getUInt64(config_prefix + ".min_upload_part_size", settings_ref.azure_min_upload_part_size); + settings->max_upload_part_size = config.getUInt64(config_prefix + ".max_upload_part_size", settings_ref.azure_max_upload_part_size); + settings->max_single_part_copy_size = config.getUInt64(config_prefix + ".max_single_part_copy_size", settings_ref.azure_max_single_part_copy_size); + settings->max_blocks_in_multipart_upload = config.getUInt64(config_prefix + ".max_blocks_in_multipart_upload", settings_ref.azure_max_blocks_in_multipart_upload); + settings->max_unexpected_write_error_retries = config.getUInt64(config_prefix + ".max_unexpected_write_error_retries", settings_ref.azure_max_unexpected_write_error_retries); + settings->max_inflight_parts_for_one_file = config.getUInt64(config_prefix + ".max_inflight_parts_for_one_file", settings_ref.azure_max_inflight_parts_for_one_file); + settings->strict_upload_part_size = config.getUInt64(config_prefix + ".strict_upload_part_size", settings_ref.azure_strict_upload_part_size); + settings->upload_part_size_multiply_factor = config.getUInt64(config_prefix + ".upload_part_size_multiply_factor", settings_ref.azure_upload_part_size_multiply_factor); + settings->upload_part_size_multiply_parts_count_threshold = config.getUInt64(config_prefix + ".upload_part_size_multiply_parts_count_threshold", settings_ref.azure_upload_part_size_multiply_parts_count_threshold); + + settings->sdk_max_retries = config.getUInt64(config_prefix + ".max_tries", settings_ref.azure_sdk_max_retries); + settings->sdk_retry_initial_backoff_ms = config.getUInt64(config_prefix + ".retry_initial_backoff_ms", settings_ref.azure_sdk_retry_initial_backoff_ms); + settings->sdk_retry_max_backoff_ms = config.getUInt64(config_prefix + ".retry_max_backoff_ms", settings_ref.azure_sdk_retry_max_backoff_ms); + + if (config.has(config_prefix + ".curl_ip_resolve")) + { + using CurlOptions = Azure::Core::Http::CurlTransportOptions; + + auto value = config.getString(config_prefix + ".curl_ip_resolve"); + if (value == "ipv4") + settings->curl_ip_resolve = CurlOptions::CURL_IPRESOLVE_V4; + else if (value == "ipv6") + settings->curl_ip_resolve = CurlOptions::CURL_IPRESOLVE_V6; + else + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected value for option 'curl_ip_resolve': {}. Expected one of 'ipv4' or 'ipv6'", value); + } + + return settings; +} + +} + +} + +#endif diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h new file mode 100644 index 00000000000..19ba48ea225 --- /dev/null +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h @@ -0,0 +1,138 @@ +#pragma once +#include "config.h" + +#if USE_AZURE_BLOB_STORAGE + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +namespace DB +{ + +namespace AzureBlobStorage +{ + +using ServiceClient = Azure::Storage::Blobs::BlobServiceClient; +using ContainerClient = Azure::Storage::Blobs::BlobContainerClient; +using BlobClient = Azure::Storage::Blobs::BlobClient; +using BlobClientOptions = Azure::Storage::Blobs::BlobClientOptions; + +struct RequestSettings +{ + RequestSettings() = default; + + size_t max_single_part_upload_size = 100 * 1024 * 1024; /// NOTE: on 32-bit machines it will be at most 4GB, but size_t is also used in BufferBase for offset + size_t min_bytes_for_seek = 1024 * 1024; + size_t max_single_read_retries = 3; + size_t max_single_download_retries = 3; + size_t list_object_keys_size = 1000; + size_t min_upload_part_size = 16 * 1024 * 1024; + size_t max_upload_part_size = 5ULL * 1024 * 1024 * 1024; + size_t max_single_part_copy_size = 256 * 1024 * 1024; + size_t max_unexpected_write_error_retries = 4; + size_t max_inflight_parts_for_one_file = 20; + size_t max_blocks_in_multipart_upload = 50000; + size_t strict_upload_part_size = 0; + size_t upload_part_size_multiply_factor = 2; + size_t upload_part_size_multiply_parts_count_threshold = 500; + size_t sdk_max_retries = 10; + size_t sdk_retry_initial_backoff_ms = 10; + size_t sdk_retry_max_backoff_ms = 1000; + bool use_native_copy = false; + + using CurlOptions = Azure::Core::Http::CurlTransportOptions; + CurlOptions::CurlOptIPResolve curl_ip_resolve = CurlOptions::CURL_IPRESOLVE_WHATEVER; +}; + +struct Endpoint +{ + String storage_account_url; + String account_name; + String container_name; + String prefix; + String sas_auth; + std::optional container_already_exists; + + String getEndpoint() const + { + String url = storage_account_url; + if (url.ends_with('/')) + url.pop_back(); + + if (!account_name.empty()) + url += "/" + account_name; + + if (!container_name.empty()) + url += "/" + container_name; + + if (!prefix.empty()) + url += "/" + prefix; + + if (!sas_auth.empty()) + url += "?" + sas_auth; + + return url; + } + + String getEndpointWithoutContainer() const + { + String url = storage_account_url; + + if (!account_name.empty()) + url += "/" + account_name; + + if (!sas_auth.empty()) + url += "?" + sas_auth; + + return url; + } +}; + +using ConnectionString = StrongTypedef; + +using AuthMethod = std::variant< + ConnectionString, + std::shared_ptr, + std::shared_ptr, + std::shared_ptr>; + +struct ConnectionParams +{ + Endpoint endpoint; + AuthMethod auth_method; + BlobClientOptions client_options; + + String getContainer() const { return endpoint.container_name; } + String getConnectionURL() const; + + std::unique_ptr createForService() const; + std::unique_ptr createForContainer() const; +}; + +Endpoint processEndpoint(const Poco::Util::AbstractConfiguration & config, const String & config_prefix); +void processURL(const String & url, const String & container_name, Endpoint & endpoint, AuthMethod & auth_method); + +std::unique_ptr getContainerClient(const ConnectionParams & params, bool readonly); + +BlobClientOptions getClientOptions(const RequestSettings & settings, bool for_disk); +AuthMethod getAuthMethod(const Poco::Util::AbstractConfiguration & config, const String & config_prefix); + +std::unique_ptr getRequestSettings(const Settings & query_settings); +std::unique_ptr getRequestSettingsForBackup(const Settings & query_settings, bool use_native_copy); +std::unique_ptr getRequestSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context); + +} + +} + +#endif diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp index 0ebe885a3e7..bc16955143b 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp @@ -9,7 +9,7 @@ #include #include -#include +#include #include #include #include @@ -104,7 +104,7 @@ private: AzureObjectStorage::AzureObjectStorage( const String & name_, - AzureClientPtr && client_, + ClientPtr && client_, SettingsPtr && settings_, const String & object_namespace_, const String & description_) @@ -397,24 +397,49 @@ void AzureObjectStorage::copyObject( /// NOLINT } void AzureObjectStorage::applyNewSettings( - const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, - ContextPtr context, const ApplyNewSettingsOptions &) + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, + ContextPtr context, + const ApplyNewSettingsOptions & options) { - auto new_settings = getAzureBlobStorageSettings(config, config_prefix, context); + auto new_settings = AzureBlobStorage::getRequestSettings(config, config_prefix, context); settings.set(std::move(new_settings)); - /// We don't update client + + if (!options.allow_client_change) + return; + + bool is_client_for_disk = client.get()->GetClickhouseOptions().IsClientForDisk; + + AzureBlobStorage::ConnectionParams params + { + .endpoint = AzureBlobStorage::processEndpoint(config, config_prefix), + .auth_method = AzureBlobStorage::getAuthMethod(config, config_prefix), + .client_options = AzureBlobStorage::getClientOptions(*settings.get(), is_client_for_disk), + }; + + auto new_client = AzureBlobStorage::getContainerClient(params, /*readonly=*/ true); + client.set(std::move(new_client)); } -std::unique_ptr AzureObjectStorage::cloneObjectStorage(const std::string &, const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context) +std::unique_ptr AzureObjectStorage::cloneObjectStorage( + const std::string & new_namespace, + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, + ContextPtr context) { - return std::make_unique( - name, - getAzureBlobContainerClient(config, config_prefix), - getAzureBlobStorageSettings(config, config_prefix, context), - object_namespace, - description - ); + auto new_settings = AzureBlobStorage::getRequestSettings(config, config_prefix, context); + bool is_client_for_disk = client.get()->GetClickhouseOptions().IsClientForDisk; + + AzureBlobStorage::ConnectionParams params + { + .endpoint = AzureBlobStorage::processEndpoint(config, config_prefix), + .auth_method = AzureBlobStorage::getAuthMethod(config, config_prefix), + .client_options = AzureBlobStorage::getClientOptions(*new_settings, is_client_for_disk), + }; + + auto new_client = AzureBlobStorage::getContainerClient(params, /*readonly=*/ true); + return std::make_unique(name, std::move(new_client), std::move(new_settings), new_namespace, params.endpoint.getEndpointWithoutContainer()); } } diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h index c342929d656..2c7ce5e18dc 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h @@ -7,6 +7,8 @@ #include #include #include +#include +#include namespace Poco { @@ -16,71 +18,15 @@ class Logger; namespace DB { -struct AzureObjectStorageSettings -{ - AzureObjectStorageSettings( - uint64_t max_single_part_upload_size_, - uint64_t min_bytes_for_seek_, - int max_single_read_retries_, - int max_single_download_retries_, - int list_object_keys_size_, - size_t min_upload_part_size_, - size_t max_upload_part_size_, - size_t max_single_part_copy_size_, - bool use_native_copy_, - size_t max_unexpected_write_error_retries_, - size_t max_inflight_parts_for_one_file_, - size_t strict_upload_part_size_, - size_t upload_part_size_multiply_factor_, - size_t upload_part_size_multiply_parts_count_threshold_) - : max_single_part_upload_size(max_single_part_upload_size_) - , min_bytes_for_seek(min_bytes_for_seek_) - , max_single_read_retries(max_single_read_retries_) - , max_single_download_retries(max_single_download_retries_) - , list_object_keys_size(list_object_keys_size_) - , min_upload_part_size(min_upload_part_size_) - , max_upload_part_size(max_upload_part_size_) - , max_single_part_copy_size(max_single_part_copy_size_) - , use_native_copy(use_native_copy_) - , max_unexpected_write_error_retries(max_unexpected_write_error_retries_) - , max_inflight_parts_for_one_file(max_inflight_parts_for_one_file_) - , strict_upload_part_size(strict_upload_part_size_) - , upload_part_size_multiply_factor(upload_part_size_multiply_factor_) - , upload_part_size_multiply_parts_count_threshold(upload_part_size_multiply_parts_count_threshold_) - { - } - - AzureObjectStorageSettings() = default; - - size_t max_single_part_upload_size = 100 * 1024 * 1024; /// NOTE: on 32-bit machines it will be at most 4GB, but size_t is also used in BufferBase for offset - uint64_t min_bytes_for_seek = 1024 * 1024; - size_t max_single_read_retries = 3; - size_t max_single_download_retries = 3; - int list_object_keys_size = 1000; - size_t min_upload_part_size = 16 * 1024 * 1024; - size_t max_upload_part_size = 5ULL * 1024 * 1024 * 1024; - size_t max_single_part_copy_size = 256 * 1024 * 1024; - bool use_native_copy = false; - size_t max_unexpected_write_error_retries = 4; - size_t max_inflight_parts_for_one_file = 20; - size_t max_blocks_in_multipart_upload = 50000; - size_t strict_upload_part_size = 0; - size_t upload_part_size_multiply_factor = 2; - size_t upload_part_size_multiply_parts_count_threshold = 500; -}; - -using AzureClient = Azure::Storage::Blobs::BlobContainerClient; -using AzureClientPtr = std::unique_ptr; - class AzureObjectStorage : public IObjectStorage { public: - - using SettingsPtr = std::unique_ptr; + using ClientPtr = std::unique_ptr; + using SettingsPtr = std::unique_ptr; AzureObjectStorage( const String & name_, - AzureClientPtr && client_, + ClientPtr && client_, SettingsPtr && settings_, const String & object_namespace_, const String & description_); @@ -159,12 +105,8 @@ public: bool isRemote() const override { return true; } - std::shared_ptr getSettings() { return settings.get(); } - - std::shared_ptr getAzureBlobStorageClient() override - { - return client.get(); - } + std::shared_ptr getSettings() const { return settings.get(); } + std::shared_ptr getAzureBlobStorageClient() const override { return client.get(); } bool supportParallelWrite() const override { return true; } @@ -174,8 +116,8 @@ private: const String name; /// client used to access the files in the Blob Storage cloud - MultiVersion client; - MultiVersion settings; + MultiVersion client; + MultiVersion settings; const String object_namespace; /// container + prefix /// We use source url without container and prefix as description, because in Azure there are no limitations for operations between different containers. diff --git a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h index 727dbeed853..93ef2659cbb 100644 --- a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h +++ b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h @@ -127,7 +127,7 @@ public: const FileCacheSettings & getCacheSettings() const { return cache_settings; } #if USE_AZURE_BLOB_STORAGE - std::shared_ptr getAzureBlobStorageClient() override + std::shared_ptr getAzureBlobStorageClient() const override { return object_storage->getAzureBlobStorageClient(); } diff --git a/src/Disks/ObjectStorages/IObjectStorage.h b/src/Disks/ObjectStorages/IObjectStorage.h index 6410a9a7a73..ceea4d5a2bb 100644 --- a/src/Disks/ObjectStorages/IObjectStorage.h +++ b/src/Disks/ObjectStorages/IObjectStorage.h @@ -261,7 +261,7 @@ public: virtual void setKeysGenerator(ObjectStorageKeysGeneratorPtr) { } #if USE_AZURE_BLOB_STORAGE - virtual std::shared_ptr getAzureBlobStorageClient() + virtual std::shared_ptr getAzureBlobStorageClient() const { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "This function is only implemented for AzureBlobStorage"); } diff --git a/src/Disks/ObjectStorages/MetadataOperationsHolder.h b/src/Disks/ObjectStorages/MetadataOperationsHolder.h index 8997f40b9a2..a042f4bd8b9 100644 --- a/src/Disks/ObjectStorages/MetadataOperationsHolder.h +++ b/src/Disks/ObjectStorages/MetadataOperationsHolder.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include diff --git a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp index 5698d2ad588..d9535358daf 100644 --- a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp +++ b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp @@ -13,7 +13,7 @@ #endif #if USE_AZURE_BLOB_STORAGE #include -#include +#include #endif #include #include @@ -317,15 +317,22 @@ void registerAzureObjectStorage(ObjectStorageFactory & factory) const ContextPtr & context, bool /* skip_access_check */) -> ObjectStoragePtr { - AzureBlobStorageEndpoint endpoint = processAzureBlobStorageEndpoint(config, config_prefix); + auto azure_settings = AzureBlobStorage::getRequestSettings(config, config_prefix, context); + + AzureBlobStorage::ConnectionParams params + { + .endpoint = AzureBlobStorage::processEndpoint(config, config_prefix), + .auth_method = AzureBlobStorage::getAuthMethod(config, config_prefix), + .client_options = AzureBlobStorage::getClientOptions(*azure_settings, /*for_disk=*/ true), + }; return createObjectStorage( ObjectStorageType::Azure, config, config_prefix, name, - getAzureBlobContainerClient(config, config_prefix), - getAzureBlobStorageSettings(config, config_prefix, context), - endpoint.prefix.empty() ? endpoint.container_name : endpoint.container_name + "/" + endpoint.prefix, - endpoint.getEndpointWithoutContainer()); + AzureBlobStorage::getContainerClient(params, /*readonly=*/ false), std::move(azure_settings), + params.endpoint.prefix.empty() ? params.endpoint.container_name : params.endpoint.container_name + "/" + params.endpoint.prefix, + params.endpoint.getEndpointWithoutContainer()); }; + factory.registerObjectStorageType("azure_blob_storage", creator); factory.registerObjectStorageType("azure", creator); } diff --git a/src/Functions/FunctionsBinaryRepresentation.cpp b/src/Functions/FunctionsBinaryRepresentation.cpp index 0f3f8be96a7..ab10d402df4 100644 --- a/src/Functions/FunctionsBinaryRepresentation.cpp +++ b/src/Functions/FunctionsBinaryRepresentation.cpp @@ -3,14 +3,14 @@ #include #include #include -#include -#include #include #include #include #include #include #include +#include +#include namespace DB { @@ -218,10 +218,7 @@ struct UnbinImpl static constexpr auto name = "unbin"; static constexpr size_t word_size = 8; - static void decode(const char * pos, const char * end, char *& out) - { - binStringDecode(pos, end, out); - } + static void decode(const char * pos, const char * end, char *& out) { binStringDecode(pos, end, out, word_size); } }; /// Encode number or string to string with binary or hexadecimal representation @@ -651,7 +648,15 @@ public: size_t size = in_offsets.size(); out_offsets.resize(size); - out_vec.resize(in_vec.size() / word_size + size); + + size_t max_out_len = 0; + for (size_t i = 0; i < in_offsets.size(); ++i) + { + const size_t len = in_offsets[i] - (i == 0 ? 0 : in_offsets[i - 1]) + - /* trailing zero symbol that is always added in ColumnString and that is ignored while decoding */ 1; + max_out_len += (len + word_size - 1) / word_size + /* trailing zero symbol that is always added by Impl::decode */ 1; + } + out_vec.resize(max_out_len); char * begin = reinterpret_cast(out_vec.data()); char * pos = begin; @@ -661,6 +666,7 @@ public: { size_t new_offset = in_offsets[i]; + /// `new_offset - 1` because in ColumnString each string is stored with trailing zero byte Impl::decode(reinterpret_cast(&in_vec[prev_offset]), reinterpret_cast(&in_vec[new_offset - 1]), pos); out_offsets[i] = pos - begin; @@ -668,6 +674,9 @@ public: prev_offset = new_offset; } + chassert( + static_cast(pos - begin) <= out_vec.size(), + fmt::format("too small amount of memory was preallocated: needed {}, but have only {}", pos - begin, out_vec.size())); out_vec.resize(pos - begin); return col_res; @@ -680,11 +689,11 @@ public: ColumnString::Offsets & out_offsets = col_res->getOffsets(); const ColumnString::Chars & in_vec = col_fix_string->getChars(); - size_t n = col_fix_string->getN(); + const size_t n = col_fix_string->getN(); size_t size = col_fix_string->size(); out_offsets.resize(size); - out_vec.resize(in_vec.size() / word_size + size); + out_vec.resize(((n + word_size - 1) / word_size + /* trailing zero symbol that is always added by Impl::decode */ 1) * size); char * begin = reinterpret_cast(out_vec.data()); char * pos = begin; @@ -694,6 +703,7 @@ public: { size_t new_offset = prev_offset + n; + /// here we don't subtract 1 from `new_offset` because in ColumnFixedString strings are stored without trailing zero byte Impl::decode(reinterpret_cast(&in_vec[prev_offset]), reinterpret_cast(&in_vec[new_offset]), pos); out_offsets[i] = pos - begin; @@ -701,6 +711,9 @@ public: prev_offset = new_offset; } + chassert( + static_cast(pos - begin) <= out_vec.size(), + fmt::format("too small amount of memory was preallocated: needed {}, but have only {}", pos - begin, out_vec.size())); out_vec.resize(pos - begin); return col_res; diff --git a/src/Functions/array/arrayIndex.h b/src/Functions/array/arrayIndex.h index fa9b3dc92dd..0782f109187 100644 --- a/src/Functions/array/arrayIndex.h +++ b/src/Functions/array/arrayIndex.h @@ -1,4 +1,5 @@ #pragma once +#include #include #include #include @@ -13,6 +14,8 @@ #include #include #include +#include "Common/Logger.h" +#include "Common/logger_useful.h" #include #include #include @@ -28,6 +31,7 @@ namespace ErrorCodes { extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int LOGICAL_ERROR; } using NullMap = PaddedPODArray; @@ -424,31 +428,21 @@ public: ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) const override { - if constexpr (std::is_same_v) + if (auto res = executeMap(arguments, result_type)) + return res; + + if (auto res = executeArrayLowCardinality(arguments)) + return res; + + auto new_arguments = arguments; + + for (auto & argument : new_arguments) { - if (isMap(arguments[0].type)) - { - auto non_const_map_column = arguments[0].column->convertToFullColumnIfConst(); - - const auto & map_column = assert_cast(*non_const_map_column); - const auto & map_array_column = map_column.getNestedColumn(); - auto offsets = map_array_column.getOffsetsPtr(); - auto keys = map_column.getNestedData().getColumnPtr(0); - auto array_column = ColumnArray::create(keys, offsets); - - const auto & type_map = assert_cast(*arguments[0].type); - auto array_type = std::make_shared(type_map.getKeyType()); - - auto arguments_copy = arguments; - arguments_copy[0].column = std::move(array_column); - arguments_copy[0].type = std::move(array_type); - arguments_copy[0].name = arguments[0].name; - - return executeArrayImpl(arguments_copy, result_type); - } + argument.column = recursiveRemoveLowCardinality(argument.column); + argument.type = recursiveRemoveLowCardinality(argument.type); } - return executeArrayImpl(arguments, result_type); + return executeArrayImpl(new_arguments, result_type); } private: @@ -458,18 +452,6 @@ private: using NullMaps = std::pair; - struct ExecutionData - { - const IColumn& left; - const IColumn& right; - const ColumnArray::Offsets& offsets; - ColumnPtr result_column; - NullMaps maps; - ResultColumnPtr result { ResultColumnType::create() }; - - void moveResult() { result_column = std::move(result); } - }; - static bool allowArguments(const DataTypePtr & inner_type, const DataTypePtr & arg) { auto inner_type_decayed = removeNullable(removeLowCardinality(inner_type)); @@ -574,23 +556,13 @@ private: } } -#define INTEGRAL_TPL_PACK UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Float32, Float64 +#define INTEGRAL_PACK UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Float32, Float64 ColumnPtr executeOnNonNullable(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type) const { - if (const auto* const left_arr = checkAndGetColumn(arguments[0].column.get())) - { - if (checkAndGetColumn(&left_arr->getData())) - { - if (auto res = executeLowCardinality(arguments)) - return res; - - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal internal type of first argument of function {}", getName()); - } - } - ColumnPtr res; - if (!((res = executeIntegral(arguments)) + if (!((res = executeNothing(arguments)) + || (res = executeIntegral(arguments)) || (res = executeConst(arguments, result_type)) || (res = executeString(arguments)) || (res = executeGeneric(arguments)))) @@ -599,6 +571,8 @@ private: return res; } +#undef INTEGRAL_PACK + /** * The Array's internal data type may be quite tricky (containing a Nullable type somewhere). To process the * Nullable types correctly, for each data type specialisation we provide two null maps (one for the data and one @@ -627,6 +601,14 @@ private: return {null_map_data, null_map_item}; } + struct ExecutionData + { + const IColumn & left; + const IColumn & right; + const ColumnArray::Offsets & offsets; + NullMaps null_maps; + }; + /** * Given a variadic pack #Integral, apply executeIntegralExpanded with such parameters: * Integral s = {s1, s2, ...} @@ -635,39 +617,33 @@ private: template static ColumnPtr executeIntegral(const ColumnsWithTypeAndName & arguments) { - const ColumnArray * const left = checkAndGetColumn(arguments[0].column.get()); - - if (!left) + const auto * array = checkAndGetColumn(arguments[0].column.get()); + if (!array) return nullptr; - const ColumnPtr right_converted_ptr = arguments[1].column->convertToFullColumnIfLowCardinality(); - const IColumn& right = *right_converted_ptr.get(); - - ExecutionData data = { - left->getData(), - right, - left->getOffsets(), - nullptr, - getNullMaps(arguments) + ExecutionData data + { + .left = array->getData(), + .right = *arguments[1].column, + .offsets = array->getOffsets(), + .null_maps = getNullMaps(arguments), }; - if (executeIntegral(data)) - return data.result_column; - - return nullptr; + auto result = ResultColumnType::create(); + return executeIntegral(data, *result) ? std::move(result) : nullptr; } template - static bool executeIntegral(ExecutionData& data) + static bool executeIntegral(const ExecutionData & data, ResultColumnType & result) { - return (executeIntegralExpanded(data) || ...); + return (executeIntegralExpanded(data, result) || ...); } /// Invoke executeIntegralImpl with such parameters: (A, other1), (A, other2), ... template - static bool executeIntegralExpanded(ExecutionData& data) + static bool executeIntegralExpanded(const ExecutionData & data, ResultColumnType & result) { - return (executeIntegralImpl(data) || ...); + return (executeIntegralImpl(data, result) || ...); } /** @@ -676,40 +652,31 @@ private: * so we have to check all possible variants for #Initial and #Resulting types. */ template - static bool executeIntegralImpl(ExecutionData& data) + static bool executeIntegralImpl(const ExecutionData & data, ResultColumnType & result) { - const ColumnVector * col_nested = checkAndGetColumn>(&data.left); - - if (!col_nested) + const auto * left_typed = checkAndGetColumn>(&data.left); + if (!left_typed) return false; - const auto [null_map_data, null_map_item] = data.maps; - - if (data.right.onlyNull()) - Impl::Null::process( - data.offsets, - data.result->getData(), - null_map_data); - else if (const auto item_arg_const = checkAndGetColumnConst>(&data.right)) + if (const auto * item_arg_const = checkAndGetColumnConst>(&data.right)) Impl::Main::vector( - col_nested->getData(), + left_typed->getData(), data.offsets, item_arg_const->template getValue(), - data.result->getData(), - null_map_data, + result.getData(), + data.null_maps.first, nullptr); - else if (const auto item_arg_vector = checkAndGetColumn>(&data.right)) + else if (const auto * item_arg_vector = checkAndGetColumn>(&data.right)) Impl::Main::vector( - col_nested->getData(), + left_typed->getData(), data.offsets, item_arg_vector->getData(), - data.result->getData(), - null_map_data, - null_map_item); + result.getData(), + data.null_maps.first, + data.null_maps.second); else return false; - data.moveResult(); return true; } @@ -724,227 +691,161 @@ private: * * Tips and tricks tried can be found at https://github.com/ClickHouse/ClickHouse/pull/12550 . */ - static ColumnPtr executeLowCardinality(const ColumnsWithTypeAndName & arguments) + static ColumnPtr executeArrayLowCardinality(const ColumnsWithTypeAndName & arguments) { - const ColumnArray * const col_array = checkAndGetColumn(arguments[0].column.get()); + const auto * col_array = checkAndGetColumn(arguments[0].column.get()); + const auto * col_array_const = checkAndGetColumnConstData(arguments[0].column.get()); - if (!col_array) + if (!col_array && !col_array_const) return nullptr; - const ColumnLowCardinality * const col_lc = checkAndGetColumn(&col_array->getData()); + if (col_array_const) + col_array = col_array_const; - if (!col_lc) + const auto * left_lc = checkAndGetColumn(&col_array->getData()); + if (!left_lc) return nullptr; - const auto [null_map_data, null_map_item] = getNullMaps(arguments); + const auto * right_const = checkAndGetColumn(arguments[1].column.get()); + if (!right_const) + return nullptr; - if (const ColumnConst * col_arg_const = checkAndGetColumn(&*arguments[1].column)) + const auto & array_type = assert_cast(*arguments[0].type); + const auto target_type = recursiveRemoveLowCardinality(array_type.getNestedType()); + auto right = recursiveRemoveLowCardinality(right_const->getDataColumnPtr()); + + UInt64 index = 0; + UInt64 left_size = arguments[0].column->size(); + ResultColumnPtr col_result = ResultColumnType::create(); + + if (!right->isNullAt(0)) { - const IColumnUnique & col_lc_dict = col_lc->getDictionary(); + auto right_type = recursiveRemoveLowCardinality(arguments[1].type); + right = castColumn({right, right_type, ""}, target_type); - const DataTypeArray * const array_type = checkAndGetDataType(arguments[0].type.get()); - const DataTypePtr target_type_ptr = recursiveRemoveLowCardinality(array_type->getNestedType()); + if (right->isNullable()) + right = checkAndGetColumn(*right).getNestedColumnPtr(); - ColumnPtr col_arg_cloned = castColumn( - {col_arg_const->getDataColumnPtr(), arguments[1].type, arguments[1].name}, target_type_ptr); + StringRef elem = right->getDataAt(0); + const auto & left_dict = left_lc->getDictionary(); - ResultColumnPtr col_result = ResultColumnType::create(); - UInt64 index = 0; - - if (!col_arg_cloned->isNullAt(0)) + if (std::optional maybe_index = left_dict.getOrFindValueIndex(elem); maybe_index) { - if (col_arg_cloned->isNullable()) - col_arg_cloned = checkAndGetColumn(*col_arg_cloned).getNestedColumnPtr(); - - StringRef elem = col_arg_cloned->getDataAt(0); - - if (std::optional maybe_index = col_lc_dict.getOrFindValueIndex(elem); maybe_index) - { - index = *maybe_index; - } - else - { - const size_t offsets_size = col_array->getOffsets().size(); - auto & data = col_result->getData(); - - data.resize_fill(offsets_size); - - return col_result; - } + index = *maybe_index; } - - Impl::Main::vector( - col_lc->getIndexes(), - col_array->getOffsets(), - index, /** Assuming LowCardinality has index of NULL always as zero. */ - col_result->getData(), - null_map_data, - null_map_item); - - return col_result; - } - else if (col_lc->nestedIsNullable()) // LowCardinality(Nullable(T)) and U - { - const ColumnPtr left_casted = col_lc->convertToFullColumnIfLowCardinality(); // Nullable(T) - const ColumnNullable & left_nullable = checkAndGetColumn(*left_casted); - - const NullMap * const null_map_left_casted = &left_nullable.getNullMapColumn().getData(); - - const IColumn & left_ptr = left_nullable.getNestedColumn(); - - const ColumnPtr right_casted = arguments[1].column->convertToFullColumnIfLowCardinality(); - const ColumnNullable * const right_nullable = checkAndGetColumn(right_casted.get()); - - const NullMap * const null_map_right_casted = right_nullable - ? &right_nullable->getNullMapColumn().getData() - : null_map_item; - - const IColumn& right_ptr = right_nullable - ? right_nullable->getNestedColumn() - : *right_casted.get(); - - ExecutionData data = + else { - left_ptr, right_ptr, - col_array->getOffsets(), - nullptr, - {null_map_left_casted, null_map_right_casted}}; + col_result->getData().resize_fill(col_array->size()); - if (dispatchConvertedLowCardinalityColumns(data)) - return data.result_column; + if (col_array_const) + return ColumnConst::create(std::move(col_result), left_size); + + return col_result; + } } - else // LowCardinality(T) and U, T not Nullable - { - if (arguments[1].column->isNullable()) - return nullptr; - - if (const auto* const arg_lc = checkAndGetColumn(arguments[1].column.get()); - arg_lc && arg_lc->isNullable()) - return nullptr; - - // LowCardinality(T) and U (possibly LowCardinality(V)) - - const ColumnPtr left_casted = col_lc->convertToFullColumnIfLowCardinality(); - const ColumnPtr right_casted = arguments[1].column->convertToFullColumnIfLowCardinality(); - - ExecutionData data = - { - *left_casted.get(), *right_casted.get(), col_array->getOffsets(), - nullptr, {null_map_data, null_map_item} - }; - - if (dispatchConvertedLowCardinalityColumns(data)) - return data.result_column; - } - - return nullptr; - } - - static bool dispatchConvertedLowCardinalityColumns(ExecutionData & data) - { - if (data.left.isNumeric() && data.right.isNumeric()) // ColumnArrays - return executeIntegral(data); - - if (checkAndGetColumn(&data.left)) - return executeStringImpl(data); Impl::Main::vector( - data.left, - data.offsets, data.right, - data.result->getData(), - data.maps.first, data.maps.second); + left_lc->getIndexes(), + col_array->getOffsets(), + index, /** Assuming LowCardinality has index of NULL always as zero. */ + col_result->getData(), + nullptr, + nullptr); - data.moveResult(); - return true; + if (col_array_const) + return ColumnConst::create(std::move(col_result), left_size); + + return col_result; } -#undef INTEGRAL_TPL_PACK + ColumnPtr executeMap(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type) const + { + if constexpr (!std::is_same_v) + return nullptr; + + if (!isMap(arguments[0].type)) + return nullptr; + + auto non_const_map_column = arguments[0].column->convertToFullColumnIfConst(); + + const auto & map_column = assert_cast(*non_const_map_column); + const auto & map_array_column = map_column.getNestedColumn(); + auto offsets = map_array_column.getOffsetsPtr(); + auto keys = map_column.getNestedData().getColumnPtr(0); + auto array_column = ColumnArray::create(keys, offsets); + + const auto & type_map = assert_cast(*arguments[0].type); + auto array_type = std::make_shared(type_map.getKeyType()); + + auto arguments_copy = arguments; + arguments_copy[0].column = std::move(array_column); + arguments_copy[0].type = std::move(array_type); + arguments_copy[0].name = arguments[0].name; + + return executeArrayImpl(arguments_copy, result_type); + } static ColumnPtr executeString(const ColumnsWithTypeAndName & arguments) { - const ColumnArray * array = checkAndGetColumn(arguments[0].column.get()); - + const auto * array = checkAndGetColumn(arguments[0].column.get()); if (!array) return nullptr; - const ColumnString * left = checkAndGetColumn(&array->getData()); - + const auto * left = checkAndGetColumn(&array->getData()); if (!left) return nullptr; - const ColumnPtr right_ptr = arguments[1].column->convertToFullColumnIfLowCardinality(); - const IColumn & right = *right_ptr.get(); + const auto & right = *arguments[1].column; + const auto [null_map_data, null_map_item] = getNullMaps(arguments); - ExecutionData data = { - *left, right, array->getOffsets(), - nullptr, getNullMaps(arguments), - std::move(ResultColumnType::create()) - }; + auto result = ResultColumnType::create(); - if (executeStringImpl(data)) - return data.result_column; - - return nullptr; - } - - static bool executeStringImpl(ExecutionData& data) - { - const auto [null_map_data, null_map_item] = data.maps; - const ColumnString& left = *typeid_cast(&data.left); - - if (data.right.onlyNull()) - Impl::Null::process( - data.offsets, - data.result->getData(), - null_map_data); - else if (const auto *const item_arg_const = checkAndGetColumnConstStringOrFixedString(&data.right)) + if (const auto * item_arg_const = checkAndGetColumnConstStringOrFixedString(&right)) { - const ColumnString * item_const_string = - checkAndGetColumn(&item_arg_const->getDataColumn()); - - const ColumnFixedString * item_const_fixedstring = - checkAndGetColumn(&item_arg_const->getDataColumn()); + const auto * item_const_string = checkAndGetColumn(&item_arg_const->getDataColumn()); + const auto * item_const_fixedstring = checkAndGetColumn(&item_arg_const->getDataColumn()); if (item_const_string) Impl::String::process( - left.getChars(), - data.offsets, - left.getOffsets(), + left->getChars(), + array->getOffsets(), + left->getOffsets(), item_const_string->getChars(), item_const_string->getDataAt(0).size, - data.result->getData(), + result->getData(), null_map_data, null_map_item); else if (item_const_fixedstring) Impl::String::process( - left.getChars(), - data.offsets, - left.getOffsets(), + left->getChars(), + array->getOffsets(), + left->getOffsets(), item_const_fixedstring->getChars(), item_const_fixedstring->getN(), - data.result->getData(), + result->getData(), null_map_data, null_map_item); else - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Logical error: ColumnConst contains not String nor FixedString column"); + throw Exception(ErrorCodes::LOGICAL_ERROR, "ColumnConst contains not String nor FixedString column"); } - else if (const auto *const item_arg_vector = checkAndGetColumn(&data.right)) + else if (const auto * item_arg_vector = checkAndGetColumn(&right)) { Impl::String::process( - left.getChars(), - data.offsets, - left.getOffsets(), + left->getChars(), + array->getOffsets(), + left->getOffsets(), item_arg_vector->getChars(), item_arg_vector->getOffsets(), - data.result->getData(), + result->getData(), null_map_data, null_map_item); } else - return false; + { + return nullptr; + } - data.moveResult(); - return true; + return result; } static ColumnPtr executeConst(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type) @@ -955,9 +856,7 @@ private: return nullptr; Array arr = col_array->getValue(); - - const ColumnPtr right_ptr = arguments[1].column->convertToFullColumnIfLowCardinality(); - const IColumn * item_arg = right_ptr.get(); + const IColumn * item_arg = arguments[1].column.get(); if (isColumnConst(*item_arg)) { @@ -1026,48 +925,59 @@ private: } } + static ColumnPtr executeNothing(const ColumnsWithTypeAndName & arguments) + { + const auto * array = checkAndGetColumn(arguments[0].column.get()); + if (!array) + return nullptr; + + if (arguments[1].column->onlyNull()) + { + auto result = ResultColumnType::create(); + Impl::Null::process(array->getOffsets(), result->getData(), getNullMaps(arguments).first); + return result; + } + + return nullptr; + } + static ColumnPtr executeGeneric(const ColumnsWithTypeAndName & arguments) { - const ColumnArray * col = checkAndGetColumn(arguments[0].column.get()); - - if (!col) + const auto * col_array = checkAndGetColumn(arguments[0].column.get()); + if (!col_array) return nullptr; DataTypePtr array_elements_type = assert_cast(*arguments[0].type).getNestedType(); const DataTypePtr & index_type = arguments[1].type; - DataTypePtr common_type = getLeastSupertype(DataTypes{array_elements_type, index_type}); - - ColumnPtr col_nested = castColumn({ col->getDataPtr(), array_elements_type, "" }, common_type); - - const ColumnPtr right_ptr = arguments[1].column->convertToFullColumnIfLowCardinality(); - ColumnPtr item_arg = castColumn({ right_ptr, removeLowCardinality(index_type), "" }, common_type); + DataTypePtr common_type = getLeastSupertype(DataTypes{array_elements_type, arguments[1].type}); + ColumnPtr col_nested = castColumn({ col_array->getDataPtr(), array_elements_type, "" }, common_type); + ColumnPtr item_arg = castColumn({ arguments[1].column, removeLowCardinality(index_type), "" }, common_type); auto col_res = ResultColumnType::create(); auto [null_map_data, null_map_item] = getNullMaps(arguments); - if (item_arg->onlyNull()) - Impl::Null::process( - col->getOffsets(), - col_res->getData(), - null_map_data); - else if (isColumnConst(*item_arg)) + if (const auto * item_arg_const = checkAndGetColumn(item_arg.get())) + { Impl::Main::vector( *col_nested, - col->getOffsets(), - typeid_cast(*item_arg).getDataColumn(), + col_array->getOffsets(), + item_arg_const->getDataColumn(), col_res->getData(), /// TODO This is wrong. null_map_data, nullptr); + } else + { Impl::Main::vector( *col_nested, - col->getOffsets(), + col_array->getOffsets(), *item_arg, col_res->getData(), null_map_data, null_map_item); + } return col_res; } diff --git a/src/Functions/bitShiftLeft.cpp b/src/Functions/bitShiftLeft.cpp index c366a1ecb44..d561430d51f 100644 --- a/src/Functions/bitShiftLeft.cpp +++ b/src/Functions/bitShiftLeft.cpp @@ -5,6 +5,7 @@ namespace DB { namespace ErrorCodes { + extern const int ARGUMENT_OUT_OF_BOUND; extern const int NOT_IMPLEMENTED; extern const int LOGICAL_ERROR; } @@ -24,6 +25,8 @@ struct BitShiftLeftImpl { if constexpr (is_big_int_v) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "BitShiftLeft is not implemented for big integers as second argument"); + else if (b < 0 || static_cast(b) > 8 * sizeof(A)) + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "The number of shift positions needs to be a non-negative value and less or equal to the bit width of the value to shift"); else if constexpr (is_big_int_v) return static_cast(a) << static_cast(b); else @@ -37,9 +40,12 @@ struct BitShiftLeftImpl throw Exception(ErrorCodes::NOT_IMPLEMENTED, "BitShiftLeft is not implemented for big integers as second argument"); else { - UInt8 word_size = 8; - /// To prevent overflow - if (static_cast(b) >= (static_cast(end - pos) * word_size) || b < 0) + const UInt8 word_size = 8 * sizeof(*pos); + size_t n = end - pos; + const UInt128 bit_limit = static_cast(word_size) * n; + if (b < 0 || static_cast(b) > bit_limit) + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "The number of shift positions needs to be a non-negative value and less or equal to the bit width of the value to shift"); + else if (b == bit_limit) { // insert default value out_vec.push_back(0); @@ -102,10 +108,12 @@ struct BitShiftLeftImpl throw Exception(ErrorCodes::NOT_IMPLEMENTED, "BitShiftLeft is not implemented for big integers as second argument"); else { - UInt8 word_size = 8; + const UInt8 word_size = 8; size_t n = end - pos; - /// To prevent overflow - if (static_cast(b) >= (static_cast(n) * word_size) || b < 0) + const UInt128 bit_limit = static_cast(word_size) * n; + if (b < 0 || static_cast(b) > bit_limit) + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "The number of shift positions needs to be a non-negative value and less or equal to the bit width of the value to shift"); + else if (b == bit_limit) { // insert default value out_vec.resize_fill(out_vec.size() + n); diff --git a/src/Functions/bitShiftRight.cpp b/src/Functions/bitShiftRight.cpp index 1c37cd3bf4c..05b8581c792 100644 --- a/src/Functions/bitShiftRight.cpp +++ b/src/Functions/bitShiftRight.cpp @@ -6,6 +6,7 @@ namespace DB { namespace ErrorCodes { + extern const int ARGUMENT_OUT_OF_BOUND; extern const int NOT_IMPLEMENTED; extern const int LOGICAL_ERROR; } @@ -25,6 +26,8 @@ struct BitShiftRightImpl { if constexpr (is_big_int_v) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "BitShiftRight is not implemented for big integers as second argument"); + else if (b < 0 || static_cast(b) > 8 * sizeof(A)) + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "The number of shift positions needs to be a non-negative value and less or equal to the bit width of the value to shift"); else if constexpr (is_big_int_v) return static_cast(a) >> static_cast(b); else @@ -53,9 +56,12 @@ struct BitShiftRightImpl throw Exception(ErrorCodes::NOT_IMPLEMENTED, "BitShiftRight is not implemented for big integers as second argument"); else { - UInt8 word_size = 8; - /// To prevent overflow - if (static_cast(b) >= (static_cast(end - pos) * word_size) || b < 0) + const UInt8 word_size = 8; + size_t n = end - pos; + const UInt128 bit_limit = static_cast(word_size) * n; + if (b < 0 || static_cast(b) > bit_limit) + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "The number of shift positions needs to be a non-negative value and less or equal to the bit width of the value to shift"); + else if (b == bit_limit) { /// insert default value out_vec.push_back(0); @@ -90,10 +96,12 @@ struct BitShiftRightImpl throw Exception(ErrorCodes::NOT_IMPLEMENTED, "BitShiftRight is not implemented for big integers as second argument"); else { - UInt8 word_size = 8; + const UInt8 word_size = 8; size_t n = end - pos; - /// To prevent overflow - if (static_cast(b) >= (static_cast(n) * word_size) || b < 0) + const UInt128 bit_limit = static_cast(word_size) * n; + if (b < 0 || static_cast(b) > bit_limit) + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "The number of shift positions needs to be a non-negative value and less or equal to the bit width of the value to shift"); + else if (b == bit_limit) { // insert default value out_vec.resize_fill(out_vec.size() + n); diff --git a/src/IO/Archives/hasRegisteredArchiveFileExtension.cpp b/src/IO/Archives/hasRegisteredArchiveFileExtension.cpp index 2a979f500f7..407977f1f13 100644 --- a/src/IO/Archives/hasRegisteredArchiveFileExtension.cpp +++ b/src/IO/Archives/hasRegisteredArchiveFileExtension.cpp @@ -1,5 +1,7 @@ #include +#include +#include namespace DB { diff --git a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp index 6386c7a3c76..128df415197 100644 --- a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp +++ b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp @@ -47,7 +47,7 @@ namespace size_t total_size_, const String & dest_container_for_logging_, const String & dest_blob_, - std::shared_ptr settings_, + std::shared_ptr settings_, ThreadPoolCallbackRunnerUnsafe schedule_, const Poco::Logger * log_) : create_read_buffer(create_read_buffer_) @@ -72,7 +72,7 @@ namespace size_t total_size; const String & dest_container_for_logging; const String & dest_blob; - std::shared_ptr settings; + std::shared_ptr settings; ThreadPoolCallbackRunnerUnsafe schedule; const Poco::Logger * log; size_t max_single_part_upload_size; @@ -318,7 +318,7 @@ void copyDataToAzureBlobStorageFile( std::shared_ptr dest_client, const String & dest_container_for_logging, const String & dest_blob, - std::shared_ptr settings, + std::shared_ptr settings, ThreadPoolCallbackRunnerUnsafe schedule) { UploadHelper helper{create_read_buffer, dest_client, offset, size, dest_container_for_logging, dest_blob, settings, schedule, &Poco::Logger::get("copyDataToAzureBlobStorageFile")}; @@ -335,7 +335,7 @@ void copyAzureBlobStorageFile( size_t size, const String & dest_container_for_logging, const String & dest_blob, - std::shared_ptr settings, + std::shared_ptr settings, const ReadSettings & read_settings, ThreadPoolCallbackRunnerUnsafe schedule) { diff --git a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h index 9c20ee4cff0..c8e48fcd372 100644 --- a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h +++ b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h @@ -28,7 +28,7 @@ void copyAzureBlobStorageFile( size_t src_size, const String & dest_container_for_logging, const String & dest_blob, - std::shared_ptr settings, + std::shared_ptr settings, const ReadSettings & read_settings, ThreadPoolCallbackRunnerUnsafe schedule_ = {}); @@ -45,7 +45,7 @@ void copyDataToAzureBlobStorageFile( std::shared_ptr client, const String & dest_container_for_logging, const String & dest_blob, - std::shared_ptr settings, + std::shared_ptr settings, ThreadPoolCallbackRunnerUnsafe schedule_ = {}); } diff --git a/src/IO/BufferWithOwnMemory.h b/src/IO/BufferWithOwnMemory.h index 5c9a69893df..da38bccdea1 100644 --- a/src/IO/BufferWithOwnMemory.h +++ b/src/IO/BufferWithOwnMemory.h @@ -4,12 +4,15 @@ #include #include +#include #include #include #include +#include "config.h" + namespace ProfileEvents { @@ -41,10 +44,13 @@ struct Memory : boost::noncopyable, Allocator char * m_data = nullptr; size_t alignment = 0; + [[maybe_unused]] bool allow_gwp_asan_force_sample{false}; + Memory() = default; /// If alignment != 0, then allocate memory aligned to specified value. - explicit Memory(size_t size_, size_t alignment_ = 0) : alignment(alignment_) + explicit Memory(size_t size_, size_t alignment_ = 0, bool allow_gwp_asan_force_sample_ = false) + : alignment(alignment_), allow_gwp_asan_force_sample(allow_gwp_asan_force_sample_) { alloc(size_); } @@ -127,6 +133,11 @@ private: ProfileEvents::increment(ProfileEvents::IOBufferAllocs); ProfileEvents::increment(ProfileEvents::IOBufferAllocBytes, new_capacity); +#if USE_GWP_ASAN + if (unlikely(allow_gwp_asan_force_sample && GWPAsan::shouldForceSample())) + gwp_asan::getThreadLocals()->NextSampleCounter = 1; +#endif + m_data = static_cast(Allocator::alloc(new_capacity, alignment)); m_capacity = new_capacity; m_size = new_size; @@ -154,7 +165,7 @@ protected: public: /// If non-nullptr 'existing_memory' is passed, then buffer will not create its own memory and will use existing_memory without ownership. explicit BufferWithOwnMemory(size_t size = DBMS_DEFAULT_BUFFER_SIZE, char * existing_memory = nullptr, size_t alignment = 0) - : Base(nullptr, 0), memory(existing_memory ? 0 : size, alignment) + : Base(nullptr, 0), memory(existing_memory ? 0 : size, alignment, /*allow_gwp_asan_force_sample_=*/true) { Base::set(existing_memory ? existing_memory : memory.data(), size); Base::padded = !existing_memory; diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp index dd1166a9228..d72f3d81549 100644 --- a/src/Interpreters/AsynchronousInsertQueue.cpp +++ b/src/Interpreters/AsynchronousInsertQueue.cpp @@ -301,13 +301,7 @@ void AsynchronousInsertQueue::preprocessInsertQuery(const ASTPtr & query, const auto & insert_query = query->as(); insert_query.async_insert_flush = true; - InterpreterInsertQuery interpreter( - query, - query_context, - query_context->getSettingsRef().insert_allow_materialized_columns, - /* no_squash */ false, - /* no_destination */ false, - /* async_insert */ false); + InterpreterInsertQuery interpreter(query, query_context, query_context->getSettingsRef().insert_allow_materialized_columns); auto table = interpreter.getTable(insert_query); auto sample_block = InterpreterInsertQuery::getSampleBlock(insert_query, table, table->getInMemoryMetadataPtr(), query_context); @@ -787,12 +781,7 @@ try try { interpreter = std::make_unique( - key.query, - insert_context, - key.settings.insert_allow_materialized_columns, - false, - false, - true); + key.query, insert_context, key.settings.insert_allow_materialized_columns, false, false, true); pipeline = interpreter->execute().pipeline; chassert(pipeline.pushing()); @@ -1011,7 +1000,7 @@ Chunk AsynchronousInsertQueue::processEntriesWithParsing( } Chunk chunk(executor.getResultColumns(), total_rows); - chunk.getChunkInfos().add(std::move(chunk_info)); + chunk.setChunkInfo(std::move(chunk_info)); return chunk; } @@ -1063,7 +1052,7 @@ Chunk AsynchronousInsertQueue::processPreprocessedEntries( } Chunk chunk(std::move(result_columns), total_rows); - chunk.getChunkInfos().add(std::move(chunk_info)); + chunk.setChunkInfo(std::move(chunk_info)); return chunk; } diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index b946c2cb21e..d36b0e98f1e 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -2238,6 +2238,12 @@ void Context::setSetting(std::string_view name, const Field & value) contextSanityClampSettingsWithLock(*this, settings, lock); } +void Context::setServerSetting(std::string_view name, const Field & value) +{ + std::lock_guard lock(mutex); + shared->server_settings.set(name, value); +} + void Context::applySettingChange(const SettingChange & change) { try diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index d3f152b7a67..e2f2523b30e 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -823,6 +823,7 @@ public: /// Set settings by name. void setSetting(std::string_view name, const String & value); void setSetting(std::string_view name, const Field & value); + void setServerSetting(std::string_view name, const Field & value); void applySettingChange(const SettingChange & change); void applySettingsChanges(const SettingsChanges & changes); diff --git a/src/Interpreters/InterpreterCheckQuery.cpp b/src/Interpreters/InterpreterCheckQuery.cpp index 81bb6290acb..4a84a7bf570 100644 --- a/src/Interpreters/InterpreterCheckQuery.cpp +++ b/src/Interpreters/InterpreterCheckQuery.cpp @@ -2,7 +2,6 @@ #include #include -#include #include @@ -23,7 +22,6 @@ #include #include -#include #include #include #include @@ -93,7 +91,7 @@ Chunk getChunkFromCheckResult(const String & database, const String & table, con return Chunk(std::move(columns), 1); } -class TableCheckTask : public ChunkInfoCloneable +class TableCheckTask : public ChunkInfo { public: TableCheckTask(StorageID table_id, const std::variant & partition_or_part, ContextPtr context) @@ -112,12 +110,6 @@ public: context->checkAccess(AccessType::SHOW_TABLES, table_->getStorageID()); } - TableCheckTask(const TableCheckTask & other) - : table(other.table) - , check_data_tasks(other.check_data_tasks) - , is_finished(other.is_finished.load()) - {} - std::optional checkNext() const { if (isFinished()) @@ -129,8 +121,8 @@ public: std::this_thread::sleep_for(sleep_time); }); - IStorage::DataValidationTasksPtr tmp = check_data_tasks; - auto result = table->checkDataNext(tmp); + IStorage::DataValidationTasksPtr check_data_tasks_ = check_data_tasks; + auto result = table->checkDataNext(check_data_tasks_); is_finished = !result.has_value(); return result; } @@ -188,7 +180,7 @@ protected: /// source should return at least one row to start pipeline result.addColumn(ColumnUInt8::create(1, 1)); /// actual data stored in chunk info - result.getChunkInfos().add(std::move(current_check_task)); + result.setChunkInfo(std::move(current_check_task)); return result; } @@ -288,7 +280,7 @@ public: protected: void transform(Chunk & chunk) override { - auto table_check_task = chunk.getChunkInfos().get(); + auto table_check_task = std::dynamic_pointer_cast(chunk.getChunkInfo()); auto check_result = table_check_task->checkNext(); if (!check_result) { diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index ee191c02ff8..0ee2bb6c0e9 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -1776,13 +1776,8 @@ BlockIO InterpreterCreateQuery::fillTableIfNeeded(const ASTCreateQuery & create) else insert->select = create.select->clone(); - return InterpreterInsertQuery( - insert, - getContext(), - getContext()->getSettingsRef().insert_allow_materialized_columns, - /* no_squash */ false, - /* no_destination */ false, - /* async_isnert */ false).execute(); + return InterpreterInsertQuery(insert, getContext(), + getContext()->getSettingsRef().insert_allow_materialized_columns).execute(); } return {}; diff --git a/src/Interpreters/InterpreterExplainQuery.cpp b/src/Interpreters/InterpreterExplainQuery.cpp index 26b7e074fdf..7c7b4b3f95a 100644 --- a/src/Interpreters/InterpreterExplainQuery.cpp +++ b/src/Interpreters/InterpreterExplainQuery.cpp @@ -534,13 +534,7 @@ QueryPipeline InterpreterExplainQuery::executeImpl() } else if (dynamic_cast(ast.getExplainedQuery().get())) { - InterpreterInsertQuery insert( - ast.getExplainedQuery(), - getContext(), - /* allow_materialized */ false, - /* no_squash */ false, - /* no_destination */ false, - /* async_isnert */ false); + InterpreterInsertQuery insert(ast.getExplainedQuery(), getContext()); auto io = insert.execute(); printPipeline(io.pipeline.getProcessors(), buf); } diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 2cbfc55d008..f396db70d21 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include @@ -27,7 +26,6 @@ #include #include #include -#include #include #include #include @@ -40,7 +38,6 @@ #include #include #include -#include "base/defines.h" namespace ProfileEvents @@ -397,358 +394,28 @@ Chain InterpreterInsertQuery::buildPreSinkChain( return out; } -std::pair, std::vector> InterpreterInsertQuery::buildPreAndSinkChains(size_t presink_streams, size_t sink_streams, StoragePtr table, const StorageMetadataPtr & metadata_snapshot, const Block & query_sample_block) -{ - chassert(presink_streams > 0); - chassert(sink_streams > 0); - - ThreadGroupPtr running_group; - if (current_thread) - running_group = current_thread->getThreadGroup(); - if (!running_group) - running_group = std::make_shared(getContext()); - - std::vector sink_chains; - std::vector presink_chains; - - for (size_t i = 0; i < sink_streams; ++i) - { - auto out = buildSink(table, metadata_snapshot, /* thread_status_holder= */ nullptr, - running_group, /* elapsed_counter_ms= */ nullptr); - - sink_chains.emplace_back(std::move(out)); - } - - for (size_t i = 0; i < presink_streams; ++i) - { - auto out = buildPreSinkChain(sink_chains[0].getInputHeader(), table, metadata_snapshot, query_sample_block); - presink_chains.emplace_back(std::move(out)); - } - - return {std::move(presink_chains), std::move(sink_chains)}; -} - - -QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline(ASTInsertQuery & query, StoragePtr table) -{ - const Settings & settings = getContext()->getSettingsRef(); - - auto metadata_snapshot = table->getInMemoryMetadataPtr(); - auto query_sample_block = getSampleBlock(query, table, metadata_snapshot, getContext(), no_destination, allow_materialized); - - bool is_trivial_insert_select = false; - - if (settings.optimize_trivial_insert_select) - { - const auto & select_query = query.select->as(); - const auto & selects = select_query.list_of_selects->children; - const auto & union_modes = select_query.list_of_modes; - - /// ASTSelectWithUnionQuery is not normalized now, so it may pass some queries which can be Trivial select queries - const auto mode_is_all = [](const auto & mode) { return mode == SelectUnionMode::UNION_ALL; }; - - is_trivial_insert_select = - std::all_of(union_modes.begin(), union_modes.end(), std::move(mode_is_all)) - && std::all_of(selects.begin(), selects.end(), isTrivialSelect); - } - - ContextPtr select_context = getContext(); - - if (is_trivial_insert_select) - { - /** When doing trivial INSERT INTO ... SELECT ... FROM table, - * don't need to process SELECT with more than max_insert_threads - * and it's reasonable to set block size for SELECT to the desired block size for INSERT - * to avoid unnecessary squashing. - */ - - Settings new_settings = select_context->getSettings(); - - new_settings.max_threads = std::max(1, settings.max_insert_threads); - - if (table->prefersLargeBlocks()) - { - if (settings.min_insert_block_size_rows) - new_settings.max_block_size = settings.min_insert_block_size_rows; - if (settings.min_insert_block_size_bytes) - new_settings.preferred_block_size_bytes = settings.min_insert_block_size_bytes; - } - - auto context_for_trivial_select = Context::createCopy(context); - context_for_trivial_select->setSettings(new_settings); - context_for_trivial_select->setInsertionTable(getContext()->getInsertionTable(), getContext()->getInsertionTableColumnNames()); - - select_context = context_for_trivial_select; - } - - QueryPipelineBuilder pipeline; - - { - auto select_query_options = SelectQueryOptions(QueryProcessingStage::Complete, 1); - - if (settings.allow_experimental_analyzer) - { - InterpreterSelectQueryAnalyzer interpreter_select_analyzer(query.select, select_context, select_query_options); - pipeline = interpreter_select_analyzer.buildQueryPipeline(); - } - else - { - InterpreterSelectWithUnionQuery interpreter_select(query.select, select_context, select_query_options); - pipeline = interpreter_select.buildQueryPipeline(); - } - } - - pipeline.dropTotalsAndExtremes(); - - /// Allow to insert Nullable into non-Nullable columns, NULL values will be added as defaults values. - if (getContext()->getSettingsRef().insert_null_as_default) - { - const auto & input_columns = pipeline.getHeader().getColumnsWithTypeAndName(); - const auto & query_columns = query_sample_block.getColumnsWithTypeAndName(); - const auto & output_columns = metadata_snapshot->getColumns(); - - if (input_columns.size() == query_columns.size()) - { - for (size_t col_idx = 0; col_idx < query_columns.size(); ++col_idx) - { - /// Change query sample block columns to Nullable to allow inserting nullable columns, where NULL values will be substituted with - /// default column values (in AddingDefaultsTransform), so all values will be cast correctly. - if (isNullableOrLowCardinalityNullable(input_columns[col_idx].type) - && !isNullableOrLowCardinalityNullable(query_columns[col_idx].type) - && !isVariant(query_columns[col_idx].type) - && !isDynamic(query_columns[col_idx].type) - && output_columns.has(query_columns[col_idx].name)) - { - query_sample_block.setColumn( - col_idx, - ColumnWithTypeAndName( - makeNullableOrLowCardinalityNullable(query_columns[col_idx].column), - makeNullableOrLowCardinalityNullable(query_columns[col_idx].type), - query_columns[col_idx].name)); - } - } - } - } - - auto actions_dag = ActionsDAG::makeConvertingActions( - pipeline.getHeader().getColumnsWithTypeAndName(), - query_sample_block.getColumnsWithTypeAndName(), - ActionsDAG::MatchColumnsMode::Position); - auto actions = std::make_shared(actions_dag, ExpressionActionsSettings::fromContext(getContext(), CompileExpressions::yes)); - - pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr - { - return std::make_shared(in_header, actions); - }); - - /// We need to convert Sparse columns to full, because it's destination storage - /// may not support it or may have different settings for applying Sparse serialization. - pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr - { - return std::make_shared(in_header); - }); - - pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr - { - auto context_ptr = getContext(); - auto counting = std::make_shared(in_header, nullptr, context_ptr->getQuota()); - counting->setProcessListElement(context_ptr->getProcessListElement()); - counting->setProgressCallback(context_ptr->getProgressCallback()); - - return counting; - }); - - size_t num_select_threads = pipeline.getNumThreads(); - - pipeline.resize(1); - - if (shouldAddSquashingFroStorage(table)) - { - pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr - { - return std::make_shared( - in_header, - table->prefersLargeBlocks() ? settings.min_insert_block_size_rows : settings.max_block_size, - table->prefersLargeBlocks() ? settings.min_insert_block_size_bytes : 0ULL); - }); - } - - pipeline.addSimpleTransform([&](const Block &in_header) -> ProcessorPtr - { - return std::make_shared(in_header); - }); - - if (!settings.insert_deduplication_token.value.empty()) - { - pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr - { - return std::make_shared(settings.insert_deduplication_token.value, in_header); - }); - - pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr - { - return std::make_shared(in_header); - }); - } - - /// Number of streams works like this: - /// * For the SELECT, use `max_threads`, or `max_insert_threads`, or whatever - /// InterpreterSelectQuery ends up with. - /// * Use `max_insert_threads` streams for various insert-preparation steps, e.g. - /// materializing and squashing (too slow to do in one thread). That's `presink_chains`. - /// * If the table supports parallel inserts, use max_insert_threads for writing to IStorage. - /// Otherwise ResizeProcessor them down to 1 stream. - - size_t presink_streams_size = std::max(settings.max_insert_threads, pipeline.getNumStreams()); - - size_t sink_streams_size = table->supportsParallelInsert() ? std::max(1, settings.max_insert_threads) : 1; - - if (!settings.parallel_view_processing) - { - auto table_id = table->getStorageID(); - auto views = DatabaseCatalog::instance().getDependentViews(table_id); - - if (table->isView() || !views.empty()) - sink_streams_size = 1; - } - - auto [presink_chains, sink_chains] = buildPreAndSinkChains( - presink_streams_size, sink_streams_size, - table, metadata_snapshot, query_sample_block); - - pipeline.resize(presink_chains.size()); - - if (shouldAddSquashingFroStorage(table)) - { - pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr - { - return std::make_shared( - in_header, - table->prefersLargeBlocks() ? settings.min_insert_block_size_rows : settings.max_block_size, - table->prefersLargeBlocks() ? settings.min_insert_block_size_bytes : 0ULL); - }); - } - - for (auto & chain : presink_chains) - pipeline.addResources(chain.detachResources()); - pipeline.addChains(std::move(presink_chains)); - - pipeline.resize(sink_streams_size); - - for (auto & chain : sink_chains) - pipeline.addResources(chain.detachResources()); - pipeline.addChains(std::move(sink_chains)); - - if (!settings.parallel_view_processing) - { - /// Don't use more threads for INSERT than for SELECT to reduce memory consumption. - if (pipeline.getNumThreads() > num_select_threads) - pipeline.setMaxThreads(num_select_threads); - } - else if (pipeline.getNumThreads() < settings.max_threads) - { - /// It is possible for query to have max_threads=1, due to optimize_trivial_insert_select, - /// however in case of parallel_view_processing and multiple views, views can still be processed in parallel. - /// - /// Note, number of threads will be limited by buildPushingToViewsChain() to max_threads. - pipeline.setMaxThreads(settings.max_threads); - } - - pipeline.setSinks([&](const Block & cur_header, QueryPipelineBuilder::StreamType) -> ProcessorPtr - { - return std::make_shared(cur_header); - }); - - return QueryPipelineBuilder::getPipeline(std::move(pipeline)); -} - - -QueryPipeline InterpreterInsertQuery::buildInsertPipeline(ASTInsertQuery & query, StoragePtr table) -{ - const Settings & settings = getContext()->getSettingsRef(); - - auto metadata_snapshot = table->getInMemoryMetadataPtr(); - auto query_sample_block = getSampleBlock(query, table, metadata_snapshot, getContext(), no_destination, allow_materialized); - - Chain chain; - - { - auto [presink_chains, sink_chains] = buildPreAndSinkChains( - /* presink_streams */1, /* sink_streams */1, - table, metadata_snapshot, query_sample_block); - - chain = std::move(presink_chains.front()); - chain.appendChain(std::move(sink_chains.front())); - } - - if (!settings.insert_deduplication_token.value.empty()) - { - chain.addSource(std::make_shared(chain.getInputHeader())); - chain.addSource(std::make_shared(settings.insert_deduplication_token.value, chain.getInputHeader())); - } - - chain.addSource(std::make_shared(chain.getInputHeader())); - - if (shouldAddSquashingFroStorage(table)) - { - bool table_prefers_large_blocks = table->prefersLargeBlocks(); - - auto squashing = std::make_shared( - chain.getInputHeader(), - table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, - table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL); - - chain.addSource(std::move(squashing)); - - auto balancing = std::make_shared( - chain.getInputHeader(), - table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, - table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL); - - chain.addSource(std::move(balancing)); - } - - auto context_ptr = getContext(); - auto counting = std::make_shared(chain.getInputHeader(), nullptr, context_ptr->getQuota()); - counting->setProcessListElement(context_ptr->getProcessListElement()); - counting->setProgressCallback(context_ptr->getProgressCallback()); - chain.addSource(std::move(counting)); - - QueryPipeline pipeline = QueryPipeline(std::move(chain)); - - pipeline.setNumThreads(std::min(pipeline.getNumThreads(), settings.max_threads)); - pipeline.setConcurrencyControl(settings.use_concurrency_control); - - if (query.hasInlinedData() && !async_insert) - { - /// can execute without additional data - auto format = getInputFormatFromASTInsertQuery(query_ptr, true, query_sample_block, getContext(), nullptr); - for (auto && buffer : owned_buffers) - format->addBuffer(std::move(buffer)); - - auto pipe = getSourceFromInputFormat(query_ptr, std::move(format), getContext(), nullptr); - pipeline.complete(std::move(pipe)); - } - - return pipeline; -} - - BlockIO InterpreterInsertQuery::execute() { const Settings & settings = getContext()->getSettingsRef(); auto & query = query_ptr->as(); + QueryPipelineBuilder pipeline; + std::optional distributed_pipeline; + QueryPlanResourceHolder resources; StoragePtr table = getTable(query); checkStorageSupportsTransactionsIfNeeded(table, getContext()); + StoragePtr inner_table; + if (const auto * mv = dynamic_cast(table.get())) + inner_table = mv->getTargetTable(); + if (query.partition_by && !table->supportsPartitionBy()) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "PARTITION BY clause is not supported by storage"); auto table_lock = table->lockForShare(getContext()->getInitialQueryId(), settings.lock_acquire_timeout); - auto metadata_snapshot = table->getInMemoryMetadataPtr(); + auto query_sample_block = getSampleBlock(query, table, metadata_snapshot, getContext(), no_destination, allow_materialized); /// For table functions we check access while executing @@ -756,43 +423,320 @@ BlockIO InterpreterInsertQuery::execute() if (!query.table_function) getContext()->checkAccess(AccessType::INSERT, query.table_id, query_sample_block.getNames()); - if (!allow_materialized) + if (query.select && settings.parallel_distributed_insert_select) + // Distributed INSERT SELECT + distributed_pipeline = table->distributedWrite(query, getContext()); + + std::vector presink_chains; + std::vector sink_chains; + if (!distributed_pipeline) { - for (const auto & column : metadata_snapshot->getColumns()) - if (column.default_desc.kind == ColumnDefaultKind::Materialized && query_sample_block.has(column.name)) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert column {}, because it is MATERIALIZED column.", column.name); + /// Number of streams works like this: + /// * For the SELECT, use `max_threads`, or `max_insert_threads`, or whatever + /// InterpreterSelectQuery ends up with. + /// * Use `max_insert_threads` streams for various insert-preparation steps, e.g. + /// materializing and squashing (too slow to do in one thread). That's `presink_chains`. + /// * If the table supports parallel inserts, use the same streams for writing to IStorage. + /// Otherwise ResizeProcessor them down to 1 stream. + /// * If it's not an INSERT SELECT, forget all that and use one stream. + size_t pre_streams_size = 1; + size_t sink_streams_size = 1; + + if (query.select) + { + bool is_trivial_insert_select = false; + + if (settings.optimize_trivial_insert_select) + { + const auto & select_query = query.select->as(); + const auto & selects = select_query.list_of_selects->children; + const auto & union_modes = select_query.list_of_modes; + + /// ASTSelectWithUnionQuery is not normalized now, so it may pass some queries which can be Trivial select queries + const auto mode_is_all = [](const auto & mode) { return mode == SelectUnionMode::UNION_ALL; }; + + is_trivial_insert_select = + std::all_of(union_modes.begin(), union_modes.end(), std::move(mode_is_all)) + && std::all_of(selects.begin(), selects.end(), isTrivialSelect); + } + + if (is_trivial_insert_select) + { + /** When doing trivial INSERT INTO ... SELECT ... FROM table, + * don't need to process SELECT with more than max_insert_threads + * and it's reasonable to set block size for SELECT to the desired block size for INSERT + * to avoid unnecessary squashing. + */ + + Settings new_settings = getContext()->getSettings(); + + new_settings.max_threads = std::max(1, settings.max_insert_threads); + + if (table->prefersLargeBlocks()) + { + if (settings.min_insert_block_size_rows) + new_settings.max_block_size = settings.min_insert_block_size_rows; + if (settings.min_insert_block_size_bytes) + new_settings.preferred_block_size_bytes = settings.min_insert_block_size_bytes; + } + + auto new_context = Context::createCopy(context); + new_context->setSettings(new_settings); + new_context->setInsertionTable(getContext()->getInsertionTable(), getContext()->getInsertionTableColumnNames()); + + auto select_query_options = SelectQueryOptions(QueryProcessingStage::Complete, 1); + + if (settings.allow_experimental_analyzer) + { + InterpreterSelectQueryAnalyzer interpreter_select_analyzer(query.select, new_context, select_query_options); + pipeline = interpreter_select_analyzer.buildQueryPipeline(); + } + else + { + InterpreterSelectWithUnionQuery interpreter_select(query.select, new_context, select_query_options); + pipeline = interpreter_select.buildQueryPipeline(); + } + } + else + { + /// Passing 1 as subquery_depth will disable limiting size of intermediate result. + auto select_query_options = SelectQueryOptions(QueryProcessingStage::Complete, 1); + + if (settings.allow_experimental_analyzer) + { + InterpreterSelectQueryAnalyzer interpreter_select_analyzer(query.select, getContext(), select_query_options); + pipeline = interpreter_select_analyzer.buildQueryPipeline(); + } + else + { + InterpreterSelectWithUnionQuery interpreter_select(query.select, getContext(), select_query_options); + pipeline = interpreter_select.buildQueryPipeline(); + } + } + + pipeline.dropTotalsAndExtremes(); + + if (settings.max_insert_threads > 1) + { + auto table_id = table->getStorageID(); + auto views = DatabaseCatalog::instance().getDependentViews(table_id); + + /// It breaks some views-related tests and we have dedicated `parallel_view_processing` for views, so let's just skip them. + /// Also it doesn't make sense to reshuffle data if storage doesn't support parallel inserts. + const bool resize_to_max_insert_threads = !table->isView() && views.empty() && table->supportsParallelInsert(); + pre_streams_size = resize_to_max_insert_threads ? settings.max_insert_threads + : std::min(settings.max_insert_threads, pipeline.getNumStreams()); + + /// Deduplication when passing insert_deduplication_token breaks if using more than one thread + if (!settings.insert_deduplication_token.toString().empty()) + { + LOG_DEBUG( + getLogger("InsertQuery"), + "Insert-select query using insert_deduplication_token, setting streams to 1 to avoid deduplication issues"); + pre_streams_size = 1; + } + + if (table->supportsParallelInsert()) + sink_streams_size = pre_streams_size; + } + + pipeline.resize(pre_streams_size); + + /// Allow to insert Nullable into non-Nullable columns, NULL values will be added as defaults values. + if (getContext()->getSettingsRef().insert_null_as_default) + { + const auto & input_columns = pipeline.getHeader().getColumnsWithTypeAndName(); + const auto & query_columns = query_sample_block.getColumnsWithTypeAndName(); + const auto & output_columns = metadata_snapshot->getColumns(); + + if (input_columns.size() == query_columns.size()) + { + for (size_t col_idx = 0; col_idx < query_columns.size(); ++col_idx) + { + /// Change query sample block columns to Nullable to allow inserting nullable columns, where NULL values will be substituted with + /// default column values (in AddingDefaultsTransform), so all values will be cast correctly. + if (isNullableOrLowCardinalityNullable(input_columns[col_idx].type) + && !isNullableOrLowCardinalityNullable(query_columns[col_idx].type) + && !isVariant(query_columns[col_idx].type) + && !isDynamic(query_columns[col_idx].type) + && output_columns.has(query_columns[col_idx].name)) + query_sample_block.setColumn(col_idx, ColumnWithTypeAndName(makeNullableOrLowCardinalityNullable(query_columns[col_idx].column), makeNullableOrLowCardinalityNullable(query_columns[col_idx].type), query_columns[col_idx].name)); + } + } + } + } + + ThreadGroupPtr running_group; + if (current_thread) + running_group = current_thread->getThreadGroup(); + if (!running_group) + running_group = std::make_shared(getContext()); + for (size_t i = 0; i < sink_streams_size; ++i) + { + auto out = buildSink(table, metadata_snapshot, /* thread_status_holder= */ nullptr, + running_group, /* elapsed_counter_ms= */ nullptr); + sink_chains.emplace_back(std::move(out)); + } + for (size_t i = 0; i < pre_streams_size; ++i) + { + auto out = buildPreSinkChain(sink_chains[0].getInputHeader(), table, metadata_snapshot, query_sample_block); + presink_chains.emplace_back(std::move(out)); + } } BlockIO res; - if (query.select) + /// What type of query: INSERT or INSERT SELECT or INSERT WATCH? + if (distributed_pipeline) { - if (settings.parallel_distributed_insert_select) + res.pipeline = std::move(*distributed_pipeline); + } + else if (query.select) + { + const auto & header = presink_chains.at(0).getInputHeader(); + auto actions_dag = ActionsDAG::makeConvertingActions( + pipeline.getHeader().getColumnsWithTypeAndName(), + header.getColumnsWithTypeAndName(), + ActionsDAG::MatchColumnsMode::Position); + auto actions = std::make_shared(actions_dag, ExpressionActionsSettings::fromContext(getContext(), CompileExpressions::yes)); + + pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr { - auto distributed = table->distributedWrite(query, getContext()); - if (distributed) - { - res.pipeline = std::move(*distributed); - } - else - { - res.pipeline = buildInsertSelectPipeline(query, table); - } - } - else + return std::make_shared(in_header, actions); + }); + + /// We need to convert Sparse columns to full, because it's destination storage + /// may not support it or may have different settings for applying Sparse serialization. + pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr { - res.pipeline = buildInsertSelectPipeline(query, table); + return std::make_shared(in_header); + }); + + pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr + { + auto context_ptr = getContext(); + auto counting = std::make_shared(in_header, nullptr, context_ptr->getQuota()); + counting->setProcessListElement(context_ptr->getProcessListElement()); + counting->setProgressCallback(context_ptr->getProgressCallback()); + + return counting; + }); + + if (shouldAddSquashingFroStorage(table)) + { + bool table_prefers_large_blocks = table->prefersLargeBlocks(); + + size_t threads = presink_chains.size(); + + pipeline.resize(1); + + pipeline.addTransform(std::make_shared( + header, + table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, + table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL)); + + pipeline.resize(threads); + + pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr + { + return std::make_shared( + in_header, + table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, + table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL); + }); } + + size_t num_select_threads = pipeline.getNumThreads(); + + for (auto & chain : presink_chains) + resources = chain.detachResources(); + for (auto & chain : sink_chains) + resources = chain.detachResources(); + + pipeline.addChains(std::move(presink_chains)); + pipeline.resize(sink_chains.size()); + pipeline.addChains(std::move(sink_chains)); + + if (!settings.parallel_view_processing) + { + /// Don't use more threads for INSERT than for SELECT to reduce memory consumption. + if (pipeline.getNumThreads() > num_select_threads) + pipeline.setMaxThreads(num_select_threads); + } + else if (pipeline.getNumThreads() < settings.max_threads) + { + /// It is possible for query to have max_threads=1, due to optimize_trivial_insert_select, + /// however in case of parallel_view_processing and multiple views, views can still be processed in parallel. + /// + /// Note, number of threads will be limited by buildPushingToViewsChain() to max_threads. + pipeline.setMaxThreads(settings.max_threads); + } + + pipeline.setSinks([&](const Block & cur_header, QueryPipelineBuilder::StreamType) -> ProcessorPtr + { + return std::make_shared(cur_header); + }); + + if (!allow_materialized) + { + for (const auto & column : metadata_snapshot->getColumns()) + if (column.default_desc.kind == ColumnDefaultKind::Materialized && header.has(column.name)) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert column {}, because it is MATERIALIZED column.", column.name); + } + + res.pipeline = QueryPipelineBuilder::getPipeline(std::move(pipeline)); } else { - res.pipeline = buildInsertPipeline(query, table); + auto & chain = presink_chains.at(0); + chain.appendChain(std::move(sink_chains.at(0))); + + if (shouldAddSquashingFroStorage(table)) + { + bool table_prefers_large_blocks = table->prefersLargeBlocks(); + + auto squashing = std::make_shared( + chain.getInputHeader(), + table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, + table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL); + + chain.addSource(std::move(squashing)); + + auto balancing = std::make_shared( + chain.getInputHeader(), + table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, + table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL); + + chain.addSource(std::move(balancing)); + } + + auto context_ptr = getContext(); + auto counting = std::make_shared(chain.getInputHeader(), nullptr, context_ptr->getQuota()); + counting->setProcessListElement(context_ptr->getProcessListElement()); + counting->setProgressCallback(context_ptr->getProgressCallback()); + chain.addSource(std::move(counting)); + + res.pipeline = QueryPipeline(std::move(presink_chains[0])); + res.pipeline.setNumThreads(std::min(res.pipeline.getNumThreads(), settings.max_threads)); + res.pipeline.setConcurrencyControl(settings.use_concurrency_control); + + if (query.hasInlinedData() && !async_insert) + { + /// can execute without additional data + auto format = getInputFormatFromASTInsertQuery(query_ptr, true, query_sample_block, getContext(), nullptr); + for (auto && buffer : owned_buffers) + format->addBuffer(std::move(buffer)); + + auto pipe = getSourceFromInputFormat(query_ptr, std::move(format), getContext(), nullptr); + res.pipeline.complete(std::move(pipe)); + } } - res.pipeline.addStorageHolder(table); + res.pipeline.addResources(std::move(resources)); - if (const auto * mv = dynamic_cast(table.get())) - res.pipeline.addStorageHolder(mv->getTargetTable()); + res.pipeline.addStorageHolder(table); + if (inner_table) + res.pipeline.addStorageHolder(inner_table); return res; } @@ -813,27 +757,17 @@ void InterpreterInsertQuery::extendQueryLogElemImpl(QueryLogElement & elem, Cont } } - void InterpreterInsertQuery::extendQueryLogElemImpl(QueryLogElement & elem, const ASTPtr &, ContextPtr context_) const { extendQueryLogElemImpl(elem, context_); } - void registerInterpreterInsertQuery(InterpreterFactory & factory) { auto create_fn = [] (const InterpreterFactory::Arguments & args) { - return std::make_unique( - args.query, - args.context, - args.allow_materialized, - /* no_squash */false, - /* no_destination */false, - /* async_insert */false); + return std::make_unique(args.query, args.context, args.allow_materialized); }; factory.registerInterpreter("InterpreterInsertQuery", create_fn); } - - } diff --git a/src/Interpreters/InterpreterInsertQuery.h b/src/Interpreters/InterpreterInsertQuery.h index 894c7c42144..bf73fb2a319 100644 --- a/src/Interpreters/InterpreterInsertQuery.h +++ b/src/Interpreters/InterpreterInsertQuery.h @@ -23,10 +23,10 @@ public: InterpreterInsertQuery( const ASTPtr & query_ptr_, ContextPtr context_, - bool allow_materialized_, - bool no_squash_, - bool no_destination, - bool async_insert_); + bool allow_materialized_ = false, + bool no_squash_ = false, + bool no_destination_ = false, + bool async_insert_ = false); /** Prepare a request for execution. Return block streams * - the stream into which you can write data to execute the query, if INSERT; @@ -73,17 +73,12 @@ private: ASTPtr query_ptr; const bool allow_materialized; - bool no_squash = false; - bool no_destination = false; + const bool no_squash; + const bool no_destination; const bool async_insert; std::vector> owned_buffers; - std::pair, std::vector> buildPreAndSinkChains(size_t presink_streams, size_t sink_streams, StoragePtr table, const StorageMetadataPtr & metadata_snapshot, const Block & query_sample_block); - - QueryPipeline buildInsertSelectPipeline(ASTInsertQuery & query, StoragePtr table); - QueryPipeline buildInsertPipeline(ASTInsertQuery & query, StoragePtr table); - Chain buildSink( const StoragePtr & table, const StorageMetadataPtr & metadata_snapshot, diff --git a/src/Interpreters/Squashing.cpp b/src/Interpreters/Squashing.cpp index 25434d1103e..f8b6a6542cc 100644 --- a/src/Interpreters/Squashing.cpp +++ b/src/Interpreters/Squashing.cpp @@ -1,7 +1,6 @@ #include #include #include -#include namespace DB @@ -12,33 +11,24 @@ namespace ErrorCodes } Squashing::Squashing(Block header_, size_t min_block_size_rows_, size_t min_block_size_bytes_) - : min_block_size_rows(min_block_size_rows_) + : header(header_) + , min_block_size_rows(min_block_size_rows_) , min_block_size_bytes(min_block_size_bytes_) - , header(header_) { } Chunk Squashing::flush() { - if (!accumulated) - return {}; - - auto result = convertToChunk(accumulated.extract()); - chassert(result); - return result; + return convertToChunk(std::move(chunks_to_merge_vec)); } Chunk Squashing::squash(Chunk && input_chunk) { - if (!input_chunk) + if (!input_chunk.hasChunkInfo()) return Chunk(); - auto squash_info = input_chunk.getChunkInfos().extract(); - - if (!squash_info) - throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no ChunksToSquash in ChunkInfoPtr"); - - return squash(std::move(squash_info->chunks), std::move(input_chunk.getChunkInfos())); + const auto *info = getInfoFromChunk(input_chunk); + return squash(info->chunks); } Chunk Squashing::add(Chunk && input_chunk) @@ -47,37 +37,48 @@ Chunk Squashing::add(Chunk && input_chunk) return {}; /// Just read block is already enough. - if (isEnoughSize(input_chunk)) + if (isEnoughSize(input_chunk.getNumRows(), input_chunk.bytes())) { /// If no accumulated data, return just read block. - if (!accumulated) + if (chunks_to_merge_vec.empty()) { - accumulated.add(std::move(input_chunk)); - return convertToChunk(accumulated.extract()); + chunks_to_merge_vec.push_back(std::move(input_chunk)); + Chunk res_chunk = convertToChunk(std::move(chunks_to_merge_vec)); + chunks_to_merge_vec.clear(); + return res_chunk; } /// Return accumulated data (maybe it has small size) and place new block to accumulated data. - Chunk res_chunk = convertToChunk(accumulated.extract()); - accumulated.add(std::move(input_chunk)); + Chunk res_chunk = convertToChunk(std::move(chunks_to_merge_vec)); + chunks_to_merge_vec.clear(); + changeCurrentSize(input_chunk.getNumRows(), input_chunk.bytes()); + chunks_to_merge_vec.push_back(std::move(input_chunk)); return res_chunk; } /// Accumulated block is already enough. - if (isEnoughSize()) + if (isEnoughSize(accumulated_size.rows, accumulated_size.bytes)) { /// Return accumulated data and place new block to accumulated data. - Chunk res_chunk = convertToChunk(accumulated.extract()); - accumulated.add(std::move(input_chunk)); + Chunk res_chunk = convertToChunk(std::move(chunks_to_merge_vec)); + chunks_to_merge_vec.clear(); + changeCurrentSize(input_chunk.getNumRows(), input_chunk.bytes()); + chunks_to_merge_vec.push_back(std::move(input_chunk)); return res_chunk; } /// Pushing data into accumulating vector - accumulated.add(std::move(input_chunk)); + expandCurrentSize(input_chunk.getNumRows(), input_chunk.bytes()); + chunks_to_merge_vec.push_back(std::move(input_chunk)); /// If accumulated data is big enough, we send it - if (isEnoughSize()) - return convertToChunk(accumulated.extract()); - + if (isEnoughSize(accumulated_size.rows, accumulated_size.bytes)) + { + Chunk res_chunk = convertToChunk(std::move(chunks_to_merge_vec)); + changeCurrentSize(0, 0); + chunks_to_merge_vec.clear(); + return res_chunk; + } return {}; } @@ -89,15 +90,14 @@ Chunk Squashing::convertToChunk(std::vector && chunks) const auto info = std::make_shared(); info->chunks = std::move(chunks); - // It is imortant that chunk is not empty, it has to have columns even if they are empty - auto aggr_chunk = Chunk(header.getColumns(), 0); - aggr_chunk.getChunkInfos().add(std::move(info)); - chassert(aggr_chunk); - return aggr_chunk; + chunks.clear(); + + return Chunk(header.cloneEmptyColumns(), 0, info); } -Chunk Squashing::squash(std::vector && input_chunks, Chunk::ChunkInfoCollection && infos) +Chunk Squashing::squash(std::vector & input_chunks) { + Chunk accumulated_chunk; std::vector mutable_columns = {}; size_t rows = 0; for (const Chunk & chunk : input_chunks) @@ -119,17 +119,35 @@ Chunk Squashing::squash(std::vector && input_chunks, Chunk::ChunkInfoColl for (size_t j = 0, size = mutable_columns.size(); j < size; ++j) { const auto source_column = columns[j]; + mutable_columns[j]->insertRangeFrom(*source_column, 0, source_column->size()); } } + accumulated_chunk.setColumns(std::move(mutable_columns), rows); + return accumulated_chunk; +} - Chunk result; - result.setColumns(std::move(mutable_columns), rows); - result.setChunkInfos(infos); - result.getChunkInfos().append(std::move(input_chunks.back().getChunkInfos())); +const ChunksToSquash* Squashing::getInfoFromChunk(const Chunk & chunk) +{ + const auto& info = chunk.getChunkInfo(); + const auto * agg_info = typeid_cast(info.get()); - chassert(result); - return result; + if (!agg_info) + throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no ChunksToSquash in ChunkInfoPtr"); + + return agg_info; +} + +void Squashing::expandCurrentSize(size_t rows, size_t bytes) +{ + accumulated_size.rows += rows; + accumulated_size.bytes += bytes; +} + +void Squashing::changeCurrentSize(size_t rows, size_t bytes) +{ + accumulated_size.rows = rows; + accumulated_size.bytes = bytes; } bool Squashing::isEnoughSize(size_t rows, size_t bytes) const @@ -138,28 +156,4 @@ bool Squashing::isEnoughSize(size_t rows, size_t bytes) const || (min_block_size_rows && rows >= min_block_size_rows) || (min_block_size_bytes && bytes >= min_block_size_bytes); } - -bool Squashing::isEnoughSize() const -{ - return isEnoughSize(accumulated.getRows(), accumulated.getBytes()); -}; - -bool Squashing::isEnoughSize(const Chunk & chunk) const -{ - return isEnoughSize(chunk.getNumRows(), chunk.bytes()); -} - -void Squashing::CurrentSize::add(Chunk && chunk) -{ - rows += chunk.getNumRows(); - bytes += chunk.bytes(); - chunks.push_back(std::move(chunk)); -} - -std::vector Squashing::CurrentSize::extract() -{ - auto result = std::move(chunks); - *this = {}; - return result; -} } diff --git a/src/Interpreters/Squashing.h b/src/Interpreters/Squashing.h index 64a9768a71f..d76cca60e41 100644 --- a/src/Interpreters/Squashing.h +++ b/src/Interpreters/Squashing.h @@ -8,18 +8,9 @@ namespace DB { -class ChunksToSquash : public ChunkInfoCloneable +struct ChunksToSquash : public ChunkInfo { -public: - ChunksToSquash() = default; - ChunksToSquash(const ChunksToSquash & other) - { - chunks.reserve(other.chunks.size()); - for (const auto & chunk: other.chunks) - chunks.push_back(chunk.clone()); - } - - std::vector chunks = {}; + mutable std::vector chunks = {}; }; /** Merging consecutive passed blocks to specified minimum size. @@ -45,35 +36,32 @@ public: static Chunk squash(Chunk && input_chunk); Chunk flush(); - void setHeader(Block header_) { header = std::move(header_); } - const Block & getHeader() const { return header; } - -private: - class CurrentSize + bool isDataLeft() + { + return !chunks_to_merge_vec.empty(); + } + + Block header; +private: + struct CurrentSize { - std::vector chunks = {}; size_t rows = 0; size_t bytes = 0; - - public: - explicit operator bool () const { return !chunks.empty(); } - size_t getRows() const { return rows; } - size_t getBytes() const { return bytes; } - void add(Chunk && chunk); - std::vector extract(); }; - const size_t min_block_size_rows; - const size_t min_block_size_bytes; - Block header; + std::vector chunks_to_merge_vec = {}; + size_t min_block_size_rows; + size_t min_block_size_bytes; - CurrentSize accumulated; + CurrentSize accumulated_size; - static Chunk squash(std::vector && input_chunks, Chunk::ChunkInfoCollection && infos); + static const ChunksToSquash * getInfoFromChunk(const Chunk & chunk); - bool isEnoughSize() const; + static Chunk squash(std::vector & input_chunks); + + void expandCurrentSize(size_t rows, size_t bytes); + void changeCurrentSize(size_t rows, size_t bytes); bool isEnoughSize(size_t rows, size_t bytes) const; - bool isEnoughSize(const Chunk & chunk) const; Chunk convertToChunk(std::vector && chunks) const; }; diff --git a/src/Interpreters/SystemLog.cpp b/src/Interpreters/SystemLog.cpp index f386e157b14..7d84efba1b5 100644 --- a/src/Interpreters/SystemLog.cpp +++ b/src/Interpreters/SystemLog.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -356,10 +357,15 @@ SystemLogs::SystemLogs(ContextPtr global_context, const Poco::Util::AbstractConf if (blob_storage_log) logs.emplace_back(blob_storage_log.get()); + bool should_prepare = global_context->getServerSettings().prepare_system_log_tables_on_startup; try { for (auto & log : logs) + { log->startup(); + if (should_prepare) + log->prepareTable(); + } } catch (...) { @@ -538,13 +544,7 @@ void SystemLog::flushImpl(const std::vector & to_flush, insert_context->makeQueryContext(); addSettingsForQuery(insert_context, IAST::QueryKind::Insert); - InterpreterInsertQuery interpreter( - query_ptr, - insert_context, - /* allow_materialized */ false, - /* no_squash */ false, - /* no_destination */ false, - /* async_isnert */ false); + InterpreterInsertQuery interpreter(query_ptr, insert_context); BlockIO io = interpreter.execute(); PushingPipelineExecutor executor(io.pipeline); diff --git a/src/Interpreters/SystemLog.h b/src/Interpreters/SystemLog.h index 94cb8c3e2fd..0ac468b15ec 100644 --- a/src/Interpreters/SystemLog.h +++ b/src/Interpreters/SystemLog.h @@ -135,6 +135,12 @@ public: void stopFlushThread() override; + /** Creates new table if it does not exist. + * Renames old table if its structure is not suitable. + * This cannot be done in constructor to avoid deadlock while renaming a table under locked Context when SystemLog object is created. + */ + void prepareTable() override; + protected: LoggerPtr log; @@ -145,12 +151,6 @@ protected: StoragePtr getStorage() const; - /** Creates new table if it does not exist. - * Renames old table if its structure is not suitable. - * This cannot be done in constructor to avoid deadlock while renaming a table under locked Context when SystemLog object is created. - */ - void prepareTable() override; - /// Some tables can override settings for internal queries virtual void addSettingsForQuery(ContextMutablePtr & mutable_context, IAST::QueryKind query_kind) const; diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index 6ce6f5e454e..a3c5a7ed3ed 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -1188,7 +1188,7 @@ bool TreeRewriterResult::collectUsedColumns(const ASTPtr & query, bool is_select } } - /// Check for dynamic subcolumns in unknown required columns. + /// Check for dynamic subcolums in unknown required columns. if (!unknown_required_source_columns.empty()) { for (const NameAndTypePair & pair : source_columns_ordinary) diff --git a/src/Interpreters/getColumnFromBlock.cpp b/src/Interpreters/getColumnFromBlock.cpp index 972e109afb3..2e70a58b5a1 100644 --- a/src/Interpreters/getColumnFromBlock.cpp +++ b/src/Interpreters/getColumnFromBlock.cpp @@ -31,6 +31,36 @@ ColumnPtr tryGetColumnFromBlock(const Block & block, const NameAndTypePair & req return castColumn({elem_column, elem_type, ""}, requested_column.type); } +ColumnPtr tryGetSubcolumnFromBlock(const Block & block, const DataTypePtr & requested_column_type, const NameAndTypePair & requested_subcolumn) +{ + const auto * elem = block.findByName(requested_subcolumn.getNameInStorage()); + if (!elem) + return nullptr; + + auto subcolumn_name = requested_subcolumn.getSubcolumnName(); + /// If requested subcolumn is dynamic, we should first perform cast and then + /// extract the subcolumn, because the data of dynamic subcolumn can change after cast. + if (elem->type->hasDynamicSubcolumns() && !elem->type->equals(*requested_column_type)) + { + auto casted_column = castColumn({elem->column, elem->type, ""}, requested_column_type); + auto elem_column = requested_column_type->tryGetSubcolumn(subcolumn_name, casted_column); + auto elem_type = requested_column_type->tryGetSubcolumnType(subcolumn_name); + + if (!elem_type || !elem_column) + return nullptr; + + return elem_column; + } + + auto elem_column = elem->type->tryGetSubcolumn(subcolumn_name, elem->column); + auto elem_type = elem->type->tryGetSubcolumnType(subcolumn_name); + + if (!elem_type || !elem_column) + return nullptr; + + return castColumn({elem_column, elem_type, ""}, requested_subcolumn.type); +} + ColumnPtr getColumnFromBlock(const Block & block, const NameAndTypePair & requested_column) { auto result_column = tryGetColumnFromBlock(block, requested_column); diff --git a/src/Interpreters/getColumnFromBlock.h b/src/Interpreters/getColumnFromBlock.h index 26500cfdd17..737ce9db555 100644 --- a/src/Interpreters/getColumnFromBlock.h +++ b/src/Interpreters/getColumnFromBlock.h @@ -9,5 +9,6 @@ namespace DB ColumnPtr getColumnFromBlock(const Block & block, const NameAndTypePair & requested_column); ColumnPtr tryGetColumnFromBlock(const Block & block, const NameAndTypePair & requested_column); +ColumnPtr tryGetSubcolumnFromBlock(const Block & block, const DataTypePtr & requested_column_type, const NameAndTypePair & requested_subcolumn); } diff --git a/src/Loggers/OwnSplitChannel.h b/src/Loggers/OwnSplitChannel.h index 7ca27cf6584..88bb6b9ce76 100644 --- a/src/Loggers/OwnSplitChannel.h +++ b/src/Loggers/OwnSplitChannel.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index 5997452bcf3..d4fc9a4bc4d 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -1129,11 +1129,11 @@ inline static bool makeHexOrBinStringLiteral(IParser::Pos & pos, ASTPtr & node, if (hex) { - hexStringDecode(str_begin, str_end, res_pos); + hexStringDecode(str_begin, str_end, res_pos, word_size); } else { - binStringDecode(str_begin, str_end, res_pos); + binStringDecode(str_begin, str_end, res_pos, word_size); } return makeStringLiteral(pos, node, String(reinterpret_cast(res.data()), (res_pos - res_begin - 1))); diff --git a/src/Processors/Chunk.cpp b/src/Processors/Chunk.cpp index 4466be5b3a7..5f6cf2f7230 100644 --- a/src/Processors/Chunk.cpp +++ b/src/Processors/Chunk.cpp @@ -19,6 +19,14 @@ Chunk::Chunk(DB::Columns columns_, UInt64 num_rows_) : columns(std::move(columns checkNumRowsIsConsistent(); } +Chunk::Chunk(Columns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_) + : columns(std::move(columns_)) + , num_rows(num_rows_) + , chunk_info(std::move(chunk_info_)) +{ + checkNumRowsIsConsistent(); +} + static Columns unmuteColumns(MutableColumns && mutable_columns) { Columns columns; @@ -35,11 +43,17 @@ Chunk::Chunk(MutableColumns columns_, UInt64 num_rows_) checkNumRowsIsConsistent(); } +Chunk::Chunk(MutableColumns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_) + : columns(unmuteColumns(std::move(columns_))) + , num_rows(num_rows_) + , chunk_info(std::move(chunk_info_)) +{ + checkNumRowsIsConsistent(); +} + Chunk Chunk::clone() const { - auto tmp = Chunk(getColumns(), getNumRows()); - tmp.setChunkInfos(chunk_infos.clone()); - return tmp; + return Chunk(getColumns(), getNumRows(), chunk_info); } void Chunk::setColumns(Columns columns_, UInt64 num_rows_) diff --git a/src/Processors/Chunk.h b/src/Processors/Chunk.h index 1348966c0d3..4f753798eaa 100644 --- a/src/Processors/Chunk.h +++ b/src/Processors/Chunk.h @@ -1,9 +1,7 @@ #pragma once -#include #include - -#include +#include namespace DB { @@ -11,29 +9,11 @@ namespace DB class ChunkInfo { public: - using Ptr = std::shared_ptr; - - ChunkInfo() = default; - ChunkInfo(const ChunkInfo&) = default; - ChunkInfo(ChunkInfo&&) = default; - - virtual Ptr clone() const = 0; virtual ~ChunkInfo() = default; + ChunkInfo() = default; }; - -template -class ChunkInfoCloneable : public ChunkInfo -{ -public: - ChunkInfoCloneable() = default; - ChunkInfoCloneable(const ChunkInfoCloneable & other) = default; - - Ptr clone() const override - { - return std::static_pointer_cast(std::make_shared(*static_cast(this))); - } -}; +using ChunkInfoPtr = std::shared_ptr; /** * Chunk is a list of columns with the same length. @@ -52,26 +32,26 @@ public: class Chunk { public: - using ChunkInfoCollection = CollectionOfDerivedItems; - Chunk() = default; Chunk(const Chunk & other) = delete; Chunk(Chunk && other) noexcept : columns(std::move(other.columns)) , num_rows(other.num_rows) - , chunk_infos(std::move(other.chunk_infos)) + , chunk_info(std::move(other.chunk_info)) { other.num_rows = 0; } Chunk(Columns columns_, UInt64 num_rows_); + Chunk(Columns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_); Chunk(MutableColumns columns_, UInt64 num_rows_); + Chunk(MutableColumns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_); Chunk & operator=(const Chunk & other) = delete; Chunk & operator=(Chunk && other) noexcept { columns = std::move(other.columns); - chunk_infos = std::move(other.chunk_infos); + chunk_info = std::move(other.chunk_info); num_rows = other.num_rows; other.num_rows = 0; return *this; @@ -82,15 +62,15 @@ public: void swap(Chunk & other) noexcept { columns.swap(other.columns); + chunk_info.swap(other.chunk_info); std::swap(num_rows, other.num_rows); - chunk_infos.swap(other.chunk_infos); } void clear() { num_rows = 0; columns.clear(); - chunk_infos.clear(); + chunk_info.reset(); } const Columns & getColumns() const { return columns; } @@ -101,9 +81,9 @@ public: /** Get empty columns with the same types as in block. */ MutableColumns cloneEmptyColumns() const; - ChunkInfoCollection & getChunkInfos() { return chunk_infos; } - const ChunkInfoCollection & getChunkInfos() const { return chunk_infos; } - void setChunkInfos(ChunkInfoCollection chunk_infos_) { chunk_infos = std::move(chunk_infos_); } + const ChunkInfoPtr & getChunkInfo() const { return chunk_info; } + bool hasChunkInfo() const { return chunk_info != nullptr; } + void setChunkInfo(ChunkInfoPtr chunk_info_) { chunk_info = std::move(chunk_info_); } UInt64 getNumRows() const { return num_rows; } UInt64 getNumColumns() const { return columns.size(); } @@ -127,7 +107,7 @@ public: private: Columns columns; UInt64 num_rows = 0; - ChunkInfoCollection chunk_infos; + ChunkInfoPtr chunk_info; void checkNumRowsIsConsistent(); }; @@ -137,15 +117,11 @@ using Chunks = std::vector; /// AsyncInsert needs two kinds of information: /// - offsets of different sub-chunks /// - tokens of different sub-chunks, which are assigned by setting `insert_deduplication_token`. -class AsyncInsertInfo : public ChunkInfoCloneable +class AsyncInsertInfo : public ChunkInfo { public: AsyncInsertInfo() = default; - AsyncInsertInfo(const AsyncInsertInfo & other) = default; - AsyncInsertInfo(const std::vector & offsets_, const std::vector & tokens_) - : offsets(offsets_) - , tokens(tokens_) - {} + explicit AsyncInsertInfo(const std::vector & offsets_, const std::vector & tokens_) : offsets(offsets_), tokens(tokens_) {} std::vector offsets; std::vector tokens; @@ -154,11 +130,9 @@ public: using AsyncInsertInfoPtr = std::shared_ptr; /// Extension to support delayed defaults. AddingDefaultsProcessor uses it to replace missing values with column defaults. -class ChunkMissingValues : public ChunkInfoCloneable +class ChunkMissingValues : public ChunkInfo { public: - ChunkMissingValues(const ChunkMissingValues & other) = default; - using RowsBitMask = std::vector; /// a bit per row for a column const RowsBitMask & getDefaultsBitmask(size_t column_idx) const; diff --git a/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp b/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp index d9fab88fe1f..d27002197d2 100644 --- a/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp +++ b/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp @@ -147,10 +147,13 @@ bool PullingAsyncPipelineExecutor::pull(Block & block, uint64_t milliseconds) block = lazy_format->getPort(IOutputFormat::PortKind::Main).getHeader().cloneWithColumns(chunk.detachColumns()); - if (auto agg_info = chunk.getChunkInfos().get()) + if (auto chunk_info = chunk.getChunkInfo()) { - block.info.bucket_num = agg_info->bucket_num; - block.info.is_overflows = agg_info->is_overflows; + if (const auto * agg_info = typeid_cast(chunk_info.get())) + { + block.info.bucket_num = agg_info->bucket_num; + block.info.is_overflows = agg_info->is_overflows; + } } return true; diff --git a/src/Processors/Executors/PullingPipelineExecutor.cpp b/src/Processors/Executors/PullingPipelineExecutor.cpp index 25c15d40c9a..cbf73c5cb07 100644 --- a/src/Processors/Executors/PullingPipelineExecutor.cpp +++ b/src/Processors/Executors/PullingPipelineExecutor.cpp @@ -73,10 +73,13 @@ bool PullingPipelineExecutor::pull(Block & block) } block = pulling_format->getPort(IOutputFormat::PortKind::Main).getHeader().cloneWithColumns(chunk.detachColumns()); - if (auto agg_info = chunk.getChunkInfos().get()) + if (auto chunk_info = chunk.getChunkInfo()) { - block.info.bucket_num = agg_info->bucket_num; - block.info.is_overflows = agg_info->is_overflows; + if (const auto * agg_info = typeid_cast(chunk_info.get())) + { + block.info.bucket_num = agg_info->bucket_num; + block.info.is_overflows = agg_info->is_overflows; + } } return true; diff --git a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp index 9e499e2c400..a5d334f4f1d 100644 --- a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp @@ -179,9 +179,7 @@ void ParquetBlockOutputFormat::consume(Chunk chunk) columns[i]->insertRangeFrom(*concatenated.getColumns()[i], offset, count); Chunks piece; - piece.emplace_back(std::move(columns), count); - piece.back().setChunkInfos(concatenated.getChunkInfos()); - + piece.emplace_back(std::move(columns), count, concatenated.getChunkInfo()); writeRowGroup(std::move(piece)); } } diff --git a/src/Processors/IAccumulatingTransform.cpp b/src/Processors/IAccumulatingTransform.cpp index 46be6e74693..4136fc5a5f2 100644 --- a/src/Processors/IAccumulatingTransform.cpp +++ b/src/Processors/IAccumulatingTransform.cpp @@ -8,9 +8,8 @@ namespace ErrorCodes } IAccumulatingTransform::IAccumulatingTransform(Block input_header, Block output_header) - : IProcessor({std::move(input_header)}, {std::move(output_header)}) - , input(inputs.front()) - , output(outputs.front()) + : IProcessor({std::move(input_header)}, {std::move(output_header)}), + input(inputs.front()), output(outputs.front()) { } diff --git a/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.cpp b/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.cpp index 86675bcb237..466adf93538 100644 --- a/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.cpp @@ -53,11 +53,13 @@ void FinishAggregatingInOrderAlgorithm::consume(Input & input, size_t source_num if (!input.chunk.hasRows()) return; - if (input.chunk.getChunkInfos().empty()) + const auto & info = input.chunk.getChunkInfo(); + if (!info) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in FinishAggregatingInOrderAlgorithm"); Int64 allocated_bytes = 0; - if (auto arenas_info = input.chunk.getChunkInfos().get()) + /// Will be set by AggregatingInOrderTransform during local aggregation; will be nullptr during merging on initiator. + if (const auto * arenas_info = typeid_cast(info.get())) allocated_bytes = arenas_info->allocated_bytes; states[source_num] = State{input.chunk, description, allocated_bytes}; @@ -134,7 +136,7 @@ Chunk FinishAggregatingInOrderAlgorithm::prepareToMerge() info->chunk_num = chunk_num++; Chunk chunk; - chunk.getChunkInfos().add(std::move(info)); + chunk.setChunkInfo(std::move(info)); return chunk; } @@ -161,7 +163,7 @@ void FinishAggregatingInOrderAlgorithm::addToAggregation() chunks.emplace_back(std::move(new_columns), current_rows); } - chunks.back().getChunkInfos().add(std::make_shared()); + chunks.back().setChunkInfo(std::make_shared()); states[i].current_row = states[i].to_row; /// We assume that sizes in bytes of rows are almost the same. diff --git a/src/Processors/Merges/Algorithms/MergeTreePartLevelInfo.h b/src/Processors/Merges/Algorithms/MergeTreePartLevelInfo.h index e4f22deec8d..bcf4e759024 100644 --- a/src/Processors/Merges/Algorithms/MergeTreePartLevelInfo.h +++ b/src/Processors/Merges/Algorithms/MergeTreePartLevelInfo.h @@ -6,22 +6,18 @@ namespace DB { /// To carry part level if chunk is produced by a merge tree source -class MergeTreePartLevelInfo : public ChunkInfoCloneable +class MergeTreePartLevelInfo : public ChunkInfo { public: MergeTreePartLevelInfo() = delete; - explicit MergeTreePartLevelInfo(ssize_t part_level) - : origin_merge_tree_part_level(part_level) - { } - MergeTreePartLevelInfo(const MergeTreePartLevelInfo & other) = default; - + explicit MergeTreePartLevelInfo(ssize_t part_level) : origin_merge_tree_part_level(part_level) { } size_t origin_merge_tree_part_level = 0; }; inline size_t getPartLevelFromChunk(const Chunk & chunk) { - const auto part_level_info = chunk.getChunkInfos().get(); - if (part_level_info) + const auto & info = chunk.getChunkInfo(); + if (const auto * part_level_info = typeid_cast(info.get())) return part_level_info->origin_merge_tree_part_level; return 0; } diff --git a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp index cd347d371d9..7b2c7d82a01 100644 --- a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp @@ -17,7 +17,7 @@ namespace ErrorCodes static IMergingAlgorithm::Status emitChunk(detail::SharedChunkPtr & chunk, bool finished = false) { - chunk->getChunkInfos().add(std::make_shared(std::move(chunk->replace_final_selection))); + chunk->setChunkInfo(std::make_shared(std::move(chunk->replace_final_selection))); return IMergingAlgorithm::Status(std::move(*chunk), finished); } diff --git a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h index 2f23f2a5c4d..a3ccccf0845 100644 --- a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h +++ b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h @@ -3,7 +3,6 @@ #include #include #include -#include namespace Poco { @@ -15,13 +14,11 @@ namespace DB /** Use in skipping final to keep list of indices of selected row after merging final */ -struct ChunkSelectFinalIndices : public ChunkInfoCloneable +struct ChunkSelectFinalIndices : public ChunkInfo { - explicit ChunkSelectFinalIndices(MutableColumnPtr select_final_indices_); - ChunkSelectFinalIndices(const ChunkSelectFinalIndices & other) = default; - const ColumnPtr column_holder; const ColumnUInt64 * select_final_indices = nullptr; + explicit ChunkSelectFinalIndices(MutableColumnPtr select_final_indices_); }; /** Merges several sorted inputs into one. diff --git a/src/Processors/Merges/IMergingTransform.cpp b/src/Processors/Merges/IMergingTransform.cpp index b1b0182a113..fbb47969b2f 100644 --- a/src/Processors/Merges/IMergingTransform.cpp +++ b/src/Processors/Merges/IMergingTransform.cpp @@ -157,7 +157,7 @@ IProcessor::Status IMergingTransformBase::prepare() bool is_port_full = !output.canPush(); /// Push if has data. - if ((state.output_chunk || !state.output_chunk.getChunkInfos().empty()) && !is_port_full) + if ((state.output_chunk || state.output_chunk.hasChunkInfo()) && !is_port_full) output.push(std::move(state.output_chunk)); if (!is_initialized) diff --git a/src/Processors/Merges/IMergingTransform.h b/src/Processors/Merges/IMergingTransform.h index be629271736..c218f622870 100644 --- a/src/Processors/Merges/IMergingTransform.h +++ b/src/Processors/Merges/IMergingTransform.h @@ -129,7 +129,7 @@ public: IMergingAlgorithm::Status status = algorithm.merge(); - if ((status.chunk && status.chunk.hasRows()) || !status.chunk.getChunkInfos().empty()) + if ((status.chunk && status.chunk.hasRows()) || status.chunk.hasChunkInfo()) { // std::cerr << "Got chunk with " << status.chunk.getNumRows() << " rows" << std::endl; state.output_chunk = std::move(status.chunk); diff --git a/src/Processors/QueryPlan/Optimizations/Optimizations.h b/src/Processors/QueryPlan/Optimizations/Optimizations.h index b1ab5561958..c48bdf1552a 100644 --- a/src/Processors/QueryPlan/Optimizations/Optimizations.h +++ b/src/Processors/QueryPlan/Optimizations/Optimizations.h @@ -46,6 +46,10 @@ size_t trySplitFilter(QueryPlan::Node * node, QueryPlan::Nodes & nodes); /// Replace chain `FilterStep -> ExpressionStep` to single FilterStep size_t tryMergeExpressions(QueryPlan::Node * parent_node, QueryPlan::Nodes &); +/// Replace chain `FilterStep -> FilterStep` to single FilterStep +/// Note: this breaks short-circuit logic, so it is disabled for now. +size_t tryMergeFilters(QueryPlan::Node * parent_node, QueryPlan::Nodes &); + /// Move FilterStep down if possible. /// May split FilterStep and push down only part of it. size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes); @@ -81,11 +85,12 @@ size_t tryAggregatePartitionsIndependently(QueryPlan::Node * node, QueryPlan::No inline const auto & getOptimizations() { - static const std::array optimizations = {{ + static const std::array optimizations = {{ {tryLiftUpArrayJoin, "liftUpArrayJoin", &QueryPlanOptimizationSettings::lift_up_array_join}, {tryPushDownLimit, "pushDownLimit", &QueryPlanOptimizationSettings::push_down_limit}, {trySplitFilter, "splitFilter", &QueryPlanOptimizationSettings::split_filter}, {tryMergeExpressions, "mergeExpressions", &QueryPlanOptimizationSettings::merge_expressions}, + {tryMergeFilters, "mergeFilters", &QueryPlanOptimizationSettings::merge_filters}, {tryPushDownFilter, "pushDownFilter", &QueryPlanOptimizationSettings::filter_push_down}, {tryConvertOuterJoinToInnerJoin, "convertOuterJoinToInnerJoin", &QueryPlanOptimizationSettings::convert_outer_join_to_inner_join}, {tryExecuteFunctionsAfterSorting, "liftUpFunctions", &QueryPlanOptimizationSettings::execute_functions_after_sorting}, diff --git a/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.cpp b/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.cpp index 2738de1ff5f..4d984133efd 100644 --- a/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.cpp +++ b/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.cpp @@ -20,6 +20,8 @@ QueryPlanOptimizationSettings QueryPlanOptimizationSettings::fromSettings(const settings.merge_expressions = from.query_plan_enable_optimizations && from.query_plan_merge_expressions; + settings.merge_filters = from.query_plan_enable_optimizations && from.query_plan_merge_filters; + settings.filter_push_down = from.query_plan_enable_optimizations && from.query_plan_filter_push_down; settings.convert_outer_join_to_inner_join = from.query_plan_enable_optimizations && from.query_plan_convert_outer_join_to_inner_join; diff --git a/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h b/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h index 85042cea4ed..539ff2eafbb 100644 --- a/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h +++ b/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h @@ -31,6 +31,9 @@ struct QueryPlanOptimizationSettings /// If merge-expressions optimization is enabled. bool merge_expressions = true; + /// If merge-filters optimization is enabled. + bool merge_filters = false; + /// If filter push down optimization is enabled. bool filter_push_down = true; diff --git a/src/Processors/QueryPlan/Optimizations/mergeExpressions.cpp b/src/Processors/QueryPlan/Optimizations/mergeExpressions.cpp index 6ace1b3b5ce..118abdd701f 100644 --- a/src/Processors/QueryPlan/Optimizations/mergeExpressions.cpp +++ b/src/Processors/QueryPlan/Optimizations/mergeExpressions.cpp @@ -34,7 +34,6 @@ size_t tryMergeExpressions(QueryPlan::Node * parent_node, QueryPlan::Nodes &) auto * parent_expr = typeid_cast(parent.get()); auto * parent_filter = typeid_cast(parent.get()); auto * child_expr = typeid_cast(child.get()); - auto * child_filter = typeid_cast(child.get()); if (parent_expr && child_expr) { @@ -76,7 +75,23 @@ size_t tryMergeExpressions(QueryPlan::Node * parent_node, QueryPlan::Nodes &) parent_node->children.swap(child_node->children); return 1; } - else if (parent_filter && child_filter) + + return 0; +} +size_t tryMergeFilters(QueryPlan::Node * parent_node, QueryPlan::Nodes &) +{ + if (parent_node->children.size() != 1) + return false; + + QueryPlan::Node * child_node = parent_node->children.front(); + + auto & parent = parent_node->step; + auto & child = child_node->step; + + auto * parent_filter = typeid_cast(parent.get()); + auto * child_filter = typeid_cast(child.get()); + + if (parent_filter && child_filter) { const auto & child_actions = child_filter->getExpression(); const auto & parent_actions = parent_filter->getExpression(); diff --git a/src/Processors/QueryPlan/ReadFromMemoryStorageStep.cpp b/src/Processors/QueryPlan/ReadFromMemoryStorageStep.cpp index 2e7693b1b36..6dc0c021a14 100644 --- a/src/Processors/QueryPlan/ReadFromMemoryStorageStep.cpp +++ b/src/Processors/QueryPlan/ReadFromMemoryStorageStep.cpp @@ -30,12 +30,15 @@ public: std::shared_ptr> parallel_execution_index_, InitializerFunc initializer_func_ = {}) : ISource(storage_snapshot->getSampleBlockForColumns(column_names_)) - , column_names_and_types(storage_snapshot->getColumnsByNames( + , requested_column_names_and_types(storage_snapshot->getColumnsByNames( GetColumnsOptions(GetColumnsOptions::All).withSubcolumns().withExtendedObjects(), column_names_)) , data(data_) , parallel_execution_index(parallel_execution_index_) , initializer_func(std::move(initializer_func_)) { + auto all_column_names_and_types = storage_snapshot->getColumns(GetColumnsOptions(GetColumnsOptions::All).withSubcolumns().withExtendedObjects()); + for (const auto & [name, type] : all_column_names_and_types) + all_names_to_types[name] = type; } String getName() const override { return "Memory"; } @@ -59,17 +62,20 @@ protected: const Block & src = (*data)[current_index]; Columns columns; - size_t num_columns = column_names_and_types.size(); + size_t num_columns = requested_column_names_and_types.size(); columns.reserve(num_columns); - auto name_and_type = column_names_and_types.begin(); + auto name_and_type = requested_column_names_and_types.begin(); for (size_t i = 0; i < num_columns; ++i) { - columns.emplace_back(tryGetColumnFromBlock(src, *name_and_type)); + if (name_and_type->isSubcolumn()) + columns.emplace_back(tryGetSubcolumnFromBlock(src, all_names_to_types[name_and_type->getNameInStorage()], *name_and_type)); + else + columns.emplace_back(tryGetColumnFromBlock(src, *name_and_type)); ++name_and_type; } - fillMissingColumns(columns, src.rows(), column_names_and_types, column_names_and_types, {}, nullptr); + fillMissingColumns(columns, src.rows(), requested_column_names_and_types, requested_column_names_and_types, {}, nullptr); assert(std::all_of(columns.begin(), columns.end(), [](const auto & column) { return column != nullptr; })); return Chunk(std::move(columns), src.rows()); @@ -88,7 +94,9 @@ private: } } - const NamesAndTypesList column_names_and_types; + const NamesAndTypesList requested_column_names_and_types; + /// Map (name -> type) for all columns from the storage header. + std::unordered_map all_names_to_types; size_t execution_index = 0; std::shared_ptr data; std::shared_ptr> parallel_execution_index; diff --git a/src/Processors/Sinks/RemoteSink.h b/src/Processors/Sinks/RemoteSink.h index c05cc1defcb..30cf958c072 100644 --- a/src/Processors/Sinks/RemoteSink.h +++ b/src/Processors/Sinks/RemoteSink.h @@ -20,7 +20,7 @@ public: } String getName() const override { return "RemoteSink"; } - void consume (Chunk & chunk) override { write(RemoteInserter::getHeader().cloneWithColumns(chunk.getColumns())); } + void consume (Chunk chunk) override { write(RemoteInserter::getHeader().cloneWithColumns(chunk.detachColumns())); } void onFinish() override { RemoteInserter::onFinish(); } }; diff --git a/src/Processors/Sinks/SinkToStorage.cpp b/src/Processors/Sinks/SinkToStorage.cpp index 36bb70f493f..5f9f9f9b1a1 100644 --- a/src/Processors/Sinks/SinkToStorage.cpp +++ b/src/Processors/Sinks/SinkToStorage.cpp @@ -15,8 +15,9 @@ void SinkToStorage::onConsume(Chunk chunk) */ Nested::validateArraySizes(getHeader().cloneWithColumns(chunk.getColumns())); - consume(chunk); - cur_chunk = std::move(chunk); + consume(chunk.clone()); + if (!lastBlockIsDuplicate()) + cur_chunk = std::move(chunk); } SinkToStorage::GenerateResult SinkToStorage::onGenerate() diff --git a/src/Processors/Sinks/SinkToStorage.h b/src/Processors/Sinks/SinkToStorage.h index c728fa87b1e..023bbd8b094 100644 --- a/src/Processors/Sinks/SinkToStorage.h +++ b/src/Processors/Sinks/SinkToStorage.h @@ -18,7 +18,8 @@ public: void addTableLock(const TableLockHolder & lock) { table_locks.push_back(lock); } protected: - virtual void consume(Chunk & chunk) = 0; + virtual void consume(Chunk chunk) = 0; + virtual bool lastBlockIsDuplicate() const { return false; } private: std::vector table_locks; @@ -37,7 +38,7 @@ class NullSinkToStorage : public SinkToStorage public: using SinkToStorage::SinkToStorage; std::string getName() const override { return "NullSinkToStorage"; } - void consume(Chunk &) override {} + void consume(Chunk) override {} }; using SinkPtr = std::shared_ptr; diff --git a/src/Processors/Sources/BlocksSource.h b/src/Processors/Sources/BlocksSource.h index 7ac460c14e2..ec0dc9609f1 100644 --- a/src/Processors/Sources/BlocksSource.h +++ b/src/Processors/Sources/BlocksSource.h @@ -43,10 +43,7 @@ protected: info->bucket_num = res.info.bucket_num; info->is_overflows = res.info.is_overflows; - auto chunk = Chunk(res.getColumns(), res.rows()); - chunk.getChunkInfos().add(std::move(info)); - - return chunk; + return Chunk(res.getColumns(), res.rows(), std::move(info)); } private: diff --git a/src/Processors/Sources/RemoteSource.cpp b/src/Processors/Sources/RemoteSource.cpp index 1578bd389c9..3d7dd3f76b8 100644 --- a/src/Processors/Sources/RemoteSource.cpp +++ b/src/Processors/Sources/RemoteSource.cpp @@ -176,7 +176,7 @@ std::optional RemoteSource::tryGenerate() auto info = std::make_shared(); info->bucket_num = block.info.bucket_num; info->is_overflows = block.info.is_overflows; - chunk.getChunkInfos().add(std::move(info)); + chunk.setChunkInfo(std::move(info)); } return chunk; diff --git a/src/Processors/Sources/SourceFromSingleChunk.cpp b/src/Processors/Sources/SourceFromSingleChunk.cpp index 9abe0504d10..00f40a34361 100644 --- a/src/Processors/Sources/SourceFromSingleChunk.cpp +++ b/src/Processors/Sources/SourceFromSingleChunk.cpp @@ -5,9 +5,7 @@ namespace DB { -SourceFromSingleChunk::SourceFromSingleChunk(Block header, Chunk chunk_) : ISource(std::move(header)), chunk(std::move(chunk_)) -{ -} +SourceFromSingleChunk::SourceFromSingleChunk(Block header, Chunk chunk_) : ISource(std::move(header)), chunk(std::move(chunk_)) {} SourceFromSingleChunk::SourceFromSingleChunk(Block data) : ISource(data.cloneEmpty()), chunk(data.getColumns(), data.rows()) { @@ -22,7 +20,7 @@ SourceFromSingleChunk::SourceFromSingleChunk(Block data) : ISource(data.cloneEmp auto info = std::make_shared(); info->bucket_num = data.info.bucket_num; info->is_overflows = data.info.is_overflows; - chunk.getChunkInfos().add(std::move(info)); + chunk.setChunkInfo(std::move(info)); } } diff --git a/src/Processors/Transforms/AggregatingInOrderTransform.cpp b/src/Processors/Transforms/AggregatingInOrderTransform.cpp index 45b0960ec8f..9ffe15d0f85 100644 --- a/src/Processors/Transforms/AggregatingInOrderTransform.cpp +++ b/src/Processors/Transforms/AggregatingInOrderTransform.cpp @@ -332,7 +332,7 @@ void AggregatingInOrderTransform::generate() variants.aggregates_pool = variants.aggregates_pools.at(0).get(); /// Pass info about used memory by aggregate functions further. - to_push_chunk.getChunkInfos().add(std::make_shared(cur_block_bytes)); + to_push_chunk.setChunkInfo(std::make_shared(cur_block_bytes)); cur_block_bytes = 0; cur_block_size = 0; @@ -351,12 +351,11 @@ FinalizeAggregatedTransform::FinalizeAggregatedTransform(Block header, Aggregati void FinalizeAggregatedTransform::transform(Chunk & chunk) { if (params->final) - { finalizeChunk(chunk, aggregates_mask); - } - else if (!chunk.getChunkInfos().get()) + else if (!chunk.getChunkInfo()) { - chunk.getChunkInfos().add(std::make_shared()); + auto info = std::make_shared(); + chunk.setChunkInfo(std::move(info)); } } diff --git a/src/Processors/Transforms/AggregatingInOrderTransform.h b/src/Processors/Transforms/AggregatingInOrderTransform.h index 41a0d7fc7f1..5d50e97f552 100644 --- a/src/Processors/Transforms/AggregatingInOrderTransform.h +++ b/src/Processors/Transforms/AggregatingInOrderTransform.h @@ -5,7 +5,6 @@ #include #include #include -#include namespace DB { @@ -13,12 +12,10 @@ namespace DB struct InputOrderInfo; using InputOrderInfoPtr = std::shared_ptr; -struct ChunkInfoWithAllocatedBytes : public ChunkInfoCloneable +struct ChunkInfoWithAllocatedBytes : public ChunkInfo { - ChunkInfoWithAllocatedBytes(const ChunkInfoWithAllocatedBytes & other) = default; explicit ChunkInfoWithAllocatedBytes(Int64 allocated_bytes_) : allocated_bytes(allocated_bytes_) {} - Int64 allocated_bytes; }; diff --git a/src/Processors/Transforms/AggregatingTransform.cpp b/src/Processors/Transforms/AggregatingTransform.cpp index 517f035667f..cdbe194cfac 100644 --- a/src/Processors/Transforms/AggregatingTransform.cpp +++ b/src/Processors/Transforms/AggregatingTransform.cpp @@ -35,7 +35,7 @@ Chunk convertToChunk(const Block & block) UInt64 num_rows = block.rows(); Chunk chunk(block.getColumns(), num_rows); - chunk.getChunkInfos().add(std::move(info)); + chunk.setChunkInfo(std::move(info)); return chunk; } @@ -44,11 +44,15 @@ namespace { const AggregatedChunkInfo * getInfoFromChunk(const Chunk & chunk) { - auto agg_info = chunk.getChunkInfos().get(); + const auto & info = chunk.getChunkInfo(); + if (!info) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk."); + + const auto * agg_info = typeid_cast(info.get()); if (!agg_info) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk should have AggregatedChunkInfo."); - return agg_info.get(); + return agg_info; } /// Reads chunks from file in native format. Provide chunks with aggregation info. @@ -206,7 +210,11 @@ private: void process(Chunk && chunk) { - auto chunks_to_merge = chunk.getChunkInfos().get(); + if (!chunk.hasChunkInfo()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected chunk with chunk info in {}", getName()); + + const auto & info = chunk.getChunkInfo(); + const auto * chunks_to_merge = typeid_cast(info.get()); if (!chunks_to_merge) throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected chunk with ChunksToMerge info in {}", getName()); @@ -775,7 +783,7 @@ void AggregatingTransform::initGenerate() { /// Just a reasonable constant, matches default value for the setting `preferred_block_size_bytes` static constexpr size_t oneMB = 1024 * 1024; - return std::make_shared(header, params->params.max_block_size, oneMB); + return std::make_shared(header, params->params.max_block_size, oneMB); }); } /// AggregatingTransform::expandPipeline expects single output port. diff --git a/src/Processors/Transforms/AggregatingTransform.h b/src/Processors/Transforms/AggregatingTransform.h index 95983c39d1e..e167acde067 100644 --- a/src/Processors/Transforms/AggregatingTransform.h +++ b/src/Processors/Transforms/AggregatingTransform.h @@ -2,7 +2,6 @@ #include #include #include -#include #include #include #include @@ -20,7 +19,7 @@ namespace CurrentMetrics namespace DB { -class AggregatedChunkInfo : public ChunkInfoCloneable +class AggregatedChunkInfo : public ChunkInfo { public: bool is_overflows = false; diff --git a/src/Processors/Transforms/ApplySquashingTransform.h b/src/Processors/Transforms/ApplySquashingTransform.h index 49a6581e685..965a084bb13 100644 --- a/src/Processors/Transforms/ApplySquashingTransform.h +++ b/src/Processors/Transforms/ApplySquashingTransform.h @@ -27,12 +27,18 @@ public: } ExceptionKeepingTransform::work(); + if (finish_chunk) + { + data.chunk = std::move(finish_chunk); + ready_output = true; + } } protected: void onConsume(Chunk chunk) override { - cur_chunk = Squashing::squash(std::move(chunk)); + if (auto res_chunk = DB::Squashing::squash(std::move(chunk))) + cur_chunk.setColumns(res_chunk.getColumns(), res_chunk.getNumRows()); } GenerateResult onGenerate() override @@ -42,10 +48,16 @@ protected: res.is_done = true; return res; } + void onFinish() override + { + auto chunk = DB::Squashing::squash({}); + finish_chunk.setColumns(chunk.getColumns(), chunk.getNumRows()); + } private: Squashing squashing; Chunk cur_chunk; + Chunk finish_chunk; }; } diff --git a/src/Processors/Transforms/CountingTransform.cpp b/src/Processors/Transforms/CountingTransform.cpp index 2c6b3bd8638..3dfb9fe178f 100644 --- a/src/Processors/Transforms/CountingTransform.cpp +++ b/src/Processors/Transforms/CountingTransform.cpp @@ -1,7 +1,6 @@ -#include -#include #include +#include #include #include diff --git a/src/Processors/Transforms/DeduplicationTokenTransforms.cpp b/src/Processors/Transforms/DeduplicationTokenTransforms.cpp deleted file mode 100644 index 6786f76cbef..00000000000 --- a/src/Processors/Transforms/DeduplicationTokenTransforms.cpp +++ /dev/null @@ -1,236 +0,0 @@ -#include - -#include - -#include -#include -#include - - -#include - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - -void RestoreChunkInfosTransform::transform(Chunk & chunk) -{ - chunk.getChunkInfos().append(chunk_infos.clone()); -} - -namespace DeduplicationToken -{ - -String TokenInfo::getToken() const -{ - if (!isDefined()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "token is not defined, stage {}, token {}", stage, debugToken()); - - return getTokenImpl(); -} - -String TokenInfo::getTokenImpl() const -{ - String result; - result.reserve(getTotalSize()); - - for (const auto & part : parts) - { - if (!result.empty()) - result.append(":"); - result.append(part); - } - - return result; -} - -String TokenInfo::debugToken() const -{ - return getTokenImpl(); -} - -void TokenInfo::addChunkHash(String part) -{ - if (stage == UNDEFINED && empty()) - stage = DEFINE_SOURCE_WITH_HASHES; - - if (stage != DEFINE_SOURCE_WITH_HASHES) - throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); - - addTokenPart(std::move(part)); -} - -void TokenInfo::finishChunkHashes() -{ - if (stage == UNDEFINED && empty()) - stage = DEFINE_SOURCE_WITH_HASHES; - - if (stage != DEFINE_SOURCE_WITH_HASHES) - throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); - - stage = DEFINED; -} - -void TokenInfo::setUserToken(const String & token) -{ - if (stage == UNDEFINED && empty()) - stage = DEFINE_SOURCE_USER_TOKEN; - - if (stage != DEFINE_SOURCE_USER_TOKEN) - throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); - - addTokenPart(fmt::format("user-token-{}", token)); -} - -void TokenInfo::setSourceWithUserToken(size_t block_number) -{ - if (stage != DEFINE_SOURCE_USER_TOKEN) - throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); - - addTokenPart(fmt::format("source-number-{}", block_number)); - - stage = DEFINED; -} - -void TokenInfo::setViewID(const String & id) -{ - if (stage == DEFINED) - stage = DEFINE_VIEW; - - if (stage != DEFINE_VIEW) - throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); - - addTokenPart(fmt::format("view-id-{}", id)); -} - -void TokenInfo::setViewBlockNumber(size_t block_number) -{ - if (stage != DEFINE_VIEW) - throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); - - addTokenPart(fmt::format("view-block-{}", block_number)); - - stage = DEFINED; -} - -void TokenInfo::reset() -{ - stage = UNDEFINED; - parts.clear(); -} - -void TokenInfo::addTokenPart(String part) -{ - parts.push_back(std::move(part)); -} - -size_t TokenInfo::getTotalSize() const -{ - if (parts.empty()) - return 0; - - size_t size = 0; - for (const auto & part : parts) - size += part.size(); - - // we reserve more size here to be able to add delimenter between parts. - return size + parts.size() - 1; -} - -#ifdef ABORT_ON_LOGICAL_ERROR -void CheckTokenTransform::transform(Chunk & chunk) -{ - auto token_info = chunk.getChunkInfos().get(); - - if (!token_info) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk has to have DedupTokenInfo as ChunkInfo, {}", debug); - - LOG_DEBUG(log, "debug: {}, token: {}", debug, token_info->debugToken()); -} -#endif - -String DefineSourceWithChunkHashTransform::getChunkHash(const Chunk & chunk) -{ - SipHash hash; - for (const auto & colunm : chunk.getColumns()) - colunm->updateHashFast(hash); - - const auto hash_value = hash.get128(); - return toString(hash_value.items[0]) + "_" + toString(hash_value.items[1]); -} - - -void DefineSourceWithChunkHashTransform::transform(Chunk & chunk) -{ - auto token_info = chunk.getChunkInfos().get(); - - if (!token_info) - throw Exception( - ErrorCodes::LOGICAL_ERROR, - "TokenInfo is expected for consumed chunk in DefineSourceWithChunkHashesTransform"); - - if (token_info->isDefined()) - return; - - token_info->addChunkHash(getChunkHash(chunk)); - token_info->finishChunkHashes(); -} - -void SetUserTokenTransform::transform(Chunk & chunk) -{ - auto token_info = chunk.getChunkInfos().get(); - if (!token_info) - throw Exception( - ErrorCodes::LOGICAL_ERROR, - "TokenInfo is expected for consumed chunk in SetUserTokenTransform"); - token_info->setUserToken(user_token); -} - -void SetSourceBlockNumberTransform::transform(Chunk & chunk) -{ - auto token_info = chunk.getChunkInfos().get(); - if (!token_info) - throw Exception( - ErrorCodes::LOGICAL_ERROR, - "TokenInfo is expected for consumed chunk in SetSourceBlockNumberTransform"); - token_info->setSourceWithUserToken(block_number++); -} - -void SetViewIDTransform::transform(Chunk & chunk) -{ - auto token_info = chunk.getChunkInfos().get(); - if (!token_info) - throw Exception( - ErrorCodes::LOGICAL_ERROR, - "TokenInfo is expected for consumed chunk in SetViewIDTransform"); - token_info->setViewID(view_id); -} - -void SetViewBlockNumberTransform::transform(Chunk & chunk) -{ - auto token_info = chunk.getChunkInfos().get(); - if (!token_info) - throw Exception( - ErrorCodes::LOGICAL_ERROR, - "TokenInfo is expected for consumed chunk in SetViewBlockNumberTransform"); - token_info->setViewBlockNumber(block_number++); -} - -void ResetTokenTransform::transform(Chunk & chunk) -{ - auto token_info = chunk.getChunkInfos().get(); - if (!token_info) - throw Exception( - ErrorCodes::LOGICAL_ERROR, - "TokenInfo is expected for consumed chunk in ResetTokenTransform"); - - token_info->reset(); -} - -} -} diff --git a/src/Processors/Transforms/DeduplicationTokenTransforms.h b/src/Processors/Transforms/DeduplicationTokenTransforms.h deleted file mode 100644 index d6aff9e1370..00000000000 --- a/src/Processors/Transforms/DeduplicationTokenTransforms.h +++ /dev/null @@ -1,237 +0,0 @@ -#pragma once - -#include -#include - -#include -#include "Common/Logger.h" - - -namespace DB -{ - class RestoreChunkInfosTransform : public ISimpleTransform - { - public: - RestoreChunkInfosTransform(Chunk::ChunkInfoCollection chunk_infos_, const Block & header_) - : ISimpleTransform(header_, header_, true) - , chunk_infos(std::move(chunk_infos_)) - {} - - String getName() const override { return "RestoreChunkInfosTransform"; } - - void transform(Chunk & chunk) override; - - private: - Chunk::ChunkInfoCollection chunk_infos; - }; - - -namespace DeduplicationToken -{ - class TokenInfo : public ChunkInfoCloneable - { - public: - TokenInfo() = default; - TokenInfo(const TokenInfo & other) = default; - - String getToken() const; - String debugToken() const; - - bool empty() const { return parts.empty(); } - - bool isDefined() const { return stage == DEFINED; } - - void addChunkHash(String part); - void finishChunkHashes(); - - void setUserToken(const String & token); - void setSourceWithUserToken(size_t block_number); - - void setViewID(const String & id); - void setViewBlockNumber(size_t block_number); - - void reset(); - - private: - String getTokenImpl() const; - - void addTokenPart(String part); - size_t getTotalSize() const; - - /* Token has to be prepared in a particular order. - * BuildingStage ensures that token is expanded according the following order. - * Firstly token is expanded with information about the source. - * It could be done with two ways: add several hash sums from the source chunks or provide user defined deduplication token and its sequentional block number. - * - * transition // method - * UNDEFINED -> DEFINE_SOURCE_WITH_HASHES // addChunkHash - * DEFINE_SOURCE_WITH_HASHES -> DEFINE_SOURCE_WITH_HASHES // addChunkHash - * DEFINE_SOURCE_WITH_HASHES -> DEFINED // defineSourceWithChankHashes - * - * transition // method - * UNDEFINED -> DEFINE_SOURCE_USER_TOKEN // setUserToken - * DEFINE_SOURCE_USER_TOKEN -> DEFINED // defineSourceWithUserToken - * - * After token is defined, it could be extended with view id and view block number. Actually it has to be expanded with view details if there is one or several views. - * - * transition // method - * DEFINED -> DEFINE_VIEW // setViewID - * DEFINE_VIEW -> DEFINED // defineViewID - */ - - enum BuildingStage - { - UNDEFINED, - DEFINE_SOURCE_WITH_HASHES, - DEFINE_SOURCE_USER_TOKEN, - DEFINE_VIEW, - DEFINED, - }; - - BuildingStage stage = UNDEFINED; - std::vector parts; - }; - - -#ifdef ABORT_ON_LOGICAL_ERROR - /// use that class only with debug builds in CI for introspection - class CheckTokenTransform : public ISimpleTransform - { - public: - CheckTokenTransform(String debug_, const Block & header_) - : ISimpleTransform(header_, header_, true) - , debug(std::move(debug_)) - { - } - - String getName() const override { return "DeduplicationToken::CheckTokenTransform"; } - - void transform(Chunk & chunk) override; - - private: - String debug; - LoggerPtr log = getLogger("CheckInsertDeduplicationTokenTransform"); - }; -#endif - - - class AddTokenInfoTransform : public ISimpleTransform - { - public: - explicit AddTokenInfoTransform(const Block & header_) - : ISimpleTransform(header_, header_, true) - { - } - - String getName() const override { return "DeduplicationToken::AddTokenInfoTransform"; } - - void transform(Chunk & chunk) override - { - chunk.getChunkInfos().add(std::make_shared()); - } - }; - - - class DefineSourceWithChunkHashTransform : public ISimpleTransform - { - public: - explicit DefineSourceWithChunkHashTransform(const Block & header_) - : ISimpleTransform(header_, header_, true) - { - } - - String getName() const override { return "DeduplicationToken::DefineSourceWithChunkHashesTransform"; } - - // Usually MergeTreeSink/ReplicatedMergeTreeSink calls addChunkHash for the deduplication token with hashes from the parts. - // But if there is some table with different engine, we still need to define the source of the data in deduplication token - // We use that transform to define the source as a hash of entire block in deduplication token - void transform(Chunk & chunk) override; - - static String getChunkHash(const Chunk & chunk); - }; - - class ResetTokenTransform : public ISimpleTransform - { - public: - explicit ResetTokenTransform(const Block & header_) - : ISimpleTransform(header_, header_, true) - { - } - - String getName() const override { return "DeduplicationToken::ResetTokenTransform"; } - - void transform(Chunk & chunk) override; - }; - - - class SetUserTokenTransform : public ISimpleTransform - { - public: - SetUserTokenTransform(String user_token_, const Block & header_) - : ISimpleTransform(header_, header_, true) - , user_token(std::move(user_token_)) - { - } - - String getName() const override { return "DeduplicationToken::SetUserTokenTransform"; } - - void transform(Chunk & chunk) override; - - private: - String user_token; - }; - - - class SetSourceBlockNumberTransform : public ISimpleTransform - { - public: - explicit SetSourceBlockNumberTransform(const Block & header_) - : ISimpleTransform(header_, header_, true) - { - } - - String getName() const override { return "DeduplicationToken::SetSourceBlockNumberTransform"; } - - void transform(Chunk & chunk) override; - - private: - size_t block_number = 0; - }; - - - class SetViewIDTransform : public ISimpleTransform - { - public: - SetViewIDTransform(String view_id_, const Block & header_) - : ISimpleTransform(header_, header_, true) - , view_id(std::move(view_id_)) - { - } - - String getName() const override { return "DeduplicationToken::SetViewIDTransform"; } - - void transform(Chunk & chunk) override; - - private: - String view_id; - }; - - - class SetViewBlockNumberTransform : public ISimpleTransform - { - public: - explicit SetViewBlockNumberTransform(const Block & header_) - : ISimpleTransform(header_, header_, true) - { - } - - String getName() const override { return "DeduplicationToken::SetViewBlockNumberTransform"; } - - void transform(Chunk & chunk) override; - - private: - size_t block_number = 0; - }; - -} -} diff --git a/src/Processors/Transforms/ExpressionTransform.cpp b/src/Processors/Transforms/ExpressionTransform.cpp index 04fabc9a3c6..2fbd2c21b8d 100644 --- a/src/Processors/Transforms/ExpressionTransform.cpp +++ b/src/Processors/Transforms/ExpressionTransform.cpp @@ -1,7 +1,5 @@ #include #include - - namespace DB { diff --git a/src/Processors/Transforms/JoiningTransform.cpp b/src/Processors/Transforms/JoiningTransform.cpp index ca204bcb482..3e2a9462e54 100644 --- a/src/Processors/Transforms/JoiningTransform.cpp +++ b/src/Processors/Transforms/JoiningTransform.cpp @@ -365,9 +365,10 @@ IProcessor::Status DelayedJoinedBlocksWorkerTransform::prepare() return Status::Finished; } - task = data.chunk.getChunkInfos().get(); - if (!task) + if (!data.chunk.hasChunkInfo()) throw Exception(ErrorCodes::LOGICAL_ERROR, "DelayedJoinedBlocksWorkerTransform must have chunk info"); + + task = std::dynamic_pointer_cast(data.chunk.getChunkInfo()); } else { @@ -478,7 +479,7 @@ IProcessor::Status DelayedJoinedBlocksTransform::prepare() if (output.isFinished()) continue; Chunk chunk; - chunk.getChunkInfos().add(std::make_shared()); + chunk.setChunkInfo(std::make_shared()); output.push(std::move(chunk)); output.finish(); } @@ -495,7 +496,7 @@ IProcessor::Status DelayedJoinedBlocksTransform::prepare() { Chunk chunk; auto task = std::make_shared(delayed_blocks, left_delayed_stream_finished_counter); - chunk.getChunkInfos().add(std::move(task)); + chunk.setChunkInfo(task); output.push(std::move(chunk)); } delayed_blocks = nullptr; diff --git a/src/Processors/Transforms/JoiningTransform.h b/src/Processors/Transforms/JoiningTransform.h index 5f6d9d6fff2..a308af03662 100644 --- a/src/Processors/Transforms/JoiningTransform.h +++ b/src/Processors/Transforms/JoiningTransform.h @@ -1,7 +1,6 @@ #pragma once #include -#include -#include + namespace DB { @@ -112,12 +111,11 @@ private: }; -class DelayedBlocksTask : public ChunkInfoCloneable +class DelayedBlocksTask : public ChunkInfo { public: DelayedBlocksTask() = default; - DelayedBlocksTask(const DelayedBlocksTask & other) = default; explicit DelayedBlocksTask(IBlocksStreamPtr delayed_blocks_, JoiningTransform::FinishCounterPtr left_delayed_stream_finish_counter_) : delayed_blocks(std::move(delayed_blocks_)) , left_delayed_stream_finish_counter(left_delayed_stream_finish_counter_) diff --git a/src/Processors/Transforms/MaterializingTransform.cpp b/src/Processors/Transforms/MaterializingTransform.cpp index 9ae80e21a68..1eaa5458d37 100644 --- a/src/Processors/Transforms/MaterializingTransform.cpp +++ b/src/Processors/Transforms/MaterializingTransform.cpp @@ -1,7 +1,6 @@ #include #include - namespace DB { diff --git a/src/Processors/Transforms/MemoryBoundMerging.h b/src/Processors/Transforms/MemoryBoundMerging.h index d7bc320173b..607087fb39c 100644 --- a/src/Processors/Transforms/MemoryBoundMerging.h +++ b/src/Processors/Transforms/MemoryBoundMerging.h @@ -150,7 +150,11 @@ private: if (!chunk.hasRows()) return; - const auto & agg_info = chunk.getChunkInfos().get(); + const auto & info = chunk.getChunkInfo(); + if (!info) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in SortingAggregatedForMemoryBoundMergingTransform."); + + const auto * agg_info = typeid_cast(info.get()); if (!agg_info) throw Exception( ErrorCodes::LOGICAL_ERROR, "Chunk should have AggregatedChunkInfo in SortingAggregatedForMemoryBoundMergingTransform."); diff --git a/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp b/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp index ea9ebb0f96e..fc40c6894bb 100644 --- a/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp +++ b/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp @@ -30,10 +30,10 @@ void GroupingAggregatedTransform::pushData(Chunks chunks, Int32 bucket, bool is_ auto info = std::make_shared(); info->bucket_num = bucket; info->is_overflows = is_overflows; - info->chunks = std::make_shared(std::move(chunks)); + info->chunks = std::make_unique(std::move(chunks)); Chunk chunk; - chunk.getChunkInfos().add(std::move(info)); + chunk.setChunkInfo(std::move(info)); output.push(std::move(chunk)); } @@ -255,10 +255,11 @@ void GroupingAggregatedTransform::addChunk(Chunk chunk, size_t input) if (!chunk.hasRows()) return; - if (chunk.getChunkInfos().empty()) + const auto & info = chunk.getChunkInfo(); + if (!info) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in GroupingAggregatedTransform."); - if (auto agg_info = chunk.getChunkInfos().get()) + if (const auto * agg_info = typeid_cast(info.get())) { Int32 bucket = agg_info->bucket_num; bool is_overflows = agg_info->is_overflows; @@ -274,7 +275,7 @@ void GroupingAggregatedTransform::addChunk(Chunk chunk, size_t input) last_bucket_number[input] = bucket; } } - else if (chunk.getChunkInfos().get()) + else if (typeid_cast(info.get())) { single_level_chunks.emplace_back(std::move(chunk)); } @@ -303,11 +304,7 @@ void GroupingAggregatedTransform::work() Int32 bucket = cur_block.info.bucket_num; auto chunk_info = std::make_shared(); chunk_info->bucket_num = bucket; - - auto chunk = Chunk(cur_block.getColumns(), cur_block.rows()); - chunk.getChunkInfos().add(std::move(chunk_info)); - - chunks_map[bucket].emplace_back(std::move(chunk)); + chunks_map[bucket].emplace_back(Chunk(cur_block.getColumns(), cur_block.rows(), std::move(chunk_info))); } } } @@ -322,7 +319,9 @@ MergingAggregatedBucketTransform::MergingAggregatedBucketTransform( void MergingAggregatedBucketTransform::transform(Chunk & chunk) { - auto chunks_to_merge = chunk.getChunkInfos().get(); + const auto & info = chunk.getChunkInfo(); + const auto * chunks_to_merge = typeid_cast(info.get()); + if (!chunks_to_merge) throw Exception(ErrorCodes::LOGICAL_ERROR, "MergingAggregatedSimpleTransform chunk must have ChunkInfo with type ChunksToMerge."); @@ -331,10 +330,11 @@ void MergingAggregatedBucketTransform::transform(Chunk & chunk) BlocksList blocks_list; for (auto & cur_chunk : *chunks_to_merge->chunks) { - if (cur_chunk.getChunkInfos().empty()) + const auto & cur_info = cur_chunk.getChunkInfo(); + if (!cur_info) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in MergingAggregatedBucketTransform."); - if (auto agg_info = cur_chunk.getChunkInfos().get()) + if (const auto * agg_info = typeid_cast(cur_info.get())) { Block block = header.cloneWithColumns(cur_chunk.detachColumns()); block.info.is_overflows = agg_info->is_overflows; @@ -342,7 +342,7 @@ void MergingAggregatedBucketTransform::transform(Chunk & chunk) blocks_list.emplace_back(std::move(block)); } - else if (cur_chunk.getChunkInfos().get()) + else if (typeid_cast(cur_info.get())) { Block block = header.cloneWithColumns(cur_chunk.detachColumns()); block.info.is_overflows = false; @@ -361,7 +361,7 @@ void MergingAggregatedBucketTransform::transform(Chunk & chunk) res_info->is_overflows = chunks_to_merge->is_overflows; res_info->bucket_num = chunks_to_merge->bucket_num; res_info->chunk_num = chunks_to_merge->chunk_num; - chunk.getChunkInfos().add(std::move(res_info)); + chunk.setChunkInfo(std::move(res_info)); auto block = params->aggregator.mergeBlocks(blocks_list, params->final, is_cancelled); @@ -405,7 +405,11 @@ bool SortingAggregatedTransform::tryPushChunk() void SortingAggregatedTransform::addChunk(Chunk chunk, size_t from_input) { - auto agg_info = chunk.getChunkInfos().get(); + const auto & info = chunk.getChunkInfo(); + if (!info) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in SortingAggregatedTransform."); + + const auto * agg_info = typeid_cast(info.get()); if (!agg_info) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk should have AggregatedChunkInfo in SortingAggregatedTransform."); diff --git a/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h b/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h index 3a3c1bd9c1e..77ee3034ffc 100644 --- a/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h +++ b/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h @@ -3,7 +3,6 @@ #include #include #include -#include #include #include #include @@ -143,9 +142,9 @@ private: void addChunk(Chunk chunk, size_t from_input); }; -struct ChunksToMerge : public ChunkInfoCloneable +struct ChunksToMerge : public ChunkInfo { - std::shared_ptr chunks; + std::unique_ptr chunks; Int32 bucket_num = -1; bool is_overflows = false; UInt64 chunk_num = 0; // chunk number in order of generation, used during memory bound merging to restore chunks order diff --git a/src/Processors/Transforms/MergingAggregatedTransform.cpp b/src/Processors/Transforms/MergingAggregatedTransform.cpp index 446e60a0b81..ad723da7527 100644 --- a/src/Processors/Transforms/MergingAggregatedTransform.cpp +++ b/src/Processors/Transforms/MergingAggregatedTransform.cpp @@ -32,10 +32,11 @@ void MergingAggregatedTransform::consume(Chunk chunk) total_input_rows += input_rows; ++total_input_blocks; - if (chunk.getChunkInfos().empty()) + const auto & info = chunk.getChunkInfo(); + if (!info) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in MergingAggregatedTransform."); - if (auto agg_info = chunk.getChunkInfos().get()) + if (const auto * agg_info = typeid_cast(info.get())) { /** If the remote servers used a two-level aggregation method, * then blocks will contain information about the number of the bucket. @@ -48,7 +49,7 @@ void MergingAggregatedTransform::consume(Chunk chunk) bucket_to_blocks[agg_info->bucket_num].emplace_back(std::move(block)); } - else if (chunk.getChunkInfos().get()) + else if (typeid_cast(info.get())) { auto block = getInputPort().getHeader().cloneWithColumns(chunk.getColumns()); block.info.is_overflows = false; @@ -88,8 +89,7 @@ Chunk MergingAggregatedTransform::generate() UInt64 num_rows = block.rows(); Chunk chunk(block.getColumns(), num_rows); - - chunk.getChunkInfos().add(std::move(info)); + chunk.setChunkInfo(std::move(info)); return chunk; } diff --git a/src/Processors/Transforms/PlanSquashingTransform.cpp b/src/Processors/Transforms/PlanSquashingTransform.cpp index ee4dfa6a64e..0f433165f14 100644 --- a/src/Processors/Transforms/PlanSquashingTransform.cpp +++ b/src/Processors/Transforms/PlanSquashingTransform.cpp @@ -10,20 +10,20 @@ namespace ErrorCodes } PlanSquashingTransform::PlanSquashingTransform( - Block header_, size_t min_block_size_rows, size_t min_block_size_bytes) - : IInflatingTransform(header_, header_) - , squashing(header_, min_block_size_rows, min_block_size_bytes) + const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes) + : IInflatingTransform(header, header), squashing(header, min_block_size_rows, min_block_size_bytes) { } void PlanSquashingTransform::consume(Chunk chunk) { - squashed_chunk = squashing.add(std::move(chunk)); + if (Chunk current_chunk = squashing.add(std::move(chunk)); current_chunk.hasChunkInfo()) + squashed_chunk.swap(current_chunk); } Chunk PlanSquashingTransform::generate() { - if (!squashed_chunk) + if (!squashed_chunk.hasChunkInfo()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't generate chunk in SimpleSquashingChunksTransform"); Chunk result_chunk; @@ -33,11 +33,12 @@ Chunk PlanSquashingTransform::generate() bool PlanSquashingTransform::canGenerate() { - return bool(squashed_chunk); + return squashed_chunk.hasChunkInfo(); } Chunk PlanSquashingTransform::getRemaining() { - return squashing.flush(); + Chunk current_chunk = squashing.flush(); + return current_chunk; } } diff --git a/src/Processors/Transforms/PlanSquashingTransform.h b/src/Processors/Transforms/PlanSquashingTransform.h index e6db245499e..4ad2ec2d089 100644 --- a/src/Processors/Transforms/PlanSquashingTransform.h +++ b/src/Processors/Transforms/PlanSquashingTransform.h @@ -10,7 +10,7 @@ class PlanSquashingTransform : public IInflatingTransform { public: PlanSquashingTransform( - Block header_, size_t min_block_size_rows, size_t min_block_size_bytes); + const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes); String getName() const override { return "PlanSquashingTransform"; } @@ -23,6 +23,7 @@ protected: private: Squashing squashing; Chunk squashed_chunk; + Chunk finish_chunk; }; } diff --git a/src/Processors/Transforms/SelectByIndicesTransform.h b/src/Processors/Transforms/SelectByIndicesTransform.h index b44f5a3203e..480ab1a0f61 100644 --- a/src/Processors/Transforms/SelectByIndicesTransform.h +++ b/src/Processors/Transforms/SelectByIndicesTransform.h @@ -26,7 +26,7 @@ public: void transform(Chunk & chunk) override { size_t num_rows = chunk.getNumRows(); - auto select_final_indices_info = chunk.getChunkInfos().extract(); + const auto * select_final_indices_info = typeid_cast(chunk.getChunkInfo().get()); if (!select_final_indices_info || !select_final_indices_info->select_final_indices) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk passed to SelectByIndicesTransform without indices column"); @@ -41,6 +41,7 @@ public: chunk.setColumns(std::move(columns), index_column->size()); } + chunk.setChunkInfo(nullptr); } }; diff --git a/src/Processors/Transforms/SquashingTransform.cpp b/src/Processors/Transforms/SquashingTransform.cpp index 1fb4433240a..b5a40c75c5b 100644 --- a/src/Processors/Transforms/SquashingTransform.cpp +++ b/src/Processors/Transforms/SquashingTransform.cpp @@ -7,6 +7,7 @@ namespace DB namespace ErrorCodes { extern const int LOGICAL_ERROR; +extern const int SIZES_OF_COLUMNS_DOESNT_MATCH; } SquashingTransform::SquashingTransform( @@ -18,7 +19,9 @@ SquashingTransform::SquashingTransform( void SquashingTransform::onConsume(Chunk chunk) { - cur_chunk = Squashing::squash(squashing.add(std::move(chunk))); + Chunk planned_chunk = squashing.add(std::move(chunk)); + if (planned_chunk.hasChunkInfo()) + cur_chunk = DB::Squashing::squash(std::move(planned_chunk)); } SquashingTransform::GenerateResult SquashingTransform::onGenerate() @@ -31,7 +34,10 @@ SquashingTransform::GenerateResult SquashingTransform::onGenerate() void SquashingTransform::onFinish() { - finish_chunk = Squashing::squash(squashing.flush()); + Chunk chunk = squashing.flush(); + if (chunk.hasChunkInfo()) + chunk = DB::Squashing::squash(std::move(chunk)); + finish_chunk.setColumns(chunk.getColumns(), chunk.getNumRows()); } void SquashingTransform::work() @@ -44,7 +50,6 @@ void SquashingTransform::work() } ExceptionKeepingTransform::work(); - if (finish_chunk) { data.chunk = std::move(finish_chunk); @@ -52,49 +57,170 @@ void SquashingTransform::work() } } -SimpleSquashingTransform::SimpleSquashingTransform( +SimpleSquashingChunksTransform::SimpleSquashingChunksTransform( const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes) - : ISimpleTransform(header, header, false) - , squashing(header, min_block_size_rows, min_block_size_bytes) + : IInflatingTransform(header, header), squashing(min_block_size_rows, min_block_size_bytes) { } -void SimpleSquashingTransform::transform(Chunk & chunk) +void SimpleSquashingChunksTransform::consume(Chunk chunk) { - if (!finished) - { - chunk = Squashing::squash(squashing.add(std::move(chunk))); - } - else - { - if (chunk.hasRows()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk expected to be empty, otherwise it will be lost"); - - chunk = Squashing::squash(squashing.flush()); - } + Block current_block = squashing.add(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns())); + squashed_chunk.setColumns(current_block.getColumns(), current_block.rows()); } -IProcessor::Status SimpleSquashingTransform::prepare() +Chunk SimpleSquashingChunksTransform::generate() { - if (!finished && input.isFinished()) + if (squashed_chunk.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't generate chunk in SimpleSquashingChunksTransform"); + + return std::move(squashed_chunk); +} + +bool SimpleSquashingChunksTransform::canGenerate() +{ + return !squashed_chunk.empty(); +} + +Chunk SimpleSquashingChunksTransform::getRemaining() +{ + Block current_block = squashing.add({}); + squashed_chunk.setColumns(current_block.getColumns(), current_block.rows()); + return std::move(squashed_chunk); +} + +SquashingLegacy::SquashingLegacy(size_t min_block_size_rows_, size_t min_block_size_bytes_) + : min_block_size_rows(min_block_size_rows_) + , min_block_size_bytes(min_block_size_bytes_) +{ +} + +Block SquashingLegacy::add(Block && input_block) +{ + return addImpl(std::move(input_block)); +} + +Block SquashingLegacy::add(const Block & input_block) +{ + return addImpl(input_block); +} + +/* + * To minimize copying, accept two types of argument: const reference for output + * stream, and rvalue reference for input stream, and decide whether to copy + * inside this function. This allows us not to copy Block unless we absolutely + * have to. + */ +template +Block SquashingLegacy::addImpl(ReferenceType input_block) +{ + /// End of input stream. + if (!input_block) { - if (output.isFinished()) - return Status::Finished; + Block to_return; + std::swap(to_return, accumulated_block); + return to_return; + } - if (!output.canPush()) - return Status::PortFull; - - if (has_output) + /// Just read block is already enough. + if (isEnoughSize(input_block)) + { + /// If no accumulated data, return just read block. + if (!accumulated_block) { - output.pushData(std::move(output_data)); - has_output = false; - return Status::PortFull; + return std::move(input_block); } - finished = true; - /// On the next call to transform() we will return all data buffered in `squashing` (if any) - return Status::Ready; + /// Return accumulated data (maybe it has small size) and place new block to accumulated data. + Block to_return = std::move(input_block); + std::swap(to_return, accumulated_block); + return to_return; } - return ISimpleTransform::prepare(); + + /// Accumulated block is already enough. + if (isEnoughSize(accumulated_block)) + { + /// Return accumulated data and place new block to accumulated data. + Block to_return = std::move(input_block); + std::swap(to_return, accumulated_block); + return to_return; + } + + append(std::move(input_block)); + if (isEnoughSize(accumulated_block)) + { + Block to_return; + std::swap(to_return, accumulated_block); + return to_return; + } + + /// Squashed block is not ready. + return {}; } + + +template +void SquashingLegacy::append(ReferenceType input_block) +{ + if (!accumulated_block) + { + accumulated_block = std::move(input_block); + return; + } + + assert(blocksHaveEqualStructure(input_block, accumulated_block)); + + try + { + for (size_t i = 0, size = accumulated_block.columns(); i < size; ++i) + { + const auto source_column = input_block.getByPosition(i).column; + + auto mutable_column = IColumn::mutate(std::move(accumulated_block.getByPosition(i).column)); + mutable_column->insertRangeFrom(*source_column, 0, source_column->size()); + accumulated_block.getByPosition(i).column = std::move(mutable_column); + } + } + catch (...) + { + /// add() may be called again even after a previous add() threw an exception. + /// Keep accumulated_block in a valid state. + /// Seems ok to discard accumulated data because we're throwing an exception, which the caller will + /// hopefully interpret to mean "this block and all *previous* blocks are potentially lost". + accumulated_block.clear(); + throw; + } +} + + +bool SquashingLegacy::isEnoughSize(const Block & block) +{ + size_t rows = 0; + size_t bytes = 0; + + for (const auto & [column, type, name] : block) + { + if (!column) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid column in block."); + + if (!rows) + rows = column->size(); + else if (rows != column->size()) + throw Exception(ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH, "Sizes of columns doesn't match"); + + bytes += column->byteSize(); + } + + return isEnoughSize(rows, bytes); +} + + +bool SquashingLegacy::isEnoughSize(size_t rows, size_t bytes) const +{ + return (!min_block_size_rows && !min_block_size_bytes) + || (min_block_size_rows && rows >= min_block_size_rows) + || (min_block_size_bytes && bytes >= min_block_size_bytes); +} + + } diff --git a/src/Processors/Transforms/SquashingTransform.h b/src/Processors/Transforms/SquashingTransform.h index c5b727ac6ec..452317e7d5e 100644 --- a/src/Processors/Transforms/SquashingTransform.h +++ b/src/Processors/Transforms/SquashingTransform.h @@ -2,6 +2,7 @@ #include #include +#include #include #include @@ -29,22 +30,51 @@ private: Chunk finish_chunk; }; -/// Doesn't care about propagating exceptions and thus doesn't throw LOGICAL_ERROR if the following transform closes its input port. -class SimpleSquashingTransform : public ISimpleTransform + +class SquashingLegacy { public: - explicit SimpleSquashingTransform(const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes); + /// Conditions on rows and bytes are OR-ed. If one of them is zero, then corresponding condition is ignored. + SquashingLegacy(size_t min_block_size_rows_, size_t min_block_size_bytes_); + + /** Add next block and possibly returns squashed block. + * At end, you need to pass empty block. As the result for last (empty) block, you will get last Result with ready = true. + */ + Block add(Block && block); + Block add(const Block & block); + +private: + size_t min_block_size_rows; + size_t min_block_size_bytes; + + Block accumulated_block; + + template + Block addImpl(ReferenceType block); + + template + void append(ReferenceType block); + + bool isEnoughSize(const Block & block); + bool isEnoughSize(size_t rows, size_t bytes) const; +}; + +class SimpleSquashingChunksTransform : public IInflatingTransform +{ +public: + explicit SimpleSquashingChunksTransform(const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes); String getName() const override { return "SimpleSquashingTransform"; } protected: - void transform(Chunk &) override; - - IProcessor::Status prepare() override; + void consume(Chunk chunk) override; + bool canGenerate() override; + Chunk generate() override; + Chunk getRemaining() override; private: - Squashing squashing; - - bool finished = false; + SquashingLegacy squashing; + Chunk squashed_chunk; }; + } diff --git a/src/Processors/Transforms/TotalsHavingTransform.cpp b/src/Processors/Transforms/TotalsHavingTransform.cpp index 59fceccb538..aa86879e62c 100644 --- a/src/Processors/Transforms/TotalsHavingTransform.cpp +++ b/src/Processors/Transforms/TotalsHavingTransform.cpp @@ -150,7 +150,11 @@ void TotalsHavingTransform::transform(Chunk & chunk) /// Block with values not included in `max_rows_to_group_by`. We'll postpone it. if (overflow_row) { - const auto & agg_info = chunk.getChunkInfos().get(); + const auto & info = chunk.getChunkInfo(); + if (!info) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in TotalsHavingTransform."); + + const auto * agg_info = typeid_cast(info.get()); if (!agg_info) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk should have AggregatedChunkInfo in TotalsHavingTransform."); diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp index b9f61d30182..a694fa43e46 100644 --- a/src/Processors/Transforms/WindowTransform.cpp +++ b/src/Processors/Transforms/WindowTransform.cpp @@ -17,6 +17,9 @@ #include #include +#include +#include + #include @@ -71,6 +74,9 @@ public: size_t function_index) const = 0; virtual std::optional getDefaultFrame() const { return {}; } + + /// Is the frame type supported by this function. + virtual bool checkWindowFrameType(const WindowTransform * /*transform*/) const { return true; } }; // Compares ORDER BY column values at given rows to find the boundaries of frame: @@ -402,6 +408,19 @@ WindowTransform::WindowTransform(const Block & input_header_, } } } + + for (const auto & workspace : workspaces) + { + if (workspace.window_function_impl) + { + if (!workspace.window_function_impl->checkWindowFrameType(this)) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported window frame type for function '{}'", + workspace.aggregate_function->getName()); + } + } + + } } WindowTransform::~WindowTransform() @@ -1609,6 +1628,34 @@ struct WindowFunctionHelpers { recurrent_detail::setValueToOutputColumn(transform, function_index, value); } + + ALWAYS_INLINE static bool checkPartitionEnterFirstRow(const WindowTransform * transform) { return transform->current_row_number == 1; } + + ALWAYS_INLINE static bool checkPartitionEnterLastRow(const WindowTransform * transform) + { + /// This is for fast check. + if (!transform->partition_ended) + return false; + + auto current_row = transform->current_row; + /// checkPartitionEnterLastRow is called on each row, also move on current_row.row here. + current_row.row++; + const auto & partition_end_row = transform->partition_end; + + /// The partition end is reached, when following is true + /// - current row is the partition end row, + /// - or current row is the last row of all input. + if (current_row != partition_end_row) + { + /// when current row is not the partition end row, we need to check whether it's the last + /// input row. + if (current_row.row < transform->blockRowsNumber(current_row)) + return false; + if (partition_end_row.block != current_row.block + 1 || partition_end_row.row) + return false; + } + return true; + } }; template @@ -2058,8 +2105,6 @@ namespace const WindowTransform * transform, size_t function_index, const DataTypes & argument_types); - - static void checkWindowFrameType(const WindowTransform * transform); }; } @@ -2080,6 +2125,29 @@ struct WindowFunctionNtile final : public StatefulWindowFunction bool allocatesMemoryInArena() const override { return false; } + bool checkWindowFrameType(const WindowTransform * transform) const override + { + if (transform->order_by_indices.empty()) + { + LOG_ERROR(getLogger("WindowFunctionNtile"), "Window frame for 'ntile' function must have ORDER BY clause"); + return false; + } + + // We must wait all for the partition end and get the total rows number in this + // partition. So before the end of this partition, there is no any block could be + // dropped out. + bool is_frame_supported = transform->window_description.frame.begin_type == WindowFrame::BoundaryType::Unbounded + && transform->window_description.frame.end_type == WindowFrame::BoundaryType::Unbounded; + if (!is_frame_supported) + { + LOG_ERROR( + getLogger("WindowFunctionNtile"), + "Window frame for function 'ntile' should be 'ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING'"); + return false; + } + return true; + } + std::optional getDefaultFrame() const override { WindowFrame frame; @@ -2106,7 +2174,6 @@ namespace { if (!buckets) [[unlikely]] { - checkWindowFrameType(transform); const auto & current_block = transform->blockAt(transform->current_row); const auto & workspace = transform->workspaces[function_index]; const auto & arg_col = *current_block.original_input_columns[workspace.argument_column_indices[0]]; @@ -2128,7 +2195,7 @@ namespace } } // new partition - if (transform->current_row_number == 1) [[unlikely]] + if (WindowFunctionHelpers::checkPartitionEnterFirstRow(transform)) [[unlikely]] { current_partition_rows = 0; current_partition_inserted_row = 0; @@ -2137,25 +2204,9 @@ namespace current_partition_rows++; // Only do the action when we meet the last row in this partition. - if (!transform->partition_ended) + if (!WindowFunctionHelpers::checkPartitionEnterLastRow(transform)) return; - else - { - auto current_row = transform->current_row; - current_row.row++; - const auto & end_row = transform->partition_end; - if (current_row != end_row) - { - if (current_row.row < transform->blockRowsNumber(current_row)) - return; - if (end_row.block != current_row.block + 1 || end_row.row) - { - return; - } - // else, current_row is the last input row. - } - } auto bucket_capacity = current_partition_rows / buckets; auto capacity_diff = current_partition_rows - bucket_capacity * buckets; @@ -2193,23 +2244,115 @@ namespace bucket_num += 1; } } +} - void NtileState::checkWindowFrameType(const WindowTransform * transform) +namespace +{ +struct PercentRankState +{ + RowNumber start_row; + UInt64 current_partition_rows = 0; +}; +} + +struct WindowFunctionPercentRank final : public StatefulWindowFunction +{ +public: + WindowFunctionPercentRank(const std::string & name_, + const DataTypes & argument_types_, const Array & parameters_) + : StatefulWindowFunction(name_, argument_types_, parameters_, std::make_shared()) + {} + + bool allocatesMemoryInArena() const override { return false; } + + bool checkWindowFrameType(const WindowTransform * transform) const override { - if (transform->order_by_indices.empty()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Window frame for 'ntile' function must have ORDER BY clause"); + if (transform->window_description.frame.type != WindowFrame::FrameType::RANGE + || transform->window_description.frame.begin_type != WindowFrame::BoundaryType::Unbounded + || transform->window_description.frame.end_type != WindowFrame::BoundaryType::Current) + { + LOG_ERROR( + getLogger("WindowFunctionPercentRank"), + "Window frame for function 'percent_rank' should be 'RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT'"); + return false; + } + return true; + } - // We must wait all for the partition end and get the total rows number in this - // partition. So before the end of this partition, there is no any block could be - // dropped out. - bool is_frame_supported = transform->window_description.frame.begin_type == WindowFrame::BoundaryType::Unbounded - && transform->window_description.frame.end_type == WindowFrame::BoundaryType::Unbounded; - if (!is_frame_supported) + std::optional getDefaultFrame() const override + { + WindowFrame frame; + frame.type = WindowFrame::FrameType::RANGE; + frame.begin_type = WindowFrame::BoundaryType::Unbounded; + frame.end_type = WindowFrame::BoundaryType::Current; + return frame; + } + + void windowInsertResultInto(const WindowTransform * transform, size_t function_index) const override + { + auto & state = getWorkspaceState(transform, function_index); + if (WindowFunctionHelpers::checkPartitionEnterFirstRow(transform)) { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Window frame for function 'ntile' should be 'ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING'"); + state.current_partition_rows = 0; + state.start_row = transform->current_row; + } + + insertRankIntoColumn(transform, function_index); + state.current_partition_rows++; + + if (!WindowFunctionHelpers::checkPartitionEnterLastRow(transform)) + { + return; + } + + UInt64 remaining_rows = state.current_partition_rows; + Float64 percent_rank_denominator = remaining_rows == 1 ? 1 : remaining_rows - 1; + + while (remaining_rows > 0) + { + auto block_rows_number = transform->blockRowsNumber(state.start_row); + auto available_block_rows = block_rows_number - state.start_row.row; + if (available_block_rows <= remaining_rows) + { + /// This partition involves multiple blocks. Finish current block and move on to the + /// next block. + auto & to_column = *transform->blockAt(state.start_row).output_columns[function_index]; + auto & data = assert_cast(to_column).getData(); + for (size_t i = state.start_row.row; i < block_rows_number; ++i) + data[i] = (data[i] - 1) / percent_rank_denominator; + + state.start_row.block++; + state.start_row.row = 0; + remaining_rows -= available_block_rows; + } + else + { + /// The partition ends in current block.s + auto & to_column = *transform->blockAt(state.start_row).output_columns[function_index]; + auto & data = assert_cast(to_column).getData(); + for (size_t i = state.start_row.row, n = state.start_row.row + remaining_rows; i < n; ++i) + { + data[i] = (data[i] - 1) / percent_rank_denominator; + } + state.start_row.row += remaining_rows; + remaining_rows = 0; + } } } -} + + + inline PercentRankState & getWorkspaceState(const WindowTransform * transform, size_t function_index) const + { + const auto & workspace = transform->workspaces[function_index]; + return getState(workspace); + } + + inline void insertRankIntoColumn(const WindowTransform * transform, size_t function_index) const + { + auto & to_column = *transform->blockAt(transform->current_row).output_columns[function_index]; + assert_cast(to_column).getData().push_back(static_cast(transform->peer_group_start_row_number)); + } +}; // ClickHouse-specific variant of lag/lead that respects the window frame. template @@ -2582,6 +2725,13 @@ void registerWindowFunctions(AggregateFunctionFactory & factory) parameters); }, properties}, AggregateFunctionFactory::CaseInsensitive); + factory.registerFunction("percent_rank", {[](const std::string & name, + const DataTypes & argument_types, const Array & parameters, const Settings *) + { + return std::make_shared(name, argument_types, + parameters); + }, properties}, AggregateFunctionFactory::CaseInsensitive); + factory.registerFunction("row_number", {[](const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *) { diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index 312b333ab33..25fbf13b0e7 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -5,9 +5,7 @@ #include #include #include -#include #include -#include #include #include #include @@ -18,7 +16,6 @@ #include #include #include -#include #include #include #include @@ -27,12 +24,9 @@ #include #include #include -#include "base/defines.h" -#include #include #include -#include namespace ProfileEvents @@ -111,7 +105,7 @@ private: class ExecutingInnerQueryFromViewTransform final : public ExceptionKeepingTransform { public: - ExecutingInnerQueryFromViewTransform(const Block & header, ViewRuntimeData & view_, ViewsDataPtr views_data_, bool disable_deduplication_for_children_); + ExecutingInnerQueryFromViewTransform(const Block & header, ViewRuntimeData & view_, ViewsDataPtr views_data_); String getName() const override { return "ExecutingInnerQueryFromView"; } @@ -122,7 +116,6 @@ protected: private: ViewsDataPtr views_data; ViewRuntimeData & view; - bool disable_deduplication_for_children; struct State { @@ -145,7 +138,7 @@ class PushingToLiveViewSink final : public SinkToStorage public: PushingToLiveViewSink(const Block & header, StorageLiveView & live_view_, StoragePtr storage_holder_, ContextPtr context_); String getName() const override { return "PushingToLiveViewSink"; } - void consume(Chunk & chunk) override; + void consume(Chunk chunk) override; private: StorageLiveView & live_view; @@ -159,7 +152,7 @@ class PushingToWindowViewSink final : public SinkToStorage public: PushingToWindowViewSink(const Block & header, StorageWindowView & window_view_, StoragePtr storage_holder_, ContextPtr context_); String getName() const override { return "PushingToWindowViewSink"; } - void consume(Chunk & chunk) override; + void consume(Chunk chunk) override; private: StorageWindowView & window_view; @@ -223,10 +216,45 @@ std::optional generateViewChain( const auto & insert_settings = insert_context->getSettingsRef(); + // Do not deduplicate insertions into MV if the main insertion is Ok if (disable_deduplication_for_children) { insert_context->setSetting("insert_deduplicate", Field{false}); } + else if (insert_settings.update_insert_deduplication_token_in_dependent_materialized_views && + !insert_settings.insert_deduplication_token.value.empty()) + { + /** Update deduplication token passed to dependent MV with current view id. So it is possible to properly handle + * deduplication in complex INSERT flows. + * + * Example: + * + * landing -┬--> mv_1_1 ---> ds_1_1 ---> mv_2_1 --┬-> ds_2_1 ---> mv_3_1 ---> ds_3_1 + * | | + * └--> mv_1_2 ---> ds_1_2 ---> mv_2_2 --┘ + * + * Here we want to avoid deduplication for two different blocks generated from `mv_2_1` and `mv_2_2` that will + * be inserted into `ds_2_1`. + * + * We are forced to use view id instead of table id because there are some possible INSERT flows where no tables + * are involved. + * + * Example: + * + * landing -┬--> mv_1_1 --┬-> ds_1_1 + * | | + * └--> mv_1_2 --┘ + * + */ + auto insert_deduplication_token = insert_settings.insert_deduplication_token.value; + + if (view_id.hasUUID()) + insert_deduplication_token += "_" + toString(view_id.uuid); + else + insert_deduplication_token += "_" + view_id.getFullNameNotQuoted(); + + insert_context->setSetting("insert_deduplication_token", insert_deduplication_token); + } // Processing of blocks for MVs is done block by block, and there will // be no parallel reading after (plus it is not a costless operation) @@ -333,13 +361,7 @@ std::optional generateViewChain( insert_columns.emplace_back(column.name); } - InterpreterInsertQuery interpreter( - nullptr, - insert_context, - /* allow_materialized */ false, - /* no_squash */ false, - /* no_destination */ false, - /* async_isnert */ false); + InterpreterInsertQuery interpreter(nullptr, insert_context, false, false, false); /// TODO: remove sql_security_type check after we turn `ignore_empty_sql_security_in_create_view_query=false` bool check_access = !materialized_view->hasInnerTable() && materialized_view->getInMemoryMetadataPtr()->sql_security_type; @@ -356,10 +378,6 @@ std::optional generateViewChain( table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL)); } -#ifdef ABORT_ON_LOGICAL_ERROR - out.addSource(std::make_shared("Before squashing", out.getInputHeader())); -#endif - auto counting = std::make_shared(out.getInputHeader(), current_thread, insert_context->getQuota()); counting->setProcessListElement(insert_context->getProcessListElement()); counting->setProgressCallback(insert_context->getProgressCallback()); @@ -402,19 +420,11 @@ std::optional generateViewChain( if (type == QueryViewsLogElement::ViewType::MATERIALIZED) { -#ifdef ABORT_ON_LOGICAL_ERROR - out.addSource(std::make_shared("Right after Inner query", out.getInputHeader())); -#endif - auto executing_inner_query = std::make_shared( - storage_header, views_data->views.back(), views_data, disable_deduplication_for_children); + storage_header, views_data->views.back(), views_data); executing_inner_query->setRuntimeData(view_thread_status, view_counter_ms); out.addSource(std::move(executing_inner_query)); - -#ifdef ABORT_ON_LOGICAL_ERROR - out.addSource(std::make_shared("Right before Inner query", out.getInputHeader())); -#endif } return out; @@ -455,7 +465,11 @@ Chain buildPushingToViewsChain( */ result_chain.addTableLock(storage->lockForShare(context->getInitialQueryId(), context->getSettingsRef().lock_acquire_timeout)); - bool disable_deduplication_for_children = !context->getSettingsRef().deduplicate_blocks_in_dependent_materialized_views; + /// If the "root" table deduplicates blocks, there are no need to make deduplication for children + /// Moreover, deduplication for AggregatingMergeTree children could produce false positives due to low size of inserting blocks + bool disable_deduplication_for_children = false; + if (!context->getSettingsRef().deduplicate_blocks_in_dependent_materialized_views) + disable_deduplication_for_children = !no_destination && storage->supportsDeduplication(); auto table_id = storage->getStorageID(); auto views = DatabaseCatalog::instance().getDependentViews(table_id); @@ -546,25 +560,12 @@ Chain buildPushingToViewsChain( auto sink = std::make_shared(live_view_header, *live_view, storage, context); sink->setRuntimeData(thread_status, elapsed_counter_ms); result_chain.addSource(std::move(sink)); - - result_chain.addSource(std::make_shared(result_chain.getInputHeader())); } else if (auto * window_view = dynamic_cast(storage.get())) { auto sink = std::make_shared(window_view->getInputHeader(), *window_view, storage, context); sink->setRuntimeData(thread_status, elapsed_counter_ms); result_chain.addSource(std::move(sink)); - - result_chain.addSource(std::make_shared(result_chain.getInputHeader())); - } - else if (dynamic_cast(storage.get())) - { - auto sink = storage->write(query_ptr, metadata_snapshot, context, async_insert); - metadata_snapshot->check(sink->getHeader().getColumnsWithTypeAndName()); - sink->setRuntimeData(thread_status, elapsed_counter_ms); - result_chain.addSource(std::move(sink)); - - result_chain.addSource(std::make_shared(result_chain.getInputHeader())); } /// Do not push to destination table if the flag is set else if (!no_destination) @@ -572,15 +573,8 @@ Chain buildPushingToViewsChain( auto sink = storage->write(query_ptr, metadata_snapshot, context, async_insert); metadata_snapshot->check(sink->getHeader().getColumnsWithTypeAndName()); sink->setRuntimeData(thread_status, elapsed_counter_ms); - - result_chain.addSource(std::make_shared(sink->getHeader())); - result_chain.addSource(std::move(sink)); } - else - { - result_chain.addSource(std::make_shared(storage_header)); - } if (result_chain.empty()) result_chain.addSink(std::make_shared(storage_header)); @@ -596,7 +590,7 @@ Chain buildPushingToViewsChain( return result_chain; } -static QueryPipeline process(Block block, ViewRuntimeData & view, const ViewsData & views_data, Chunk::ChunkInfoCollection && chunk_infos, bool disable_deduplication_for_children) +static QueryPipeline process(Block block, ViewRuntimeData & view, const ViewsData & views_data) { const auto & context = view.context; @@ -643,19 +637,6 @@ static QueryPipeline process(Block block, ViewRuntimeData & view, const ViewsDat pipeline.getHeader(), std::make_shared(std::move(converting)))); - pipeline.addTransform(std::make_shared(std::move(chunk_infos), pipeline.getHeader())); - - if (!disable_deduplication_for_children) - { - String materialize_view_id = view.table_id.hasUUID() ? toString(view.table_id.uuid) : view.table_id.getFullNameNotQuoted(); - pipeline.addTransform(std::make_shared(std::move(materialize_view_id), pipeline.getHeader())); - pipeline.addTransform(std::make_shared(pipeline.getHeader())); - } - else - { - pipeline.addTransform(std::make_shared(pipeline.getHeader())); - } - return QueryPipelineBuilder::getPipeline(std::move(pipeline)); } @@ -747,19 +728,17 @@ IProcessor::Status CopyingDataToViewsTransform::prepare() ExecutingInnerQueryFromViewTransform::ExecutingInnerQueryFromViewTransform( const Block & header, ViewRuntimeData & view_, - std::shared_ptr views_data_, - bool disable_deduplication_for_children_) + std::shared_ptr views_data_) : ExceptionKeepingTransform(header, view_.sample_block) , views_data(std::move(views_data_)) , view(view_) - , disable_deduplication_for_children(disable_deduplication_for_children_) { } void ExecutingInnerQueryFromViewTransform::onConsume(Chunk chunk) { - auto block = getInputPort().getHeader().cloneWithColumns(chunk.detachColumns()); - state.emplace(process(std::move(block), view, *views_data, std::move(chunk.getChunkInfos()), disable_deduplication_for_children)); + auto block = getInputPort().getHeader().cloneWithColumns(chunk.getColumns()); + state.emplace(process(block, view, *views_data)); } @@ -791,10 +770,10 @@ PushingToLiveViewSink::PushingToLiveViewSink(const Block & header, StorageLiveVi { } -void PushingToLiveViewSink::consume(Chunk & chunk) +void PushingToLiveViewSink::consume(Chunk chunk) { Progress local_progress(chunk.getNumRows(), chunk.bytes(), 0); - live_view.writeBlock(live_view, getHeader().cloneWithColumns(chunk.detachColumns()), std::move(chunk.getChunkInfos()), context); + live_view.writeBlock(getHeader().cloneWithColumns(chunk.detachColumns()), context); if (auto process = context->getProcessListElement()) process->updateProgressIn(local_progress); @@ -814,11 +793,11 @@ PushingToWindowViewSink::PushingToWindowViewSink( { } -void PushingToWindowViewSink::consume(Chunk & chunk) +void PushingToWindowViewSink::consume(Chunk chunk) { Progress local_progress(chunk.getNumRows(), chunk.bytes(), 0); StorageWindowView::writeIntoWindowView( - window_view, getHeader().cloneWithColumns(chunk.detachColumns()), std::move(chunk.getChunkInfos()), context); + window_view, getHeader().cloneWithColumns(chunk.detachColumns()), context); if (auto process = context->getProcessListElement()) process->updateProgressIn(local_progress); diff --git a/src/QueryPipeline/QueryPipelineBuilder.h b/src/QueryPipeline/QueryPipelineBuilder.h index a9e5b1535c0..f0b2ead687e 100644 --- a/src/QueryPipeline/QueryPipelineBuilder.h +++ b/src/QueryPipeline/QueryPipelineBuilder.h @@ -193,7 +193,7 @@ public: return concurrency_control; } - void addResources(QueryPlanResourceHolder resources_) { resources.append(std::move(resources_)); } + void addResources(QueryPlanResourceHolder resources_) { resources = std::move(resources_); } void setQueryIdHolder(std::shared_ptr query_id_holder) { resources.query_id_holders.emplace_back(std::move(query_id_holder)); } void addContext(ContextPtr context) { resources.interpreter_context.emplace_back(std::move(context)); } diff --git a/src/QueryPipeline/QueryPlanResourceHolder.cpp b/src/QueryPipeline/QueryPlanResourceHolder.cpp index bb2be2c8ffb..2cd4dc42a83 100644 --- a/src/QueryPipeline/QueryPlanResourceHolder.cpp +++ b/src/QueryPipeline/QueryPlanResourceHolder.cpp @@ -5,7 +5,7 @@ namespace DB { -QueryPlanResourceHolder & QueryPlanResourceHolder::append(QueryPlanResourceHolder && rhs) noexcept +QueryPlanResourceHolder & QueryPlanResourceHolder::operator=(QueryPlanResourceHolder && rhs) noexcept { table_locks.insert(table_locks.end(), rhs.table_locks.begin(), rhs.table_locks.end()); storage_holders.insert(storage_holders.end(), rhs.storage_holders.begin(), rhs.storage_holders.end()); @@ -16,12 +16,6 @@ QueryPlanResourceHolder & QueryPlanResourceHolder::append(QueryPlanResourceHolde return *this; } -QueryPlanResourceHolder & QueryPlanResourceHolder::operator=(QueryPlanResourceHolder && rhs) noexcept -{ - append(std::move(rhs)); - return *this; -} - QueryPlanResourceHolder::QueryPlanResourceHolder() = default; QueryPlanResourceHolder::QueryPlanResourceHolder(QueryPlanResourceHolder &&) noexcept = default; QueryPlanResourceHolder::~QueryPlanResourceHolder() = default; diff --git a/src/QueryPipeline/QueryPlanResourceHolder.h b/src/QueryPipeline/QueryPlanResourceHolder.h index 10f7f39ab09..ed9eb68b7ba 100644 --- a/src/QueryPipeline/QueryPlanResourceHolder.h +++ b/src/QueryPipeline/QueryPlanResourceHolder.h @@ -20,11 +20,8 @@ struct QueryPlanResourceHolder QueryPlanResourceHolder(QueryPlanResourceHolder &&) noexcept; ~QueryPlanResourceHolder(); - QueryPlanResourceHolder & operator=(QueryPlanResourceHolder &) = delete; - /// Custom move assignment does not destroy data from lhs. It appends data from rhs to lhs. QueryPlanResourceHolder & operator=(QueryPlanResourceHolder &&) noexcept; - QueryPlanResourceHolder & append(QueryPlanResourceHolder &&) noexcept; /// Some processors may implicitly use Context or temporary Storage created by Interpreter. /// But lifetime of Streams is not nested in lifetime of Interpreters, so we have to store it here, diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 5e37b7ae758..ac1423f87c1 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -888,11 +888,12 @@ AsynchronousInsertQueue::PushResult TCPHandler::processAsyncInsertQuery(Asynchro while (readDataNext()) { - squashing.setHeader(state.block_for_insert.cloneEmpty()); - auto result_chunk = Squashing::squash(squashing.add({state.block_for_insert.getColumns(), state.block_for_insert.rows()})); - if (result_chunk) + squashing.header = state.block_for_insert; + auto planned_chunk = squashing.add({state.block_for_insert.getColumns(), state.block_for_insert.rows()}); + if (planned_chunk.hasChunkInfo()) { - auto result = squashing.getHeader().cloneWithColumns(result_chunk.detachColumns()); + Chunk result_chunk = DB::Squashing::squash(std::move(planned_chunk)); + auto result = state.block_for_insert.cloneWithColumns(result_chunk.getColumns()); return PushResult { .status = PushResult::TOO_MUCH_DATA, @@ -901,13 +902,12 @@ AsynchronousInsertQueue::PushResult TCPHandler::processAsyncInsertQuery(Asynchro } } - Chunk result_chunk = Squashing::squash(squashing.flush()); - if (!result_chunk) - { - return insert_queue.pushQueryWithBlock(state.parsed_query, squashing.getHeader(), query_context); - } + auto planned_chunk = squashing.flush(); + Chunk result_chunk; + if (planned_chunk.hasChunkInfo()) + result_chunk = DB::Squashing::squash(std::move(planned_chunk)); - auto result = squashing.getHeader().cloneWithColumns(result_chunk.detachColumns()); + auto result = squashing.header.cloneWithColumns(result_chunk.getColumns()); return insert_queue.pushQueryWithBlock(state.parsed_query, std::move(result), query_context); } @@ -1875,7 +1875,7 @@ void TCPHandler::receiveQuery() #endif } - query_context = session->makeQueryContext(std::move(client_info)); + query_context = session->makeQueryContext(client_info); /// Sets the default database if it wasn't set earlier for the session context. if (is_interserver_mode && !default_database.empty()) @@ -1890,6 +1890,16 @@ void TCPHandler::receiveQuery() /// /// Settings /// + + /// FIXME: Remove when allow_experimental_analyzer will become obsolete. + /// Analyzer became Beta in 24.3 and started to be enabled by default. + /// We have to disable it for ourselves to make sure we don't have different settings on + /// different servers. + if (query_kind == ClientInfo::QueryKind::SECONDARY_QUERY + && client_info.getVersionNumber() < VersionNumber(23, 3, 0) + && !passed_settings.allow_experimental_analyzer.changed) + passed_settings.set("allow_experimental_analyzer", false); + auto settings_changes = passed_settings.changes(); query_kind = query_context->getClientInfo().query_kind; if (query_kind == ClientInfo::QueryKind::INITIAL_QUERY) diff --git a/src/Storages/Distributed/DistributedSink.cpp b/src/Storages/Distributed/DistributedSink.cpp index 8791668cd89..e556bda2561 100644 --- a/src/Storages/Distributed/DistributedSink.cpp +++ b/src/Storages/Distributed/DistributedSink.cpp @@ -134,7 +134,7 @@ DistributedSink::DistributedSink( } -void DistributedSink::consume(Chunk & chunk) +void DistributedSink::consume(Chunk chunk) { if (is_first_chunk) { @@ -142,7 +142,7 @@ void DistributedSink::consume(Chunk & chunk) is_first_chunk = false; } - auto ordinary_block = getHeader().cloneWithColumns(chunk.getColumns()); + auto ordinary_block = getHeader().cloneWithColumns(chunk.detachColumns()); if (insert_sync) writeSync(ordinary_block); @@ -420,13 +420,7 @@ DistributedSink::runWritingJob(JobReplica & job, const Block & current_block, si /// to resolve tables (in InterpreterInsertQuery::getTable()) auto copy_query_ast = query_ast->clone(); - InterpreterInsertQuery interp( - copy_query_ast, - job.local_context, - allow_materialized, - /* no_squash */ false, - /* no_destination */ false, - /* async_isnert */ false); + InterpreterInsertQuery interp(copy_query_ast, job.local_context, allow_materialized); auto block_io = interp.execute(); job.pipeline = std::move(block_io.pipeline); @@ -721,13 +715,7 @@ void DistributedSink::writeToLocal(const Cluster::ShardInfo & shard_info, const try { - InterpreterInsertQuery interp( - query_ast, - context, - allow_materialized, - /* no_squash */ false, - /* no_destination */ false, - /* async_isnert */ false); + InterpreterInsertQuery interp(query_ast, context, allow_materialized); auto block_io = interp.execute(); PushingPipelineExecutor executor(block_io.pipeline); diff --git a/src/Storages/Distributed/DistributedSink.h b/src/Storages/Distributed/DistributedSink.h index 5b7396f2c6f..a4c95633595 100644 --- a/src/Storages/Distributed/DistributedSink.h +++ b/src/Storages/Distributed/DistributedSink.h @@ -49,7 +49,7 @@ public: const Names & columns_to_send_); String getName() const override { return "DistributedSink"; } - void consume(Chunk & chunk) override; + void consume(Chunk chunk) override; void onFinish() override; private: diff --git a/src/Storages/FileLog/StorageFileLog.cpp b/src/Storages/FileLog/StorageFileLog.cpp index 0f9bd8b6ff9..abd4b4ce23b 100644 --- a/src/Storages/FileLog/StorageFileLog.cpp +++ b/src/Storages/FileLog/StorageFileLog.cpp @@ -740,14 +740,7 @@ bool StorageFileLog::streamToViews() auto new_context = Context::createCopy(getContext()); - InterpreterInsertQuery interpreter( - insert, - new_context, - /* allow_materialized */ false, - /* no_squash */ true, - /* no_destination */ true, - /* async_isnert */ false); - + InterpreterInsertQuery interpreter(insert, new_context, false, true, true); auto block_io = interpreter.execute(); /// Each stream responsible for closing it's files and store meta diff --git a/src/Storages/Kafka/StorageKafka.cpp b/src/Storages/Kafka/StorageKafka.cpp index 809401bb279..f5c5d093ce1 100644 --- a/src/Storages/Kafka/StorageKafka.cpp +++ b/src/Storages/Kafka/StorageKafka.cpp @@ -1099,13 +1099,7 @@ bool StorageKafka::streamToViews() // Create a stream for each consumer and join them in a union stream // Only insert into dependent views and expect that input blocks contain virtual columns - InterpreterInsertQuery interpreter( - insert, - kafka_context, - /* allow_materialized */ false, - /* no_squash */ true, - /* no_destination */ true, - /* async_isnert */ false); + InterpreterInsertQuery interpreter(insert, kafka_context, false, true, true); auto block_io = interpreter.execute(); // Create a stream for each consumer and join them in a union stream diff --git a/src/Storages/LiveView/LiveViewSink.h b/src/Storages/LiveView/LiveViewSink.h index 9803fa0a160..792133ced64 100644 --- a/src/Storages/LiveView/LiveViewSink.h +++ b/src/Storages/LiveView/LiveViewSink.h @@ -71,9 +71,9 @@ public: new_hash.reset(); } - void consume(Chunk & chunk) override + void consume(Chunk chunk) override { - auto block = getHeader().cloneWithColumns(chunk.getColumns()); + auto block = getHeader().cloneWithColumns(chunk.detachColumns()); block.updateHash(*new_hash); new_blocks->push_back(std::move(block)); } diff --git a/src/Storages/LiveView/StorageLiveView.cpp b/src/Storages/LiveView/StorageLiveView.cpp index 82759e8a851..57a1ea302f9 100644 --- a/src/Storages/LiveView/StorageLiveView.cpp +++ b/src/Storages/LiveView/StorageLiveView.cpp @@ -21,7 +21,6 @@ limitations under the License. */ #include #include #include -#include #include #include #include @@ -331,7 +330,7 @@ Pipe StorageLiveView::watch( return reader; } -void StorageLiveView::writeBlock(StorageLiveView & live_view, Block && block, Chunk::ChunkInfoCollection && chunk_infos, ContextPtr local_context) +void StorageLiveView::writeBlock(const Block & block, ContextPtr local_context) { auto output = std::make_shared(*this); @@ -408,21 +407,6 @@ void StorageLiveView::writeBlock(StorageLiveView & live_view, Block && block, Ch builder = interpreter.buildQueryPipeline(); } - builder.addSimpleTransform([&](const Block & cur_header) - { - return std::make_shared(chunk_infos.clone(), cur_header); - }); - - String live_view_id = live_view.getStorageID().hasUUID() ? toString(live_view.getStorageID().uuid) : live_view.getStorageID().getFullNameNotQuoted(); - builder.addSimpleTransform([&](const Block & stream_header) - { - return std::make_shared(live_view_id, stream_header); - }); - builder.addSimpleTransform([&](const Block & stream_header) - { - return std::make_shared(stream_header); - }); - builder.addSimpleTransform([&](const Block & cur_header) { return std::make_shared(cur_header); diff --git a/src/Storages/LiveView/StorageLiveView.h b/src/Storages/LiveView/StorageLiveView.h index 12d8e898347..91daac32c7b 100644 --- a/src/Storages/LiveView/StorageLiveView.h +++ b/src/Storages/LiveView/StorageLiveView.h @@ -118,7 +118,7 @@ public: return 0; } - void writeBlock(StorageLiveView & live_view, Block && block, Chunk::ChunkInfoCollection && chunk_infos, ContextPtr context); + void writeBlock(const Block & block, ContextPtr context); void refresh(); diff --git a/src/Storages/MaterializedView/RefreshTask.cpp b/src/Storages/MaterializedView/RefreshTask.cpp index ff5214a5e51..bc8cb0ce69a 100644 --- a/src/Storages/MaterializedView/RefreshTask.cpp +++ b/src/Storages/MaterializedView/RefreshTask.cpp @@ -377,13 +377,7 @@ void RefreshTask::executeRefreshUnlocked(std::shared_ptrread("count.txt"); readIntText(rows_count, *buf); assertEOF(*buf); + + if (!index_granularity.empty() && rows_count < index_granularity.getTotalRows() && index_granularity_info.fixed_index_granularity) + { + /// Adjust last granule size to match the number of rows in the part in case of fixed index_granularity. + index_granularity.popMark(); + index_granularity.appendMark(rows_count % index_granularity_info.fixed_index_granularity); + if (rows_count != index_granularity.getTotalRows()) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Index granularity total rows in part {} does not match rows_count: {}, instead of {}", + name, index_granularity.getTotalRows(), rows_count); + } }; if (index_granularity.empty()) @@ -2328,26 +2339,21 @@ String IMergeTreeDataPart::getUniqueId() const return getDataPartStorage().getUniqueId(); } -UInt128 IMergeTreeDataPart::getPartBlockIDHash() const -{ - SipHash hash; - checksums.computeTotalChecksumDataOnly(hash); - return hash.get128(); -} - String IMergeTreeDataPart::getZeroLevelPartBlockID(std::string_view token) const { if (info.level != 0) throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to get block id for non zero level part {}", name); + SipHash hash; if (token.empty()) { - const auto hash_value = getPartBlockIDHash(); - return info.partition_id + "_" + toString(hash_value.items[0]) + "_" + toString(hash_value.items[1]); + checksums.computeTotalChecksumDataOnly(hash); + } + else + { + hash.update(token.data(), token.size()); } - SipHash hash; - hash.update(token.data(), token.size()); const auto hash_value = hash.get128(); return info.partition_id + "_" + toString(hash_value.items[0]) + "_" + toString(hash_value.items[1]); } diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index 9fd481b0d8e..571f1389e10 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -210,7 +210,6 @@ public: /// Compute part block id for zero level part. Otherwise throws an exception. /// If token is not empty, block id is calculated based on it instead of block data - UInt128 getPartBlockIDHash() const; String getZeroLevelPartBlockID(std::string_view token) const; void setName(const String & new_name); diff --git a/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp b/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp index 6152da78395..c87f66b64f3 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp @@ -1,4 +1,5 @@ #include +#include namespace DB { @@ -71,9 +72,21 @@ IMergeTreeDataPartWriter::IMergeTreeDataPartWriter( Columns IMergeTreeDataPartWriter::releaseIndexColumns() { - return Columns( - std::make_move_iterator(index_columns.begin()), - std::make_move_iterator(index_columns.end())); + /// The memory for index was allocated without thread memory tracker. + /// We need to deallocate it in shrinkToFit without memory tracker as well. + MemoryTrackerBlockerInThread temporarily_disable_memory_tracker; + + Columns result; + result.reserve(index_columns.size()); + + for (auto & column : index_columns) + { + column->shrinkToFit(); + result.push_back(std::move(column)); + } + + index_columns.clear(); + return result; } SerializationPtr IMergeTreeDataPartWriter::getSerialization(const String & column_name) const diff --git a/src/Storages/MergeTree/IPartMetadataManager.h b/src/Storages/MergeTree/IPartMetadataManager.h index cef1d10e4ad..e817421f7d0 100644 --- a/src/Storages/MergeTree/IPartMetadataManager.h +++ b/src/Storages/MergeTree/IPartMetadataManager.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp index 21d046c76f2..52d12c9db7d 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp @@ -154,7 +154,8 @@ void writeColumnSingleGranule( const SerializationPtr & serialization, ISerialization::OutputStreamGetter stream_getter, size_t from_row, - size_t number_of_rows) + size_t number_of_rows, + const MergeTreeWriterSettings & settings) { ISerialization::SerializeBinaryBulkStatePtr state; ISerialization::SerializeBinaryBulkSettings serialize_settings; @@ -162,6 +163,7 @@ void writeColumnSingleGranule( serialize_settings.getter = stream_getter; serialize_settings.position_independent_encoding = true; serialize_settings.low_cardinality_max_dictionary_size = 0; + serialize_settings.use_compact_variant_discriminators_serialization = settings.use_compact_variant_discriminators_serialization; serialize_settings.dynamic_write_statistics = ISerialization::SerializeBinaryBulkSettings::DynamicStatisticsMode::PREFIX; serialization->serializeBinaryBulkStatePrefix(*column.column, serialize_settings, state); @@ -259,7 +261,7 @@ void MergeTreeDataPartWriterCompact::writeDataBlock(const Block & block, const G writeColumnSingleGranule( block.getByName(name_and_type->name), getSerialization(name_and_type->name), - stream_getter, granule.start_row, granule.rows_to_write); + stream_getter, granule.start_row, granule.rows_to_write, settings); /// Each type always have at least one substream prev_stream->hashing_buf.next(); diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp index a576720294f..5c9191dbb54 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp @@ -254,6 +254,12 @@ void MergeTreeDataPartWriterOnDisk::initPrimaryIndex() index_compressor_stream = std::make_unique(*index_file_hashing_stream, primary_key_compression_codec, settings.primary_key_compress_block_size); index_source_hashing_stream = std::make_unique(*index_compressor_stream); } + + const auto & primary_key_types = metadata_snapshot->getPrimaryKey().data_types; + index_serializations.reserve(primary_key_types.size()); + + for (const auto & type : primary_key_types) + index_serializations.push_back(type->getDefaultSerialization()); } } @@ -299,22 +305,33 @@ void MergeTreeDataPartWriterOnDisk::initSkipIndices() store = std::make_shared(stream_name, data_part_storage, data_part_storage, storage_settings->max_digestion_size_per_segment); gin_index_stores[stream_name] = store; } + skip_indices_aggregators.push_back(skip_index->createIndexAggregatorForPart(store, settings)); skip_index_accumulated_marks.push_back(0); } } +void MergeTreeDataPartWriterOnDisk::calculateAndSerializePrimaryIndexRow(const Block & index_block, size_t row) +{ + chassert(index_block.columns() == index_serializations.size()); + auto & index_stream = compress_primary_key ? *index_source_hashing_stream : *index_file_hashing_stream; + + for (size_t i = 0; i < index_block.columns(); ++i) + { + const auto & column = index_block.getByPosition(i).column; + + index_columns[i]->insertFrom(*column, row); + index_serializations[i]->serializeBinary(*column, row, index_stream, {}); + } +} + void MergeTreeDataPartWriterOnDisk::calculateAndSerializePrimaryIndex(const Block & primary_index_block, const Granules & granules_to_write) { - size_t primary_columns_num = primary_index_block.columns(); + if (!metadata_snapshot->hasPrimaryKey()) + return; + if (index_columns.empty()) - { - index_types = primary_index_block.getDataTypes(); - index_columns.resize(primary_columns_num); - last_block_index_columns.resize(primary_columns_num); - for (size_t i = 0; i < primary_columns_num; ++i) - index_columns[i] = primary_index_block.getByPosition(i).column->cloneEmpty(); - } + index_columns = primary_index_block.cloneEmptyColumns(); { /** While filling index (index_columns), disable memory tracker. @@ -328,22 +345,14 @@ void MergeTreeDataPartWriterOnDisk::calculateAndSerializePrimaryIndex(const Bloc /// Write index. The index contains Primary Key value for each `index_granularity` row. for (const auto & granule : granules_to_write) { - if (metadata_snapshot->hasPrimaryKey() && granule.mark_on_start) - { - for (size_t j = 0; j < primary_columns_num; ++j) - { - const auto & primary_column = primary_index_block.getByPosition(j); - index_columns[j]->insertFrom(*primary_column.column, granule.start_row); - primary_column.type->getDefaultSerialization()->serializeBinary( - *primary_column.column, granule.start_row, compress_primary_key ? *index_source_hashing_stream : *index_file_hashing_stream, {}); - } - } + if (granule.mark_on_start) + calculateAndSerializePrimaryIndexRow(primary_index_block, granule.start_row); } } - /// store last index row to write final mark at the end of column - for (size_t j = 0; j < primary_columns_num; ++j) - last_block_index_columns[j] = primary_index_block.getByPosition(j).column; + /// Store block with last index row to write final mark at the end of column + if (with_final_mark) + last_index_block = primary_index_block; } void MergeTreeDataPartWriterOnDisk::calculateAndSerializeStatistics(const Block & block) @@ -420,17 +429,11 @@ void MergeTreeDataPartWriterOnDisk::fillPrimaryIndexChecksums(MergeTreeData::Dat if (index_file_hashing_stream) { - if (write_final_mark) + if (write_final_mark && last_index_block) { - for (size_t j = 0; j < index_columns.size(); ++j) - { - const auto & column = *last_block_index_columns[j]; - size_t last_row_number = column.size() - 1; - index_columns[j]->insertFrom(column, last_row_number); - index_types[j]->getDefaultSerialization()->serializeBinary( - column, last_row_number, compress_primary_key ? *index_source_hashing_stream : *index_file_hashing_stream, {}); - } - last_block_index_columns.clear(); + MemoryTrackerBlockerInThread temporarily_disable_memory_tracker; + calculateAndSerializePrimaryIndexRow(last_index_block, last_index_block.rows() - 1); + last_index_block.clear(); } if (compress_primary_key) diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h index bdf0fdb7f32..8d84442981e 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h @@ -173,10 +173,10 @@ protected: std::unique_ptr index_source_hashing_stream; bool compress_primary_key; - DataTypes index_types; - /// Index columns from the last block - /// It's written to index file in the `writeSuffixAndFinalizePart` method - Columns last_block_index_columns; + /// Last block with index columns. + /// It's written to index file in the `writeSuffixAndFinalizePart` method. + Block last_index_block; + Serializations index_serializations; bool data_written = false; @@ -193,6 +193,7 @@ private: void initStatistics(); virtual void fillIndexGranularity(size_t index_granularity_for_block, size_t rows_in_block) = 0; + void calculateAndSerializePrimaryIndexRow(const Block & index_block, size_t row); struct ExecutionStatistics { diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp index 5ba326cef0c..74ea89a8864 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp @@ -433,6 +433,7 @@ void MergeTreeDataPartWriterWide::writeColumn( if (inserted) { ISerialization::SerializeBinaryBulkSettings serialize_settings; + serialize_settings.use_compact_variant_discriminators_serialization = settings.use_compact_variant_discriminators_serialization; serialize_settings.getter = createStreamGetter(name_and_type, offset_columns); serialization->serializeBinaryBulkStatePrefix(column, serialize_settings, it->second); } @@ -441,6 +442,7 @@ void MergeTreeDataPartWriterWide::writeColumn( serialize_settings.getter = createStreamGetter(name_and_type, offset_columns); serialize_settings.low_cardinality_max_dictionary_size = settings.low_cardinality_max_dictionary_size; serialize_settings.low_cardinality_use_single_dictionary_for_part = settings.low_cardinality_use_single_dictionary_for_part; + serialize_settings.use_compact_variant_discriminators_serialization = settings.use_compact_variant_discriminators_serialization; for (const auto & granule : granules) { @@ -558,7 +560,10 @@ void MergeTreeDataPartWriterWide::validateColumnOfFixedSize(const NameAndTypePai if (index_granularity_rows != index_granularity.getMarkRows(mark_num)) { - throw Exception( + /// With fixed granularity we can have last mark with less rows than granularity + const bool is_last_mark = (mark_num + 1 == index_granularity.getMarksCount()); + if (!index_granularity_info.fixed_index_granularity || !is_last_mark) + throw Exception( ErrorCodes::LOGICAL_ERROR, "Incorrect mark rows for part {} for mark #{}" " (compressed offset {}, decompressed offset {}), in-memory {}, on disk {}, total marks {}", @@ -627,6 +632,7 @@ void MergeTreeDataPartWriterWide::fillDataChecksums(MergeTreeDataPartChecksums & ISerialization::SerializeBinaryBulkSettings serialize_settings; serialize_settings.low_cardinality_max_dictionary_size = settings.low_cardinality_max_dictionary_size; serialize_settings.low_cardinality_use_single_dictionary_for_part = settings.low_cardinality_use_single_dictionary_for_part; + serialize_settings.use_compact_variant_discriminators_serialization = settings.use_compact_variant_discriminators_serialization; WrittenOffsetColumns offset_columns; if (rows_written_in_last_mark > 0) { @@ -821,7 +827,14 @@ void MergeTreeDataPartWriterWide::adjustLastMarkIfNeedAndFlushToDisk(size_t new_ /// Without offset rows_written_in_last_mark = 0; } + + if (compute_granularity) + { + index_granularity.popMark(); + index_granularity.appendMark(new_rows_in_last_mark); + } } + } } diff --git a/src/Storages/MergeTree/MergeTreeIOSettings.h b/src/Storages/MergeTree/MergeTreeIOSettings.h index a9125b4047e..04171656fcf 100644 --- a/src/Storages/MergeTree/MergeTreeIOSettings.h +++ b/src/Storages/MergeTree/MergeTreeIOSettings.h @@ -76,6 +76,7 @@ struct MergeTreeWriterSettings , max_threads_for_annoy_index_creation(global_settings.max_threads_for_annoy_index_creation) , low_cardinality_max_dictionary_size(global_settings.low_cardinality_max_dictionary_size) , low_cardinality_use_single_dictionary_for_part(global_settings.low_cardinality_use_single_dictionary_for_part != 0) + , use_compact_variant_discriminators_serialization(storage_settings->use_compact_variant_discriminators_serialization) { } @@ -98,6 +99,7 @@ struct MergeTreeWriterSettings size_t low_cardinality_max_dictionary_size; bool low_cardinality_use_single_dictionary_for_part; + bool use_compact_variant_discriminators_serialization; }; } diff --git a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp index a6298aab3d9..78b67de1a7e 100644 --- a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp @@ -145,12 +145,8 @@ ChunkAndProgress MergeTreeSelectProcessor::read() ordered_columns.push_back(res.block.getByName(name).column); } - auto chunk = Chunk(ordered_columns, res.row_count); - if (add_part_level) - chunk.getChunkInfos().add(std::make_shared(task->getInfo().data_part->info.level)); - return ChunkAndProgress{ - .chunk = std::move(chunk), + .chunk = Chunk(ordered_columns, res.row_count, add_part_level ? std::make_shared(task->getInfo().data_part->info.level) : nullptr), .num_read_rows = res.num_read_rows, .num_read_bytes = res.num_read_bytes, .is_finished = false}; diff --git a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp index 4f90f7131da..02f8d6f4f6a 100644 --- a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp +++ b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp @@ -264,10 +264,7 @@ try ++it; } - auto result = Chunk(std::move(res_columns), rows_read); - if (add_part_level) - result.getChunkInfos().add(std::make_shared(data_part->info.level)); - return result; + return Chunk(std::move(res_columns), rows_read, add_part_level ? std::make_shared(data_part->info.level) : nullptr); } } else diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index 1f8d6abebd2..c0afd781c7e 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -43,6 +43,7 @@ struct Settings; M(UInt64, compact_parts_max_granules_to_buffer, 128, "Only available in ClickHouse Cloud", 0) \ M(UInt64, compact_parts_merge_max_bytes_to_prefetch_part, 16 * 1024 * 1024, "Only available in ClickHouse Cloud", 0) \ M(Bool, load_existing_rows_count_for_old_parts, false, "Whether to load existing_rows_count for existing parts. If false, existing_rows_count will be equal to rows_count for existing parts.", 0) \ + M(Bool, use_compact_variant_discriminators_serialization, true, "Use compact version of Variant discriminators serialization.", 0) \ \ /** Merge settings. */ \ M(UInt64, merge_max_block_size, 8192, "How many rows in blocks should be formed for merge operations. By default has the same value as `index_granularity`.", 0) \ diff --git a/src/Storages/MergeTree/MergeTreeSink.cpp b/src/Storages/MergeTree/MergeTreeSink.cpp index d8cfce1ca99..05751e0fa6f 100644 --- a/src/Storages/MergeTree/MergeTreeSink.cpp +++ b/src/Storages/MergeTree/MergeTreeSink.cpp @@ -1,27 +1,14 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include #include #include - -#include +#include +#include +#include namespace ProfileEvents { extern const Event DuplicatedInsertedBlocks; } -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - namespace DB { @@ -71,12 +58,12 @@ void MergeTreeSink::onCancel() { } -void MergeTreeSink::consume(Chunk & chunk) +void MergeTreeSink::consume(Chunk chunk) { if (num_blocks_processed > 0) storage.delayInsertOrThrowIfNeeded(nullptr, context, false); - auto block = getHeader().cloneWithColumns(chunk.getColumns()); + auto block = getHeader().cloneWithColumns(chunk.detachColumns()); if (!storage_snapshot->object_columns.empty()) convertDynamicColumnsToTuples(block, storage_snapshot); @@ -89,18 +76,6 @@ void MergeTreeSink::consume(Chunk & chunk) size_t streams = 0; bool support_parallel_write = false; - auto token_info = chunk.getChunkInfos().get(); - if (!token_info) - throw Exception(ErrorCodes::LOGICAL_ERROR, - "TokenInfo is expected for consumed chunk in MergeTreeSink for table: {}", - storage.getStorageID().getNameForLogs()); - - const bool need_to_define_dedup_token = !token_info->isDefined(); - - String block_dedup_token; - if (token_info->isDefined()) - block_dedup_token = token_info->getToken(); - for (auto & current_block : part_blocks) { ProfileEvents::Counters part_counters; @@ -125,16 +100,22 @@ void MergeTreeSink::consume(Chunk & chunk) if (!temp_part.part) continue; - if (need_to_define_dedup_token) - { - chassert(temp_part.part); - const auto hash_value = temp_part.part->getPartBlockIDHash(); - token_info->addChunkHash(toString(hash_value.items[0]) + "_" + toString(hash_value.items[1])); - } - if (!support_parallel_write && temp_part.part->getDataPartStorage().supportParallelWrite()) support_parallel_write = true; + String block_dedup_token; + if (storage.getDeduplicationLog()) + { + const String & dedup_token = settings.insert_deduplication_token; + if (!dedup_token.empty()) + { + /// multiple blocks can be inserted within the same insert query + /// an ordinal number is added to dedup token to generate a distinctive block id for each block + block_dedup_token = fmt::format("{}_{}", dedup_token, chunk_dedup_seqnum); + ++chunk_dedup_seqnum; + } + } + size_t max_insert_delayed_streams_for_parallel_write; if (settings.max_insert_delayed_streams_for_parallel_write.changed) @@ -146,7 +127,6 @@ void MergeTreeSink::consume(Chunk & chunk) /// In case of too much columns/parts in block, flush explicitly. streams += temp_part.streams.size(); - if (streams > max_insert_delayed_streams_for_parallel_write) { finishDelayedChunk(); @@ -163,16 +143,11 @@ void MergeTreeSink::consume(Chunk & chunk) { .temp_part = std::move(temp_part), .elapsed_ns = elapsed_ns, - .block_dedup_token = block_dedup_token, + .block_dedup_token = std::move(block_dedup_token), .part_counters = std::move(part_counters), }); } - if (need_to_define_dedup_token) - { - token_info->finishChunkHashes(); - } - finishDelayedChunk(); delayed_chunk = std::make_unique(); delayed_chunk->partitions = std::move(partitions); @@ -185,8 +160,6 @@ void MergeTreeSink::finishDelayedChunk() if (!delayed_chunk) return; - const Settings & settings = context->getSettingsRef(); - for (auto & partition : delayed_chunk->partitions) { ProfileEventsScope scoped_attach(&partition.part_counters); @@ -205,8 +178,7 @@ void MergeTreeSink::finishDelayedChunk() storage.fillNewPartName(part, lock); auto * deduplication_log = storage.getDeduplicationLog(); - - if (settings.insert_deduplicate && deduplication_log) + if (deduplication_log) { const String block_id = part->getZeroLevelPartBlockID(partition.block_dedup_token); auto res = deduplication_log->addPart(block_id, part->info); diff --git a/src/Storages/MergeTree/MergeTreeSink.h b/src/Storages/MergeTree/MergeTreeSink.h index 90976020d52..cf6715a3415 100644 --- a/src/Storages/MergeTree/MergeTreeSink.h +++ b/src/Storages/MergeTree/MergeTreeSink.h @@ -25,7 +25,7 @@ public: ~MergeTreeSink() override; String getName() const override { return "MergeTreeSink"; } - void consume(Chunk & chunk) override; + void consume(Chunk chunk) override; void onStart() override; void onFinish() override; void onCancel() override; @@ -36,6 +36,7 @@ private: size_t max_parts_per_block; ContextPtr context; StorageSnapshotPtr storage_snapshot; + UInt64 chunk_dedup_seqnum = 0; /// input chunk ordinal number in case of dedup token UInt64 num_blocks_processed = 0; /// We can delay processing for previous chunk and start writing a new one. diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 3dbcb5e5bda..a552ee89aee 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -1297,7 +1297,6 @@ void PartMergerWriter::prepare() bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() { Block cur_block; - Block projection_header; if (MutationHelpers::checkOperationIsNotCanceled(*ctx->merges_blocker, ctx->mutate_entry) && ctx->mutating_executor->pull(cur_block)) { if (ctx->minmax_idx) @@ -1315,12 +1314,14 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() ProfileEventTimeIncrement watch(ProfileEvents::MutateTaskProjectionsCalculationMicroseconds); Block block_to_squash = projection.calculate(cur_block, ctx->context); - projection_squashes[i].setHeader(block_to_squash.cloneEmpty()); + projection_squashes[i].header = block_to_squash; + Chunk planned_chunk = projection_squashes[i].add({block_to_squash.getColumns(), block_to_squash.rows()}); - Chunk squashed_chunk = Squashing::squash(projection_squashes[i].add({block_to_squash.getColumns(), block_to_squash.rows()})); - if (squashed_chunk) + if (planned_chunk.hasChunkInfo()) { - auto result = projection_squashes[i].getHeader().cloneWithColumns(squashed_chunk.detachColumns()); + Chunk projection_chunk = DB::Squashing::squash(std::move(planned_chunk)); + + auto result = block_to_squash.cloneWithColumns(projection_chunk.getColumns()); auto tmp_part = MergeTreeDataWriter::writeTempProjectionPart( *ctx->data, ctx->log, result, projection, ctx->new_data_part.get(), ++block_num); tmp_part.finalize(); @@ -1341,10 +1342,12 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() { const auto & projection = *ctx->projections_to_build[i]; auto & projection_squash_plan = projection_squashes[i]; - auto squashed_chunk = Squashing::squash(projection_squash_plan.flush()); - if (squashed_chunk) + auto planned_chunk = projection_squash_plan.flush(); + if (planned_chunk.hasChunkInfo()) { - auto result = projection_squash_plan.getHeader().cloneWithColumns(squashed_chunk.detachColumns()); + Chunk projection_chunk = DB::Squashing::squash(std::move(planned_chunk)); + + auto result = projection_squash_plan.header.cloneWithColumns(projection_chunk.getColumns()); auto temp_part = MergeTreeDataWriter::writeTempProjectionPart( *ctx->data, ctx->log, result, projection, ctx->new_data_part.get(), ++block_num); temp_part.finalize(); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index bbae054fbed..4b4f4c33e7d 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -1,25 +1,21 @@ +#include +#include +#include +#include +#include #include "Common/Exception.h" #include #include #include -#include #include -#include -#include -#include -#include -#include -#include -#include +#include #include #include -#include -#include -#include - +#include +#include +#include +#include #include -#include - namespace ProfileEvents { @@ -257,12 +253,12 @@ size_t ReplicatedMergeTreeSinkImpl::checkQuorumPrecondition(const } template -void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) +void ReplicatedMergeTreeSinkImpl::consume(Chunk chunk) { if (num_blocks_processed > 0) storage.delayInsertOrThrowIfNeeded(&storage.partial_shutdown_event, context, false); - auto block = getHeader().cloneWithColumns(chunk.getColumns()); + auto block = getHeader().cloneWithColumns(chunk.detachColumns()); const auto & settings = context->getSettingsRef(); @@ -288,25 +284,13 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) if constexpr (async_insert) { - const auto async_insert_info_ptr = chunk.getChunkInfos().get(); - if (async_insert_info_ptr) + const auto & chunk_info = chunk.getChunkInfo(); + if (const auto * async_insert_info_ptr = typeid_cast(chunk_info.get())) async_insert_info = std::make_shared(async_insert_info_ptr->offsets, async_insert_info_ptr->tokens); else throw Exception(ErrorCodes::LOGICAL_ERROR, "No chunk info for async inserts"); } - String block_dedup_token; - auto token_info = chunk.getChunkInfos().get(); - if (!token_info) - throw Exception(ErrorCodes::LOGICAL_ERROR, - "TokenInfo is expected for consumed chunk in ReplicatedMergeTreeSink for table: {}", - storage.getStorageID().getNameForLogs()); - - const bool need_to_define_dedup_token = !token_info->isDefined(); - - if (token_info->isDefined()) - block_dedup_token = token_info->getToken(); - auto part_blocks = MergeTreeDataWriter::splitBlockIntoParts(std::move(block), max_parts_per_block, metadata_snapshot, context, async_insert_info); using DelayedPartition = typename ReplicatedMergeTreeSinkImpl::DelayedChunk::Partition; @@ -358,10 +342,23 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) } else { + if (deduplicate) { + String block_dedup_token; + /// We add the hash from the data and partition identifier to deduplication ID. /// That is, do not insert the same data to the same partition twice. + + const String & dedup_token = settings.insert_deduplication_token; + if (!dedup_token.empty()) + { + /// multiple blocks can be inserted within the same insert query + /// an ordinal number is added to dedup token to generate a distinctive block id for each block + block_dedup_token = fmt::format("{}_{}", dedup_token, chunk_dedup_seqnum); + ++chunk_dedup_seqnum; + } + block_id = temp_part.part->getZeroLevelPartBlockID(block_dedup_token); LOG_DEBUG(log, "Wrote block with ID '{}', {} rows{}", block_id, current_block.block.rows(), quorumLogMessage(replicas_num)); } @@ -369,13 +366,6 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) { LOG_DEBUG(log, "Wrote block with {} rows{}", current_block.block.rows(), quorumLogMessage(replicas_num)); } - - if (need_to_define_dedup_token) - { - chassert(temp_part.part); - const auto hash_value = temp_part.part->getPartBlockIDHash(); - token_info->addChunkHash(toString(hash_value.items[0]) + "_" + toString(hash_value.items[1])); - } } profile_events_scope.reset(); @@ -421,15 +411,17 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) )); } - if (need_to_define_dedup_token) - { - token_info->finishChunkHashes(); - } - finishDelayedChunk(zookeeper); delayed_chunk = std::make_unique(); delayed_chunk->partitions = std::move(partitions); + /// If deduplicated data should not be inserted into MV, we need to set proper + /// value for `last_block_is_duplicate`, which is possible only after the part is committed. + /// Othervide we can delay commit. + /// TODO: we can also delay commit if there is no MVs. + if (!settings.deduplicate_blocks_in_dependent_materialized_views) + finishDelayedChunk(zookeeper); + ++num_blocks_processed; } @@ -439,6 +431,8 @@ void ReplicatedMergeTreeSinkImpl::finishDelayedChunk(const ZooKeeperWithF if (!delayed_chunk) return; + last_block_is_duplicate = false; + for (auto & partition : delayed_chunk->partitions) { ProfileEventsScope scoped_attach(&partition.part_counters); @@ -451,6 +445,8 @@ void ReplicatedMergeTreeSinkImpl::finishDelayedChunk(const ZooKeeperWithF { bool deduplicated = commitPart(zookeeper, part, partition.block_id, delayed_chunk->replicas_num).second; + last_block_is_duplicate = last_block_is_duplicate || deduplicated; + /// Set a special error code if the block is duplicate int error = (deduplicate && deduplicated) ? ErrorCodes::INSERT_WAS_DEDUPLICATED : 0; auto counters_snapshot = std::make_shared(partition.part_counters.getPartiallyAtomicSnapshot()); @@ -539,7 +535,7 @@ bool ReplicatedMergeTreeSinkImpl::writeExistingPart(MergeTreeData::Mutabl ProfileEventsScope profile_events_scope; String original_part_dir = part->getDataPartStorage().getPartDirectory(); - auto try_rollback_part_rename = [this, &part, &original_part_dir] () + auto try_rollback_part_rename = [this, &part, &original_part_dir]() { if (original_part_dir == part->getDataPartStorage().getPartDirectory()) return; @@ -1155,16 +1151,8 @@ void ReplicatedMergeTreeSinkImpl::onStart() template void ReplicatedMergeTreeSinkImpl::onFinish() { - const auto & settings = context->getSettingsRef(); - - ZooKeeperWithFaultInjectionPtr zookeeper = ZooKeeperWithFaultInjection::createInstance( - settings.insert_keeper_fault_injection_probability, - settings.insert_keeper_fault_injection_seed, - storage.getZooKeeper(), - "ReplicatedMergeTreeSink::onFinish", - log); - - finishDelayedChunk(zookeeper); + auto zookeeper = storage.getZooKeeper(); + finishDelayedChunk(std::make_shared(zookeeper)); } template diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.h b/src/Storages/MergeTree/ReplicatedMergeTreeSink.h index 7d025361717..39623c20584 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.h @@ -51,7 +51,7 @@ public: ~ReplicatedMergeTreeSinkImpl() override; void onStart() override; - void consume(Chunk & chunk) override; + void consume(Chunk chunk) override; void onFinish() override; String getName() const override { return "ReplicatedMergeTreeSink"; } @@ -59,6 +59,16 @@ public: /// For ATTACHing existing data on filesystem. bool writeExistingPart(MergeTreeData::MutableDataPartPtr & part); + /// For proper deduplication in MaterializedViews + bool lastBlockIsDuplicate() const override + { + /// If MV is responsible for deduplication, block is not considered duplicating. + if (context->getSettingsRef().deduplicate_blocks_in_dependent_materialized_views) + return false; + + return last_block_is_duplicate; + } + struct DelayedChunk; private: std::vector detectConflictsInAsyncBlockIDs(const std::vector & ids); @@ -116,6 +126,7 @@ private: bool allow_attach_while_readonly = false; bool quorum_parallel = false; const bool deduplicate = true; + bool last_block_is_duplicate = false; UInt64 num_blocks_processed = 0; LoggerPtr log; diff --git a/src/Storages/MessageQueueSink.cpp b/src/Storages/MessageQueueSink.cpp index 36899011e33..4fb81d69070 100644 --- a/src/Storages/MessageQueueSink.cpp +++ b/src/Storages/MessageQueueSink.cpp @@ -40,7 +40,7 @@ void MessageQueueSink::onFinish() producer->finish(); } -void MessageQueueSink::consume(Chunk & chunk) +void MessageQueueSink::consume(Chunk chunk) { const auto & columns = chunk.getColumns(); if (columns.empty()) diff --git a/src/Storages/MessageQueueSink.h b/src/Storages/MessageQueueSink.h index 4a9248c6c4d..b3c1e61734f 100644 --- a/src/Storages/MessageQueueSink.h +++ b/src/Storages/MessageQueueSink.h @@ -35,7 +35,7 @@ public: String getName() const override { return storage_name + "Sink"; } - void consume(Chunk & chunk) override; + void consume(Chunk chunk) override; void onStart() override; void onFinish() override; diff --git a/src/Storages/NATS/StorageNATS.cpp b/src/Storages/NATS/StorageNATS.cpp index 8f0e2d76473..0b88a9e8929 100644 --- a/src/Storages/NATS/StorageNATS.cpp +++ b/src/Storages/NATS/StorageNATS.cpp @@ -644,13 +644,7 @@ bool StorageNATS::streamToViews() insert->table_id = table_id; // Only insert into dependent views and expect that input blocks contain virtual columns - InterpreterInsertQuery interpreter( - insert, - nats_context, - /* allow_materialized */ false, - /* no_squash */ true, - /* no_destination */ true, - /* async_isnert */ false); + InterpreterInsertQuery interpreter(insert, nats_context, false, true, true); auto block_io = interpreter.execute(); auto storage_snapshot = getStorageSnapshot(getInMemoryMetadataPtr(), getContext()); diff --git a/src/Storages/ObjectStorage/Azure/Configuration.cpp b/src/Storages/ObjectStorage/Azure/Configuration.cpp index f763a997bfb..91cc02de0f0 100644 --- a/src/Storages/ObjectStorage/Azure/Configuration.cpp +++ b/src/Storages/ObjectStorage/Azure/Configuration.cpp @@ -1,4 +1,5 @@ #include +#include #if USE_AZURE_BLOB_STORAGE @@ -40,72 +41,19 @@ const std::unordered_set optional_configuration_keys = { "storage_account_url", }; -using AzureClient = Azure::Storage::Blobs::BlobContainerClient; -using AzureClientPtr = std::unique_ptr; - -namespace -{ - bool isConnectionString(const std::string & candidate) - { - return !candidate.starts_with("http"); - } - - template - bool containerExists(T & blob_service_client, const std::string & container_name) - { - Azure::Storage::Blobs::ListBlobContainersOptions options; - options.Prefix = container_name; - options.PageSizeHint = 1; - - auto containers_list_response = blob_service_client.ListBlobContainers(options); - auto containers_list = containers_list_response.BlobContainers; - - auto it = std::find_if( - containers_list.begin(), containers_list.end(), - [&](const auto & c) { return c.Name == container_name; }); - return it != containers_list.end(); - } -} - -Poco::URI StorageAzureConfiguration::getConnectionURL() const -{ - if (!is_connection_string) - return Poco::URI(connection_url); - - auto parsed_connection_string = Azure::Storage::_internal::ParseConnectionString(connection_url); - return Poco::URI(parsed_connection_string.BlobServiceUrl.GetAbsoluteUrl()); -} - void StorageAzureConfiguration::check(ContextPtr context) const { - context->getGlobalContext()->getRemoteHostFilter().checkURL(getConnectionURL()); + auto url = Poco::URI(connection_params.getConnectionURL()); + context->getGlobalContext()->getRemoteHostFilter().checkURL(url); Configuration::check(context); } StorageAzureConfiguration::StorageAzureConfiguration(const StorageAzureConfiguration & other) : Configuration(other) { - connection_url = other.connection_url; - is_connection_string = other.is_connection_string; - account_name = other.account_name; - account_key = other.account_key; - container = other.container; blob_path = other.blob_path; blobs_paths = other.blobs_paths; -} - -AzureObjectStorage::SettingsPtr StorageAzureConfiguration::createSettings(ContextPtr context) -{ - const auto & context_settings = context->getSettingsRef(); - auto settings_ptr = std::make_unique(); - settings_ptr->max_single_part_upload_size = context_settings.azure_max_single_part_upload_size; - settings_ptr->max_single_read_retries = context_settings.azure_max_single_read_retries; - settings_ptr->list_object_keys_size = static_cast(context_settings.azure_list_object_keys_size); - settings_ptr->strict_upload_part_size = context_settings.azure_strict_upload_part_size; - settings_ptr->max_upload_part_size = context_settings.azure_max_upload_part_size; - settings_ptr->max_blocks_in_multipart_upload = context_settings.azure_max_blocks_in_multipart_upload; - settings_ptr->min_upload_part_size = context_settings.azure_min_upload_part_size; - return settings_ptr; + connection_params = other.connection_params; } StorageObjectStorage::QuerySettings StorageAzureConfiguration::getQuerySettings(const ContextPtr & context) const @@ -126,174 +74,59 @@ StorageObjectStorage::QuerySettings StorageAzureConfiguration::getQuerySettings( ObjectStoragePtr StorageAzureConfiguration::createObjectStorage(ContextPtr context, bool is_readonly) /// NOLINT { assertInitialized(); - auto client = createClient(is_readonly, /* attempt_to_create_container */true); - auto settings = createSettings(context); + + auto settings = AzureBlobStorage::getRequestSettings(context->getSettingsRef()); + auto client = AzureBlobStorage::getContainerClient(connection_params, is_readonly); + return std::make_unique( - "AzureBlobStorage", std::move(client), std::move(settings), container, getConnectionURL().toString()); + "AzureBlobStorage", + connection_params.createForContainer(), + std::move(settings), + connection_params.getContainer(), + connection_params.getConnectionURL()); } -AzureClientPtr StorageAzureConfiguration::createClient(bool is_read_only, bool attempt_to_create_container) +static AzureBlobStorage::ConnectionParams getConnectionParams( + const String & connection_url, + const String & container_name, + const std::optional & account_name, + const std::optional & account_key, + const ContextPtr & local_context) { - using namespace Azure::Storage::Blobs; + AzureBlobStorage::ConnectionParams connection_params; + auto request_settings = AzureBlobStorage::getRequestSettings(local_context->getSettingsRef()); - AzureClientPtr result; - - if (is_connection_string) + if (account_name && account_key) { - auto managed_identity_credential = std::make_shared(); - auto blob_service_client = std::make_unique(BlobServiceClient::CreateFromConnectionString(connection_url)); - result = std::make_unique(BlobContainerClient::CreateFromConnectionString(connection_url, container)); - - if (attempt_to_create_container) - { - bool container_exists = containerExists(*blob_service_client, container); - if (!container_exists) - { - if (is_read_only) - throw Exception( - ErrorCodes::BAD_ARGUMENTS, - "AzureBlobStorage container does not exist '{}'", - container); - - try - { - result->CreateIfNotExists(); - } - catch (const Azure::Storage::StorageException & e) - { - if (!(e.StatusCode == Azure::Core::Http::HttpStatusCode::Conflict - && e.ReasonPhrase == "The specified container already exists.")) - { - throw; - } - } - } - } + connection_params.endpoint.storage_account_url = connection_url; + connection_params.endpoint.container_name = container_name; + connection_params.auth_method = std::make_shared(*account_name, *account_key); + connection_params.client_options = AzureBlobStorage::getClientOptions(*request_settings, /*for_disk=*/ false); } else { - std::shared_ptr storage_shared_key_credential; - if (account_name.has_value() && account_key.has_value()) - { - storage_shared_key_credential - = std::make_shared(*account_name, *account_key); - } - - std::unique_ptr blob_service_client; - size_t pos = connection_url.find('?'); - std::shared_ptr managed_identity_credential; - if (storage_shared_key_credential) - { - blob_service_client = std::make_unique(connection_url, storage_shared_key_credential); - } - else - { - /// If conneciton_url does not have '?', then its not SAS - if (pos == std::string::npos) - { - auto workload_identity_credential = std::make_shared(); - blob_service_client = std::make_unique(connection_url, workload_identity_credential); - } - else - { - managed_identity_credential = std::make_shared(); - blob_service_client = std::make_unique(connection_url, managed_identity_credential); - } - } - - std::string final_url; - if (pos != std::string::npos) - { - auto url_without_sas = connection_url.substr(0, pos); - final_url = url_without_sas + (url_without_sas.back() == '/' ? "" : "/") + container - + connection_url.substr(pos); - } - else - final_url - = connection_url + (connection_url.back() == '/' ? "" : "/") + container; - - if (!attempt_to_create_container) - { - if (storage_shared_key_credential) - return std::make_unique(final_url, storage_shared_key_credential); - else - return std::make_unique(final_url, managed_identity_credential); - } - - bool container_exists = containerExists(*blob_service_client, container); - if (container_exists) - { - if (storage_shared_key_credential) - result = std::make_unique(final_url, storage_shared_key_credential); - else - { - /// If conneciton_url does not have '?', then its not SAS - if (pos == std::string::npos) - { - auto workload_identity_credential = std::make_shared(); - result = std::make_unique(final_url, workload_identity_credential); - } - else - result = std::make_unique(final_url, managed_identity_credential); - } - } - else - { - if (is_read_only) - throw Exception( - ErrorCodes::BAD_ARGUMENTS, - "AzureBlobStorage container does not exist '{}'", - container); - try - { - result = std::make_unique(blob_service_client->CreateBlobContainer(container).Value); - } catch (const Azure::Storage::StorageException & e) - { - if (e.StatusCode == Azure::Core::Http::HttpStatusCode::Conflict - && e.ReasonPhrase == "The specified container already exists.") - { - if (storage_shared_key_credential) - result = std::make_unique(final_url, storage_shared_key_credential); - else - { - /// If conneciton_url does not have '?', then its not SAS - if (pos == std::string::npos) - { - auto workload_identity_credential = std::make_shared(); - result = std::make_unique(final_url, workload_identity_credential); - } - else - result = std::make_unique(final_url, managed_identity_credential); - } - } - else - { - throw; - } - } - } + AzureBlobStorage::processURL(connection_url, container_name, connection_params.endpoint, connection_params.auth_method); + connection_params.client_options = AzureBlobStorage::getClientOptions(*request_settings, /*for_disk=*/ false); } - return result; + return connection_params; } - void StorageAzureConfiguration::fromNamedCollection(const NamedCollection & collection, ContextPtr) +void StorageAzureConfiguration::fromNamedCollection(const NamedCollection & collection, ContextPtr context) { validateNamedCollection(collection, required_configuration_keys, optional_configuration_keys); + String connection_url; + String container_name; + std::optional account_name; + std::optional account_key; + if (collection.has("connection_string")) - { connection_url = collection.get("connection_string"); - is_connection_string = true; - } - - if (collection.has("storage_account_url")) - { + else if (collection.has("storage_account_url")) connection_url = collection.get("storage_account_url"); - is_connection_string = false; - } - container = collection.get("container"); + container_name = collection.get("container"); blob_path = collection.get("blob_path"); if (collection.has("account_name")) @@ -307,6 +140,7 @@ AzureClientPtr StorageAzureConfiguration::createClient(bool is_read_only, bool a compression_method = collection.getOrDefault("compression_method", collection.getOrDefault("compression", "auto")); blobs_paths = {blob_path}; + connection_params = getConnectionParams(connection_url, container_name, account_name, account_key, context); } void StorageAzureConfiguration::fromAST(ASTs & engine_args, ContextPtr context, bool with_structure) @@ -324,12 +158,14 @@ void StorageAzureConfiguration::fromAST(ASTs & engine_args, ContextPtr context, std::unordered_map engine_args_to_idx; - connection_url = checkAndGetLiteralArgument(engine_args[0], "connection_string/storage_account_url"); - is_connection_string = isConnectionString(connection_url); - container = checkAndGetLiteralArgument(engine_args[1], "container"); + String connection_url = checkAndGetLiteralArgument(engine_args[0], "connection_string/storage_account_url"); + String container_name = checkAndGetLiteralArgument(engine_args[1], "container"); blob_path = checkAndGetLiteralArgument(engine_args[2], "blobpath"); + std::optional account_name; + std::optional account_key; + auto is_format_arg = [] (const std::string & s) -> bool { return s == "auto" || FormatFactory::instance().getAllFormats().contains(Poco::toLower(s)); @@ -386,7 +222,9 @@ void StorageAzureConfiguration::fromAST(ASTs & engine_args, ContextPtr context, account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); auto sixth_arg = checkAndGetLiteralArgument(engine_args[5], "format/account_name"); if (is_format_arg(sixth_arg)) + { format = sixth_arg; + } else { if (with_structure) @@ -428,6 +266,7 @@ void StorageAzureConfiguration::fromAST(ASTs & engine_args, ContextPtr context, } blobs_paths = {blob_path}; + connection_params = getConnectionParams(connection_url, container_name, account_name, account_key, context); } void StorageAzureConfiguration::addStructureAndFormatToArgs( diff --git a/src/Storages/ObjectStorage/Azure/Configuration.h b/src/Storages/ObjectStorage/Azure/Configuration.h index bbaa82c51ba..4e6bfbc0745 100644 --- a/src/Storages/ObjectStorage/Azure/Configuration.h +++ b/src/Storages/ObjectStorage/Azure/Configuration.h @@ -35,8 +35,8 @@ public: const Paths & getPaths() const override { return blobs_paths; } void setPaths(const Paths & paths) override { blobs_paths = paths; } - String getNamespace() const override { return container; } - String getDataSourceDescription() const override { return std::filesystem::path(connection_url) / container; } + String getNamespace() const override { return connection_params.getContainer(); } + String getDataSourceDescription() const override { return std::filesystem::path(connection_params.getConnectionURL()) / connection_params.getContainer(); } StorageObjectStorage::QuerySettings getQuerySettings(const ContextPtr &) const override; void check(ContextPtr context) const override; @@ -54,22 +54,9 @@ protected: void fromNamedCollection(const NamedCollection & collection, ContextPtr context) override; void fromAST(ASTs & args, ContextPtr context, bool with_structure) override; - using AzureClient = Azure::Storage::Blobs::BlobContainerClient; - using AzureClientPtr = std::unique_ptr; - - std::string connection_url; - bool is_connection_string; - - std::optional account_name; - std::optional account_key; - - std::string container; std::string blob_path; std::vector blobs_paths; - - AzureClientPtr createClient(bool is_read_only, bool attempt_to_create_container); - AzureObjectStorage::SettingsPtr createSettings(ContextPtr local_context); - Poco::URI getConnectionURL() const; + AzureBlobStorage::ConnectionParams connection_params; }; } diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp index d2bdd0af302..f2f6eac333c 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp @@ -39,12 +39,12 @@ StorageObjectStorageSink::StorageObjectStorageSink( configuration->format, *write_buf, sample_block, context, format_settings_); } -void StorageObjectStorageSink::consume(Chunk & chunk) +void StorageObjectStorageSink::consume(Chunk chunk) { std::lock_guard lock(cancel_mutex); if (cancelled) return; - writer->write(getHeader().cloneWithColumns(chunk.getColumns())); + writer->write(getHeader().cloneWithColumns(chunk.detachColumns())); } void StorageObjectStorageSink::onCancel() diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSink.h b/src/Storages/ObjectStorage/StorageObjectStorageSink.h index 6ab531bb21a..e0081193686 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSink.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageSink.h @@ -20,7 +20,7 @@ public: String getName() const override { return "StorageObjectStorageSink"; } - void consume(Chunk & chunk) override; + void consume(Chunk chunk) override; void onCancel() override; diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp index 3c1c2f9bba1..6940f10cb91 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp @@ -65,7 +65,6 @@ StorageObjectStorageSource::StorageObjectStorageSource( CurrentMetrics::StorageObjectStorageThreadsActive, CurrentMetrics::StorageObjectStorageThreadsScheduled, 1/* max_threads */)) - , columns_desc(info.columns_description) , file_iterator(file_iterator_) , schema_cache(StorageObjectStorage::getSchemaCache(context_, configuration->getTypeName())) , create_reader_scheduler(threadPoolCallbackRunnerUnsafe(*create_reader_pool, "Reader")) @@ -156,20 +155,20 @@ std::shared_ptr StorageObjectStorageSourc return iterator; } -void StorageObjectStorageSource::lazyInitialize(size_t processor) +void StorageObjectStorageSource::lazyInitialize() { if (initialized) return; - reader = createReader(processor); + reader = createReader(); if (reader) - reader_future = createReaderAsync(processor); + reader_future = createReaderAsync(); initialized = true; } Chunk StorageObjectStorageSource::generate() { - lazyInitialize(0); + lazyInitialize(); while (true) { @@ -259,27 +258,30 @@ void StorageObjectStorageSource::addNumRowsToCache(const ObjectInfo & object_inf schema_cache.addNumRows(cache_key, num_rows); } -std::optional StorageObjectStorageSource::tryGetNumRowsFromCache(const ObjectInfo & object_info) +StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReader() { - const auto cache_key = getKeyForSchemaCache( - getUniqueStoragePathIdentifier(*configuration, object_info), - configuration->format, - format_settings, - getContext()); - - auto get_last_mod_time = [&]() -> std::optional - { - return object_info.metadata - ? std::optional(object_info.metadata->last_modified.epochTime()) - : std::nullopt; - }; - return schema_cache.tryGetNumRows(cache_key, get_last_mod_time); + return createReader( + 0, file_iterator, configuration, object_storage, read_from_format_info, format_settings, + key_condition, getContext(), &schema_cache, log, max_block_size, max_parsing_threads, need_only_count); } -StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReader(size_t processor) +StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReader( + size_t processor, + const std::shared_ptr & file_iterator, + const ConfigurationPtr & configuration, + const ObjectStoragePtr & object_storage, + const ReadFromFormatInfo & read_from_format_info, + const std::optional & format_settings, + const std::shared_ptr & key_condition_, + const ContextPtr & context_, + SchemaCache * schema_cache, + const LoggerPtr & log, + size_t max_block_size, + size_t max_parsing_threads, + bool need_only_count) { ObjectInfoPtr object_info; - auto query_settings = configuration->getQuerySettings(getContext()); + auto query_settings = configuration->getQuerySettings(context_); do { @@ -300,9 +302,29 @@ StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReade std::shared_ptr source; std::unique_ptr read_buf; + auto try_get_num_rows_from_cache = [&]() -> std::optional + { + if (!schema_cache) + return std::nullopt; + + const auto cache_key = getKeyForSchemaCache( + getUniqueStoragePathIdentifier(*configuration, *object_info), + configuration->format, + format_settings, + context_); + + auto get_last_mod_time = [&]() -> std::optional + { + return object_info->metadata + ? std::optional(object_info->metadata->last_modified.epochTime()) + : std::nullopt; + }; + return schema_cache->tryGetNumRows(cache_key, get_last_mod_time); + }; + std::optional num_rows_from_cache = need_only_count - && getContext()->getSettingsRef().use_cache_for_count_from_files - ? tryGetNumRowsFromCache(*object_info) + && context_->getSettingsRef().use_cache_for_count_from_files + ? try_get_num_rows_from_cache() : std::nullopt; if (num_rows_from_cache) @@ -327,14 +349,14 @@ StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReade else { compression_method = chooseCompressionMethod(object_info->getFileName(), configuration->compression_method); - read_buf = createReadBuffer(*object_info); + read_buf = createReadBuffer(*object_info, object_storage, context_, log); } auto input_format = FormatFactory::instance().getInput( configuration->format, *read_buf, read_from_format_info.format_header, - getContext(), + context_, max_block_size, format_settings, need_only_count ? 1 : max_parsing_threads, @@ -343,20 +365,20 @@ StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReade compression_method, need_only_count); - if (key_condition) - input_format->setKeyCondition(key_condition); + if (key_condition_) + input_format->setKeyCondition(key_condition_); if (need_only_count) input_format->needOnlyCount(); builder.init(Pipe(input_format)); - if (columns_desc.hasDefaults()) + if (read_from_format_info.columns_description.hasDefaults()) { builder.addSimpleTransform( [&](const Block & header) { - return std::make_shared(header, columns_desc, *input_format, getContext()); + return std::make_shared(header, read_from_format_info.columns_description, *input_format, context_); }); } @@ -379,21 +401,25 @@ StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReade object_info, std::move(read_buf), std::move(source), std::move(pipeline), std::move(current_reader)); } -std::future StorageObjectStorageSource::createReaderAsync(size_t processor) +std::future StorageObjectStorageSource::createReaderAsync() { - return create_reader_scheduler([=, this] { return createReader(processor); }, Priority{}); + return create_reader_scheduler([=, this] { return createReader(); }, Priority{}); } -std::unique_ptr StorageObjectStorageSource::createReadBuffer(const ObjectInfo & object_info) +std::unique_ptr StorageObjectStorageSource::createReadBuffer( + const ObjectInfo & object_info, + const ObjectStoragePtr & object_storage, + const ContextPtr & context_, + const LoggerPtr & log) { const auto & object_size = object_info.metadata->size_bytes; - auto read_settings = getContext()->getReadSettings().adjustBufferSize(object_size); + auto read_settings = context_->getReadSettings().adjustBufferSize(object_size); read_settings.enable_filesystem_cache = false; /// FIXME: Changing this setting to default value breaks something around parquet reading read_settings.remote_read_min_bytes_for_seek = read_settings.remote_fs_buffer_size; - const bool object_too_small = object_size <= 2 * getContext()->getSettings().max_download_buffer_size; + const bool object_too_small = object_size <= 2 * context_->getSettings().max_download_buffer_size; const bool use_prefetch = object_too_small && read_settings.remote_fs_method == RemoteFSReadMethod::threadpool; read_settings.remote_fs_method = use_prefetch ? RemoteFSReadMethod::threadpool : RemoteFSReadMethod::read; /// User's object may change, don't cache it. diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.h b/src/Storages/ObjectStorage/StorageObjectStorageSource.h index a9e28b93b5c..271b38fa75c 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.h @@ -76,7 +76,6 @@ protected: const ReadFromFormatInfo read_from_format_info; const std::shared_ptr create_reader_pool; - ColumnsDescription columns_desc; std::shared_ptr file_iterator; SchemaCache & schema_cache; bool initialized = false; @@ -117,13 +116,32 @@ protected: std::future reader_future; /// Recreate ReadBuffer and Pipeline for each file. - ReaderHolder createReader(size_t processor = 0); - std::future createReaderAsync(size_t processor = 0); - std::unique_ptr createReadBuffer(const ObjectInfo & object_info); + static ReaderHolder createReader( + size_t processor, + const std::shared_ptr & file_iterator, + const ConfigurationPtr & configuration, + const ObjectStoragePtr & object_storage, + const ReadFromFormatInfo & read_from_format_info, + const std::optional & format_settings, + const std::shared_ptr & key_condition_, + const ContextPtr & context_, + SchemaCache * schema_cache, + const LoggerPtr & log, + size_t max_block_size, + size_t max_parsing_threads, + bool need_only_count); + + ReaderHolder createReader(); + + std::future createReaderAsync(); + static std::unique_ptr createReadBuffer( + const ObjectInfo & object_info, + const ObjectStoragePtr & object_storage, + const ContextPtr & context_, + const LoggerPtr & log); void addNumRowsToCache(const ObjectInfo & object_info, size_t num_rows); - std::optional tryGetNumRowsFromCache(const ObjectInfo & object_info); - void lazyInitialize(size_t processor); + void lazyInitialize(); }; class StorageObjectStorageSource::IIterator diff --git a/src/Storages/ObjectStorageQueue/ObjectStorageQueueIFileMetadata.cpp b/src/Storages/ObjectStorageQueue/ObjectStorageQueueIFileMetadata.cpp index 52ee0c9f8ed..6fac519849d 100644 --- a/src/Storages/ObjectStorageQueue/ObjectStorageQueueIFileMetadata.cpp +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueIFileMetadata.cpp @@ -62,6 +62,11 @@ void ObjectStorageQueueIFileMetadata::FileStatus::onFailed(const std::string & e last_exception = exception; } +void ObjectStorageQueueIFileMetadata::FileStatus::updateState(State state_) +{ + state = state_; +} + std::string ObjectStorageQueueIFileMetadata::FileStatus::getException() const { std::lock_guard lock(last_exception_mutex); @@ -224,9 +229,14 @@ bool ObjectStorageQueueIFileMetadata::setProcessing() auto [success, file_state] = setProcessingImpl(); if (success) + { file_status->onProcessing(); + } else + { + LOG_TEST(log, "Updating state of {} from {} to {}", path, file_status->state.load(), file_state); file_status->updateState(file_state); + } LOG_TEST(log, "File {} has state `{}`: will {}process (processing id version: {})", path, file_state, success ? "" : "not ", diff --git a/src/Storages/ObjectStorageQueue/ObjectStorageQueueIFileMetadata.h b/src/Storages/ObjectStorageQueue/ObjectStorageQueueIFileMetadata.h index f0e55c202a2..9bab9e7f075 100644 --- a/src/Storages/ObjectStorageQueue/ObjectStorageQueueIFileMetadata.h +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueIFileMetadata.h @@ -23,7 +23,7 @@ public: void onProcessing(); void onProcessed(); void onFailed(const std::string & exception); - void updateState(State state_) { state = state_; } + void updateState(State state_); std::string getException() const; diff --git a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp index 371a23f5a66..4d921003e04 100644 --- a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp @@ -359,41 +359,38 @@ ObjectStorageQueueSource::FileIterator::getNextKeyFromAcquiredBucket(size_t proc ObjectStorageQueueSource::ObjectStorageQueueSource( String name_, size_t processor_id_, - const Block & header_, - std::unique_ptr internal_source_, + std::shared_ptr file_iterator_, + ConfigurationPtr configuration_, + ObjectStoragePtr object_storage_, + const ReadFromFormatInfo & read_from_format_info_, + const std::optional & format_settings_, + const ObjectStorageQueueSettings & queue_settings_, std::shared_ptr files_metadata_, - const ObjectStorageQueueAction & action_, - RemoveFileFunc remove_file_func_, - const NamesAndTypesList & requested_virtual_columns_, ContextPtr context_, + size_t max_block_size_, const std::atomic & shutdown_called_, const std::atomic & table_is_being_dropped_, std::shared_ptr system_queue_log_, const StorageID & storage_id_, LoggerPtr log_, - size_t max_processed_files_before_commit_, - size_t max_processed_rows_before_commit_, - size_t max_processed_bytes_before_commit_, - size_t max_processing_time_sec_before_commit_, bool commit_once_processed_) - : ISource(header_) + : ISource(read_from_format_info_.source_header) , WithContext(context_) , name(std::move(name_)) , processor_id(processor_id_) - , action(action_) + , file_iterator(file_iterator_) + , configuration(configuration_) + , object_storage(object_storage_) + , read_from_format_info(read_from_format_info_) + , format_settings(format_settings_) + , queue_settings(queue_settings_) , files_metadata(files_metadata_) - , internal_source(std::move(internal_source_)) - , requested_virtual_columns(requested_virtual_columns_) + , max_block_size(max_block_size_) , shutdown_called(shutdown_called_) , table_is_being_dropped(table_is_being_dropped_) , system_queue_log(system_queue_log_) , storage_id(storage_id_) - , max_processed_files_before_commit(max_processed_files_before_commit_) - , max_processed_rows_before_commit(max_processed_rows_before_commit_) - , max_processed_bytes_before_commit(max_processed_bytes_before_commit_) - , max_processing_time_sec_before_commit(max_processing_time_sec_before_commit_) , commit_once_processed(commit_once_processed_) - , remove_file_func(remove_file_func_) , log(log_) { } @@ -403,21 +400,6 @@ String ObjectStorageQueueSource::getName() const return name; } -void ObjectStorageQueueSource::lazyInitialize(size_t processor) -{ - if (initialized) - return; - - LOG_TEST(log, "Initializing a new reader"); - - internal_source->lazyInitialize(processor); - reader = std::move(internal_source->reader); - if (reader) - reader_future = std::move(internal_source->reader_future); - - initialized = true; -} - Chunk ObjectStorageQueueSource::generate() { Chunk chunk; @@ -442,19 +424,33 @@ Chunk ObjectStorageQueueSource::generate() Chunk ObjectStorageQueueSource::generateImpl() { - lazyInitialize(processor_id); - while (true) { if (!reader) { - LOG_TEST(log, "No reader"); - break; + if (shutdown_called) + { + LOG_TEST(log, "Shutdown called"); + break; + } + + const auto context = getContext(); + reader = StorageObjectStorageSource::createReader( + processor_id, file_iterator, configuration, object_storage, read_from_format_info, + format_settings, nullptr, context, nullptr, log, max_block_size, + context->getSettingsRef().max_parsing_threads.value, /* need_only_count */false); + + if (!reader) + { + LOG_TEST(log, "No reader"); + break; + } } const auto * object_info = dynamic_cast(reader.getObjectInfo().get()); auto file_metadata = object_info->file_metadata; auto file_status = file_metadata->getFileStatus(); + const auto & path = reader.getObjectInfo()->getPath(); if (isCancelled()) { @@ -477,8 +473,6 @@ Chunk ObjectStorageQueueSource::generateImpl() break; } - const auto & path = reader.getObjectInfo()->getPath(); - if (shutdown_called) { LOG_TEST(log, "Shutdown called"); @@ -526,7 +520,7 @@ Chunk ObjectStorageQueueSource::generateImpl() total_processed_bytes += chunk.bytes(); VirtualColumnUtils::addRequestedFileLikeStorageVirtualsToChunk( - chunk, requested_virtual_columns, + chunk, read_from_format_info.requested_virtual_columns, { .path = path, .size = reader.getObjectInfo()->metadata->size_bytes @@ -545,9 +539,6 @@ Chunk ObjectStorageQueueSource::generateImpl() if (processed_rows_from_file == 0) { - auto * file_iterator = dynamic_cast(internal_source->file_iterator.get()); - chassert(file_iterator); - if (file_status->retries < file_metadata->getMaxTries()) file_iterator->returnForRetry(reader.getObjectInfo()); @@ -562,11 +553,13 @@ Chunk ObjectStorageQueueSource::generateImpl() file_status->setProcessingEndTime(); file_status.reset(); + reader = {}; processed_rows_from_file = 0; processed_files.push_back(file_metadata); - if (processed_files.size() == max_processed_files_before_commit) + if (queue_settings.max_processed_files_before_commit + && processed_files.size() == queue_settings.max_processed_files_before_commit) { LOG_TRACE(log, "Number of max processed files before commit reached " "(rows: {}, bytes: {}, files: {})", @@ -574,68 +567,30 @@ Chunk ObjectStorageQueueSource::generateImpl() break; } - bool rows_or_bytes_or_time_limit_reached = false; - if (max_processed_rows_before_commit - && total_processed_rows == max_processed_rows_before_commit) + if (queue_settings.max_processed_rows_before_commit + && total_processed_rows == queue_settings.max_processed_rows_before_commit) { LOG_TRACE(log, "Number of max processed rows before commit reached " "(rows: {}, bytes: {}, files: {})", total_processed_rows, total_processed_bytes, processed_files.size()); - - rows_or_bytes_or_time_limit_reached = true; + break; } - else if (max_processed_bytes_before_commit - && total_processed_bytes == max_processed_bytes_before_commit) + else if (queue_settings.max_processed_bytes_before_commit + && total_processed_bytes == queue_settings.max_processed_bytes_before_commit) { LOG_TRACE(log, "Number of max processed bytes before commit reached " "(rows: {}, bytes: {}, files: {})", total_processed_rows, total_processed_bytes, processed_files.size()); - - rows_or_bytes_or_time_limit_reached = true; + break; } - else if (max_processing_time_sec_before_commit - && total_stopwatch.elapsedSeconds() >= max_processing_time_sec_before_commit) + else if (queue_settings.max_processing_time_sec_before_commit + && total_stopwatch.elapsedSeconds() >= queue_settings.max_processing_time_sec_before_commit) { LOG_TRACE(log, "Max processing time before commit reached " "(rows: {}, bytes: {}, files: {})", total_processed_rows, total_processed_bytes, processed_files.size()); - - rows_or_bytes_or_time_limit_reached = true; - } - - if (rows_or_bytes_or_time_limit_reached) - { - if (!reader_future.valid()) - break; - - LOG_TRACE(log, "Rows or bytes limit reached, but we have one more file scheduled already, " - "will process it despite the limit"); - } - - if (shutdown_called) - { - LOG_TRACE(log, "Shutdown was called, stopping sync"); break; } - - chassert(reader_future.valid()); - reader = reader_future.get(); - - if (!reader) - { - LOG_TEST(log, "Reader finished"); - break; - } - - file_status = files_metadata->getFileStatus(reader.getObjectInfo()->getPath()); - - if (!rows_or_bytes_or_time_limit_reached && processed_files.size() + 1 < max_processed_files_before_commit) - { - /// Even if task is finished the thread may be not freed in pool. - /// So wait until it will be freed before scheduling a new task. - internal_source->create_reader_pool->wait(); - reader_future = internal_source->createReaderAsync(processor_id); - } } return {}; @@ -679,12 +634,11 @@ void ObjectStorageQueueSource::commit(bool success, const std::string & exceptio void ObjectStorageQueueSource::applyActionAfterProcessing(const String & path) { - switch (action) + switch (queue_settings.after_processing.value) { case ObjectStorageQueueAction::DELETE: { - assert(remove_file_func); - remove_file_func(path); + object_storage->removeObject(StoredObject(path)); break; } case ObjectStorageQueueAction::KEEP: diff --git a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.h b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.h index ccd87e8a269..0f3d0ab2e92 100644 --- a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.h +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.h @@ -21,7 +21,6 @@ class ObjectStorageQueueSource : public ISource, WithContext public: using Storage = StorageObjectStorage; using Source = StorageObjectStorageSource; - using RemoveFileFunc = std::function; using BucketHolderPtr = ObjectStorageQueueOrderedFileMetadata::BucketHolderPtr; using BucketHolder = ObjectStorageQueueOrderedFileMetadata::BucketHolder; @@ -97,22 +96,20 @@ public: ObjectStorageQueueSource( String name_, size_t processor_id_, - const Block & header_, - std::unique_ptr internal_source_, + std::shared_ptr file_iterator_, + ConfigurationPtr configuration_, + ObjectStoragePtr object_storage_, + const ReadFromFormatInfo & read_from_format_info_, + const std::optional & format_settings_, + const ObjectStorageQueueSettings & queue_settings_, std::shared_ptr files_metadata_, - const ObjectStorageQueueAction & action_, - RemoveFileFunc remove_file_func_, - const NamesAndTypesList & requested_virtual_columns_, ContextPtr context_, + size_t max_block_size_, const std::atomic & shutdown_called_, const std::atomic & table_is_being_dropped_, std::shared_ptr system_queue_log_, const StorageID & storage_id_, LoggerPtr log_, - size_t max_processed_files_before_commit_, - size_t max_processed_rows_before_commit_, - size_t max_processed_bytes_before_commit_, - size_t max_processing_time_sec_before_commit_, bool commit_once_processed_); static Block getHeader(Block sample_block, const std::vector & requested_virtual_columns); @@ -128,29 +125,27 @@ public: private: const String name; const size_t processor_id; - const ObjectStorageQueueAction action; + const std::shared_ptr file_iterator; + const ConfigurationPtr configuration; + const ObjectStoragePtr object_storage; + const ReadFromFormatInfo read_from_format_info; + const std::optional format_settings; + const ObjectStorageQueueSettings queue_settings; const std::shared_ptr files_metadata; - const std::shared_ptr internal_source; - const NamesAndTypesList requested_virtual_columns; + const size_t max_block_size; + const std::atomic & shutdown_called; const std::atomic & table_is_being_dropped; const std::shared_ptr system_queue_log; const StorageID storage_id; - const size_t max_processed_files_before_commit; - const size_t max_processed_rows_before_commit; - const size_t max_processed_bytes_before_commit; - const size_t max_processing_time_sec_before_commit; const bool commit_once_processed; - RemoveFileFunc remove_file_func; LoggerPtr log; std::vector processed_files; std::vector failed_during_read_files; Source::ReaderHolder reader; - std::future reader_future; - std::atomic initialized{false}; size_t processed_rows_from_file = 0; size_t total_processed_rows = 0; @@ -165,8 +160,6 @@ private: ObjectStorageQueueMetadata::FileStatus & file_status_, size_t processed_rows, bool processed); - - void lazyInitialize(size_t processor); }; } diff --git a/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp b/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp index 0930a3a1f9c..4388864434e 100644 --- a/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp +++ b/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp @@ -352,43 +352,14 @@ std::shared_ptr StorageObjectStorageQueue::createSourc ContextPtr local_context, bool commit_once_processed) { - auto internal_source = std::make_unique( - getName(), - object_storage, - configuration, - info, - format_settings, - local_context, - max_block_size, - file_iterator, - local_context->getSettingsRef().max_download_threads, - false); - - auto file_deleter = [=, this](const std::string & path) mutable - { - object_storage->removeObject(StoredObject(path)); - }; - return std::make_shared( - getName(), - processor_id, - info.source_header, - std::move(internal_source), - files_metadata, - queue_settings->after_processing, - file_deleter, - info.requested_virtual_columns, - local_context, - shutdown_called, - table_is_being_dropped, + getName(), processor_id, + file_iterator, configuration, object_storage, + info, format_settings, + *queue_settings, files_metadata, + local_context, max_block_size, shutdown_called, table_is_being_dropped, getQueueLog(object_storage, local_context, *queue_settings), - getStorageID(), - log, - queue_settings->max_processed_files_before_commit, - queue_settings->max_processed_rows_before_commit, - queue_settings->max_processed_bytes_before_commit, - queue_settings->max_processing_time_sec_before_commit, - commit_once_processed); + getStorageID(), log, commit_once_processed); } bool StorageObjectStorageQueue::hasDependencies(const StorageID & table_id) @@ -483,13 +454,7 @@ bool StorageObjectStorageQueue::streamToViews() while (!shutdown_called && !file_iterator->isFinished()) { - InterpreterInsertQuery interpreter( - insert, - queue_context, - /* allow_materialized */ false, - /* no_squash */ true, - /* no_destination */ true, - /* async_isnert */ false); + InterpreterInsertQuery interpreter(insert, queue_context, false, true, true); auto block_io = interpreter.execute(); auto read_from_format_info = prepareReadingFromFormat( block_io.pipeline.getHeader().getNames(), diff --git a/src/Storages/PartitionedSink.cpp b/src/Storages/PartitionedSink.cpp index ee2570756ed..09b009b26d8 100644 --- a/src/Storages/PartitionedSink.cpp +++ b/src/Storages/PartitionedSink.cpp @@ -51,7 +51,7 @@ SinkPtr PartitionedSink::getSinkForPartitionKey(StringRef partition_key) return it->second; } -void PartitionedSink::consume(Chunk & chunk) +void PartitionedSink::consume(Chunk chunk) { const auto & columns = chunk.getColumns(); @@ -104,7 +104,7 @@ void PartitionedSink::consume(Chunk & chunk) for (const auto & [partition_key, partition_index] : partition_id_to_chunk_index) { auto sink = getSinkForPartitionKey(partition_key); - sink->consume(partition_index_to_chunk[partition_index]); + sink->consume(std::move(partition_index_to_chunk[partition_index])); } } diff --git a/src/Storages/PartitionedSink.h b/src/Storages/PartitionedSink.h index fcd67556dc9..68edeb6fd73 100644 --- a/src/Storages/PartitionedSink.h +++ b/src/Storages/PartitionedSink.h @@ -20,7 +20,7 @@ public: String getName() const override { return "PartitionedSink"; } - void consume(Chunk & chunk) override; + void consume(Chunk chunk) override; void onException(std::exception_ptr exception) override; diff --git a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp index 44479bd01e2..ba3cc6f58d0 100644 --- a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp +++ b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp @@ -697,13 +697,7 @@ void MaterializedPostgreSQLConsumer::syncTables() insert->table_id = storage->getStorageID(); insert->columns = std::make_shared(buffer->columns_ast); - InterpreterInsertQuery interpreter( - insert, - insert_context, - /* allow_materialized */ true, - /* no_squash */ false, - /* no_destination */ false, - /* async_isnert */ false); + InterpreterInsertQuery interpreter(insert, insert_context, true); auto io = interpreter.execute(); auto input = std::make_shared( result_rows.cloneEmpty(), Chunk(result_rows.getColumns(), result_rows.rows())); diff --git a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp index f632e553a0d..2bb1e2dde0d 100644 --- a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp +++ b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp @@ -437,13 +437,7 @@ StorageInfo PostgreSQLReplicationHandler::loadFromSnapshot(postgres::Connection auto insert_context = materialized_storage->getNestedTableContext(); - InterpreterInsertQuery interpreter( - insert, - insert_context, - /* allow_materialized */ false, - /* no_squash */ false, - /* no_destination */ false, - /* async_isnert */ false); + InterpreterInsertQuery interpreter(insert, insert_context); auto block_io = interpreter.execute(); const StorageInMemoryMetadata & storage_metadata = nested_storage->getInMemoryMetadata(); diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index f3d2aff68c8..e4b19992151 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -1129,13 +1129,7 @@ bool StorageRabbitMQ::tryStreamToViews() } // Only insert into dependent views and expect that input blocks contain virtual columns - InterpreterInsertQuery interpreter( - insert, - rabbitmq_context, - /* allow_materialized */ false, - /* no_squash */ true, - /* no_destination */ true, - /* async_isnert */ false); + InterpreterInsertQuery interpreter(insert, rabbitmq_context, /* allow_materialized_ */ false, /* no_squash_ */ true, /* no_destination_ */ true); auto block_io = interpreter.execute(); block_io.pipeline.complete(Pipe::unitePipes(std::move(pipes))); diff --git a/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp b/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp index 4b5188ca9f2..90792c59d38 100644 --- a/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp +++ b/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp @@ -218,7 +218,7 @@ std::pair EmbeddedRocksDBBulkSink::seriali return {std::move(serialized_key_column), std::move(serialized_value_column)}; } -void EmbeddedRocksDBBulkSink::consume(Chunk & chunk_) +void EmbeddedRocksDBBulkSink::consume(Chunk chunk_) { std::vector chunks_to_write = squash(std::move(chunk_)); @@ -247,10 +247,7 @@ void EmbeddedRocksDBBulkSink::onFinish() { /// If there is any data left, write it. if (!chunks.empty()) - { - Chunk empty; - consume(empty); - } + consume({}); } String EmbeddedRocksDBBulkSink::getTemporarySSTFilePath() diff --git a/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.h b/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.h index 64190c8c86f..1f548e7813d 100644 --- a/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.h +++ b/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.h @@ -32,7 +32,7 @@ public: ~EmbeddedRocksDBBulkSink() override; - void consume(Chunk & chunk) override; + void consume(Chunk chunk) override; void onFinish() override; diff --git a/src/Storages/RocksDB/EmbeddedRocksDBSink.cpp b/src/Storages/RocksDB/EmbeddedRocksDBSink.cpp index 1f7f6939f40..c451cfd1bf5 100644 --- a/src/Storages/RocksDB/EmbeddedRocksDBSink.cpp +++ b/src/Storages/RocksDB/EmbeddedRocksDBSink.cpp @@ -29,7 +29,7 @@ EmbeddedRocksDBSink::EmbeddedRocksDBSink( serializations = getHeader().getSerializations(); } -void EmbeddedRocksDBSink::consume(Chunk & chunk) +void EmbeddedRocksDBSink::consume(Chunk chunk) { auto rows = chunk.getNumRows(); const auto & columns = chunk.getColumns(); diff --git a/src/Storages/RocksDB/EmbeddedRocksDBSink.h b/src/Storages/RocksDB/EmbeddedRocksDBSink.h index 2e1e0c7b429..011322df829 100644 --- a/src/Storages/RocksDB/EmbeddedRocksDBSink.h +++ b/src/Storages/RocksDB/EmbeddedRocksDBSink.h @@ -17,7 +17,7 @@ public: StorageEmbeddedRocksDB & storage_, const StorageMetadataPtr & metadata_snapshot_); - void consume(Chunk & chunk) override; + void consume(Chunk chunk) override; String getName() const override { return "EmbeddedRocksDBSink"; } private: diff --git a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp index 3473166a080..b9d3e071b6c 100644 --- a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp +++ b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp @@ -313,8 +313,7 @@ void StorageEmbeddedRocksDB::mutate(const MutationCommands & commands, ContextPt Block block; while (executor.pull(block)) { - auto chunk = Chunk(block.getColumns(), block.rows()); - sink->consume(chunk); + sink->consume(Chunk{block.getColumns(), block.rows()}); } } diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index b064fba223a..a3f6b6afc5d 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -607,7 +607,7 @@ public: String getName() const override { return "BufferSink"; } - void consume(Chunk & chunk) override + void consume(Chunk chunk) override { size_t rows = chunk.getNumRows(); if (!rows) @@ -1020,13 +1020,7 @@ void StorageBuffer::writeBlockToDestination(const Block & block, StoragePtr tabl auto insert_context = Context::createCopy(getContext()); insert_context->makeQueryContext(); - InterpreterInsertQuery interpreter( - insert, - insert_context, - allow_materialized, - /* no_squash */ false, - /* no_destination */ false, - /* async_isnert */ false); + InterpreterInsertQuery interpreter{insert, insert_context, allow_materialized}; auto block_io = interpreter.execute(); PushingPipelineExecutor executor(block_io.pipeline); diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 67586985ce8..849fa5dbe0b 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -1050,13 +1050,7 @@ std::optional StorageDistributed::distributedWriteBetweenDistribu const auto & shard_info = shards_info[shard_index]; if (shard_info.isLocal()) { - InterpreterInsertQuery interpreter( - new_query, - query_context, - /* allow_materialized */ false, - /* no_squash */ false, - /* no_destination */ false, - /* async_isnert */ false); + InterpreterInsertQuery interpreter(new_query, query_context); pipeline.addCompletedPipeline(interpreter.execute().pipeline); } else diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 3fb397c7b81..7f39ff615f0 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -1778,12 +1778,12 @@ public: String getName() const override { return "StorageFileSink"; } - void consume(Chunk & chunk) override + void consume(Chunk chunk) override { std::lock_guard cancel_lock(cancel_mutex); if (cancelled) return; - writer->write(getHeader().cloneWithColumns(chunk.getColumns())); + writer->write(getHeader().cloneWithColumns(chunk.detachColumns())); } void onCancel() override diff --git a/src/Storages/StorageFuzzQuery.cpp b/src/Storages/StorageFuzzQuery.cpp new file mode 100644 index 00000000000..6e8f425f8dc --- /dev/null +++ b/src/Storages/StorageFuzzQuery.cpp @@ -0,0 +1,169 @@ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + +ColumnPtr FuzzQuerySource::createColumn() +{ + auto column = ColumnString::create(); + ColumnString::Chars & data_to = column->getChars(); + ColumnString::Offsets & offsets_to = column->getOffsets(); + + offsets_to.resize(block_size); + IColumn::Offset offset = 0; + + auto fuzz_base = query; + size_t row_num = 0; + + while (row_num < block_size) + { + ASTPtr new_query = fuzz_base->clone(); + + auto base_before_fuzz = fuzz_base->formatForErrorMessage(); + fuzzer.fuzzMain(new_query); + auto fuzzed_text = new_query->formatForErrorMessage(); + + if (base_before_fuzz == fuzzed_text) + continue; + + /// AST is too long, will start from the original query. + if (config.max_query_length > 500) + { + fuzz_base = query; + continue; + } + + IColumn::Offset next_offset = offset + fuzzed_text.size() + 1; + data_to.resize(next_offset); + + std::copy(fuzzed_text.begin(), fuzzed_text.end(), &data_to[offset]); + + data_to[offset + fuzzed_text.size()] = 0; + offsets_to[row_num] = next_offset; + + offset = next_offset; + fuzz_base = new_query; + ++row_num; + } + + return column; +} + +StorageFuzzQuery::StorageFuzzQuery( + const StorageID & table_id_, const ColumnsDescription & columns_, const String & comment_, const Configuration & config_) + : IStorage(table_id_), config(config_) +{ + StorageInMemoryMetadata storage_metadata; + storage_metadata.setColumns(columns_); + storage_metadata.setComment(comment_); + setInMemoryMetadata(storage_metadata); +} + +Pipe StorageFuzzQuery::read( + const Names & column_names, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & /*query_info*/, + ContextPtr /*context*/, + QueryProcessingStage::Enum /*processed_stage*/, + size_t max_block_size, + size_t num_streams) +{ + storage_snapshot->check(column_names); + + Pipes pipes; + pipes.reserve(num_streams); + + const ColumnsDescription & our_columns = storage_snapshot->metadata->getColumns(); + Block block_header; + for (const auto & name : column_names) + { + const auto & name_type = our_columns.get(name); + MutableColumnPtr column = name_type.type->createColumn(); + block_header.insert({std::move(column), name_type.type, name_type.name}); + } + + const char * begin = config.query.data(); + const char * end = begin + config.query.size(); + + ParserQuery parser(end, false); + auto query = parseQuery(parser, begin, end, "", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); + + for (UInt64 i = 0; i < num_streams; ++i) + pipes.emplace_back(std::make_shared(max_block_size, block_header, config, query)); + + return Pipe::unitePipes(std::move(pipes)); +} + +StorageFuzzQuery::Configuration StorageFuzzQuery::getConfiguration(ASTs & engine_args, ContextPtr local_context) +{ + StorageFuzzQuery::Configuration configuration{}; + + // Supported signatures: + // + // FuzzQuery(query) + // FuzzQuery(query, max_query_length) + // FuzzQuery(query, max_query_length, random_seed) + if (engine_args.empty() || engine_args.size() > 3) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "FuzzQuery requires 1 to 3 arguments: query, max_query_length, random_seed"); + + for (auto & engine_arg : engine_args) + engine_arg = evaluateConstantExpressionOrIdentifierAsLiteral(engine_arg, local_context); + + auto first_arg = checkAndGetLiteralArgument(engine_args[0], "query"); + configuration.query = std::move(first_arg); + + if (engine_args.size() >= 2) + { + const auto & literal = engine_args[1]->as(); + if (!literal.value.isNull()) + configuration.max_query_length = checkAndGetLiteralArgument(literal, "max_query_length"); + } + + if (engine_args.size() == 3) + { + const auto & literal = engine_args[2]->as(); + if (!literal.value.isNull()) + configuration.random_seed = checkAndGetLiteralArgument(literal, "random_seed"); + } + + return configuration; +} + +void registerStorageFuzzQuery(StorageFactory & factory) +{ + factory.registerStorage( + "FuzzQuery", + [](const StorageFactory::Arguments & args) -> std::shared_ptr + { + ASTs & engine_args = args.engine_args; + + if (engine_args.empty()) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Storage FuzzQuery must have arguments."); + + StorageFuzzQuery::Configuration configuration = StorageFuzzQuery::getConfiguration(engine_args, args.getLocalContext()); + + for (const auto& col : args.columns) + if (col.type->getTypeId() != TypeIndex::String) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "'StorageFuzzQuery' supports only columns of String type, got {}.", col.type->getName()); + + return std::make_shared(args.table_id, args.columns, args.comment, configuration); + }); +} + +} diff --git a/src/Storages/StorageFuzzQuery.h b/src/Storages/StorageFuzzQuery.h new file mode 100644 index 00000000000..125ef960e74 --- /dev/null +++ b/src/Storages/StorageFuzzQuery.h @@ -0,0 +1,88 @@ +#pragma once + +#include +#include +#include +#include + +#include "config.h" + +namespace DB +{ + +class NamedCollection; + +class StorageFuzzQuery final : public IStorage +{ +public: + struct Configuration : public StatelessTableEngineConfiguration + { + String query; + UInt64 max_query_length = 500; + UInt64 random_seed = randomSeed(); + }; + + StorageFuzzQuery( + const StorageID & table_id_, const ColumnsDescription & columns_, const String & comment_, const Configuration & config_); + + std::string getName() const override { return "FuzzQuery"; } + + Pipe read( + const Names & column_names, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & query_info, + ContextPtr context, + QueryProcessingStage::Enum processed_stage, + size_t max_block_size, + size_t num_streams) override; + + static StorageFuzzQuery::Configuration getConfiguration(ASTs & engine_args, ContextPtr local_context); + +private: + const Configuration config; +}; + + +class FuzzQuerySource : public ISource +{ +public: + FuzzQuerySource( + UInt64 block_size_, Block block_header_, const StorageFuzzQuery::Configuration & config_, ASTPtr query_) + : ISource(block_header_) + , block_size(block_size_) + , block_header(std::move(block_header_)) + , config(config_) + , query(query_) + , fuzzer(config_.random_seed) + { + } + + String getName() const override { return "FuzzQuery"; } + +protected: + Chunk generate() override + { + Columns columns; + columns.reserve(block_header.columns()); + for (const auto & col : block_header) + { + chassert(col.type->getTypeId() == TypeIndex::String); + columns.emplace_back(createColumn()); + } + + return {std::move(columns), block_size}; + } + +private: + ColumnPtr createColumn(); + + UInt64 block_size; + Block block_header; + + StorageFuzzQuery::Configuration config; + ASTPtr query; + + QueryFuzzer fuzzer; +}; + +} diff --git a/src/Storages/StorageKeeperMap.cpp b/src/Storages/StorageKeeperMap.cpp index c80e799a92b..20f99070000 100644 --- a/src/Storages/StorageKeeperMap.cpp +++ b/src/Storages/StorageKeeperMap.cpp @@ -119,10 +119,10 @@ public: std::string getName() const override { return "StorageKeeperMapSink"; } - void consume(Chunk & chunk) override + void consume(Chunk chunk) override { auto rows = chunk.getNumRows(); - auto block = getHeader().cloneWithColumns(chunk.getColumns()); + auto block = getHeader().cloneWithColumns(chunk.detachColumns()); WriteBufferFromOwnString wb_key; WriteBufferFromOwnString wb_value; @@ -1248,10 +1248,7 @@ void StorageKeeperMap::mutate(const MutationCommands & commands, ContextPtr loca Block block; while (executor.pull(block)) - { - auto chunk = Chunk(block.getColumns(), block.rows()); - sink->consume(chunk); - } + sink->consume(Chunk{block.getColumns(), block.rows()}); sink->finalize(strict); } diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp index 463694c63aa..de0324d7998 100644 --- a/src/Storages/StorageLog.cpp +++ b/src/Storages/StorageLog.cpp @@ -1,6 +1,5 @@ #include #include -#include #include #include @@ -22,6 +21,7 @@ #include #include +#include "StorageLogSettings.h" #include #include #include @@ -341,7 +341,7 @@ public: } } - void consume(Chunk & chunk) override; + void consume(Chunk chunk) override; void onFinish() override; private: @@ -398,9 +398,9 @@ private: }; -void LogSink::consume(Chunk & chunk) +void LogSink::consume(Chunk chunk) { - auto block = getHeader().cloneWithColumns(chunk.getColumns()); + auto block = getHeader().cloneWithColumns(chunk.detachColumns()); metadata_snapshot->check(block, true); for (auto & stream : streams | boost::adaptors::map_values) diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index 316f398b476..ec1559b71a4 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -161,6 +161,7 @@ StorageMaterializedView::StorageMaterializedView( manual_create_query->setDatabase(getStorageID().database_name); manual_create_query->setTable(generateInnerTableName(getStorageID())); manual_create_query->uuid = query.to_inner_uuid; + manual_create_query->has_uuid = query.to_inner_uuid != UUIDHelpers::Nil; auto new_columns_list = std::make_shared(); new_columns_list->set(new_columns_list->columns, query.columns_list->columns->ptr()); diff --git a/src/Storages/StorageMemory.cpp b/src/Storages/StorageMemory.cpp index b1bd7053c2e..f69c4adb552 100644 --- a/src/Storages/StorageMemory.cpp +++ b/src/Storages/StorageMemory.cpp @@ -63,7 +63,7 @@ public: String getName() const override { return "MemorySink"; } - void consume(Chunk & chunk) override + void consume(Chunk chunk) override { auto block = getHeader().cloneWithColumns(chunk.getColumns()); storage_snapshot->metadata->check(block, true); diff --git a/src/Storages/StorageMongoDB.cpp b/src/Storages/StorageMongoDB.cpp index e0818fafae9..62a2a048642 100644 --- a/src/Storages/StorageMongoDB.cpp +++ b/src/Storages/StorageMongoDB.cpp @@ -17,6 +17,7 @@ #include #include #include +#include #include @@ -106,12 +107,12 @@ public: String getName() const override { return "StorageMongoDBSink"; } - void consume(Chunk & chunk) override + void consume(Chunk chunk) override { Poco::MongoDB::Database db(db_name); Poco::MongoDB::Document::Vector documents; - auto block = getHeader().cloneWithColumns(chunk.getColumns()); + auto block = getHeader().cloneWithColumns(chunk.detachColumns()); size_t num_rows = block.rows(); size_t num_cols = block.columns(); diff --git a/src/Storages/StorageMySQL.cpp b/src/Storages/StorageMySQL.cpp index 2a8a7bd2ee7..da391909dff 100644 --- a/src/Storages/StorageMySQL.cpp +++ b/src/Storages/StorageMySQL.cpp @@ -151,9 +151,9 @@ public: String getName() const override { return "StorageMySQLSink"; } - void consume(Chunk & chunk) override + void consume(Chunk chunk) override { - auto block = getHeader().cloneWithColumns(chunk.getColumns()); + auto block = getHeader().cloneWithColumns(chunk.detachColumns()); auto blocks = splitBlocks(block, max_batch_rows); mysqlxx::Transaction trans(entry); try diff --git a/src/Storages/StoragePostgreSQL.cpp b/src/Storages/StoragePostgreSQL.cpp index cdfeab62b58..a8713c61e4d 100644 --- a/src/Storages/StoragePostgreSQL.cpp +++ b/src/Storages/StoragePostgreSQL.cpp @@ -227,9 +227,9 @@ public: String getName() const override { return "PostgreSQLSink"; } - void consume(Chunk & chunk) override + void consume(Chunk chunk) override { - auto block = getHeader().cloneWithColumns(chunk.getColumns()); + auto block = getHeader().cloneWithColumns(chunk.detachColumns()); if (!inserter) { if (on_conflict.empty()) diff --git a/src/Storages/StorageRedis.cpp b/src/Storages/StorageRedis.cpp index 1a275320f43..83bb3c606c9 100644 --- a/src/Storages/StorageRedis.cpp +++ b/src/Storages/StorageRedis.cpp @@ -147,7 +147,7 @@ class RedisSink : public SinkToStorage public: RedisSink(StorageRedis & storage_, const StorageMetadataPtr & metadata_snapshot_); - void consume(Chunk & chunk) override; + void consume(Chunk chunk) override; String getName() const override { return "RedisSink"; } private: @@ -169,10 +169,10 @@ RedisSink::RedisSink(StorageRedis & storage_, const StorageMetadataPtr & metadat } } -void RedisSink::consume(Chunk & chunk) +void RedisSink::consume(Chunk chunk) { auto rows = chunk.getNumRows(); - auto block = getHeader().cloneWithColumns(chunk.getColumns()); + auto block = getHeader().cloneWithColumns(chunk.detachColumns()); WriteBufferFromOwnString wb_key; WriteBufferFromOwnString wb_value; @@ -567,8 +567,7 @@ void StorageRedis::mutate(const MutationCommands & commands, ContextPtr context_ Block block; while (executor.pull(block)) { - Chunk chunk(block.getColumns(), block.rows()); - sink->consume(chunk); + sink->consume(Chunk{block.getColumns(), block.rows()}); } } diff --git a/src/Storages/StorageSQLite.cpp b/src/Storages/StorageSQLite.cpp index 85417a2f2a4..179e4cee199 100644 --- a/src/Storages/StorageSQLite.cpp +++ b/src/Storages/StorageSQLite.cpp @@ -141,7 +141,7 @@ public: String getName() const override { return "SQLiteSink"; } - void consume(Chunk & chunk) override + void consume(Chunk chunk) override { auto block = getHeader().cloneWithColumns(chunk.getColumns()); WriteBufferFromOwnString sqlbuf; diff --git a/src/Storages/StorageSet.cpp b/src/Storages/StorageSet.cpp index 0d094c15880..5b7f9fc0ac2 100644 --- a/src/Storages/StorageSet.cpp +++ b/src/Storages/StorageSet.cpp @@ -44,7 +44,7 @@ public: const String & backup_file_name_, bool persistent_); String getName() const override { return "SetOrJoinSink"; } - void consume(Chunk & chunk) override; + void consume(Chunk chunk) override; void onFinish() override; private: @@ -82,9 +82,9 @@ SetOrJoinSink::SetOrJoinSink( { } -void SetOrJoinSink::consume(Chunk & chunk) +void SetOrJoinSink::consume(Chunk chunk) { - Block block = getHeader().cloneWithColumns(chunk.getColumns()); + Block block = getHeader().cloneWithColumns(chunk.detachColumns()); table.insertBlock(block, getContext()); if (persistent) diff --git a/src/Storages/StorageStripeLog.cpp b/src/Storages/StorageStripeLog.cpp index 9b6d9f041e1..8df87d6290f 100644 --- a/src/Storages/StorageStripeLog.cpp +++ b/src/Storages/StorageStripeLog.cpp @@ -226,9 +226,9 @@ public: } } - void consume(Chunk & chunk) override + void consume(Chunk chunk) override { - block_out->write(getHeader().cloneWithColumns(chunk.getColumns())); + block_out->write(getHeader().cloneWithColumns(chunk.detachColumns())); } void onFinish() override diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index 90e05c44e31..895da028fc2 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -565,12 +565,12 @@ StorageURLSink::StorageURLSink( } -void StorageURLSink::consume(Chunk & chunk) +void StorageURLSink::consume(Chunk chunk) { std::lock_guard lock(cancel_mutex); if (cancelled) return; - writer->write(getHeader().cloneWithColumns(chunk.getColumns())); + writer->write(getHeader().cloneWithColumns(chunk.detachColumns())); } void StorageURLSink::onCancel() diff --git a/src/Storages/StorageURL.h b/src/Storages/StorageURL.h index 1804079e75f..fa7cc6eeeef 100644 --- a/src/Storages/StorageURL.h +++ b/src/Storages/StorageURL.h @@ -251,7 +251,7 @@ public: const String & method = Poco::Net::HTTPRequest::HTTP_POST); std::string getName() const override { return "StorageURLSink"; } - void consume(Chunk & chunk) override; + void consume(Chunk chunk) override; void onCancel() override; void onException(std::exception_ptr exception) override; void onFinish() override; diff --git a/src/Storages/System/StorageSystemZooKeeper.cpp b/src/Storages/System/StorageSystemZooKeeper.cpp index c9c606de049..cb46cd19517 100644 --- a/src/Storages/System/StorageSystemZooKeeper.cpp +++ b/src/Storages/System/StorageSystemZooKeeper.cpp @@ -119,7 +119,7 @@ public: ZooKeeperSink(const Block & header, ContextPtr context) : SinkToStorage(header), zookeeper(context->getZooKeeper()) { } String getName() const override { return "ZooKeeperSink"; } - void consume(Chunk & chunk) override + void consume(Chunk chunk) override { auto block = getHeader().cloneWithColumns(chunk.getColumns()); size_t rows = block.rows(); diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index e36247103c7..77e6ee9cb24 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -32,7 +32,6 @@ #include #include #include -#include #include #include #include @@ -305,7 +304,7 @@ namespace public: explicit AddingAggregatedChunkInfoTransform(Block header) : ISimpleTransform(header, header, false) { } - void transform(Chunk & chunk) override { chunk.getChunkInfos().add(std::make_shared()); } + void transform(Chunk & chunk) override { chunk.setChunkInfo(std::make_shared()); } String getName() const override { return "AddingAggregatedChunkInfoTransform"; } }; @@ -690,13 +689,7 @@ inline void StorageWindowView::fire(UInt32 watermark) StoragePtr target_table = getTargetTable(); auto insert = std::make_shared(); insert->table_id = target_table->getStorageID(); - InterpreterInsertQuery interpreter( - insert, - getContext(), - /* allow_materialized */ false, - /* no_squash */ false, - /* no_destination */ false, - /* async_isnert */ false); + InterpreterInsertQuery interpreter(insert, getContext()); auto block_io = interpreter.execute(); auto pipe = Pipe(std::make_shared(blocks, header)); @@ -1420,7 +1413,7 @@ void StorageWindowView::eventTimeParser(const ASTCreateQuery & query) } void StorageWindowView::writeIntoWindowView( - StorageWindowView & window_view, Block && block, Chunk::ChunkInfoCollection && chunk_infos, ContextPtr local_context) + StorageWindowView & window_view, const Block & block, ContextPtr local_context) { window_view.throwIfWindowViewIsDisabled(local_context); while (window_view.modifying_query) @@ -1435,7 +1428,7 @@ void StorageWindowView::writeIntoWindowView( window_view.max_watermark = window_view.getWindowUpperBound(first_record_timestamp); } - Pipe pipe(std::make_shared(block)); + Pipe pipe(std::make_shared(block.cloneEmpty(), Chunk(block.getColumns(), block.rows()))); UInt32 lateness_bound = 0; UInt32 t_max_watermark = 0; @@ -1480,10 +1473,10 @@ void StorageWindowView::writeIntoWindowView( auto syntax_result = TreeRewriter(local_context).analyze(query, columns); auto filter_expression = ExpressionAnalyzer(filter_function, syntax_result, local_context).getActionsDAG(false); - pipe.addSimpleTransform([&](const Block & header_) + pipe.addSimpleTransform([&](const Block & header) { return std::make_shared( - header_, std::make_shared(filter_expression), + header, std::make_shared(filter_expression), filter_function->getColumnName(), true); }); } @@ -1538,30 +1531,6 @@ void StorageWindowView::writeIntoWindowView( QueryProcessingStage::WithMergeableState); builder = select_block.buildQueryPipeline(); - - builder.addSimpleTransform([&](const Block & stream_header) - { - // Can't move chunk_infos here, that function could be called several times - return std::make_shared(chunk_infos.clone(), stream_header); - }); - - String window_view_id = window_view.getStorageID().hasUUID() ? toString(window_view.getStorageID().uuid) : window_view.getStorageID().getFullNameNotQuoted(); - builder.addSimpleTransform([&](const Block & stream_header) - { - return std::make_shared(window_view_id, stream_header); - }); - builder.addSimpleTransform([&](const Block & stream_header) - { - return std::make_shared(stream_header); - }); - -#ifdef ABORT_ON_LOGICAL_ERROR - builder.addSimpleTransform([&](const Block & stream_header) - { - return std::make_shared("StorageWindowView: Afrer tmp table before squashing", stream_header); - }); -#endif - builder.addSimpleTransform([&](const Block & current_header) { return std::make_shared( @@ -1601,13 +1570,6 @@ void StorageWindowView::writeIntoWindowView( lateness_upper_bound); }); -#ifdef ABORT_ON_LOGICAL_ERROR - builder.addSimpleTransform([&](const Block & stream_header) - { - return std::make_shared("StorageWindowView: Afrer WatermarkTransform", stream_header); - }); -#endif - auto inner_table = window_view.getInnerTable(); auto lock = inner_table->lockForShare( local_context->getCurrentQueryId(), local_context->getSettingsRef().lock_acquire_timeout); @@ -1624,16 +1586,9 @@ void StorageWindowView::writeIntoWindowView( auto convert_actions = std::make_shared( convert_actions_dag, ExpressionActionsSettings::fromContext(local_context, CompileExpressions::yes)); - builder.addSimpleTransform([&](const Block & header_) { return std::make_shared(header_, convert_actions); }); + builder.addSimpleTransform([&](const Block & header) { return std::make_shared(header, convert_actions); }); } -#ifdef ABORT_ON_LOGICAL_ERROR - builder.addSimpleTransform([&](const Block & stream_header) - { - return std::make_shared("StorageWindowView: Before out", stream_header); - }); -#endif - builder.addChain(Chain(std::move(output))); builder.setSinks([&](const Block & cur_header, Pipe::StreamType) { diff --git a/src/Storages/WindowView/StorageWindowView.h b/src/Storages/WindowView/StorageWindowView.h index 14ac65091d3..f79867df424 100644 --- a/src/Storages/WindowView/StorageWindowView.h +++ b/src/Storages/WindowView/StorageWindowView.h @@ -166,7 +166,7 @@ public: BlockIO populate(); - static void writeIntoWindowView(StorageWindowView & window_view, Block && block, Chunk::ChunkInfoCollection && chunk_infos, ContextPtr context); + static void writeIntoWindowView(StorageWindowView & window_view, const Block & block, ContextPtr context); ASTPtr getMergeableQuery() const { return mergeable_query->clone(); } diff --git a/src/Storages/buildQueryTreeForShard.cpp b/src/Storages/buildQueryTreeForShard.cpp index ed378169381..84ba92bba00 100644 --- a/src/Storages/buildQueryTreeForShard.cpp +++ b/src/Storages/buildQueryTreeForShard.cpp @@ -290,7 +290,7 @@ TableNodePtr executeSubqueryNode(const QueryTreeNodePtr & subquery_node, size_t min_block_size_rows = mutable_context->getSettingsRef().min_external_table_block_size_rows; size_t min_block_size_bytes = mutable_context->getSettingsRef().min_external_table_block_size_bytes; - auto squashing = std::make_shared(builder->getHeader(), min_block_size_rows, min_block_size_bytes); + auto squashing = std::make_shared(builder->getHeader(), min_block_size_rows, min_block_size_bytes); builder->resize(1); builder->addTransform(std::move(squashing)); diff --git a/src/Storages/registerStorages.cpp b/src/Storages/registerStorages.cpp index 628e5a85437..9f849052071 100644 --- a/src/Storages/registerStorages.cpp +++ b/src/Storages/registerStorages.cpp @@ -26,6 +26,7 @@ void registerStorageGenerateRandom(StorageFactory & factory); void registerStorageExecutable(StorageFactory & factory); void registerStorageWindowView(StorageFactory & factory); void registerStorageLoop(StorageFactory & factory); +void registerStorageFuzzQuery(StorageFactory & factory); #if USE_RAPIDJSON || USE_SIMDJSON void registerStorageFuzzJSON(StorageFactory & factory); #endif @@ -123,6 +124,7 @@ void registerStorages() registerStorageExecutable(factory); registerStorageWindowView(factory); registerStorageLoop(factory); + registerStorageFuzzQuery(factory); #if USE_RAPIDJSON || USE_SIMDJSON registerStorageFuzzJSON(factory); #endif diff --git a/src/TableFunctions/TableFunctionFuzzQuery.cpp b/src/TableFunctions/TableFunctionFuzzQuery.cpp new file mode 100644 index 00000000000..224f6666556 --- /dev/null +++ b/src/TableFunctions/TableFunctionFuzzQuery.cpp @@ -0,0 +1,54 @@ +#include + +#include +#include +#include +#include + +namespace DB +{ + + +namespace ErrorCodes +{ + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + +void TableFunctionFuzzQuery::parseArguments(const ASTPtr & ast_function, ContextPtr context) +{ + ASTs & args_func = ast_function->children; + + if (args_func.size() != 1) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Table function '{}' must have arguments", getName()); + + auto args = args_func.at(0)->children; + configuration = StorageFuzzQuery::getConfiguration(args, context); +} + +StoragePtr TableFunctionFuzzQuery::executeImpl( + const ASTPtr & /*ast_function*/, + ContextPtr context, + const std::string & table_name, + ColumnsDescription /*cached_columns*/, + bool is_insert_query) const +{ + ColumnsDescription columns = getActualTableStructure(context, is_insert_query); + auto res = std::make_shared( + StorageID(getDatabaseName(), table_name), + columns, + /* comment */ String{}, + configuration); + res->startup(); + return res; +} + +void registerTableFunctionFuzzQuery(TableFunctionFactory & factory) +{ + factory.registerFunction( + {.documentation + = {.description = "Perturbs a query string with random variations.", + .returned_value = "A table object with a single column containing perturbed query strings."}, + .allow_readonly = true}); +} + +} diff --git a/src/TableFunctions/TableFunctionFuzzQuery.h b/src/TableFunctions/TableFunctionFuzzQuery.h new file mode 100644 index 00000000000..22d10341c4d --- /dev/null +++ b/src/TableFunctions/TableFunctionFuzzQuery.h @@ -0,0 +1,42 @@ +#pragma once + +#include + +#include +#include +#include + +#include "config.h" + +namespace DB +{ + +class TableFunctionFuzzQuery : public ITableFunction +{ +public: + static constexpr auto name = "fuzzQuery"; + std::string getName() const override { return name; } + + void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; + + ColumnsDescription getActualTableStructure(ContextPtr /* context */, bool /* is_insert_query */) const override + { + return ColumnsDescription{{"query", std::make_shared()}}; + } + +private: + StoragePtr executeImpl( + const ASTPtr & ast_function, + ContextPtr context, + const std::string & table_name, + ColumnsDescription cached_columns, + bool is_insert_query) const override; + + const char * getStorageTypeName() const override { return "fuzzQuery"; } + + String source; + std::optional random_seed; + StorageFuzzQuery::Configuration configuration; +}; + +} diff --git a/src/TableFunctions/registerTableFunctions.cpp b/src/TableFunctions/registerTableFunctions.cpp index ca4913898f9..a6c90872f12 100644 --- a/src/TableFunctions/registerTableFunctions.cpp +++ b/src/TableFunctions/registerTableFunctions.cpp @@ -26,6 +26,7 @@ void registerTableFunctions() registerTableFunctionMongoDB(factory); registerTableFunctionRedis(factory); registerTableFunctionMergeTreeIndex(factory); + registerTableFunctionFuzzQuery(factory); #if USE_RAPIDJSON || USE_SIMDJSON registerTableFunctionFuzzJSON(factory); #endif diff --git a/src/TableFunctions/registerTableFunctions.h b/src/TableFunctions/registerTableFunctions.h index efde4d6dcdc..2a8864a9bfd 100644 --- a/src/TableFunctions/registerTableFunctions.h +++ b/src/TableFunctions/registerTableFunctions.h @@ -23,6 +23,7 @@ void registerTableFunctionGenerate(TableFunctionFactory & factory); void registerTableFunctionMongoDB(TableFunctionFactory & factory); void registerTableFunctionRedis(TableFunctionFactory & factory); void registerTableFunctionMergeTreeIndex(TableFunctionFactory & factory); +void registerTableFunctionFuzzQuery(TableFunctionFactory & factory); #if USE_RAPIDJSON || USE_SIMDJSON void registerTableFunctionFuzzJSON(TableFunctionFactory & factory); #endif diff --git a/src/configure_config.cmake b/src/configure_config.cmake index a3f6dae4b87..75f61baa854 100644 --- a/src/configure_config.cmake +++ b/src/configure_config.cmake @@ -170,5 +170,8 @@ endif() if (TARGET ch_contrib::pocketfft) set(USE_POCKETFFT 1) endif() +if (TARGET ch_contrib::prometheus_protobufs) + set(USE_PROMETHEUS_PROTOBUFS 1) +endif() set(SOURCE_DIR ${PROJECT_SOURCE_DIR}) diff --git a/tests/ci/integration_tests_runner.py b/tests/ci/integration_tests_runner.py index 87f721cfde7..21f16d995a4 100755 --- a/tests/ci/integration_tests_runner.py +++ b/tests/ci/integration_tests_runner.py @@ -434,7 +434,14 @@ class ClickhouseIntegrationTestsRunner: "Getting all tests to the file %s with cmd: \n%s", out_file_full, cmd ) with open(out_file_full, "wb") as ofd: - subprocess.check_call(cmd, shell=True, stdout=ofd, stderr=ofd) + try: + subprocess.check_call(cmd, shell=True, stdout=ofd, stderr=ofd) + except subprocess.CalledProcessError as ex: + print("ERROR: Setting test plan failed. Output:") + with open(out_file_full, "r", encoding="utf-8") as file: + for line in file: + print(" " + line, end="") + raise ex all_tests = set() with open(out_file_full, "r", encoding="utf-8") as all_tests_fd: diff --git a/tests/ci/sync_pr.py b/tests/ci/sync_pr.py index 8251ccbaf38..1b71231f820 100644 --- a/tests/ci/sync_pr.py +++ b/tests/ci/sync_pr.py @@ -101,23 +101,20 @@ def main(): assert pr_info.merged_pr, "BUG. merged PR number could not been determined" prs = gh.get_pulls_from_search( - query=f"head:sync-upstream/pr/{pr_info.merged_pr} org:ClickHouse type:pr", + query=f"head:sync-upstream/pr/{pr_info.merged_pr} org:ClickHouse type:pr is:open", repo="ClickHouse/clickhouse-private", ) - sync_pr = None - if len(prs) > 1: print(f"WARNING: More than one PR found [{prs}] - exiting") elif len(prs) == 0: print("WARNING: No Sync PR found") else: sync_pr = prs[0] - - if args.merge: - merge_sync_pr(gh, sync_pr) - elif args.status: - set_sync_status(gh, pr_info, sync_pr) + if args.merge: + merge_sync_pr(gh, sync_pr) + elif args.status: + set_sync_status(gh, pr_info, sync_pr) if __name__ == "__main__": diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 8486e3a885f..8e7002af889 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -34,10 +34,8 @@ import urllib.parse # for crc32 import zlib from argparse import ArgumentParser -from contextlib import closing from datetime import datetime, timedelta from errno import ESRCH -from queue import Full from subprocess import PIPE, Popen from time import sleep, time from typing import Dict, List, Optional, Set, Tuple, Union @@ -69,7 +67,7 @@ TEST_FILE_EXTENSIONS = [".sql", ".sql.j2", ".sh", ".py", ".expect"] VERSION_PATTERN = r"^((\d+\.)?(\d+\.)?(\d+\.)?\d+)$" -TEST_MAX_RUN_TIME_IN_SECONDS = 120 +TEST_MAX_RUN_TIME_IN_SECONDS = 180 class SharedEngineReplacer: @@ -360,37 +358,12 @@ def clickhouse_execute_json( return rows -class Terminated(KeyboardInterrupt): - pass - - -def signal_handler(sig, frame): - raise Terminated(f"Terminated with {sig} signal") - - def stop_tests(): - global stop_tests_triggered_lock - global stop_tests_triggered - global restarted_tests - - with stop_tests_triggered_lock: - print("Stopping tests") - if not stop_tests_triggered.is_set(): - stop_tests_triggered.set() - - # materialize multiprocessing.Manager().list() object before - # sending SIGTERM since this object is a proxy, that requires - # communicating with manager thread, but after SIGTERM will be - # send, this thread will die, and you will get - # ConnectionRefusedError error for any access to "restarted_tests" - # variable. - restarted_tests = [*restarted_tests] - - # send signal to all processes in group to avoid hung check triggering - # (to avoid terminating clickhouse-test itself, the signal should be ignored) - signal.signal(signal.SIGTERM, signal.SIG_IGN) - os.killpg(os.getpgid(os.getpid()), signal.SIGTERM) - signal.signal(signal.SIGTERM, signal.SIG_DFL) + # send signal to all processes in group to avoid hung check triggering + # (to avoid terminating clickhouse-test itself, the signal should be ignored) + signal.signal(signal.SIGTERM, signal.SIG_IGN) + os.killpg(os.getpgid(os.getpid()), signal.SIGTERM) + signal.signal(signal.SIGTERM, signal.SIG_DFL) def get_db_engine(args, database_name): @@ -2071,13 +2044,18 @@ class TestSuite: stop_time = None exit_code = None server_died = None -stop_tests_triggered_lock = None -stop_tests_triggered = None -queue = None multiprocessing_manager = None restarted_tests = None +class ServerDied(Exception): + pass + + +class GlobalTimeout(Exception): + pass + + def run_tests_array(all_tests_with_params: Tuple[List[str], int, TestSuite]): all_tests, num_tests, test_suite = all_tests_with_params global stop_time @@ -2132,24 +2110,19 @@ def run_tests_array(all_tests_with_params: Tuple[List[str], int, TestSuite]): print(f"\nRunning {about}{num_tests} {test_suite.suite} tests ({proc_name}).\n") while True: - if is_concurrent: - case = queue.get(timeout=args.timeout * 1.1) - if not case: - break + if all_tests: + case = all_tests.pop(0) else: - if all_tests: - case = all_tests.pop(0) - else: - break + break if server_died.is_set(): stop_tests() - break + raise ServerDied("Server died") if stop_time and time() > stop_time: print("\nStop tests run because global time limit is exceeded.\n") stop_tests() - break + raise GlobalTimeout("Stop tests run because global time limit is exceeded") test_case = TestCase(test_suite, case, args, is_concurrent) @@ -2192,7 +2165,6 @@ def run_tests_array(all_tests_with_params: Tuple[List[str], int, TestSuite]): failures_chain += 1 if test_result.reason == FailureReason.SERVER_DIED: server_died.set() - stop_tests() elif test_result.status == TestStatus.SKIPPED: skipped_total += 1 @@ -2203,7 +2175,7 @@ def run_tests_array(all_tests_with_params: Tuple[List[str], int, TestSuite]): if failures_chain >= args.max_failures_chain: stop_tests() - break + raise ServerDied("Max failures chain") if failures_total > 0: print( @@ -2400,7 +2372,7 @@ def extract_key(key: str) -> str: )[1] -def do_run_tests(jobs, test_suite: TestSuite, parallel): +def do_run_tests(jobs, test_suite: TestSuite): if jobs > 1 and len(test_suite.parallel_tests) > 0: print( "Found", @@ -2409,19 +2381,8 @@ def do_run_tests(jobs, test_suite: TestSuite, parallel): len(test_suite.sequential_tests), "sequential tests", ) - run_n, run_total = parallel.split("/") - run_n = float(run_n) - run_total = float(run_total) tests_n = len(test_suite.parallel_tests) - run_total = min(run_total, tests_n) - jobs = min(jobs, tests_n) - run_total = max(jobs, run_total) - - batch_size = max(1, len(test_suite.parallel_tests) // jobs) - parallel_tests_array = [] - for _ in range(jobs): - parallel_tests_array.append((None, batch_size, test_suite)) # If we don't do random shuffling then there will be always # nearly the same groups of test suites running concurrently. @@ -2434,25 +2395,21 @@ def do_run_tests(jobs, test_suite: TestSuite, parallel): # of failures will be nearly the same for all tests from the group. random.shuffle(test_suite.parallel_tests) + batch_size = max(1, len(test_suite.parallel_tests) // jobs) + parallel_tests_array = [] + for job in range(jobs): + range_ = job * batch_size, job * batch_size + batch_size + batch = test_suite.parallel_tests[range_[0] : range_[1]] + parallel_tests_array.append((batch, batch_size, test_suite)) + try: - with closing(multiprocessing.Pool(processes=jobs)) as pool: - pool.map_async(run_tests_array, parallel_tests_array) - - for suit in test_suite.parallel_tests: - queue.put(suit, timeout=args.timeout * 1.1) - - for _ in range(jobs): - queue.put(None, timeout=args.timeout * 1.1) - - queue.close() - except Full: - print( - "Couldn't put test to the queue within timeout. Server probably hung." - ) - print_stacktraces() - queue.close() - - pool.join() + with multiprocessing.Pool(processes=jobs) as pool: + future = pool.map_async(run_tests_array, parallel_tests_array) + future.wait() + finally: + pool.terminate() + pool.close() + pool.join() run_tests_array( (test_suite.sequential_tests, len(test_suite.sequential_tests), test_suite) @@ -2817,7 +2774,7 @@ def main(args): test_suite.cloud_skip_list = cloud_skip_list test_suite.private_skip_list = private_skip_list - total_tests_run += do_run_tests(args.jobs, test_suite, args.parallel) + total_tests_run += do_run_tests(args.jobs, test_suite) if server_died.is_set(): exit_code.value = 1 @@ -3284,9 +3241,6 @@ if __name__ == "__main__": stop_time = None exit_code = multiprocessing.Value("i", 0) server_died = multiprocessing.Event() - stop_tests_triggered_lock = multiprocessing.Lock() - stop_tests_triggered = multiprocessing.Event() - queue = multiprocessing.Queue(maxsize=1) multiprocessing_manager = multiprocessing.Manager() restarted_tests = multiprocessing_manager.list() @@ -3294,9 +3248,6 @@ if __name__ == "__main__": # infinite tests processes left # (new process group is required to avoid killing some parent processes) os.setpgid(0, 0) - signal.signal(signal.SIGTERM, signal_handler) - signal.signal(signal.SIGINT, signal_handler) - signal.signal(signal.SIGHUP, signal_handler) try: args = parse_args() diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 544b06cca1b..34f5c28fef8 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -73,7 +73,7 @@ CLICKHOUSE_ERROR_LOG_FILE = "/var/log/clickhouse-server/clickhouse-server.err.lo # Minimum version we use in integration tests to check compatibility with old releases # Keep in mind that we only support upgrading between releases that are at most 1 year different. # This means that this minimum need to be, at least, 1 year older than the current release -CLICKHOUSE_CI_MIN_TESTED_VERSION = "22.8" +CLICKHOUSE_CI_MIN_TESTED_VERSION = "23.3" # to create docker-compose env file diff --git a/tests/integration/test_analyzer_compatibility/__init__.py b/tests/integration/test_analyzer_compatibility/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_analyzer_compatibility/configs/remote_servers.xml b/tests/integration/test_analyzer_compatibility/configs/remote_servers.xml new file mode 100644 index 00000000000..0a50dab7fd3 --- /dev/null +++ b/tests/integration/test_analyzer_compatibility/configs/remote_servers.xml @@ -0,0 +1,17 @@ + + + + + true + + current + 9000 + + + backward + 9000 + + + + + diff --git a/tests/integration/test_analyzer_compatibility/test.py b/tests/integration/test_analyzer_compatibility/test.py new file mode 100644 index 00000000000..d4ded420c61 --- /dev/null +++ b/tests/integration/test_analyzer_compatibility/test.py @@ -0,0 +1,100 @@ +import uuid + +import pytest +from helpers.cluster import ClickHouseCluster +from helpers.test_tools import TSV + +CLICKHOUSE_MAX_VERSION_WITH_ANALYZER_DISABLED_BY_DEFAULT = "24.2" + +cluster = ClickHouseCluster(__file__) +# Here analyzer is enabled by default +current = cluster.add_instance( + "current", + main_configs=["configs/remote_servers.xml"], +) +# Here analyzer is disabled by default +backward = cluster.add_instance( + "backward", + use_old_analyzer=True, + main_configs=["configs/remote_servers.xml"], + image="clickhouse/clickhouse-server", + tag=CLICKHOUSE_MAX_VERSION_WITH_ANALYZER_DISABLED_BY_DEFAULT, + with_installed_binary=True, +) + + +@pytest.fixture(scope="module") +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +def test_two_new_versions(start_cluster): + # Two new versions (both know about the analyzer) + # One have it enabled by default, another one - disabled. + + current.query("SYSTEM FLUSH LOGS") + backward.query("SYSTEM FLUSH LOGS") + + query_id = str(uuid.uuid4()) + current.query( + "SELECT * FROM clusterAllReplicas('test_cluster_mixed', system.tables);", + query_id=query_id, + ) + + current.query("SYSTEM FLUSH LOGS") + backward.query("SYSTEM FLUSH LOGS") + + assert ( + current.query( + """ +SELECT hostname() AS h, getSetting('allow_experimental_analyzer') +FROM clusterAllReplicas('test_cluster_mixed', system.one) +ORDER BY h;""" + ) + == TSV([["backward", "true"], ["current", "true"]]) + ) + + # Should be enabled everywhere + analyzer_enabled = current.query( + f""" +SELECT +DISTINCT Settings['allow_experimental_analyzer'] +FROM clusterAllReplicas('test_cluster_mixed', system.query_log) +WHERE initial_query_id = '{query_id}';""" + ) + + assert TSV(analyzer_enabled) == TSV("1") + + query_id = str(uuid.uuid4()) + backward.query( + "SELECT * FROM clusterAllReplicas('test_cluster_mixed', system.tables)", + query_id=query_id, + ) + + current.query("SYSTEM FLUSH LOGS") + backward.query("SYSTEM FLUSH LOGS") + + assert ( + backward.query( + """ +SELECT hostname() AS h, getSetting('allow_experimental_analyzer') +FROM clusterAllReplicas('test_cluster_mixed', system.one) +ORDER BY h;""" + ) + == TSV([["backward", "false"], ["current", "false"]]) + ) + + # Should be disabled everywhere + analyzer_enabled = backward.query( + f""" +SELECT +DISTINCT Settings['allow_experimental_analyzer'] +FROM clusterAllReplicas('test_cluster_mixed', system.query_log) +WHERE initial_query_id = '{query_id}';""" + ) + + assert TSV(analyzer_enabled) == TSV("0") diff --git a/tests/integration/test_backward_compatibility/test_functions.py b/tests/integration/test_backward_compatibility/test_functions.py index 758dda655da..fc03a77030e 100644 --- a/tests/integration/test_backward_compatibility/test_functions.py +++ b/tests/integration/test_backward_compatibility/test_functions.py @@ -130,10 +130,13 @@ def test_string_functions(start_cluster): functions = map(lambda x: x.strip(), functions) excludes = [ + # The argument of this function is not a seed, but an arbitrary expression needed for bypassing common subexpression elimination. "rand", "rand64", "randConstant", + "randCanonical", "generateUUIDv4", + "generateULID", # Syntax error otherwise "position", "substring", @@ -153,6 +156,18 @@ def test_string_functions(start_cluster): "tryBase64Decode", # Removed in 23.9 "meiliMatch", + # These functions require more than one argument. + "parseDateTimeInJodaSyntaxOrZero", + "parseDateTimeInJodaSyntaxOrNull", + "parseDateTimeOrNull", + "parseDateTimeOrZero", + "parseDateTime", + # The argument is effectively a disk name (and we don't have one with name foo) + "filesystemUnreserved", + "filesystemCapacity", + "filesystemAvailable", + # Exclude it for now. Looks like the result depends on the build type. + "farmHash64", ] functions = filter(lambda x: x not in excludes, functions) @@ -205,6 +220,9 @@ def test_string_functions(start_cluster): # Function X takes exactly one parameter: # The function 'X' can only be used as a window function "BAD_ARGUMENTS", + # String foo is obviously not a valid IP address. + "CANNOT_PARSE_IPV4", + "CANNOT_PARSE_IPV6", ] if any(map(lambda x: x in error_message, allowed_errors)): logging.info("Skipping %s", function) diff --git a/tests/integration/test_disks_app_func/test.py b/tests/integration/test_disks_app_func/test.py index 97d5da787cd..56ea5c8846a 100644 --- a/tests/integration/test_disks_app_func/test.py +++ b/tests/integration/test_disks_app_func/test.py @@ -9,7 +9,9 @@ def started_cluster(): try: cluster = ClickHouseCluster(__file__) cluster.add_instance( - "disks_app_test", main_configs=["config.xml"], with_minio=True + "disks_app_test", + main_configs=["config.xml"], + with_minio=True, ) cluster.start() @@ -47,12 +49,18 @@ def test_disks_app_func_ld(started_cluster): source = cluster.instances["disks_app_test"] out = source.exec_in_container( - ["/usr/bin/clickhouse", "disks", "--save-logs", "list-disks"] + ["/usr/bin/clickhouse", "disks", "--save-logs", "--query", "list-disks"] ) - disks = out.split("\n") + disks = list( + sorted( + map( + lambda x: x.split(":")[0], filter(lambda x: len(x) > 1, out.split("\n")) + ) + ) + ) - assert disks[0] == "default" and disks[1] == "test1" and disks[2] == "test2" + assert disks[:4] == ["default", "local", "test1", "test2"] def test_disks_app_func_ls(started_cluster): @@ -61,7 +69,15 @@ def test_disks_app_func_ls(started_cluster): init_data(source) out = source.exec_in_container( - ["/usr/bin/clickhouse", "disks", "--save-logs", "--disk", "test1", "list", "."] + [ + "/usr/bin/clickhouse", + "disks", + "--save-logs", + "--disk", + "test1", + "--query", + "list .", + ] ) files = out.split("\n") @@ -75,9 +91,8 @@ def test_disks_app_func_ls(started_cluster): "--save-logs", "--disk", "test1", - "list", - ".", - "--recursive", + "--query", + "list . --recursive", ] ) @@ -102,8 +117,8 @@ def test_disks_app_func_cp(started_cluster): "--save-logs", "--disk", "test1", - "write", - "path1", + "--query", + "'write path1'", ] ), ] @@ -113,18 +128,21 @@ def test_disks_app_func_cp(started_cluster): [ "/usr/bin/clickhouse", "disks", - "copy", - "--disk-from", - "test1", - "--disk-to", - "test2", - ".", - ".", + "--query", + "copy --recursive --disk-from test1 --disk-to test2 . .", ] ) out = source.exec_in_container( - ["/usr/bin/clickhouse", "disks", "--save-logs", "--disk", "test2", "list", "."] + [ + "/usr/bin/clickhouse", + "disks", + "--save-logs", + "--disk", + "test2", + "--query", + "list .", + ] ) assert "path1" in out @@ -136,8 +154,8 @@ def test_disks_app_func_cp(started_cluster): "--save-logs", "--disk", "test2", - "remove", - "path1", + "--query", + "remove path1", ] ) @@ -148,21 +166,37 @@ def test_disks_app_func_cp(started_cluster): "--save-logs", "--disk", "test1", - "remove", - "path1", + "--query", + "remove path1", ] ) # alesapin: Why we need list one more time? # kssenii: it is an assertion that the file is indeed deleted out = source.exec_in_container( - ["/usr/bin/clickhouse", "disks", "--save-logs", "--disk", "test2", "list", "."] + [ + "/usr/bin/clickhouse", + "disks", + "--save-logs", + "--disk", + "test2", + "--query", + "list .", + ] ) assert "path1" not in out out = source.exec_in_container( - ["/usr/bin/clickhouse", "disks", "--save-logs", "--disk", "test1", "list", "."] + [ + "/usr/bin/clickhouse", + "disks", + "--save-logs", + "--disk", + "test1", + "--query", + "list .", + ] ) assert "path1" not in out @@ -177,14 +211,13 @@ def test_disks_app_func_ln(started_cluster): [ "/usr/bin/clickhouse", "disks", - "link", - "data/default/test_table", - "data/default/z_tester", + "--query", + "link data/default/test_table data/default/z_tester", ] ) out = source.exec_in_container( - ["/usr/bin/clickhouse", "disks", "--save-logs", "list", "data/default/"] + ["/usr/bin/clickhouse", "disks", "--save-logs", "--query", "list data/default/"] ) files = out.split("\n") @@ -209,15 +242,23 @@ def test_disks_app_func_rm(started_cluster): "--save-logs", "--disk", "test2", - "write", - "path3", + "--query", + "'write path3'", ] ), ] ) out = source.exec_in_container( - ["/usr/bin/clickhouse", "disks", "--save-logs", "--disk", "test2", "list", "."] + [ + "/usr/bin/clickhouse", + "disks", + "--save-logs", + "--disk", + "test2", + "--query", + "list .", + ] ) assert "path3" in out @@ -229,13 +270,21 @@ def test_disks_app_func_rm(started_cluster): "--save-logs", "--disk", "test2", - "remove", - "path3", + "--query", + "remove path3", ] ) out = source.exec_in_container( - ["/usr/bin/clickhouse", "disks", "--save-logs", "--disk", "test2", "list", "."] + [ + "/usr/bin/clickhouse", + "disks", + "--save-logs", + "--disk", + "test2", + "--query", + "list .", + ] ) assert "path3" not in out @@ -247,7 +296,15 @@ def test_disks_app_func_mv(started_cluster): init_data(source) out = source.exec_in_container( - ["/usr/bin/clickhouse", "disks", "--save-logs", "--disk", "test1", "list", "."] + [ + "/usr/bin/clickhouse", + "disks", + "--save-logs", + "--disk", + "test1", + "--query", + "list .", + ] ) files = out.split("\n") @@ -260,14 +317,21 @@ def test_disks_app_func_mv(started_cluster): "disks", "--disk", "test1", - "move", - "store", - "old_store", + "--query", + "move store old_store", ] ) out = source.exec_in_container( - ["/usr/bin/clickhouse", "disks", "--save-logs", "--disk", "test1", "list", "."] + [ + "/usr/bin/clickhouse", + "disks", + "--save-logs", + "--disk", + "test1", + "--query", + "list .", + ] ) files = out.split("\n") @@ -290,8 +354,8 @@ def test_disks_app_func_read_write(started_cluster): "--save-logs", "--disk", "test1", - "write", - "5.txt", + "--query", + "'write 5.txt'", ] ), ] @@ -304,8 +368,8 @@ def test_disks_app_func_read_write(started_cluster): "--save-logs", "--disk", "test1", - "read", - "5.txt", + "--query", + "read 5.txt", ] ) @@ -319,7 +383,15 @@ def test_remote_disk_list(started_cluster): init_data_s3(source) out = source.exec_in_container( - ["/usr/bin/clickhouse", "disks", "--save-logs", "--disk", "test3", "list", "."] + [ + "/usr/bin/clickhouse", + "disks", + "--save-logs", + "--disk", + "test3", + "--query", + "list .", + ] ) files = out.split("\n") @@ -333,9 +405,8 @@ def test_remote_disk_list(started_cluster): "--save-logs", "--disk", "test3", - "list", - ".", - "--recursive", + "--query", + "list . --recursive", ] ) diff --git a/tests/integration/test_disks_app_interactive/__init__.py b/tests/integration/test_disks_app_interactive/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_disks_app_interactive/configs/config.xml b/tests/integration/test_disks_app_interactive/configs/config.xml new file mode 100644 index 00000000000..bcbb107f0a2 --- /dev/null +++ b/tests/integration/test_disks_app_interactive/configs/config.xml @@ -0,0 +1,3 @@ + + /var/lib/clickhouse/ + \ No newline at end of file diff --git a/tests/integration/test_disks_app_interactive/test.py b/tests/integration/test_disks_app_interactive/test.py new file mode 100644 index 00000000000..ca4ba5d9065 --- /dev/null +++ b/tests/integration/test_disks_app_interactive/test.py @@ -0,0 +1,331 @@ +from helpers.cluster import ClickHouseCluster + +import pytest + +import pathlib + +import subprocess +import select +import io +from typing import List, Tuple, Dict, Union, Optional + +import os + + +class ClickHouseDisksException(Exception): + pass + + +@pytest.fixture(scope="module") +def started_cluster(): + global cluster + try: + cluster = ClickHouseCluster(__file__) + cluster.add_instance( + "disks_app_test", + main_configs=["server_configs/config.xml"], + with_minio=True, + ) + + cluster.start() + yield cluster + + finally: + cluster.shutdown() + + +class DisksClient(object): + SEPARATOR = b"\a\a\a\a\n" + local_client: Optional["DisksClient"] = None # static variable + default_disk_root_directory: str = "/var/lib/clickhouse" + + def __init__(self, bin_path: str, config_path: str, working_path: str): + self.bin_path = bin_path + self.working_path = working_path + + self.proc = subprocess.Popen( + [bin_path, "disks", "--test-mode", "--config", config_path], + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + + self.poller = select.epoll() + self.poller.register(self.proc.stdout) + self.poller.register(self.proc.stderr) + + self.stopped = False + + self._fd_nums = { + self.proc.stdout.fileno(): self.proc.stdout, + self.proc.stderr.fileno(): self.proc.stderr, + } + + def execute_query(self, query: str, timeout: float = 5.0) -> str: + output = io.BytesIO() + + self.proc.stdin.write(query.encode() + b"\n") + self.proc.stdin.flush() + + events = self.poller.poll(timeout) + if not events: + raise TimeoutError(f"Disks client returned no output") + + for fd_num, event in events: + if event & (select.EPOLLIN | select.EPOLLPRI): + file = self._fd_nums[fd_num] + + if file == self.proc.stdout: + while True: + chunk = file.readline() + if chunk.endswith(self.SEPARATOR): + break + + output.write(chunk) + + elif file == self.proc.stderr: + error_line = self.proc.stderr.readline() + print(error_line) + raise ClickHouseDisksException(error_line.strip().decode()) + + else: + raise ValueError(f"Failed to read from pipe. Flag {event}") + + data = output.getvalue().strip().decode() + return data + + def list_disks(self) -> List[Tuple[str, str]]: + output = self.execute_query("list-disks") + return list( + sorted( + map( + lambda x: (x.split(":")[0], ":".join(x.split(":")[1:])), + output.split("\n"), + ) + ) + ) + + def current_disk_with_path(self) -> Tuple[str, str]: + output = self.execute_query("current_disk_with_path") + disk_line = output.split("\n")[0] + path_line = output.split("\n")[1] + assert disk_line.startswith("Disk: ") + assert path_line.startswith("Path: ") + return disk_line[6:], path_line[6:] + + def ls( + self, path: str, recursive: bool = False, show_hidden: bool = False + ) -> Union[List[str], Dict[str, List[str]]]: + recursive_adding = "--recursive " if recursive else "" + show_hidden_adding = "--all " if show_hidden else "" + output = self.execute_query( + f"list {path} {recursive_adding} {show_hidden_adding}" + ) + if recursive: + answer: Dict[str, List[str]] = dict() + blocks = output.split("\n\n") + for block in blocks: + directory = block.split("\n")[0][:-1] + files = block.split("\n")[1:] + answer[directory] = files + return answer + else: + return output.split("\n") + + def switch_disk(self, disk: str, directory: Optional[str] = None): + directory_addition = f"--path {directory} " if directory is not None else "" + self.execute_query(f"switch-disk {disk} {directory_addition}") + + def cd(self, directory: str, disk: Optional[str] = None): + disk_addition = f"--disk {disk} " if disk is not None else "" + self.execute_query(f"cd {directory} {disk_addition}") + + def copy( + self, + path_from, + path_to, + disk_from: Optional[str] = None, + disk_to: Optional[str] = None, + recursive: bool = False, + ): + disk_from_option = f"--disk-from {disk_from} " if disk_from is not None else "" + disk_to_option = f"--disk-to {disk_to} " if disk_to is not None else "" + recursive_tag = "--recursive" if recursive else "" + + self.execute_query( + f"copy {recursive_tag} {path_from} {path_to} {disk_from_option} {disk_to_option}" + ) + + def move(self, path_from: str, path_to: str): + self.execute_query(f"move {path_from} {path_to}") + + def rm(self, path: str, recursive: bool = False): + recursive_tag = "--recursive" if recursive else "" + self.execute_query(f"rm {recursive_tag} {path}") + + def mkdir(self, path: str, recursive: bool = False): + recursive_adding = "--recursive " if recursive else "" + self.execute_query(f"mkdir {path} {recursive_adding}") + + def ln(self, path_from: str, path_to: str): + self.execute_query(f"link {path_from} {path_to}") + + def read(self, path_from: str, path_to: Optional[str] = None): + path_to_adding = f"--path-to {path_to} " if path_to is not None else "" + output = self.execute_query(f"read {path_from} {path_to_adding}") + return output + + def write( + self, path_from: str, path_to: str + ): # Writing from stdin is difficult to test (do not know how to do this in python) + path_from_adding = f"--path-from {path_from}" + self.execute_query(f"write {path_from_adding} {path_to}") + + @staticmethod + def getLocalDisksClient(refresh: bool): + if (DisksClient.local_client is None) or refresh: + binary_file = os.environ.get("CLICKHOUSE_TESTS_SERVER_BIN_PATH") + current_working_directory = str(pathlib.Path().resolve()) + config_file = f"{current_working_directory}/test_disks_app_interactive/configs/config.xml" + if not os.path.exists(DisksClient.default_disk_root_directory): + os.mkdir(DisksClient.default_disk_root_directory) + + DisksClient.local_client = DisksClient( + binary_file, config_file, current_working_directory + ) + return DisksClient.local_client + else: + return DisksClient.local_client + + +def test_disks_app_interactive_list_disks(): + client = DisksClient.getLocalDisksClient(True) + expected_disks_with_path = [ + ("default", "/"), + ("local", client.working_path), + ] + assert expected_disks_with_path == client.list_disks() + assert client.current_disk_with_path() == ("default", "/") + client.switch_disk("local") + assert client.current_disk_with_path() == ( + "local", + client.working_path, + ) + + +def test_disks_app_interactive_list_files_local(): + client = DisksClient.getLocalDisksClient(True) + client.switch_disk("local") + excepted_listed_files = sorted(os.listdir("test_disks_app_interactive/")) + listed_files = sorted(client.ls("test_disks_app_interactive/")) + assert excepted_listed_files == listed_files + + +def test_disks_app_interactive_list_directories_default(): + client = DisksClient.getLocalDisksClient(True) + traversed_dir = client.ls(".", recursive=True) + client.mkdir("dir1") + client.mkdir("dir2") + client.mkdir(".dir3") + client.cd("dir1") + client.mkdir("dir11") + client.mkdir(".dir12") + client.mkdir("dir13") + client.cd("../dir2") + client.mkdir("dir21") + client.mkdir("dir22") + client.mkdir(".dir23") + client.cd("../.dir3") + client.mkdir("dir31") + client.mkdir(".dir32") + client.cd("..") + traversed_dir = client.ls(".", recursive=True) + assert traversed_dir == { + ".": ["dir1", "dir2"], + "./dir1": ["dir11", "dir13"], + "./dir2": ["dir21", "dir22"], + "./dir1/dir11": [], + "./dir1/dir13": [], + "./dir2/dir21": [], + "./dir2/dir22": [], + } + traversed_dir = client.ls(".", recursive=True, show_hidden=True) + assert traversed_dir == { + ".": [".dir3", "dir1", "dir2"], + "./dir1": [".dir12", "dir11", "dir13"], + "./dir2": [".dir23", "dir21", "dir22"], + "./.dir3": [".dir32", "dir31"], + "./dir1/dir11": [], + "./dir1/.dir12": [], + "./dir1/dir13": [], + "./dir2/dir21": [], + "./dir2/dir22": [], + "./dir2/.dir23": [], + "./.dir3/dir31": [], + "./.dir3/.dir32": [], + } + client.rm("dir2", recursive=True) + traversed_dir = client.ls(".", recursive=True, show_hidden=True) + assert traversed_dir == { + ".": [".dir3", "dir1"], + "./dir1": [".dir12", "dir11", "dir13"], + "./.dir3": [".dir32", "dir31"], + "./dir1/dir11": [], + "./dir1/.dir12": [], + "./dir1/dir13": [], + "./.dir3/dir31": [], + "./.dir3/.dir32": [], + } + traversed_dir = client.ls(".", recursive=True, show_hidden=False) + assert traversed_dir == { + ".": ["dir1"], + "./dir1": ["dir11", "dir13"], + "./dir1/dir11": [], + "./dir1/dir13": [], + } + client.rm("dir1", recursive=True) + client.rm(".dir3", recursive=True) + assert client.ls(".", recursive=True, show_hidden=False) == {".": []} + + +def test_disks_app_interactive_cp_and_read(): + initial_text = "File content" + with open("a.txt", "w") as file: + file.write(initial_text) + client = DisksClient.getLocalDisksClient(True) + client.switch_disk("default") + client.copy("a.txt", "/a.txt", disk_from="local", disk_to="default") + read_text = client.read("a.txt") + assert initial_text == read_text + client.mkdir("dir1") + client.copy("a.txt", "/dir1/b.txt", disk_from="local", disk_to="default") + read_text = client.read("a.txt", path_to="dir1/b.txt") + assert "" == read_text + read_text = client.read("/dir1/b.txt") + assert read_text == initial_text + with open(f"{DisksClient.default_disk_root_directory}/dir1/b.txt", "r") as file: + read_text = file.read() + assert read_text == initial_text + os.remove("a.txt") + client.rm("a.txt") + client.rm("/dir1", recursive=True) + + +def test_disks_app_interactive_test_move_and_write(): + initial_text = "File content" + with open("a.txt", "w") as file: + file.write(initial_text) + client = DisksClient.getLocalDisksClient(True) + client.switch_disk("default") + client.copy("a.txt", "/a.txt", disk_from="local", disk_to="default") + files = client.ls(".") + assert files == ["a.txt"] + client.move("a.txt", "b.txt") + files = client.ls(".") + assert files == ["b.txt"] + read_text = client.read("/b.txt") + assert read_text == initial_text + client.write("b.txt", "c.txt") + read_text = client.read("c.txt") + assert read_text == initial_text + os.remove("a.txt") diff --git a/tests/integration/test_distributed_inter_server_secret/test.py b/tests/integration/test_distributed_inter_server_secret/test.py index 50d7be4d11e..7ecb2cda257 100644 --- a/tests/integration/test_distributed_inter_server_secret/test.py +++ b/tests/integration/test_distributed_inter_server_secret/test.py @@ -7,7 +7,7 @@ import uuid import time from helpers.client import QueryRuntimeException -from helpers.cluster import ClickHouseCluster, CLICKHOUSE_CI_MIN_TESTED_VERSION +from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) @@ -27,9 +27,6 @@ def make_instance(name, *args, **kwargs): ) -# DBMS_MIN_REVISION_WITH_INTERSERVER_SECRET_V2 added in 23.3, ensure that CLICKHOUSE_CI_MIN_TESTED_VERSION fits -assert CLICKHOUSE_CI_MIN_TESTED_VERSION < "23.3" - # _n1/_n2 contains cluster with different -- should fail # only n1 contains new_user n1 = make_instance( @@ -38,14 +35,6 @@ n1 = make_instance( user_configs=["configs/users.d/new_user.xml"], ) n2 = make_instance("n2", main_configs=["configs/remote_servers_n2.xml"]) -backward = make_instance( - "backward", - main_configs=["configs/remote_servers_backward.xml"], - image="clickhouse/clickhouse-server", - # version without DBMS_MIN_REVISION_WITH_INTERSERVER_SECRET_V2 - tag=CLICKHOUSE_CI_MIN_TESTED_VERSION, - with_installed_binary=True, -) users = pytest.mark.parametrize( "user,password", @@ -427,28 +416,6 @@ def test_per_user_protocol_settings_secure_cluster(user, password): ) -@users -def test_user_secure_cluster_with_backward(user, password): - id_ = "with-backward-query-dist_secure-" + user - n1.query( - f"SELECT *, '{id_}' FROM dist_secure_backward", user=user, password=password - ) - assert get_query_user_info(n1, id_) == [user, user] - assert get_query_user_info(backward, id_) == [user, user] - - -@users -def test_user_secure_cluster_from_backward(user, password): - id_ = "from-backward-query-dist_secure-" + user - backward.query(f"SELECT *, '{id_}' FROM dist_secure", user=user, password=password) - assert get_query_user_info(n1, id_) == [user, user] - assert get_query_user_info(backward, id_) == [user, user] - - assert n1.contains_in_log( - "Using deprecated interserver protocol because the client is too old. Consider upgrading all nodes in cluster." - ) - - def test_secure_cluster_distributed_over_distributed_different_users(): # This works because we will have initial_user='default' n1.query( diff --git a/tests/integration/test_force_deduplication/test.py b/tests/integration/test_force_deduplication/test.py index 14c11bc8500..87b2c45bbc5 100644 --- a/tests/integration/test_force_deduplication/test.py +++ b/tests/integration/test_force_deduplication/test.py @@ -29,8 +29,6 @@ def get_counts(): def test_basic(start_cluster): - old_src, old_a, old_b, old_c = 0, 0, 0, 0 - node.query( """ CREATE TABLE test (A Int64) ENGINE = ReplicatedMergeTree ('/clickhouse/test/tables/test','1') ORDER BY tuple(); @@ -41,15 +39,6 @@ def test_basic(start_cluster): INSERT INTO test values(999); """ ) - - src, a, b, c = get_counts() - assert src == old_src + 1 - assert a == old_a + 2 - assert b == old_b + 2 - assert c == old_c + 2 - old_src, old_a, old_b, old_c = src, a, b, c - - # that issert fails on test_mv_b due to partitions by A with pytest.raises(QueryRuntimeException): node.query( """ @@ -57,51 +46,34 @@ def test_basic(start_cluster): INSERT INTO test SELECT number FROM numbers(10); """ ) - src, a, b, c = get_counts() - assert src == old_src + 10 - assert a == old_a + 10 - assert b == old_b - assert c == old_c + 10 - old_src, old_a, old_b, old_c = src, a, b, c - # deduplication only for src table + old_src, old_a, old_b, old_c = get_counts() + # number of rows in test_mv_a and test_mv_c depends on order of inserts into views + assert old_src == 11 + assert old_a in (1, 11) + assert old_b == 1 + assert old_c in (1, 11) + node.query("INSERT INTO test SELECT number FROM numbers(10)") src, a, b, c = get_counts() - assert src == old_src - assert a == old_a + 10 - assert b == old_b + 10 - assert c == old_c + 10 - old_src, old_a, old_b, old_c = src, a, b, c - - # deduplication for MV tables does not work, because previous inserts have not written their deduplications tokens to the log due to `deduplicate_blocks_in_dependent_materialized_views = 0`. - node.query( - """ - SET deduplicate_blocks_in_dependent_materialized_views = 1; - INSERT INTO test SELECT number FROM numbers(10); - """ - ) - src, a, b, c = get_counts() - assert src == old_src - assert a == old_a + 10 - assert b == old_b + 10 - assert c == old_c + 10 - old_src, old_a, old_b, old_c = src, a, b, c - - # deduplication for all the tables - node.query( - """ - SET deduplicate_blocks_in_dependent_materialized_views = 1; - INSERT INTO test SELECT number FROM numbers(10); - """ - ) - src, a, b, c = get_counts() + # no changes because of deduplication in source table assert src == old_src assert a == old_a assert b == old_b assert c == old_c - old_src, old_a, old_b, old_c = src, a, b, c - # that issert fails on test_mv_b due to partitions by A, it is an uniq data which is not deduplicated + node.query( + """ + SET deduplicate_blocks_in_dependent_materialized_views = 1; + INSERT INTO test SELECT number FROM numbers(10); + """ + ) + src, a, b, c = get_counts() + assert src == 11 + assert a == old_a + 10 # first insert could be succesfull with disabled dedup + assert b == 11 + assert c == old_c + 10 + with pytest.raises(QueryRuntimeException): node.query( """ @@ -110,23 +82,16 @@ def test_basic(start_cluster): INSERT INTO test SELECT number FROM numbers(100,10); """ ) - src, a, b, c = get_counts() - assert src == old_src + 10 - assert a == old_a + 10 - assert b == old_b - assert c == old_c + 10 - old_src, old_a, old_b, old_c = src, a, b, c - # deduplication for all tables, except test_mv_b. For test_mv_b it is an uniq data which is not deduplicated due to exception at previous insert node.query( """ SET deduplicate_blocks_in_dependent_materialized_views = 1; INSERT INTO test SELECT number FROM numbers(100,10); """ ) + src, a, b, c = get_counts() - assert src == old_src - assert a == old_a - assert b == old_b + 10 - assert c == old_c - old_src, old_a, old_b, old_c = src, a, b, c + assert src == 21 + assert a == old_a + 20 + assert b == 21 + assert c == old_c + 20 diff --git a/tests/integration/test_memory_limit_observer/test.py b/tests/integration/test_memory_limit_observer/test.py index fe3acd9a0cf..0eda165b1d2 100644 --- a/tests/integration/test_memory_limit_observer/test.py +++ b/tests/integration/test_memory_limit_observer/test.py @@ -35,7 +35,7 @@ def get_latest_mem_limit(): ).strip() ) return mem_limit - except Exception as e: + except Exception: time.sleep(1) raise Exception("Cannot get memory limit") @@ -51,3 +51,29 @@ def test_observe_memory_limit(started_cluster): if new_max_mem > original_max_mem: return raise Exception("the memory limit does not increase as expected") + + +def test_memory_usage_doesnt_include_page_cache_size(started_cluster): + try: + # populate page cache with 4GB of data; it might be killed by OOM killer but it is fine + node1.exec_in_container( + ["dd", "if=/dev/zero", "of=outputfile", "bs=1M", "count=4K"] + ) + except Exception: + pass + + observer_refresh_period = int( + node1.query( + "select value from system.server_settings where name = 'cgroups_memory_usage_observer_wait_time'" + ).strip() + ) + time.sleep(observer_refresh_period + 1) + + max_mem_usage_from_cgroup = node1.query( + """ + SELECT max(toUInt64(replaceRegexpAll(message, 'Read current memory usage (\\d+) bytes.*', '\\1'))) AS max_mem + FROM system.text_log + WHERE logger_name = 'CgroupsMemoryUsageObserver' AND message LIKE 'Read current memory usage%bytes%' + """ + ).strip() + assert int(max_mem_usage_from_cgroup) < 2 * 2**30 diff --git a/tests/integration/test_startup_scripts/__init__.py b/tests/integration/test_startup_scripts/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_startup_scripts/configs/config.d/query_log.xml b/tests/integration/test_startup_scripts/configs/config.d/query_log.xml new file mode 100644 index 00000000000..24d66fc674e --- /dev/null +++ b/tests/integration/test_startup_scripts/configs/config.d/query_log.xml @@ -0,0 +1,8 @@ + + + system + query_log
+ toYYYYMM(event_date) + 1000 +
+
diff --git a/tests/integration/test_startup_scripts/configs/config.d/startup_scripts.xml b/tests/integration/test_startup_scripts/configs/config.d/startup_scripts.xml new file mode 100644 index 00000000000..e8a711a926a --- /dev/null +++ b/tests/integration/test_startup_scripts/configs/config.d/startup_scripts.xml @@ -0,0 +1,17 @@ + + + + CREATE ROLE OR REPLACE testrole + + + GRANT CREATE USER, ALTER USER, DROP USER, SHOW USERS, SHOW CREATE USER ON *.* TO 'testrole' WITH GRANT OPTION; + + + CREATE TABLE TestTable (id UInt64) ENGINE=TinyLog + SELECT 1; + + + SELECT * FROM system.query_log LIMIT 1 + + + diff --git a/tests/integration/test_startup_scripts/configs/users.xml b/tests/integration/test_startup_scripts/configs/users.xml new file mode 100644 index 00000000000..f9917b034b2 --- /dev/null +++ b/tests/integration/test_startup_scripts/configs/users.xml @@ -0,0 +1,41 @@ + + + + + + + + 1 + + + + + + + + + + ::/0 + + + default + + default + + + + + + + + 3600 + + 0 + 0 + 0 + 0 + 0 + + + + diff --git a/tests/integration/test_startup_scripts/test.py b/tests/integration/test_startup_scripts/test.py new file mode 100644 index 00000000000..43a871a6fc5 --- /dev/null +++ b/tests/integration/test_startup_scripts/test.py @@ -0,0 +1,21 @@ +from helpers.cluster import ClickHouseCluster + + +def test_startup_scripts(): + cluster = ClickHouseCluster(__file__) + + node = cluster.add_instance( + "node", + main_configs=[ + "configs/config.d/query_log.xml", + "configs/config.d/startup_scripts.xml", + ], + with_zookeeper=False, + ) + + try: + cluster.start() + assert node.query("SHOW TABLES") == "TestTable\n" + + finally: + cluster.shutdown() diff --git a/tests/integration/test_storage_azure_blob_storage/test.py b/tests/integration/test_storage_azure_blob_storage/test.py index 20b004a7605..6fbe7634642 100644 --- a/tests/integration/test_storage_azure_blob_storage/test.py +++ b/tests/integration/test_storage_azure_blob_storage/test.py @@ -809,7 +809,7 @@ def test_read_subcolumn_time(cluster): def test_read_from_not_existing_container(cluster): node = cluster.instances["node"] query = ( - f"select * from azureBlobStorage('{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', 'cont_not_exists', 'test_table.csv', " + f"select * from azureBlobStorage('{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', 'cont-not-exists', 'test_table.csv', " f"'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV', 'auto')" ) expected_err_msg = "container does not exist" diff --git a/tests/integration/test_storage_s3_queue/test.py b/tests/integration/test_storage_s3_queue/test.py index b93e560d5b9..bf3c28c5429 100644 --- a/tests/integration/test_storage_s3_queue/test.py +++ b/tests/integration/test_storage_s3_queue/test.py @@ -1780,6 +1780,7 @@ def test_commit_on_limit(started_cluster): if "test_999999.csv" in get_processed_files(): break time.sleep(1) + assert "test_999999.csv" in get_processed_files() assert 1 == int( diff --git a/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.reference b/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.reference index 9c9281dc7e4..adf6abb7298 100644 --- a/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.reference +++ b/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.reference @@ -1,7 +1,7 @@ 2 3 -3 +2 3 1 diff --git a/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.sql b/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.sql index 51e6a513608..d3c4da86b41 100644 --- a/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.sql +++ b/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.sql @@ -29,7 +29,7 @@ INSERT INTO without_deduplication VALUES (43); SELECT count() FROM with_deduplication; SELECT count() FROM without_deduplication; --- Implicit insert isn't deduplicated, because deduplicate_blocks_in_dependent_materialized_views = 0 by default +-- Implicit insert isn't deduplicated SELECT ''; SELECT countMerge(cnt) FROM with_deduplication_mv; SELECT countMerge(cnt) FROM without_deduplication_mv; diff --git a/tests/queries/0_stateless/00633_materialized_view_and_too_many_parts_zookeeper.sh b/tests/queries/0_stateless/00633_materialized_view_and_too_many_parts_zookeeper.sh index 8f7d19028b0..1fb219108da 100755 --- a/tests/queries/0_stateless/00633_materialized_view_and_too_many_parts_zookeeper.sh +++ b/tests/queries/0_stateless/00633_materialized_view_and_too_many_parts_zookeeper.sh @@ -36,8 +36,8 @@ ${CLICKHOUSE_CLIENT} --query "DROP TABLE c" echo ${CLICKHOUSE_CLIENT} --query "CREATE TABLE root (d UInt64) ENGINE = Null" ${CLICKHOUSE_CLIENT} --query "CREATE MATERIALIZED VIEW d (d UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/d', '1') ORDER BY d AS SELECT * FROM root" -${CLICKHOUSE_CLIENT} --query "INSERT INTO root SETTINGS deduplicate_blocks_in_dependent_materialized_views=1 VALUES (1)"; -${CLICKHOUSE_CLIENT} --query "INSERT INTO root SETTINGS deduplicate_blocks_in_dependent_materialized_views=1 VALUES (1)"; +${CLICKHOUSE_CLIENT} --query "INSERT INTO root VALUES (1)"; +${CLICKHOUSE_CLIENT} --query "INSERT INTO root VALUES (1)"; ${CLICKHOUSE_CLIENT} --query "SELECT * FROM d"; ${CLICKHOUSE_CLIENT} --query "DROP TABLE root" ${CLICKHOUSE_CLIENT} --query "DROP TABLE d" diff --git a/tests/queries/0_stateless/01246_buffer_flush.sh b/tests/queries/0_stateless/01246_buffer_flush.sh new file mode 100755 index 00000000000..1ca953c80d9 --- /dev/null +++ b/tests/queries/0_stateless/01246_buffer_flush.sh @@ -0,0 +1,76 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +function elapsed_sec() +{ + local expr=$1 && shift + local start end + start=$(date +%s.%N) + while ! eval "$expr"; do + sleep 0.5 + done + end=$(date +%s.%N) + $CLICKHOUSE_LOCAL -q "select floor($end-$start)" +} + +$CLICKHOUSE_CLIENT -nm -q " + drop table if exists data_01256; + drop table if exists buffer_01256; + + create table data_01256 as system.numbers Engine=Memory(); +" + +echo "min" +$CLICKHOUSE_CLIENT -nm -q " + create table buffer_01256 as system.numbers Engine=Buffer(currentDatabase(), data_01256, 1, + 2, 100, /* time */ + 4, 100, /* rows */ + 1, 1e6 /* bytes */ + ); + insert into buffer_01256 select * from system.numbers limit 5; + select count() from data_01256; +" +sec=$(elapsed_sec '[[ $($CLICKHOUSE_CLIENT -q "select count() from data_01256") -eq 5 ]]') +[[ $sec -ge 2 ]] || echo "Buffer flushed too early, min_time=2, flushed after $sec sec" +[[ $sec -lt 100 ]] || echo "Buffer flushed too late, max_time=100, flushed after $sec sec" +$CLICKHOUSE_CLIENT -q "select count() from data_01256" +$CLICKHOUSE_CLIENT -q "drop table buffer_01256" + +echo "max" +$CLICKHOUSE_CLIENT -nm -q " + create table buffer_01256 as system.numbers Engine=Buffer(currentDatabase(), data_01256, 1, + 100, 2, /* time */ + 0, 100, /* rows */ + 0, 1e6 /* bytes */ + ); + insert into buffer_01256 select * from system.numbers limit 5; + select count() from data_01256; +" +sec=$(elapsed_sec '[[ $($CLICKHOUSE_CLIENT -q "select count() from data_01256") -eq 10 ]]') +[[ $sec -ge 2 ]] || echo "Buffer flushed too early, max_time=2, flushed after $sec sec" +$CLICKHOUSE_CLIENT -q "select count() from data_01256" +$CLICKHOUSE_CLIENT -q "drop table buffer_01256" + +echo "direct" +$CLICKHOUSE_CLIENT -nm -q " + create table buffer_01256 as system.numbers Engine=Buffer(currentDatabase(), data_01256, 1, + 100, 100, /* time */ + 0, 9, /* rows */ + 0, 1e6 /* bytes */ + ); + insert into buffer_01256 select * from system.numbers limit 10; + select count() from data_01256; +" + +echo "drop" +$CLICKHOUSE_CLIENT -nm -q " + insert into buffer_01256 select * from system.numbers limit 10; + drop table if exists buffer_01256; + select count() from data_01256; +" + +$CLICKHOUSE_CLIENT -q "drop table data_01256" diff --git a/tests/queries/0_stateless/01246_buffer_flush.sql b/tests/queries/0_stateless/01246_buffer_flush.sql deleted file mode 100644 index 66f93371c29..00000000000 --- a/tests/queries/0_stateless/01246_buffer_flush.sql +++ /dev/null @@ -1,50 +0,0 @@ --- Tags: no-fasttest - -SET function_sleep_max_microseconds_per_block = 4000000; - -drop table if exists data_01256; -drop table if exists buffer_01256; - -create table data_01256 as system.numbers Engine=Memory(); - -select 'min'; -create table buffer_01256 as system.numbers Engine=Buffer(currentDatabase(), data_01256, 1, - 5, 100, /* time */ - 4, 100, /* rows */ - 1, 1e6 /* bytes */ -); -insert into buffer_01256 select * from system.numbers limit 5; -select count() from data_01256; --- It is enough to ensure that the buffer will be flushed earlier then 2*min_time (10 sec) -select sleepEachRow(9) FORMAT Null SETTINGS function_sleep_max_microseconds_per_block=10e6; -select count() from data_01256; -drop table buffer_01256; - -select 'max'; -create table buffer_01256 as system.numbers Engine=Buffer(currentDatabase(), data_01256, 1, - 100, 2, /* time */ - 0, 100, /* rows */ - 0, 1e6 /* bytes */ -); -insert into buffer_01256 select * from system.numbers limit 5; -select count() from data_01256; --- sleep 2 (min time) + 1 (round up) + bias (1) = 4 -select sleepEachRow(2) from numbers(2) FORMAT Null; -select count() from data_01256; -drop table buffer_01256; - -select 'direct'; -create table buffer_01256 as system.numbers Engine=Buffer(currentDatabase(), data_01256, 1, - 100, 100, /* time */ - 0, 9, /* rows */ - 0, 1e6 /* bytes */ -); -insert into buffer_01256 select * from system.numbers limit 10; -select count() from data_01256; - -select 'drop'; -insert into buffer_01256 select * from system.numbers limit 10; -drop table if exists buffer_01256; -select count() from data_01256; - -drop table data_01256; diff --git a/tests/queries/0_stateless/01275_parallel_mv.reference b/tests/queries/0_stateless/01275_parallel_mv.reference index dadf2f35e6e..a9801e3b910 100644 --- a/tests/queries/0_stateless/01275_parallel_mv.reference +++ b/tests/queries/0_stateless/01275_parallel_mv.reference @@ -137,7 +137,7 @@ select arrayUniq(thread_ids) from system.query_log where Settings['parallel_view_processing'] = '1' and Settings['optimize_trivial_insert_select'] = '0' and Settings['max_insert_threads'] = '16'; -18 +5 select count() from testX; 60 select count() from testXA; @@ -185,7 +185,7 @@ select arrayUniq(thread_ids) from system.query_log where Settings['parallel_view_processing'] = '1' and Settings['optimize_trivial_insert_select'] = '1' and Settings['max_insert_threads'] = '16'; -18 +5 select count() from testX; 80 select count() from testXA; diff --git a/tests/queries/0_stateless/01563_distributed_query_finish.reference b/tests/queries/0_stateless/01563_distributed_query_finish.reference index c3688b553c4..b48979a492e 100644 --- a/tests/queries/0_stateless/01563_distributed_query_finish.reference +++ b/tests/queries/0_stateless/01563_distributed_query_finish.reference @@ -1,2 +1 @@ -1,0 NETWORK_ERROR=0 diff --git a/tests/queries/0_stateless/01563_distributed_query_finish.sh b/tests/queries/0_stateless/01563_distributed_query_finish.sh index 0019c714e40..e3c5928f108 100755 --- a/tests/queries/0_stateless/01563_distributed_query_finish.sh +++ b/tests/queries/0_stateless/01563_distributed_query_finish.sh @@ -19,20 +19,25 @@ create table dist_01247 as data_01247 engine=Distributed(test_cluster_two_shards select * from dist_01247 format Null; EOL -network_errors_before=$($CLICKHOUSE_CLIENT -q "SELECT value FROM system.errors WHERE name = 'NETWORK_ERROR'") +# NOTE: it is possible to got NETWORK_ERROR even with no-parallel, at least due to system.*_log_sender to the cloud +for ((i = 0; i < 100; ++i)); do + network_errors_before=$($CLICKHOUSE_CLIENT -q "SELECT value FROM system.errors WHERE name = 'NETWORK_ERROR'") -opts=( - "--max_distributed_connections=1" - "--optimize_skip_unused_shards=1" - "--optimize_distributed_group_by_sharding_key=1" - "--prefer_localhost_replica=0" -) -$CLICKHOUSE_CLIENT "${opts[@]}" --format CSV -nm < filter is pushed down before CreatingSets CreatingSets Filter +Filter 1 3 > one condition of filter is pushed down before LEFT JOIN diff --git a/tests/queries/0_stateless/01655_plan_optimizations_merge_filters.sql b/tests/queries/0_stateless/01655_plan_optimizations_merge_filters.sql index 1301135b4cb..2193fc7a8f4 100644 --- a/tests/queries/0_stateless/01655_plan_optimizations_merge_filters.sql +++ b/tests/queries/0_stateless/01655_plan_optimizations_merge_filters.sql @@ -1,3 +1,5 @@ +set query_plan_merge_filters=1; + set allow_experimental_analyzer=1; select explain from (explain actions = 1 select * from (select sum(number) as v, bitAnd(number, 15) as key from numbers(1e8) group by key having v != 0) where key = 7) where explain like '%Filter%' or explain like '%Aggregating%'; diff --git a/tests/queries/0_stateless/01927_query_views_log_current_database.sql b/tests/queries/0_stateless/01927_query_views_log_current_database.sql index 6287156daaf..ba42795333c 100644 --- a/tests/queries/0_stateless/01927_query_views_log_current_database.sql +++ b/tests/queries/0_stateless/01927_query_views_log_current_database.sql @@ -16,7 +16,6 @@ CREATE MATERIALIZED VIEW matview_b_to_c TO table_c AS SELECT SUM(a + sleepEachRo CREATE MATERIALIZED VIEW matview_join_d_e TO table_f AS SELECT table_d.a as a, table_e.count + sleepEachRow(0.000003) as count FROM table_d LEFT JOIN table_e ON table_d.a = table_e.a; -- ENABLE LOGS -SET parallel_view_processing=0; SET log_query_views=1; SET log_queries_min_type='QUERY_FINISH'; SET log_queries=1; diff --git a/tests/queries/0_stateless/02010_array_index_bad_cast.reference b/tests/queries/0_stateless/02010_array_index_bad_cast.reference index e69de29bb2d..e22493782f0 100644 --- a/tests/queries/0_stateless/02010_array_index_bad_cast.reference +++ b/tests/queries/0_stateless/02010_array_index_bad_cast.reference @@ -0,0 +1,3 @@ +1 +0 +0 diff --git a/tests/queries/0_stateless/02010_array_index_bad_cast.sql b/tests/queries/0_stateless/02010_array_index_bad_cast.sql index 14162e0d2e2..590e60eb42e 100644 --- a/tests/queries/0_stateless/02010_array_index_bad_cast.sql +++ b/tests/queries/0_stateless/02010_array_index_bad_cast.sql @@ -1,3 +1,4 @@ --- This query throws exception about uncomparable data types (but at least it does not introduce bad cast in code). SET allow_suspicious_low_cardinality_types=1; -SELECT has(materialize(CAST(['2021-07-14'] AS Array(LowCardinality(Nullable(DateTime))))), materialize('2021-07-14'::DateTime64(7))); -- { serverError ILLEGAL_COLUMN } +SELECT has(materialize(CAST(['2021-07-14'] AS Array(LowCardinality(Nullable(DateTime))))), materialize('2021-07-14'::DateTime64(7))); +SELECT has(materialize(CAST(['2021-07-14'] AS Array(LowCardinality(Nullable(DateTime))))), materialize('2021-07-14 00:00:01'::DateTime64(7))); +SELECT has(materialize(CAST(['2021-07-14'] AS Array(LowCardinality(Nullable(DateTime))))), materialize(NULL)); diff --git a/tests/queries/0_stateless/02016_bit_shift_right_for_string_integer.reference b/tests/queries/0_stateless/02016_bit_shift_right_for_string_integer.reference index e6a2b2b6aaf..ab832478da0 100644 --- a/tests/queries/0_stateless/02016_bit_shift_right_for_string_integer.reference +++ b/tests/queries/0_stateless/02016_bit_shift_right_for_string_integer.reference @@ -41,8 +41,6 @@ String ConstConst 38 Hello 00000001 39 Hello 00000000 40 Hello -41 Hello -42 Hello FixedString ConstConst 1 0 Hello\0\0\0\0\0 01001000011001010110110001101100011011110000000000000000000000000000000000000000 @@ -92,10 +90,8 @@ FixedString ConstConst 78 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000001 79 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 80 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 -81 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 String VectorVector --1 Hello 0 Hello 0100100001100101011011000110110001101111 1 Hello 0010010000110010101101100011011000110111 7 Hello 0000000010010000110010101101100011011000 @@ -112,8 +108,6 @@ String VectorVector 33 Hello 00100100 39 Hello 00000000 40 Hello -41 Hello -42 Hello 7 Hel 000000001001000011001010 8 Hel 0100100001100101 9 Hel 0010010000110010 @@ -125,7 +119,6 @@ String VectorVector 9 Hel 0010010000110010 FixedString VectorVector --1 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 0 Hello\0\0\0\0\0 01001000011001010110110001101100011011110000000000000000000000000000000000000000 1 Hello\0\0\0\0\0 00100100001100101011011000110110001101111000000000000000000000000000000000000000 7 Hello\0\0\0\0\0 00000000100100001100101011011000110110001101111000000000000000000000000000000000 @@ -142,8 +135,6 @@ FixedString VectorVector 33 Hello\0\0\0\0\0 00000000000000000000000000000000001001000011001010110110001101100011011110000000 39 Hello\0\0\0\0\0 00000000000000000000000000000000000000001001000011001010110110001101100011011110 40 Hello\0\0\0\0\0 00000000000000000000000000000000000000000100100001100101011011000110110001101111 -41 Hello\0\0\0\0\0 00000000000000000000000000000000000000000010010000110010101101100011011000110111 -42 Hello\0\0\0\0\0 00000000000000000000000000000000000000000001001000011001010110110001101100011011 7 Hel\0\0\0\0\0\0\0 00000000100100001100101011011000000000000000000000000000000000000000000000000000 8 Hel\0\0\0\0\0\0\0 00000000010010000110010101101100000000000000000000000000000000000000000000000000 9 Hel\0\0\0\0\0\0\0 00000000001001000011001010110110000000000000000000000000000000000000000000000000 @@ -171,9 +162,6 @@ String VectorConst 7 Hello 0000000010010000110010101101100011011000 7 Hello 0000000010010000110010101101100011011000 7 Hello 0000000010010000110010101101100011011000 -7 Hello 0000000010010000110010101101100011011000 -7 Hello 0000000010010000110010101101100011011000 -7 Hello 0000000010010000110010101101100011011000 7 Hel 000000001001000011001010 7 Hel 000000001001000011001010 7 Hel 000000001001000011001010 @@ -193,9 +181,6 @@ String VectorConst 8 Hello 01001000011001010110110001101100 8 Hello 01001000011001010110110001101100 8 Hello 01001000011001010110110001101100 -8 Hello 01001000011001010110110001101100 -8 Hello 01001000011001010110110001101100 -8 Hello 01001000011001010110110001101100 8 Hel 0100100001100101 8 Hel 0100100001100101 8 Hel 0100100001100101 @@ -217,9 +202,6 @@ FixedString VectorConst 7 Hello\0\0\0\0\0 00000000100100001100101011011000110110001101111000000000000000000000000000000000 7 Hello\0\0\0\0\0 00000000100100001100101011011000110110001101111000000000000000000000000000000000 7 Hello\0\0\0\0\0 00000000100100001100101011011000110110001101111000000000000000000000000000000000 -7 Hello\0\0\0\0\0 00000000100100001100101011011000110110001101111000000000000000000000000000000000 -7 Hello\0\0\0\0\0 00000000100100001100101011011000110110001101111000000000000000000000000000000000 -7 Hello\0\0\0\0\0 00000000100100001100101011011000110110001101111000000000000000000000000000000000 7 Hel\0\0\0\0\0\0\0 00000000100100001100101011011000000000000000000000000000000000000000000000000000 7 Hel\0\0\0\0\0\0\0 00000000100100001100101011011000000000000000000000000000000000000000000000000000 7 Hel\0\0\0\0\0\0\0 00000000100100001100101011011000000000000000000000000000000000000000000000000000 @@ -239,15 +221,11 @@ FixedString VectorConst 8 Hello\0\0\0\0\0 00000000010010000110010101101100011011000110111100000000000000000000000000000000 8 Hello\0\0\0\0\0 00000000010010000110010101101100011011000110111100000000000000000000000000000000 8 Hello\0\0\0\0\0 00000000010010000110010101101100011011000110111100000000000000000000000000000000 -8 Hello\0\0\0\0\0 00000000010010000110010101101100011011000110111100000000000000000000000000000000 -8 Hello\0\0\0\0\0 00000000010010000110010101101100011011000110111100000000000000000000000000000000 -8 Hello\0\0\0\0\0 00000000010010000110010101101100011011000110111100000000000000000000000000000000 8 Hel\0\0\0\0\0\0\0 00000000010010000110010101101100000000000000000000000000000000000000000000000000 8 Hel\0\0\0\0\0\0\0 00000000010010000110010101101100000000000000000000000000000000000000000000000000 8 Hel\0\0\0\0\0\0\0 00000000010010000110010101101100000000000000000000000000000000000000000000000000 String ConstVector --1 Hello 0 Hello 0100100001100101011011000110110001101111 1 Hello 0010010000110010101101100011011000110111 7 Hello 0000000010010000110010101101100011011000 @@ -264,12 +242,9 @@ String ConstVector 33 Hello 00100100 39 Hello 00000000 40 Hello -41 Hello -42 Hello 7 Hello 0000000010010000110010101101100011011000 8 Hello 01001000011001010110110001101100 9 Hello 00100100001100101011011000110110 --1 Hel 0 Hel 010010000110010101101100 1 Hel 001001000011001010110110 7 Hel 000000001001000011001010 @@ -280,20 +255,11 @@ String ConstVector 17 Hel 00100100 23 Hel 00000000 24 Hel -25 Hel -31 Hel -32 Hel -33 Hel -39 Hel -40 Hel -41 Hel -42 Hel 7 Hel 000000001001000011001010 8 Hel 0100100001100101 9 Hel 0010010000110010 FixedString ConstVector --1 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 0 Hello\0\0\0\0\0 01001000011001010110110001101100011011110000000000000000000000000000000000000000 1 Hello\0\0\0\0\0 00100100001100101011011000110110001101111000000000000000000000000000000000000000 7 Hello\0\0\0\0\0 00000000100100001100101011011000110110001101111000000000000000000000000000000000 @@ -310,12 +276,9 @@ FixedString ConstVector 33 Hello\0\0\0\0\0 00000000000000000000000000000000001001000011001010110110001101100011011110000000 39 Hello\0\0\0\0\0 00000000000000000000000000000000000000001001000011001010110110001101100011011110 40 Hello\0\0\0\0\0 00000000000000000000000000000000000000000100100001100101011011000110110001101111 -41 Hello\0\0\0\0\0 00000000000000000000000000000000000000000010010000110010101101100011011000110111 -42 Hello\0\0\0\0\0 00000000000000000000000000000000000000000001001000011001010110110001101100011011 7 Hello\0\0\0\0\0 00000000100100001100101011011000110110001101111000000000000000000000000000000000 8 Hello\0\0\0\0\0 00000000010010000110010101101100011011000110111100000000000000000000000000000000 9 Hello\0\0\0\0\0 00000000001001000011001010110110001101100011011110000000000000000000000000000000 --1 Hel\0\0\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 0 Hel\0\0\0\0\0\0\0 01001000011001010110110000000000000000000000000000000000000000000000000000000000 1 Hel\0\0\0\0\0\0\0 00100100001100101011011000000000000000000000000000000000000000000000000000000000 7 Hel\0\0\0\0\0\0\0 00000000100100001100101011011000000000000000000000000000000000000000000000000000 @@ -332,8 +295,6 @@ FixedString ConstVector 33 Hel\0\0\0\0\0\0\0 00000000000000000000000000000000001001000011001010110110000000000000000000000000 39 Hel\0\0\0\0\0\0\0 00000000000000000000000000000000000000001001000011001010110110000000000000000000 40 Hel\0\0\0\0\0\0\0 00000000000000000000000000000000000000000100100001100101011011000000000000000000 -41 Hel\0\0\0\0\0\0\0 00000000000000000000000000000000000000000010010000110010101101100000000000000000 -42 Hel\0\0\0\0\0\0\0 00000000000000000000000000000000000000000001001000011001010110110000000000000000 7 Hel\0\0\0\0\0\0\0 00000000100100001100101011011000000000000000000000000000000000000000000000000000 8 Hel\0\0\0\0\0\0\0 00000000010010000110010101101100000000000000000000000000000000000000000000000000 9 Hel\0\0\0\0\0\0\0 00000000001001000011001010110110000000000000000000000000000000000000000000000000 diff --git a/tests/queries/0_stateless/02016_bit_shift_right_for_string_integer.sql b/tests/queries/0_stateless/02016_bit_shift_right_for_string_integer.sql index 0ee04e408ba..40fccbc89e6 100644 --- a/tests/queries/0_stateless/02016_bit_shift_right_for_string_integer.sql +++ b/tests/queries/0_stateless/02016_bit_shift_right_for_string_integer.sql @@ -41,8 +41,6 @@ SELECT 37,'Hello',bin(bitShiftRight('Hello', 37)); SELECT 38,'Hello',bin(bitShiftRight('Hello', 38)); SELECT 39,'Hello',bin(bitShiftRight('Hello', 39)); SELECT 40,'Hello',bin(bitShiftRight('Hello', 40)); -SELECT 41,'Hello',bin(bitShiftRight('Hello', 41)); -SELECT 42,'Hello',bin(bitShiftRight('Hello', 42)); SELECT 'FixedString ConstConst'; SELECT bin(toFixedString('Hello', 10)) == bin(bitShiftRight(toFixedString('Hello', 10), 0)); @@ -93,40 +91,39 @@ SELECT 77,toFixedString('Hello', 10), bin(bitShiftRight(toFixedString('Hello', 1 SELECT 78,toFixedString('Hello', 10), bin(bitShiftRight(toFixedString('Hello', 10), 78)); SELECT 79,toFixedString('Hello', 10), bin(bitShiftRight(toFixedString('Hello', 10), 79)); SELECT 80,toFixedString('Hello', 10), bin(bitShiftRight(toFixedString('Hello', 10), 80)); -SELECT 81,toFixedString('Hello', 10), bin(bitShiftRight(toFixedString('Hello', 10), 81)); DROP TABLE IF EXISTS test_bit_shift_right_string_integer; CREATE TABLE test_bit_shift_right_string_integer (str String, fixedStr FixedString(10), id Int64) engine=Log; -INSERT INTO test_bit_shift_right_string_integer VALUES('Hello','Hello',-1)('Hello','Hello',0),('Hello','Hello',1),('Hello','Hello',7),('Hello','Hello',8),('Hello','Hello',9),('Hello','Hello',15),('Hello','Hello',16),('Hello','Hello',17),('Hello','Hello',23),('Hello','Hello',24),('Hello','Hello',25),('Hello','Hello',31),('Hello','Hello',32),('Hello','Hello',33),('Hello','Hello',39),('Hello','Hello',40),('Hello','Hello',41),('Hello','Hello',42),('Hel','Hel',7),('Hel','Hel',8),('Hel','Hel',9); +INSERT INTO test_bit_shift_right_string_integer VALUES('Hello','Hello',0),('Hello','Hello',1),('Hello','Hello',7),('Hello','Hello',8),('Hello','Hello',9),('Hello','Hello',15),('Hello','Hello',16),('Hello','Hello',17),('Hello','Hello',23),('Hello','Hello',24),('Hello','Hello',25),('Hello','Hello',31),('Hello','Hello',32),('Hello','Hello',33),('Hello','Hello',39),('Hello','Hello',40),('Hel','Hel',7),('Hel','Hel',8),('Hel','Hel',9); -SELECT bin(bitShiftRight('Hello', 42)); --A blank line +SELECT bin(bitShiftRight('Hello', 40)); --A blank line SELECT 'String VectorVector'; SELECT id as shift_right_bit,str as arg,bin(bitShiftRight(str, id)) as string_res FROM test_bit_shift_right_string_integer; SELECT id as shift_right_bit,str as arg,bin(bitShiftRight(str, id)) as string_res FROM test_bit_shift_right_string_integer WHERE (str='Hello' AND (id=23 OR id=24 OR id=25)) OR (str='Hel' AND (id=7 OR id=8 OR id=9)); -SELECT bin(bitShiftRight('Hello', 42)); +SELECT bin(bitShiftRight('Hello', 40)); SELECT 'FixedString VectorVector'; SELECT id as shift_right_bit,fixedStr as arg,bin(bitShiftRight(fixedStr, id)) as fixed_string_res FROM test_bit_shift_right_string_integer; SELECT id as shift_right_bit,fixedStr as arg,bin(bitShiftRight(fixedStr, id)) as fixed_string_res FROM test_bit_shift_right_string_integer WHERE (str='Hello' AND (id=23 OR id=24 OR id=25)) OR (str='Hel' AND (id=7 OR id=8 OR id=9)); -SELECT bin(bitShiftRight('Hello', 42)); --A blank line +SELECT bin(bitShiftRight('Hello', 40)); --A blank line SELECT 'String VectorConst'; SELECT 7 as shift_right_bit,str as arg,bin(bitShiftRight(str, 7)) as string_res FROM test_bit_shift_right_string_integer; SELECT 8 as shift_right_bit,str as arg,bin(bitShiftRight(str, 8)) as string_res FROM test_bit_shift_right_string_integer; -SELECT bin(bitShiftRight('Hello', 42)); --A blank line +SELECT bin(bitShiftRight('Hello', 40)); --A blank line SELECT 'FixedString VectorConst'; SELECT 7 as shift_right_bit,fixedStr as arg,bin(bitShiftRight(fixedStr, 7)) as fixed_string_res FROM test_bit_shift_right_string_integer; SELECT 8 as shift_right_bit,fixedStr as arg,bin(bitShiftRight(fixedStr, 8)) as fixed_string_res FROM test_bit_shift_right_string_integer; -SELECT bin(bitShiftRight('Hello', 42)); --A blank line +SELECT bin(bitShiftRight('Hello', 40)); --A blank line SELECT 'String ConstVector'; SELECT id as shift_right_bit,'Hello' as arg,bin(bitShiftRight('Hello', id)) as string_res FROM test_bit_shift_right_string_integer; -SELECT id as shift_right_bit,'Hel' as arg,bin(bitShiftRight('Hel', id)) as string_res FROM test_bit_shift_right_string_integer; +SELECT id as shift_right_bit,'Hel' as arg,bin(bitShiftRight('Hel', id)) as string_res FROM test_bit_shift_right_string_integer WHERE id <= 8 * 3; -SELECT bin(bitShiftRight('Hello', 42)); --A blank line +SELECT bin(bitShiftRight('Hello', 40)); --A blank line SELECT 'FixedString ConstVector'; SELECT id as shift_right_bit,toFixedString('Hello', 10) as arg,bin(bitShiftRight(toFixedString('Hello', 10), id)) as fixed_string_res FROM test_bit_shift_right_string_integer; SELECT id as shift_right_bit,toFixedString('Hel', 10) as arg,bin(bitShiftRight(toFixedString('Hel', 10), id)) as fixed_string_res FROM test_bit_shift_right_string_integer; diff --git a/tests/queries/0_stateless/02017_bit_shift_left_for_string_integer.reference b/tests/queries/0_stateless/02017_bit_shift_left_for_string_integer.reference index ff5a09c0d48..a20c44bbb9a 100644 --- a/tests/queries/0_stateless/02017_bit_shift_left_for_string_integer.reference +++ b/tests/queries/0_stateless/02017_bit_shift_left_for_string_integer.reference @@ -41,8 +41,6 @@ String ConstConst 38 Hello 00010010000110010101101100011011000110111100000000000000000000000000000000000000 39 Hello 00100100001100101011011000110110001101111000000000000000000000000000000000000000 40 Hello -41 Hello -42 Hello FixedString ConstConst 1 0 Hello\0\0\0\0\0 01001000011001010110110001101100011011110000000000000000000000000000000000000000 @@ -92,10 +90,8 @@ FixedString ConstConst 78 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 79 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 80 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 -81 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 String VectorVector --1 Hello 0 Hello 0100100001100101011011000110110001101111 1 Hello 000000001001000011001010110110001101100011011110 7 Hello 001001000011001010110110001101100011011110000000 @@ -112,8 +108,6 @@ String VectorVector 33 Hello 00000000100100001100101011011000110110001101111000000000000000000000000000000000 39 Hello 00100100001100101011011000110110001101111000000000000000000000000000000000000000 40 Hello -41 Hello -42 Hello 7 Hel 00100100001100101011011000000000 8 Hel 01001000011001010110110000000000 9 Hel 0000000010010000110010101101100000000000 @@ -125,7 +119,6 @@ String VectorVector 9 Hel 0000000010010000110010101101100000000000 FixedString VectorVector --1 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 0 Hello\0\0\0\0\0 01001000011001010110110001101100011011110000000000000000000000000000000000000000 1 Hello\0\0\0\0\0 10010000110010101101100011011000110111100000000000000000000000000000000000000000 7 Hello\0\0\0\0\0 00110010101101100011011000110111100000000000000000000000000000000000000000000000 @@ -142,8 +135,6 @@ FixedString VectorVector 33 Hello\0\0\0\0\0 11011110000000000000000000000000000000000000000000000000000000000000000000000000 39 Hello\0\0\0\0\0 10000000000000000000000000000000000000000000000000000000000000000000000000000000 40 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 -41 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 -42 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 7 Hel\0\0\0\0\0\0\0 00110010101101100000000000000000000000000000000000000000000000000000000000000000 8 Hel\0\0\0\0\0\0\0 01100101011011000000000000000000000000000000000000000000000000000000000000000000 9 Hel\0\0\0\0\0\0\0 11001010110110000000000000000000000000000000000000000000000000000000000000000000 @@ -171,9 +162,6 @@ String VectorConst 7 Hello 001001000011001010110110001101100011011110000000 7 Hello 001001000011001010110110001101100011011110000000 7 Hello 001001000011001010110110001101100011011110000000 -7 Hello 001001000011001010110110001101100011011110000000 -7 Hello 001001000011001010110110001101100011011110000000 -7 Hello 001001000011001010110110001101100011011110000000 7 Hel 00100100001100101011011000000000 7 Hel 00100100001100101011011000000000 7 Hel 00100100001100101011011000000000 @@ -193,9 +181,6 @@ String VectorConst 8 Hello 010010000110010101101100011011000110111100000000 8 Hello 010010000110010101101100011011000110111100000000 8 Hello 010010000110010101101100011011000110111100000000 -8 Hello 010010000110010101101100011011000110111100000000 -8 Hello 010010000110010101101100011011000110111100000000 -8 Hello 010010000110010101101100011011000110111100000000 8 Hel 01001000011001010110110000000000 8 Hel 01001000011001010110110000000000 8 Hel 01001000011001010110110000000000 @@ -217,9 +202,6 @@ FixedString VectorConst 7 Hello\0\0\0\0\0 00110010101101100011011000110111100000000000000000000000000000000000000000000000 7 Hello\0\0\0\0\0 00110010101101100011011000110111100000000000000000000000000000000000000000000000 7 Hello\0\0\0\0\0 00110010101101100011011000110111100000000000000000000000000000000000000000000000 -7 Hello\0\0\0\0\0 00110010101101100011011000110111100000000000000000000000000000000000000000000000 -7 Hello\0\0\0\0\0 00110010101101100011011000110111100000000000000000000000000000000000000000000000 -7 Hello\0\0\0\0\0 00110010101101100011011000110111100000000000000000000000000000000000000000000000 7 Hel\0\0\0\0\0\0\0 00110010101101100000000000000000000000000000000000000000000000000000000000000000 7 Hel\0\0\0\0\0\0\0 00110010101101100000000000000000000000000000000000000000000000000000000000000000 7 Hel\0\0\0\0\0\0\0 00110010101101100000000000000000000000000000000000000000000000000000000000000000 @@ -239,15 +221,11 @@ FixedString VectorConst 8 Hello\0\0\0\0\0 01100101011011000110110001101111000000000000000000000000000000000000000000000000 8 Hello\0\0\0\0\0 01100101011011000110110001101111000000000000000000000000000000000000000000000000 8 Hello\0\0\0\0\0 01100101011011000110110001101111000000000000000000000000000000000000000000000000 -8 Hello\0\0\0\0\0 01100101011011000110110001101111000000000000000000000000000000000000000000000000 -8 Hello\0\0\0\0\0 01100101011011000110110001101111000000000000000000000000000000000000000000000000 -8 Hello\0\0\0\0\0 01100101011011000110110001101111000000000000000000000000000000000000000000000000 8 Hel\0\0\0\0\0\0\0 01100101011011000000000000000000000000000000000000000000000000000000000000000000 8 Hel\0\0\0\0\0\0\0 01100101011011000000000000000000000000000000000000000000000000000000000000000000 8 Hel\0\0\0\0\0\0\0 01100101011011000000000000000000000000000000000000000000000000000000000000000000 String ConstVector --1 Hello 0 Hello 0100100001100101011011000110110001101111 1 Hello 000000001001000011001010110110001101100011011110 7 Hello 001001000011001010110110001101100011011110000000 @@ -264,12 +242,9 @@ String ConstVector 33 Hello 00000000100100001100101011011000110110001101111000000000000000000000000000000000 39 Hello 00100100001100101011011000110110001101111000000000000000000000000000000000000000 40 Hello -41 Hello -42 Hello 7 Hello 001001000011001010110110001101100011011110000000 8 Hello 010010000110010101101100011011000110111100000000 9 Hello 00000000100100001100101011011000110110001101111000000000 --1 Hel 0 Hel 010010000110010101101100 1 Hel 00000000100100001100101011011000 7 Hel 00100100001100101011011000000000 @@ -280,20 +255,11 @@ String ConstVector 17 Hel 000000001001000011001010110110000000000000000000 23 Hel 001001000011001010110110000000000000000000000000 24 Hel -25 Hel -31 Hel -32 Hel -33 Hel -39 Hel -40 Hel -41 Hel -42 Hel 7 Hel 00100100001100101011011000000000 8 Hel 01001000011001010110110000000000 9 Hel 0000000010010000110010101101100000000000 FixedString ConstVector --1 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 0 Hello\0\0\0\0\0 01001000011001010110110001101100011011110000000000000000000000000000000000000000 1 Hello\0\0\0\0\0 10010000110010101101100011011000110111100000000000000000000000000000000000000000 7 Hello\0\0\0\0\0 00110010101101100011011000110111100000000000000000000000000000000000000000000000 @@ -310,12 +276,9 @@ FixedString ConstVector 33 Hello\0\0\0\0\0 11011110000000000000000000000000000000000000000000000000000000000000000000000000 39 Hello\0\0\0\0\0 10000000000000000000000000000000000000000000000000000000000000000000000000000000 40 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 -41 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 -42 Hello\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 7 Hello\0\0\0\0\0 00110010101101100011011000110111100000000000000000000000000000000000000000000000 8 Hello\0\0\0\0\0 01100101011011000110110001101111000000000000000000000000000000000000000000000000 9 Hello\0\0\0\0\0 11001010110110001101100011011110000000000000000000000000000000000000000000000000 --1 Hel\0\0\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 0 Hel\0\0\0\0\0\0\0 01001000011001010110110000000000000000000000000000000000000000000000000000000000 1 Hel\0\0\0\0\0\0\0 10010000110010101101100000000000000000000000000000000000000000000000000000000000 7 Hel\0\0\0\0\0\0\0 00110010101101100000000000000000000000000000000000000000000000000000000000000000 @@ -332,8 +295,6 @@ FixedString ConstVector 33 Hel\0\0\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 39 Hel\0\0\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 40 Hel\0\0\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 -41 Hel\0\0\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 -42 Hel\0\0\0\0\0\0\0 00000000000000000000000000000000000000000000000000000000000000000000000000000000 7 Hel\0\0\0\0\0\0\0 00110010101101100000000000000000000000000000000000000000000000000000000000000000 8 Hel\0\0\0\0\0\0\0 01100101011011000000000000000000000000000000000000000000000000000000000000000000 9 Hel\0\0\0\0\0\0\0 11001010110110000000000000000000000000000000000000000000000000000000000000000000 diff --git a/tests/queries/0_stateless/02017_bit_shift_left_for_string_integer.sql b/tests/queries/0_stateless/02017_bit_shift_left_for_string_integer.sql index 5c7a9901dae..a8e66eda281 100644 --- a/tests/queries/0_stateless/02017_bit_shift_left_for_string_integer.sql +++ b/tests/queries/0_stateless/02017_bit_shift_left_for_string_integer.sql @@ -41,8 +41,6 @@ SELECT 37,'Hello',bin(bitShiftLeft('Hello', 37)); SELECT 38,'Hello',bin(bitShiftLeft('Hello', 38)); SELECT 39,'Hello',bin(bitShiftLeft('Hello', 39)); SELECT 40,'Hello',bin(bitShiftLeft('Hello', 40)); -SELECT 41,'Hello',bin(bitShiftLeft('Hello', 41)); -SELECT 42,'Hello',bin(bitShiftLeft('Hello', 42)); SELECT 'FixedString ConstConst'; SELECT bin(toFixedString('Hello', 10)) == bin(bitShiftLeft(toFixedString('Hello', 10), 0)); @@ -93,40 +91,39 @@ SELECT 77,toFixedString('Hello', 10), bin(bitShiftLeft(toFixedString('Hello', 10 SELECT 78,toFixedString('Hello', 10), bin(bitShiftLeft(toFixedString('Hello', 10), 78)); SELECT 79,toFixedString('Hello', 10), bin(bitShiftLeft(toFixedString('Hello', 10), 79)); SELECT 80,toFixedString('Hello', 10), bin(bitShiftLeft(toFixedString('Hello', 10), 80)); -SELECT 81,toFixedString('Hello', 10), bin(bitShiftLeft(toFixedString('Hello', 10), 81)); DROP TABLE IF EXISTS test_bit_shift_left_string_integer; CREATE TABLE test_bit_shift_left_string_integer (str String, fixedStr FixedString(10), id Int64) engine=Log; -INSERT INTO test_bit_shift_left_string_integer VALUES('Hello','Hello',-1)('Hello','Hello',0),('Hello','Hello',1),('Hello','Hello',7),('Hello','Hello',8),('Hello','Hello',9),('Hello','Hello',15),('Hello','Hello',16),('Hello','Hello',17),('Hello','Hello',23),('Hello','Hello',24),('Hello','Hello',25),('Hello','Hello',31),('Hello','Hello',32),('Hello','Hello',33),('Hello','Hello',39),('Hello','Hello',40),('Hello','Hello',41),('Hello','Hello',42),('Hel','Hel',7),('Hel','Hel',8),('Hel','Hel',9); +INSERT INTO test_bit_shift_left_string_integer VALUES('Hello','Hello',0),('Hello','Hello',1),('Hello','Hello',7),('Hello','Hello',8),('Hello','Hello',9),('Hello','Hello',15),('Hello','Hello',16),('Hello','Hello',17),('Hello','Hello',23),('Hello','Hello',24),('Hello','Hello',25),('Hello','Hello',31),('Hello','Hello',32),('Hello','Hello',33),('Hello','Hello',39),('Hello','Hello',40),('Hel','Hel',7),('Hel','Hel',8),('Hel','Hel',9); -SELECT bin(bitShiftLeft('Hello', 42)); --A blank line +SELECT bin(bitShiftLeft('Hello', 40)); --A blank line SELECT 'String VectorVector'; SELECT id as shift_right_bit,str as arg,bin(bitShiftLeft(str, id)) as string_res FROM test_bit_shift_left_string_integer; SELECT id as shift_right_bit,str as arg,bin(bitShiftLeft(str, id)) as string_res FROM test_bit_shift_left_string_integer WHERE (str='Hello' AND (id=23 OR id=24 OR id=25)) OR (str='Hel' AND (id=7 OR id=8 OR id=9)); -SELECT bin(bitShiftLeft('Hello', 42)); +SELECT bin(bitShiftLeft('Hello', 40)); SELECT 'FixedString VectorVector'; SELECT id as shift_right_bit,fixedStr as arg,bin(bitShiftLeft(fixedStr, id)) as fixed_string_res FROM test_bit_shift_left_string_integer; SELECT id as shift_right_bit,fixedStr as arg,bin(bitShiftLeft(fixedStr, id)) as fixed_string_res FROM test_bit_shift_left_string_integer WHERE (str='Hello' AND (id=23 OR id=24 OR id=25)) OR (str='Hel' AND (id=7 OR id=8 OR id=9)); -SELECT bin(bitShiftLeft('Hello', 42)); --A blank line +SELECT bin(bitShiftLeft('Hello', 40)); --A blank line SELECT 'String VectorConst'; SELECT 7 as shift_right_bit,str as arg,bin(bitShiftLeft(str, 7)) as string_res FROM test_bit_shift_left_string_integer; SELECT 8 as shift_right_bit,str as arg,bin(bitShiftLeft(str, 8)) as string_res FROM test_bit_shift_left_string_integer; -SELECT bin(bitShiftLeft('Hello', 42)); --A blank line +SELECT bin(bitShiftLeft('Hello', 40)); --A blank line SELECT 'FixedString VectorConst'; SELECT 7 as shift_right_bit,fixedStr as arg,bin(bitShiftLeft(fixedStr, 7)) as fixed_string_res FROM test_bit_shift_left_string_integer; SELECT 8 as shift_right_bit,fixedStr as arg,bin(bitShiftLeft(fixedStr, 8)) as fixed_string_res FROM test_bit_shift_left_string_integer; -SELECT bin(bitShiftLeft('Hello', 42)); --A blank line +SELECT bin(bitShiftLeft('Hello', 40)); --A blank line SELECT 'String ConstVector'; SELECT id as shift_right_bit,'Hello' as arg,bin(bitShiftLeft('Hello', id)) as string_res FROM test_bit_shift_left_string_integer; -SELECT id as shift_right_bit,'Hel' as arg,bin(bitShiftLeft('Hel', id)) as string_res FROM test_bit_shift_left_string_integer; +SELECT id as shift_right_bit,'Hel' as arg,bin(bitShiftLeft('Hel', id)) as string_res FROM test_bit_shift_left_string_integer WHERE id <= 8 * 3; -SELECT bin(bitShiftLeft('Hello', 42)); --A blank line +SELECT bin(bitShiftLeft('Hello', 40)); --A blank line SELECT 'FixedString ConstVector'; SELECT id as shift_right_bit,toFixedString('Hello', 10) as arg,bin(bitShiftLeft(toFixedString('Hello', 10), id)) as fixed_string_res FROM test_bit_shift_left_string_integer; SELECT id as shift_right_bit,toFixedString('Hel', 10) as arg,bin(bitShiftLeft(toFixedString('Hel', 10), id)) as fixed_string_res FROM test_bit_shift_left_string_integer; diff --git a/tests/queries/0_stateless/02124_insert_deduplication_token_materialized_views.reference b/tests/queries/0_stateless/02124_insert_deduplication_token_materialized_views.reference index 2d9f236ada9..e0cc8f0ce63 100644 --- a/tests/queries/0_stateless/02124_insert_deduplication_token_materialized_views.reference +++ b/tests/queries/0_stateless/02124_insert_deduplication_token_materialized_views.reference @@ -1,8 +1,8 @@ -deduplicate_blocks_in_dependent_materialized_views=0, insert_deduplication_token = no, results: test_mv_a and test_mv_c have all data, test_mv_b has data obly with max_partitions_per_insert_block=0 -18 36 27 36 -deduplicate_blocks_in_dependent_materialized_views=1, insert_deduplication_token = no, results: all tables have deduplicated data -18 18 18 18 -deduplicate_blocks_in_dependent_materialized_views=0, insert_deduplication_token = yes, results: test_mv_a and test_mv_c have all data, test_mv_b has data obly with max_partitions_per_insert_block=0 -18 36 27 36 -deduplicate_blocks_in_dependent_materialized_views=1, insert_deduplication_token = yes, results: all tables have deduplicated data +deduplicate_blocks_in_dependent_materialized_views=0, insert_deduplication_token = no, results inconsitent +18 18 9 18 +deduplicate_blocks_in_dependent_materialized_views=1, insert_deduplication_token = no, results inconsitent +18 9 9 9 +deduplicate_blocks_in_dependent_materialized_views=0, insert_deduplication_token = yes, results inconsitent +18 18 9 18 +deduplicate_blocks_in_dependent_materialized_views=1, insert_deduplication_token = yes, results consitent 18 18 18 18 diff --git a/tests/queries/0_stateless/02124_insert_deduplication_token_materialized_views.sql b/tests/queries/0_stateless/02124_insert_deduplication_token_materialized_views.sql index 465c8d6136c..fdd75b91b1f 100644 --- a/tests/queries/0_stateless/02124_insert_deduplication_token_materialized_views.sql +++ b/tests/queries/0_stateless/02124_insert_deduplication_token_materialized_views.sql @@ -1,6 +1,6 @@ -- Tags: long -select 'deduplicate_blocks_in_dependent_materialized_views=0, insert_deduplication_token = no, results: test_mv_a and test_mv_c have all data, test_mv_b has data obly with max_partitions_per_insert_block=0'; +select 'deduplicate_blocks_in_dependent_materialized_views=0, insert_deduplication_token = no, results inconsitent'; drop table if exists test sync; drop table if exists test_mv_a sync; @@ -35,7 +35,7 @@ select (select sum(c) from test_mv_c where test='case1'); -select 'deduplicate_blocks_in_dependent_materialized_views=1, insert_deduplication_token = no, results: all tables have deduplicated data'; +select 'deduplicate_blocks_in_dependent_materialized_views=1, insert_deduplication_token = no, results inconsitent'; set deduplicate_blocks_in_dependent_materialized_views=1; @@ -53,7 +53,7 @@ select (select sum(c) from test_mv_c where test='case2'); -select 'deduplicate_blocks_in_dependent_materialized_views=0, insert_deduplication_token = yes, results: test_mv_a and test_mv_c have all data, test_mv_b has data obly with max_partitions_per_insert_block=0'; +select 'deduplicate_blocks_in_dependent_materialized_views=0, insert_deduplication_token = yes, results inconsitent'; set deduplicate_blocks_in_dependent_materialized_views=0; @@ -70,7 +70,7 @@ select (select sum(c) from test_mv_b where test='case3'), (select sum(c) from test_mv_c where test='case3'); -select 'deduplicate_blocks_in_dependent_materialized_views=1, insert_deduplication_token = yes, results: all tables have deduplicated data'; +select 'deduplicate_blocks_in_dependent_materialized_views=1, insert_deduplication_token = yes, results consitent'; set deduplicate_blocks_in_dependent_materialized_views=1; diff --git a/tests/queries/0_stateless/02125_query_views_log.sql b/tests/queries/0_stateless/02125_query_views_log.sql index ba50902ebea..d2d19b76a1f 100644 --- a/tests/queries/0_stateless/02125_query_views_log.sql +++ b/tests/queries/0_stateless/02125_query_views_log.sql @@ -8,7 +8,7 @@ create table dst (key Int) engine=Null(); create materialized view mv1 to dst as select * from src; create materialized view mv2 to dst as select * from src; -insert into src select * from numbers(1e6) settings log_queries=1, max_untracked_memory=0, parallel_view_processing=0; +insert into src select * from numbers(1e6) settings log_queries=1, max_untracked_memory=0, parallel_view_processing=1; system flush logs; -- { echo } diff --git a/tests/queries/0_stateless/02366_kql_func_binary.reference b/tests/queries/0_stateless/02366_kql_func_binary.reference index 6276cd6d867..360c1aa9899 100644 --- a/tests/queries/0_stateless/02366_kql_func_binary.reference +++ b/tests/queries/0_stateless/02366_kql_func_binary.reference @@ -1,7 +1,4 @@ -- binary functions 4 7 -1 -1 -1 7 3 1 diff --git a/tests/queries/0_stateless/02366_kql_func_binary.sql b/tests/queries/0_stateless/02366_kql_func_binary.sql index 824022b564c..687f3afb5ee 100644 --- a/tests/queries/0_stateless/02366_kql_func_binary.sql +++ b/tests/queries/0_stateless/02366_kql_func_binary.sql @@ -1,8 +1,5 @@ set dialect='kusto'; print ' -- binary functions'; print binary_and(4,7), binary_or(4,7); -print binary_shift_left(1, 1) == binary_shift_left(1, 65); -print binary_shift_right(2, 1) == binary_shift_right(2, 65); -print binary_shift_right(binary_shift_left(1, 65), 65) == 1; print binary_xor(2, 5), bitset_count_ones(42); print bitset_count_ones(binary_shift_left(binary_and(4,7), 1)); diff --git a/tests/queries/0_stateless/02496_remove_redundant_sorting.reference b/tests/queries/0_stateless/02496_remove_redundant_sorting.reference index 4a4e898c5bd..77ef213b36d 100644 --- a/tests/queries/0_stateless/02496_remove_redundant_sorting.reference +++ b/tests/queries/0_stateless/02496_remove_redundant_sorting.reference @@ -332,12 +332,13 @@ SETTINGS optimize_aggregators_of_group_by_keys=0 -- avoid removing any() as it d Expression (Projection) Sorting (Sorting for ORDER BY) Expression (Before ORDER BY) - Filter (((WHERE + (Projection + Before ORDER BY)) + HAVING)) - Aggregating - Expression ((Before GROUP BY + Projection)) - Sorting (Sorting for ORDER BY) - Expression ((Before ORDER BY + (Projection + Before ORDER BY))) - ReadFromSystemNumbers + Filter ((WHERE + (Projection + Before ORDER BY))) + Filter (HAVING) + Aggregating + Expression ((Before GROUP BY + Projection)) + Sorting (Sorting for ORDER BY) + Expression ((Before ORDER BY + (Projection + Before ORDER BY))) + ReadFromSystemNumbers -- execute 1 2 diff --git a/tests/queries/0_stateless/02554_fix_grouping_sets_predicate_push_down.reference b/tests/queries/0_stateless/02554_fix_grouping_sets_predicate_push_down.reference index 70bcd7f255b..9bb0c022752 100644 --- a/tests/queries/0_stateless/02554_fix_grouping_sets_predicate_push_down.reference +++ b/tests/queries/0_stateless/02554_fix_grouping_sets_predicate_push_down.reference @@ -29,16 +29,20 @@ WHERE type_1 = \'all\' ExpressionTransform × 2 (Filter) FilterTransform × 2 - (Aggregating) - ExpressionTransform × 2 - AggregatingTransform × 2 - Copy 1 → 2 - (Expression) - ExpressionTransform - (Expression) - ExpressionTransform - (ReadFromMergeTree) - MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1 + (Filter) + FilterTransform × 2 + (Filter) + FilterTransform × 2 + (Aggregating) + ExpressionTransform × 2 + AggregatingTransform × 2 + Copy 1 → 2 + (Expression) + ExpressionTransform + (Expression) + ExpressionTransform + (ReadFromMergeTree) + MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1 (Expression) ExpressionTransform × 2 (Filter) @@ -64,10 +68,14 @@ ExpressionTransform × 2 ExpressionTransform × 2 AggregatingTransform × 2 Copy 1 → 2 - (Expression) - ExpressionTransform - (ReadFromMergeTree) - MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1 + (Filter) + FilterTransform + (Filter) + FilterTransform + (Expression) + ExpressionTransform + (ReadFromMergeTree) + MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1 (Expression) ExpressionTransform × 2 (Aggregating) diff --git a/tests/queries/0_stateless/02766_bitshift_with_const_arguments.sql b/tests/queries/0_stateless/02766_bitshift_with_const_arguments.sql index 6b2961f0555..91e8624057c 100644 --- a/tests/queries/0_stateless/02766_bitshift_with_const_arguments.sql +++ b/tests/queries/0_stateless/02766_bitshift_with_const_arguments.sql @@ -10,7 +10,7 @@ DROP TABLE IF EXISTS t1; CREATE TABLE t0 (vkey UInt32, pkey UInt32, c0 UInt32) engine = TinyLog; CREATE TABLE t1 (vkey UInt32) ENGINE = AggregatingMergeTree ORDER BY vkey; INSERT INTO t0 VALUES (15, 25000, 58); -SELECT ref_5.pkey AS c_2_c2392_6 FROM t0 AS ref_5 WHERE 'J[' < multiIf(ref_5.pkey IN ( SELECT 1 ), bitShiftLeft(multiIf(ref_5.c0 > NULL, '1', ')'), 40), NULL); +SELECT ref_5.pkey AS c_2_c2392_6 FROM t0 AS ref_5 WHERE 'J[' < multiIf(ref_5.pkey IN ( SELECT 1 ), bitShiftLeft(multiIf(ref_5.c0 > NULL, '1', ')'), 40), NULL); -- { serverError ARGUMENT_OUT_OF_BOUND } DROP TABLE t0; DROP TABLE t1; diff --git a/tests/queries/0_stateless/02802_clickhouse_disks_s3_copy.sh b/tests/queries/0_stateless/02802_clickhouse_disks_s3_copy.sh index 2b9e5296a05..20b02bcba32 100755 --- a/tests/queries/0_stateless/02802_clickhouse_disks_s3_copy.sh +++ b/tests/queries/0_stateless/02802_clickhouse_disks_s3_copy.sh @@ -14,14 +14,11 @@ function run_test_for_disk() echo "$disk" - clickhouse-disks -C "$config" --disk "$disk" write --input "$config" $CLICKHOUSE_DATABASE/test - clickhouse-disks -C "$config" --log-level test --disk "$disk" copy $CLICKHOUSE_DATABASE/test $CLICKHOUSE_DATABASE/test.copy |& { + clickhouse-disks -C "$config" --disk "$disk" --query "write --path-from $config $CLICKHOUSE_DATABASE/test" + clickhouse-disks -C "$config" --log-level test --disk "$disk" --query "copy -r $CLICKHOUSE_DATABASE/test $CLICKHOUSE_DATABASE/test.copy" |& { grep -o -e "Single part upload has completed." -e "Single operation copy has completed." } - clickhouse-disks -C "$config" --disk "$disk" remove $CLICKHOUSE_DATABASE/test - # NOTE: this is due to "copy" does works like "cp -R from to/" instead of "cp from to" - clickhouse-disks -C "$config" --disk "$disk" remove $CLICKHOUSE_DATABASE/test.copy/test - clickhouse-disks -C "$config" --disk "$disk" remove $CLICKHOUSE_DATABASE/test.copy + clickhouse-disks -C "$config" --disk "$disk" --query "remove -r $CLICKHOUSE_DATABASE/test" } function run_test_copy_from_s3_to_s3(){ @@ -29,13 +26,12 @@ function run_test_copy_from_s3_to_s3(){ local disk_dest=$1 && shift echo "copy from $disk_src to $disk_dest" - clickhouse-disks -C "$config" --disk "$disk_src" write --input "$config" $CLICKHOUSE_DATABASE/test + clickhouse-disks -C "$config" --disk "$disk_src" --query "write --path-from $config $CLICKHOUSE_DATABASE/test" - clickhouse-disks -C "$config" --log-level test copy --disk-from "$disk_src" --disk-to "$disk_dest" $CLICKHOUSE_DATABASE/test $CLICKHOUSE_DATABASE/test.copy |& { + clickhouse-disks -C "$config" --log-level test --query "copy -r --disk-from $disk_src --disk-to $disk_dest $CLICKHOUSE_DATABASE/test $CLICKHOUSE_DATABASE/test.copy" |& { grep -o -e "Single part upload has completed." -e "Single operation copy has completed." } - clickhouse-disks -C "$config" --disk "$disk_dest" remove $CLICKHOUSE_DATABASE/test.copy/test - clickhouse-disks -C "$config" --disk "$disk_dest" remove $CLICKHOUSE_DATABASE/test.copy + clickhouse-disks -C "$config" --disk "$disk_dest" --query "remove -r $CLICKHOUSE_DATABASE/test.copy" } run_test_for_disk s3_plain_native_copy diff --git a/tests/queries/0_stateless/02912_ingestion_mv_deduplication.reference b/tests/queries/0_stateless/02912_ingestion_mv_deduplication.reference index 07deb7c2565..335b55f05c8 100644 --- a/tests/queries/0_stateless/02912_ingestion_mv_deduplication.reference +++ b/tests/queries/0_stateless/02912_ingestion_mv_deduplication.reference @@ -10,14 +10,13 @@ 2022-09-01 12:23:34 42 2023-09-01 12:23:34 42 -- MV -2022-09-01 12:00:00 84 -2023-09-01 12:00:00 42 +2022-09-01 12:00:00 42 -- Original issue with deduplicate_blocks_in_dependent_materialized_views = 1 AND max_insert_delayed_streams_for_parallel_write > 1 -- Landing 2022-09-01 12:23:34 42 2023-09-01 12:23:34 42 -- MV -2022-09-01 12:00:00 84 +2022-09-01 12:00:00 42 2023-09-01 12:00:00 42 -- Regression introduced in https://github.com/ClickHouse/ClickHouse/pull/54184 -- Landing (Agg/Replacing)MergeTree diff --git a/tests/queries/0_stateless/02912_ingestion_mv_deduplication.sql b/tests/queries/0_stateless/02912_ingestion_mv_deduplication.sql index a2378fd8f67..f206f0d7775 100644 --- a/tests/queries/0_stateless/02912_ingestion_mv_deduplication.sql +++ b/tests/queries/0_stateless/02912_ingestion_mv_deduplication.sql @@ -54,9 +54,8 @@ SELECT '-- Original issue with deduplicate_blocks_in_dependent_materialized_view - 1st insert works for landing and mv tables - 2nd insert gets first block 20220901 deduplicated and second one inserted in landing table - - 2nd insert is not inserting anything in mv table due to a bug computing blocks to be discarded, now that block is inserted because deduplicate_blocks_in_dependent_materialized_views=0 + - 2nd insert is not inserting anything in mv table due to a bug computing blocks to be discarded - Now it is fixed. */ SET deduplicate_blocks_in_dependent_materialized_views = 0, max_insert_delayed_streams_for_parallel_write = 1000; @@ -98,7 +97,7 @@ SELECT '-- Original issue with deduplicate_blocks_in_dependent_materialized_view This is what happens now: - 1st insert works for landing and mv tables - - 2nd insert gets first block 20220901 deduplicated for landing and both rows are inserted for mv tables + - 2nd insert gets first block 20220901 deduplicated and second one inserted for landing and mv tables */ SET deduplicate_blocks_in_dependent_materialized_views = 1, max_insert_delayed_streams_for_parallel_write = 1000; diff --git a/tests/queries/0_stateless/02941_variant_type_1.reference b/tests/queries/0_stateless/02941_variant_type_1.reference index 8a6e77d4f6d..53e5a556821 100644 --- a/tests/queries/0_stateless/02941_variant_type_1.reference +++ b/tests/queries/0_stateless/02941_variant_type_1.reference @@ -91,42 +91,42 @@ lc_str_2 (0,0) (0,0) (0,0) -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 0 1 2 -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 2 3 -\N -\N -\N +0 +0 +0 [] [] [] @@ -145,21 +145,21 @@ lc_str_2 [0] [0,1] [0,1,2] -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 2 3 @@ -256,42 +256,42 @@ lc_str_2 (0,0) (0,0) (0,0) -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N 0 -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 2 -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 -\N +0 3 -\N -\N -\N +0 +0 +0 [] [] [] @@ -310,23 +310,23 @@ lc_str_2 [0] [] [0,1,2] -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 -\N +0 3 ----------------------------------------------------------------------------------------------------------- test3 insert @@ -421,42 +421,42 @@ lc_str_15 (0,0) (16,17) (0,0) -\N -\N -\N -\N +0 +0 +0 +0 4 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 10 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 16 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 5 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 11 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 17 -\N +0 [] [] [] @@ -475,23 +475,23 @@ lc_str_15 [] [] [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17] -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 6 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 12 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 18 ----------------------------------------------------------------------------------------------------------- MergeTree compact @@ -587,42 +587,42 @@ lc_str_2 (0,0) (0,0) (0,0) -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 0 1 2 -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 2 3 -\N -\N -\N +0 +0 +0 [] [] [] @@ -641,21 +641,21 @@ lc_str_2 [0] [0,1] [0,1,2] -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 2 3 @@ -751,42 +751,42 @@ lc_str_2 (0,0) (0,0) (0,0) -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 0 1 2 -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 2 3 -\N -\N -\N +0 +0 +0 [] [] [] @@ -805,21 +805,21 @@ lc_str_2 [0] [0,1] [0,1,2] -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 2 3 @@ -916,42 +916,42 @@ lc_str_2 (0,0) (0,0) (0,0) -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N 0 -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 2 -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 -\N +0 3 -\N -\N -\N +0 +0 +0 [] [] [] @@ -970,23 +970,23 @@ lc_str_2 [0] [] [0,1,2] -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 -\N +0 3 ----------------------------------------------------------------------------------------------------------- test2 select @@ -1080,42 +1080,42 @@ lc_str_2 (0,0) (0,0) (0,0) -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N 0 -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 2 -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 -\N +0 3 -\N -\N -\N +0 +0 +0 [] [] [] @@ -1134,23 +1134,23 @@ lc_str_2 [0] [] [0,1,2] -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 -\N +0 3 ----------------------------------------------------------------------------------------------------------- test3 insert @@ -1245,42 +1245,42 @@ lc_str_15 (0,0) (16,17) (0,0) -\N -\N -\N -\N +0 +0 +0 +0 4 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 10 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 16 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 5 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 11 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 17 -\N +0 [] [] [] @@ -1299,23 +1299,23 @@ lc_str_15 [] [] [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17] -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 6 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 12 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 18 ----------------------------------------------------------------------------------------------------------- test3 select @@ -1409,42 +1409,42 @@ lc_str_15 (0,0) (16,17) (0,0) -\N -\N -\N -\N +0 +0 +0 +0 4 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 10 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 16 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 5 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 11 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 17 -\N +0 [] [] [] @@ -1463,23 +1463,23 @@ lc_str_15 [] [] [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17] -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 6 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 12 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 18 ----------------------------------------------------------------------------------------------------------- MergeTree wide @@ -1575,42 +1575,42 @@ lc_str_2 (0,0) (0,0) (0,0) -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 0 1 2 -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 2 3 -\N -\N -\N +0 +0 +0 [] [] [] @@ -1629,21 +1629,21 @@ lc_str_2 [0] [0,1] [0,1,2] -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 2 3 @@ -1739,42 +1739,42 @@ lc_str_2 (0,0) (0,0) (0,0) -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 0 1 2 -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 2 3 -\N -\N -\N +0 +0 +0 [] [] [] @@ -1793,21 +1793,21 @@ lc_str_2 [0] [0,1] [0,1,2] -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 2 3 @@ -1904,42 +1904,42 @@ lc_str_2 (0,0) (0,0) (0,0) -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N 0 -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 2 -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 -\N +0 3 -\N -\N -\N +0 +0 +0 [] [] [] @@ -1958,23 +1958,23 @@ lc_str_2 [0] [] [0,1,2] -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 -\N +0 3 ----------------------------------------------------------------------------------------------------------- test2 select @@ -2068,42 +2068,42 @@ lc_str_2 (0,0) (0,0) (0,0) -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N 0 -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 2 -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 -\N +0 3 -\N -\N -\N +0 +0 +0 [] [] [] @@ -2122,23 +2122,23 @@ lc_str_2 [0] [] [0,1,2] -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 -\N +0 3 ----------------------------------------------------------------------------------------------------------- test3 insert @@ -2233,42 +2233,42 @@ lc_str_15 (0,0) (16,17) (0,0) -\N -\N -\N -\N +0 +0 +0 +0 4 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 10 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 16 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 5 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 11 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 17 -\N +0 [] [] [] @@ -2287,23 +2287,23 @@ lc_str_15 [] [] [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17] -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 6 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 12 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 18 ----------------------------------------------------------------------------------------------------------- test3 select @@ -2397,42 +2397,42 @@ lc_str_15 (0,0) (16,17) (0,0) -\N -\N -\N -\N +0 +0 +0 +0 4 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 10 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 16 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 5 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 11 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 17 -\N +0 [] [] [] @@ -2451,22 +2451,22 @@ lc_str_15 [] [] [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17] -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 6 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 12 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 18 ----------------------------------------------------------------------------------------------------------- diff --git a/tests/queries/0_stateless/02941_variant_type_2.reference b/tests/queries/0_stateless/02941_variant_type_2.reference index 20a5176cb5e..1d9126aa230 100644 --- a/tests/queries/0_stateless/02941_variant_type_2.reference +++ b/tests/queries/0_stateless/02941_variant_type_2.reference @@ -6,9 +6,6 @@ test4 select 100000 100000 100000 -100000 -100000 -100000 MergeTree compact test4 insert test4 select @@ -17,18 +14,12 @@ test4 select 100000 100000 100000 -100000 -100000 -100000 test4 select 500000 100000 100000 100000 100000 -100000 -100000 -100000 MergeTree wide test4 insert test4 select @@ -37,15 +28,9 @@ test4 select 100000 100000 100000 -100000 -100000 -100000 test4 select 500000 100000 100000 100000 100000 -100000 -100000 -100000 diff --git a/tests/queries/0_stateless/02941_variant_type_2.sh b/tests/queries/0_stateless/02941_variant_type_2.sh index f43cd2bb0d6..8453bce98dc 100755 --- a/tests/queries/0_stateless/02941_variant_type_2.sh +++ b/tests/queries/0_stateless/02941_variant_type_2.sh @@ -33,13 +33,10 @@ select v.\`LowCardinality(String)\` from test format Null; select count() from test where isNotNull(v.\`LowCardinality(String)\`); select v.\`Tuple(a UInt32, b UInt32)\` from test format Null; select v.\`Tuple(a UInt32, b UInt32)\`.a from test format Null; -select count() from test where isNotNull(v.\`Tuple(a UInt32, b UInt32)\`.a); select v.\`Tuple(a UInt32, b UInt32)\`.b from test format Null; -select count() from test where isNotNull(v.\`Tuple(a UInt32, b UInt32)\`.b); select v.\`Array(UInt64)\` from test format Null; select count() from test where not empty(v.\`Array(UInt64)\`); -select v.\`Array(UInt64)\`.size0 from test format Null; -select count() from test where isNotNull(v.\`Array(UInt64)\`.size0);" +select v.\`Array(UInt64)\`.size0 from test format Null;" } function run() diff --git a/tests/queries/0_stateless/02941_variant_type_3.reference b/tests/queries/0_stateless/02941_variant_type_3.reference index 1ccdb3acdff..d28aa7a594b 100644 --- a/tests/queries/0_stateless/02941_variant_type_3.reference +++ b/tests/queries/0_stateless/02941_variant_type_3.reference @@ -6,9 +6,6 @@ test5 select 100000 100000 100000 -100000 -100000 -100000 MergeTree compact test5 insert test5 select @@ -17,18 +14,12 @@ test5 select 100000 100000 100000 -100000 -100000 -100000 test5 select 500000 100000 100000 100000 100000 -100000 -100000 -100000 MergeTree wide test5 insert test5 select @@ -37,15 +28,9 @@ test5 select 100000 100000 100000 -100000 -100000 -100000 test5 select 500000 100000 100000 100000 100000 -100000 -100000 -100000 diff --git a/tests/queries/0_stateless/02941_variant_type_3.sh b/tests/queries/0_stateless/02941_variant_type_3.sh index f4b2b304f56..990eb25b5be 100755 --- a/tests/queries/0_stateless/02941_variant_type_3.sh +++ b/tests/queries/0_stateless/02941_variant_type_3.sh @@ -35,13 +35,10 @@ select v.\`LowCardinality(String)\` from test format Null; select count() from test where isNotNull(v.\`LowCardinality(String)\`); select v.\`Tuple(a UInt32, b UInt32)\` from test format Null; select v.\`Tuple(a UInt32, b UInt32)\`.a from test format Null; -select count() from test where isNotNull(v.\`Tuple(a UInt32, b UInt32)\`.a); select v.\`Tuple(a UInt32, b UInt32)\`.b from test format Null; -select count() from test where isNotNull(v.\`Tuple(a UInt32, b UInt32)\`.b); select v.\`Array(UInt64)\` from test format Null; select count() from test where not empty(v.\`Array(UInt64)\`); -select v.\`Array(UInt64)\`.size0 from test format Null; -select count() from test where isNotNull(v.\`Array(UInt64)\`.size0);" +select v.\`Array(UInt64)\`.size0 from test format Null;" } function run() diff --git a/tests/queries/0_stateless/02941_variant_type_4.reference b/tests/queries/0_stateless/02941_variant_type_4.reference index e13d5820343..d1630b04347 100644 --- a/tests/queries/0_stateless/02941_variant_type_4.reference +++ b/tests/queries/0_stateless/02941_variant_type_4.reference @@ -6,9 +6,6 @@ test6 select 200000 200000 200000 -200000 -200000 -200000 ----------------------------------------------------------------------------------------------------------- MergeTree compact test6 insert @@ -18,9 +15,6 @@ test6 select 200000 200000 200000 -200000 -200000 -200000 ----------------------------------------------------------------------------------------------------------- test6 select 1000000 @@ -28,9 +22,6 @@ test6 select 200000 200000 200000 -200000 -200000 -200000 ----------------------------------------------------------------------------------------------------------- MergeTree wide test6 insert @@ -40,9 +31,6 @@ test6 select 200000 200000 200000 -200000 -200000 -200000 ----------------------------------------------------------------------------------------------------------- test6 select 1000000 @@ -50,7 +38,4 @@ test6 select 200000 200000 200000 -200000 -200000 -200000 ----------------------------------------------------------------------------------------------------------- diff --git a/tests/queries/0_stateless/02941_variant_type_4.sh b/tests/queries/0_stateless/02941_variant_type_4.sh index f9a16847864..b8f619694b0 100755 --- a/tests/queries/0_stateless/02941_variant_type_4.sh +++ b/tests/queries/0_stateless/02941_variant_type_4.sh @@ -29,13 +29,10 @@ function test6_select() select count() from test where isNotNull(v.\`LowCardinality(String)\`); select v.\`Tuple(a UInt32, b UInt32)\` from test format Null; select v.\`Tuple(a UInt32, b UInt32)\`.a from test format Null; - select count() from test where isNotNull(v.\`Tuple(a UInt32, b UInt32)\`.a); select v.\`Tuple(a UInt32, b UInt32)\`.b from test format Null; - select count() from test where isNotNull(v.\`Tuple(a UInt32, b UInt32)\`.b); select v.\`Array(UInt64)\` from test format Null; select count() from test where not empty(v.\`Array(UInt64)\`); - select v.\`Array(UInt64)\`.size0 from test format Null; - select count() from test where isNotNull(v.\`Array(UInt64)\`.size0);" + select v.\`Array(UInt64)\`.size0 from test format Null;" echo "-----------------------------------------------------------------------------------------------------------" } diff --git a/tests/queries/0_stateless/02967_prewhere_no_columns.reference b/tests/queries/0_stateless/02967_prewhere_no_columns.reference new file mode 100644 index 00000000000..df105254618 --- /dev/null +++ b/tests/queries/0_stateless/02967_prewhere_no_columns.reference @@ -0,0 +1,2 @@ +105 +105 diff --git a/tests/queries/0_stateless/02967_prewhere_no_columns.sql b/tests/queries/0_stateless/02967_prewhere_no_columns.sql new file mode 100644 index 00000000000..efcc952caa2 --- /dev/null +++ b/tests/queries/0_stateless/02967_prewhere_no_columns.sql @@ -0,0 +1,51 @@ +CREATE TABLE t_02967 +( + `key` Date, + `value` UInt16 +) +ENGINE = MergeTree +ORDER BY key +SETTINGS + index_granularity_bytes = 0 --8192 --, min_index_granularity_bytes = 2 + , index_granularity = 100 + , min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0 +-- +-- , min_bytes_for_wide_part = 2 +AS SELECT + number, + repeat(toString(number), 5) +FROM numbers(105.); + + + +-- Check with newly inserted data part. It's in-memory structured are filled at insert time. +SELECT + count(ignore(*)) +FROM t_02967 +PREWHERE CAST(ignore() + 1 as UInt8) +GROUP BY + ignore(65535, *), + ignore(255, 256, *) +SETTINGS + --send_logs_level='test', + max_threads=1; + + + +-- Reload part form disk to check that in-meory structures where properly serilaized-deserialized +DETACH TABLE t_02967; +ATTACH TABLE t_02967; + + +SELECT + count(ignore(*)) +FROM t_02967 +PREWHERE CAST(ignore() + 1 as UInt8) +GROUP BY + ignore(65535, *), + ignore(255, 256, *) +SETTINGS + --send_logs_level='test', + max_threads=1; + +DROP TABLE t_02967; diff --git a/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_MergeTree.reference b/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_MergeTree.reference index 531163e1d84..3135f2d01e1 100644 --- a/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_MergeTree.reference +++ b/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_MergeTree.reference @@ -3,28 +3,28 @@ data after ATTACH 1 Files before DETACH TABLE all_1_1_0 -backups/ordinary_default/data/ordinary_default/data/all_1_1_0: -primary.cidx -serialization.json -metadata_version.txt -default_compression_codec.txt +/backups/ordinary_default/data/ordinary_default/data/all_1_1_0: +checksums.txt +columns.txt +count.txt data.bin data.cmrk3 -count.txt -columns.txt -checksums.txt +default_compression_codec.txt +metadata_version.txt +primary.cidx +serialization.json Files after DETACH TABLE all_1_1_0 -backups/ordinary_default/data/ordinary_default/data/all_1_1_0: -primary.cidx -serialization.json -metadata_version.txt -default_compression_codec.txt +/backups/ordinary_default/data/ordinary_default/data/all_1_1_0: +checksums.txt +columns.txt +count.txt data.bin data.cmrk3 -count.txt -columns.txt -checksums.txt +default_compression_codec.txt +metadata_version.txt +primary.cidx +serialization.json diff --git a/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_MergeTree.sh b/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_MergeTree.sh index 12d08159012..d543f7195a9 100755 --- a/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_MergeTree.sh +++ b/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_MergeTree.sh @@ -49,11 +49,11 @@ path=$($CLICKHOUSE_CLIENT -q "SELECT replace(data_paths[1], 's3_plain', '') FROM path=${path%/} echo "Files before DETACH TABLE" -clickhouse-disks -C "$config" --disk s3_plain_disk list --recursive "${path:?}" | tail -n+2 +clickhouse-disks -C "$config" --disk s3_plain_disk --query "list --recursive $path" | tail -n+2 $CLICKHOUSE_CLIENT -q "detach table data" echo "Files after DETACH TABLE" -clickhouse-disks -C "$config" --disk s3_plain_disk list --recursive "$path" | tail -n+2 +clickhouse-disks -C "$config" --disk s3_plain_disk --query "list --recursive $path" | tail -n+2 # metadata file is left $CLICKHOUSE_CLIENT --force_remove_data_recursively_on_drop=1 -q "drop database if exists $CLICKHOUSE_DATABASE" diff --git a/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_ReplicatedMergeTree.reference b/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_ReplicatedMergeTree.reference index 1e191b719a5..a2dd196083e 100644 --- a/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_ReplicatedMergeTree.reference +++ b/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_ReplicatedMergeTree.reference @@ -3,28 +3,28 @@ data after ATTACH 1 Files before DETACH TABLE all_X_X_X -backups/ordinary_default/data/ordinary_default/data_read/all_X_X_X: -primary.cidx -serialization.json -metadata_version.txt -default_compression_codec.txt +/backups/ordinary_default/data/ordinary_default/data_read/all_X_X_X: +checksums.txt +columns.txt +count.txt data.bin data.cmrk3 -count.txt -columns.txt -checksums.txt +default_compression_codec.txt +metadata_version.txt +primary.cidx +serialization.json Files after DETACH TABLE all_X_X_X -backups/ordinary_default/data/ordinary_default/data_read/all_X_X_X: -primary.cidx -serialization.json -metadata_version.txt -default_compression_codec.txt +/backups/ordinary_default/data/ordinary_default/data_read/all_X_X_X: +checksums.txt +columns.txt +count.txt data.bin data.cmrk3 -count.txt -columns.txt -checksums.txt +default_compression_codec.txt +metadata_version.txt +primary.cidx +serialization.json diff --git a/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_ReplicatedMergeTree.sh b/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_ReplicatedMergeTree.sh index b079e67a000..eec05c81344 100755 --- a/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_ReplicatedMergeTree.sh +++ b/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_ReplicatedMergeTree.sh @@ -55,14 +55,14 @@ path=${path%/} echo "Files before DETACH TABLE" # sed to match any part, since in case of fault injection part name may not be all_0_0_0 but all_1_1_0 -clickhouse-disks -C "$config" --disk s3_plain_disk list --recursive "${path:?}" | tail -n+2 | sed 's/all_[^_]*_[^_]*_0/all_X_X_X/g' +clickhouse-disks -C "$config" --disk s3_plain_disk --query "list --recursive $path" | tail -n+2 | sed 's/all_[^_]*_[^_]*_0/all_X_X_X/g' $CLICKHOUSE_CLIENT -nm -q " detach table data_read; detach table data_write; " echo "Files after DETACH TABLE" -clickhouse-disks -C "$config" --disk s3_plain_disk list --recursive "$path" | tail -n+2 | sed 's/all_[^_]*_[^_]*_0/all_X_X_X/g' +clickhouse-disks -C "$config" --disk s3_plain_disk --query "list --recursive $path" | tail -n+2 | sed 's/all_[^_]*_[^_]*_0/all_X_X_X/g' # metadata file is left $CLICKHOUSE_CLIENT --force_remove_data_recursively_on_drop=1 -q "drop database if exists $CLICKHOUSE_DATABASE" diff --git a/tests/queries/0_stateless/02982_aggregation_states_destruction.reference b/tests/queries/0_stateless/02982_aggregation_states_destruction.reference index 72749c905a3..d00491fd7e5 100644 --- a/tests/queries/0_stateless/02982_aggregation_states_destruction.reference +++ b/tests/queries/0_stateless/02982_aggregation_states_destruction.reference @@ -1 +1 @@ -1 1 1 +1 diff --git a/tests/queries/0_stateless/02982_aggregation_states_destruction.sh b/tests/queries/0_stateless/02982_aggregation_states_destruction.sh index 263a4535c0e..84183606d48 100755 --- a/tests/queries/0_stateless/02982_aggregation_states_destruction.sh +++ b/tests/queries/0_stateless/02982_aggregation_states_destruction.sh @@ -11,4 +11,4 @@ $CLICKHOUSE_CLIENT --query_id $query_id --log_query_threads 1 --query="select nu $CLICKHOUSE_CLIENT -q "system flush logs;" -$CLICKHOUSE_CLIENT -q "select count() > 0, (countIf(thread_name = 'AggregDestruct') as aggs) > 0, aggs > 1 from system.query_thread_log where query_id = '$query_id' and current_database = currentDatabase();" +$CLICKHOUSE_CLIENT -q "select count() > 0 from system.query_thread_log where query_id = '$query_id' and current_database = currentDatabase() and thread_name = 'AggregDestruct';" diff --git a/tests/queries/0_stateless/02993_lazy_index_loading.reference b/tests/queries/0_stateless/02993_lazy_index_loading.reference index 5bc329ae4eb..08f07a92815 100644 --- a/tests/queries/0_stateless/02993_lazy_index_loading.reference +++ b/tests/queries/0_stateless/02993_lazy_index_loading.reference @@ -1,4 +1,4 @@ -100000000 140000000 +100000000 100000000 0 0 1 100000000 100000000 diff --git a/tests/queries/0_stateless/03008_deduplication.python b/tests/queries/0_stateless/03008_deduplication.python deleted file mode 100644 index dd1058518c9..00000000000 --- a/tests/queries/0_stateless/03008_deduplication.python +++ /dev/null @@ -1,657 +0,0 @@ -#!/usr/bin/env python3 - -import os -import sys -import argparse -import string - - -CURDIR = os.path.dirname(os.path.realpath(__file__)) -sys.path.insert(0, os.path.join(CURDIR, "helpers")) - - -def __format(template, **params): - field_names = [v[1] for v in string.Formatter().parse(template) if v[1] is not None] - kv_args = {} - for field in field_names: - if field in params: - kv_args[field] = params[field] - else: - kv_args[field] = "" - - return template.format(**kv_args) - - -def instance_create_statement( - table_name, - table_columns, - table_keys, - table_engine, - with_deduplication, - no_merges=True, -): - template = """ - CREATE TABLE {table_name} - {table_columns} - ENGINE = {table_engine} - ORDER BY {table_keys} - {table_settings}; - {table_no_merges} - """ - - params = dict() - params["table_name"] = table_name - params["table_columns"] = table_columns - params["table_keys"] = table_keys - params["table_no_merges"] = f"SYSTEM STOP MERGES {table_name};" if no_merges else "" - params["table_engine"] = ( - "MergeTree()" - if table_engine == "MergeTree" - else f"ReplicatedMergeTree('/clickhouse/tables/{{database}}/{table_name}', '1')" - ) - - deduplication_window_setting_name = ( - "non_replicated_deduplication_window" - if table_engine == "MergeTree" - else "replicated_deduplication_window" - ) - deduplication_window_setting_value = 1000 if with_deduplication else 0 - - settings = list() - settings += [ - f"{deduplication_window_setting_name}={deduplication_window_setting_value}" - ] - params["table_settings"] = "SETTINGS " + ",".join(settings) - - return __format(template, **params) - - -def instance_insert_statement( - table_name, count, insert_method, insert_unique_blocks, use_insert_token -): - insert_settings = ( - "" if not use_insert_token else "SETTINGS insert_deduplication_token='UDT'" - ) - - if insert_method == "InsertSelect": - template = """ - INSERT INTO {table_name} - SELECT {insert_columns} - FROM numbers({count}) {insert_settings}; - """ - return __format( - template, - table_name=table_name, - count=count, - insert_columns="'src_4', 4" - if not insert_unique_blocks - else "'src_' || toString(number), number", - insert_settings=insert_settings, - ) - - else: - template = """ - INSERT INTO {table_name} - {insert_settings} VALUES {insert_values}; - """ - - values = [] - for i in range(count): - values += ( - [f"('src_{i}', {i})"] if insert_unique_blocks else ["('src_4', 4)"] - ) - insert_values = ", ".join(values) - - return __format( - template, - table_name=table_name, - insert_settings=insert_settings, - insert_values=insert_values, - ) - - -def get_drop_tables_statements(tables): - return "".join( - [f"DROP TABLE IF EXISTS {table_name};\n" for table_name in tables[::-1]] - ) - - -def get_logs_statement(args): - if args.get_logs: - return "SET send_logs_level='test';" - return "" - - -def str2bool(v): - if isinstance(v, bool): - return v - if v.lower() in ("yes", "true", "t", "y", "1"): - return True - elif v.lower() in ("no", "false", "f", "n", "0"): - return False - else: - raise argparse.ArgumentTypeError("Boolean value expected.") - - -class ArgsFactory: - def __init__(self, parser): - self.__parser = parser - - def add_opt_engine(self): - self.__parser.add_argument( - "--table-engine", - choices=["ReplicatedMergeTree", "MergeTree"], - default="MergeTree", - ) - - def add_opt_user_token(self): - self.__parser.add_argument( - "--use-insert-token", type=str2bool, nargs="?", const=True, default=False - ) - - def add_opt_single_thread(self): - self.__parser.add_argument( - "--single-thread", type=str2bool, nargs="?", const=True, default=True - ) - - def add_opt_dedup_src(self): - self.__parser.add_argument( - "--deduplicate-src-table", - type=str2bool, - nargs="?", - const=True, - default=True, - ) - - def add_opt_dedup_dst(self): - self.__parser.add_argument( - "--deduplicate-dst-table", - type=str2bool, - nargs="?", - const=True, - default=True, - ) - - def add_opt_get_logs(self): - self.__parser.add_argument( - "--get-logs", type=str2bool, nargs="?", const=True, default=False - ) - - def add_opt_uniq_blocks(self): - self.__parser.add_argument( - "--insert-unique-blocks", type=str2bool, nargs="?", const=True, default=True - ) - - def add_opt_insert_method(self): - self.__parser.add_argument( - "--insert-method", - choices=["InsertSelect", "InsertValues"], - default="InsertSelect", - ) - - def add_all(self): - self.add_opt_engine() - self.add_opt_user_token() - self.add_opt_single_thread() - self.add_opt_dedup_src() - self.add_opt_dedup_dst() - self.add_opt_get_logs() - self.add_opt_insert_method() - self.add_opt_uniq_blocks() - - -def test_insert_several_blocks(parser): - ArgsFactory(parser).add_all() - - def calle(args): - create_table_a_b_statement = instance_create_statement( - table_name="table_a_b", - table_columns="(a String, b UInt64)", - table_keys="(a, b)", - table_engine=args.table_engine, - with_deduplication=args.deduplicate_src_table, - ) - - create_table_when_b_even_statement = instance_create_statement( - table_name="table_when_b_even", - table_columns="(a String, b UInt64)", - table_keys="(a, b)", - table_engine=args.table_engine, - with_deduplication=args.deduplicate_dst_table, - ) - - create_mv_statement = """ - CREATE MATERIALIZED VIEW mv_b_even - TO table_when_b_even - AS - SELECT a, b - FROM table_a_b - WHERE b % 2 = 0; - """ - - drop_tables_statements = get_drop_tables_statements( - ["table_a_b", "table_when_b_even", "mv_b_even"] - ) - - insert_statement = instance_insert_statement( - "table_a_b", - 10, - args.insert_method, - args.insert_unique_blocks, - args.use_insert_token, - ) - - print_details_statements = f""" - SELECT 'table_a_b'; - SELECT 'count', count() FROM table_a_b; - {"" if not args.get_logs else "SELECT _part, count() FROM table_a_b GROUP BY _part ORDER BY _part;"} - - SELECT 'table_when_b_even'; - SELECT 'count', count() FROM table_when_b_even; - {"" if not args.get_logs else "SELECT _part, count() FROM table_when_b_even GROUP BY _part ORDER BY _part;"} - """ - - if args.insert_unique_blocks: - assert_first_insert_statements = f""" - SELECT throwIf( count() != 10 ) - FROM table_a_b; - SELECT throwIf( count() != 5 ) - FROM table_when_b_even; - """ - assert_second_insert_statements = f""" - SELECT throwIf( count() != {10 if args.deduplicate_src_table else 20} ) - FROM table_a_b; - SELECT throwIf( count() != {5 if args.deduplicate_dst_table else 10} ) - FROM table_when_b_even; - """ - else: - if args.use_insert_token: - assert_first_insert_statements = """ - SELECT throwIf( count() != 10 ) - FROM table_a_b; - SELECT throwIf( count() != 10 ) - FROM table_when_b_even; - """ - assert_second_insert_statements = f""" - SELECT throwIf( count() != {10 if args.deduplicate_src_table else 20} ) - FROM table_a_b; - SELECT throwIf( count() != {10 if args.deduplicate_dst_table else 20} ) - FROM table_when_b_even; - """ - else: - assert_first_insert_statements = f""" - SELECT throwIf( count() != {1 if args.deduplicate_src_table else 10} ) - FROM table_a_b; - SELECT throwIf( count() != {1 if args.deduplicate_dst_table else 10} ) - FROM table_when_b_even; - """ - assert_second_insert_statements = f""" - SELECT throwIf( count() != {1 if args.deduplicate_src_table else 20} ) - FROM table_a_b; - SELECT throwIf( count() != {1 if args.deduplicate_dst_table else 20} ) - FROM table_when_b_even; - """ - - script = f""" - {get_logs_statement(args)} - - SET max_insert_threads={1 if args.single_thread else 10}; - SET update_insert_deduplication_token_in_dependent_materialized_views=1; - SET deduplicate_blocks_in_dependent_materialized_views=1; - - SET max_block_size=1; - SET min_insert_block_size_rows=0; - SET min_insert_block_size_bytes=0; - - {drop_tables_statements} - - {create_table_a_b_statement} - - {create_table_when_b_even_statement} - - {create_mv_statement} - - -- first insert - {insert_statement} - - {print_details_statements} - - {assert_first_insert_statements} - - -- second insert, it is retry - {insert_statement} - - {print_details_statements} - - {assert_second_insert_statements} - - {drop_tables_statements} - """ - - print(script) - - parser.set_defaults(func=calle) - - -def test_mv_generates_several_blocks(parser): - ArgsFactory(parser).add_all() - - def calle(args): - tables = [ - "table_for_join_with", - "table_a_b", - "table_when_b_even_and_joined", - "mv_b_even", - ] - drop_tables_statements = get_drop_tables_statements(tables) - - details_print_for_table_for_join_with = "" - if args.get_logs: - details_print_for_table_for_join_with = """ - SELECT 'table_for_join_with'; - SELECT a_join, b, _part FROM table_for_join_with ORDER BY _part, a_join, b; - """ - - create_table_a_b_statement = instance_create_statement( - table_name="table_a_b", - table_columns="(a_src String, b UInt64)", - table_keys="(a_src, b)", - table_engine=args.table_engine, - with_deduplication=args.deduplicate_src_table, - ) - - create_table_when_b_even_and_joined_statement = instance_create_statement( - table_name="table_when_b_even_and_joined", - table_columns="(a_src String, a_join String, b UInt64)", - table_keys="(a_src, a_join, b)", - table_engine=args.table_engine, - with_deduplication=args.deduplicate_dst_table, - ) - - insert_statement = instance_insert_statement( - "table_a_b", - 5, - args.insert_method, - args.insert_unique_blocks, - args.use_insert_token, - ) - - details_print_statements = f""" - SELECT 'table_a_b'; - SELECT 'count', count() FROM table_a_b; - - SELECT 'table_when_b_even_and_joined'; - SELECT 'count', count() FROM table_when_b_even_and_joined; - {"" if not args.get_logs else "SELECT _part, a_src, a_join, b FROM table_when_b_even_and_joined ORDER BY _part;"} - """ - - if args.insert_unique_blocks: - assert_first_insert_statements = f""" - SELECT throwIf( count() != 5 ) - FROM table_a_b; - - SELECT throwIf( count() != 9 ) - FROM table_when_b_even_and_joined; - """ - assert_second_insert_statements = f""" - SELECT throwIf( count() != {5 if args.deduplicate_src_table else 10} ) - FROM table_a_b; - - SELECT throwIf( count() != {9 if args.deduplicate_dst_table else 18} ) - FROM table_when_b_even_and_joined; - """ - else: - if args.use_insert_token: - assert_first_insert_statements = f""" - SELECT throwIf( count() != {5 if args.deduplicate_src_table else 5} ) - FROM table_a_b; - - SELECT throwIf( count() != {10 if args.deduplicate_dst_table else 10} ) - FROM table_when_b_even_and_joined; - """ - assert_second_insert_statements = f""" - SELECT throwIf( count() != {5 if args.deduplicate_src_table else 10} ) - FROM table_a_b; - - SELECT throwIf( count() != {10 if args.deduplicate_dst_table else 20} ) - FROM table_when_b_even_and_joined; - """ - else: - assert_first_insert_statements = f""" - SELECT throwIf( count() != {1 if args.deduplicate_src_table else 5} ) - FROM table_a_b; - - SELECT throwIf( count() != {2 if args.deduplicate_dst_table else 10} ) - FROM table_when_b_even_and_joined; - """ - assert_second_insert_statements = f""" - SELECT throwIf( count() != {1 if args.deduplicate_src_table else 10} ) - FROM table_a_b; - - SELECT throwIf( count() != {2 if args.deduplicate_dst_table else 20} ) - FROM table_when_b_even_and_joined; - """ - - script = f""" - {get_logs_statement(args)} - - SET max_insert_threads={1 if args.single_thread else 10}; - SET update_insert_deduplication_token_in_dependent_materialized_views=1; - SET deduplicate_blocks_in_dependent_materialized_views=1; - - SET max_block_size=1; - SET min_insert_block_size_rows=0; - SET min_insert_block_size_bytes=0; - - {drop_tables_statements} - - CREATE TABLE table_for_join_with - (a_join String, b UInt64) - ENGINE = MergeTree() - ORDER BY (a_join, b); - INSERT INTO table_for_join_with - SELECT 'joined_' || toString(number), number - FROM numbers(1); - {details_print_for_table_for_join_with} - - {create_table_a_b_statement} - SYSTEM STOP MERGES table_a_b; - - {create_table_when_b_even_and_joined_statement} - SYSTEM STOP MERGES table_when_b_even_and_joined; - - CREATE MATERIALIZED VIEW mv_b_even - TO table_when_b_even_and_joined - AS - SELECT a_src, a_join, table_for_join_with.b as b - FROM table_a_b - FULL OUTER JOIN table_for_join_with - ON table_a_b.b = table_for_join_with.b AND table_a_b.b % 2 = 0 - ORDER BY a_src, a_join, b; - - -- first insert - {insert_statement} - - {details_print_statements} - - -- first assertion - {assert_first_insert_statements} - - -- second insert - {insert_statement} - - {details_print_statements} - - -- second assertion - {assert_second_insert_statements} - - {drop_tables_statements} - """ - - print(script) - - parser.set_defaults(func=calle) - - -def test_several_mv_into_one_table(parser): - ArgsFactory(parser).add_all() - - def calle(args): - tables = ["table_src", "table_dst", "mv_b_even", "mv_b_even_even"] - drop_tables_statements = get_drop_tables_statements(tables) - - create_table_src_statement = instance_create_statement( - table_name="table_src", - table_columns="(a String, b UInt64)", - table_keys="(a, b)", - table_engine=args.table_engine, - with_deduplication=args.deduplicate_src_table, - ) - - create_table_dst_statement = instance_create_statement( - table_name="table_dst", - table_columns="(a String, b UInt64)", - table_keys="(a, b)", - table_engine=args.table_engine, - with_deduplication=args.deduplicate_dst_table, - ) - - insert_statement = instance_insert_statement( - "table_src", - 8, - args.insert_method, - args.insert_unique_blocks, - args.use_insert_token, - ) - - details_print_statements = f""" - SELECT 'table_src count', count() FROM table_src; - - SELECT 'table_dst count', count() FROM table_dst; - {"" if not args.get_logs else "SELECT _part, count() FROM table_dst GROUP BY _part ORDER BY _part;"} - """ - - if args.insert_unique_blocks: - assert_first_insert_statements = f""" - SELECT throwIf( count() != 8 ) - FROM table_src; - - SELECT throwIf( count() != 6 ) - FROM table_dst; - """ - assert_second_insert_statements = f""" - SELECT throwIf( count() != {8 if args.deduplicate_src_table else 16} ) - FROM table_src; - - SELECT throwIf( count() != {6 if args.deduplicate_dst_table else 12} ) - FROM table_dst; - """ - else: - if args.use_insert_token: - assert_first_insert_statements = f""" - SELECT throwIf( count() != {8 if args.deduplicate_src_table else 8} ) - FROM table_src; - - SELECT throwIf( count() != {16 if args.deduplicate_dst_table else 16} ) - FROM table_dst; - """ - assert_second_insert_statements = f""" - SELECT throwIf( count() != {8 if args.deduplicate_src_table else 16} ) - FROM table_src; - - SELECT throwIf( count() != {16 if args.deduplicate_dst_table else 32} ) - FROM table_dst; - """ - else: - assert_first_insert_statements = f""" - SELECT throwIf( count() != {1 if args.deduplicate_src_table else 8} ) - FROM table_src; - - SELECT throwIf( count() != {2 if args.deduplicate_dst_table else 16} ) - FROM table_dst; - """ - assert_second_insert_statements = f""" - SELECT throwIf( count() != {1 if args.deduplicate_src_table else 16} ) - FROM table_src; - - SELECT throwIf( count() != {2 if args.deduplicate_dst_table else 32} ) - FROM table_dst; - """ - - script = f""" - {get_logs_statement(args)} - - SET max_insert_threads={1 if args.single_thread else 10}; - SET update_insert_deduplication_token_in_dependent_materialized_views=1; - SET deduplicate_blocks_in_dependent_materialized_views=1; - - SET max_block_size=1; - SET min_insert_block_size_rows=0; - SET min_insert_block_size_bytes=0; - - {drop_tables_statements} - - {create_table_src_statement} - - {create_table_dst_statement} - - CREATE MATERIALIZED VIEW mv_b_even - TO table_dst - AS - SELECT a, b - FROM table_src - WHERE b % 2 = 0; - - CREATE MATERIALIZED VIEW mv_b_even_even - TO table_dst - AS - SELECT a, b - FROM table_src - WHERE b % 4 = 0; - - -- first insert - {insert_statement} - - {details_print_statements} - - {assert_first_insert_statements} - - -- second insert, retry - {insert_statement} - - {details_print_statements} - - {assert_second_insert_statements} - - {drop_tables_statements} - """ - - print(script) - - parser.set_defaults(func=calle) - - -def parse_args(): - parser = argparse.ArgumentParser() - subparsers = parser.add_subparsers(dest="test") - test_insert_several_blocks( - subparsers.add_parser("insert_several_blocks_into_table") - ) - test_mv_generates_several_blocks( - subparsers.add_parser("mv_generates_several_blocks") - ) - test_several_mv_into_one_table(subparsers.add_parser("several_mv_into_one_table")) - args = parser.parse_args() - if args.test is None: - parser.print_help() - return args - - -def main(): - args = parse_args() - if args.test is not None: - args.func(args) - - -if __name__ == "__main__": - main() diff --git a/tests/queries/0_stateless/03008_deduplication_cases_from_docs.reference b/tests/queries/0_stateless/03008_deduplication_cases_from_docs.reference deleted file mode 100644 index 4893274c1cd..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_cases_from_docs.reference +++ /dev/null @@ -1,41 +0,0 @@ -Different materialized view insert into one underlayed table equal data. -first attempt -from dst 1 A all_1_1_0 -from mv_dst 0 A all_1_1_0 -from mv_dst 0 A all_2_2_0 -second attempt -from dst 1 A all_1_1_0 -from mv_dst 0 A all_1_1_0 -from mv_dst 0 A all_2_2_0 -Different insert operations generate the same data after transformation in underlied table of materialized view. -first attempt -from dst 1 A all_1_1_0 -from mv_dst 0 A all_1_1_0 -second attempt -from dst 1 A all_1_1_0 -from dst 2 A all_2_2_0 -from mv_dst 0 A all_1_1_0 -from mv_dst 0 A all_2_2_0 -Indentical blocks in insertion with `insert_deduplication_token` -first attempt -from dst 0 A all_1_1_0 -from dst 0 A all_2_2_0 -second attempt -from dst 0 A all_1_1_0 -from dst 0 A all_2_2_0 -third attempt -from dst 0 A all_1_1_0 -from dst 0 A all_2_2_0 -Indentical blocks in insertion -from dst 0 A all_1_1_0 -Indentical blocks after materialised view`s transformation -first attempt -from dst 1 B all_1_1_0 -from dst 2 B all_2_2_0 -from mv_dst 0 B all_1_1_0 -from mv_dst 0 B all_2_2_0 -second attempt -from dst 1 B all_1_1_0 -from dst 2 B all_2_2_0 -from mv_dst 0 B all_1_1_0 -from mv_dst 0 B all_2_2_0 diff --git a/tests/queries/0_stateless/03008_deduplication_cases_from_docs.sql b/tests/queries/0_stateless/03008_deduplication_cases_from_docs.sql deleted file mode 100644 index 7927a6b1edf..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_cases_from_docs.sql +++ /dev/null @@ -1,331 +0,0 @@ --- ######### -select 'Different materialized view insert into one underlayed table equal data.'; - -DROP TABLE IF EXISTS dst; -DROP TABLE IF EXISTS mv_dst; -DROP TABLE IF EXISTS mv_first; -DROP TABLE IF EXISTS mv_second; - -CREATE TABLE dst -( - `key` Int64, - `value` String -) -ENGINE = MergeTree -ORDER BY tuple() -SETTINGS non_replicated_deduplication_window=1000; - -CREATE TABLE mv_dst -( - `key` Int64, - `value` String -) -ENGINE = MergeTree -ORDER BY tuple() -SETTINGS non_replicated_deduplication_window=1000; - -CREATE MATERIALIZED VIEW mv_first -TO mv_dst -AS SELECT - 0 AS key, - value AS value -FROM dst; - -CREATE MATERIALIZED VIEW mv_second -TO mv_dst -AS SELECT - 0 AS key, - value AS value -FROM dst; - -SET deduplicate_blocks_in_dependent_materialized_views=1; - -select 'first attempt'; - -INSERT INTO dst VALUES (1, 'A'); - -SELECT - 'from dst', - *, - _part -FROM dst -ORDER by all; - -SELECT - 'from mv_dst', - *, - _part -FROM mv_dst -ORDER by all; - -select 'second attempt'; - -INSERT INTO dst VALUES (1, 'A'); - -SELECT - 'from dst', - *, - _part -FROM dst -ORDER by all; - -SELECT - 'from mv_dst', - *, - _part -FROM mv_dst -ORDER by all; - -DROP TABLE mv_second; -DROP TABLE mv_first; -DROP TABLE mv_dst; -DROP TABLE dst; - - --- ######### -select 'Different insert operations generate the same data after transformation in underlied table of materialized view.'; - -DROP TABLE IF EXISTS dst; -DROP TABLE IF EXISTS mv_dst; - -CREATE TABLE dst -( - `key` Int64, - `value` String -) -ENGINE = MergeTree -ORDER BY tuple() -SETTINGS non_replicated_deduplication_window=1000; - -CREATE MATERIALIZED VIEW mv_dst -( - `key` Int64, - `value` String -) -ENGINE = MergeTree -ORDER BY tuple() -SETTINGS non_replicated_deduplication_window=1000 -AS SELECT - 0 AS key, - value AS value -FROM dst; - -SET deduplicate_blocks_in_dependent_materialized_views=1; - -select 'first attempt'; - -INSERT INTO dst VALUES (1, 'A'); - -SELECT - 'from dst', - *, - _part -FROM dst -ORDER by all; - -SELECT - 'from mv_dst', - *, - _part -FROM mv_dst -ORDER by all; - -select 'second attempt'; - -INSERT INTO dst VALUES (2, 'A'); - -SELECT - 'from dst', - *, - _part -FROM dst -ORDER by all; - -SELECT - 'from mv_dst', - *, - _part -FROM mv_dst -ORDER by all; - -DROP TABLE mv_dst; -DROP TABLE dst; - - --- ######### -select 'Indentical blocks in insertion with `insert_deduplication_token`'; - -DROP TABLE IF EXISTS dst; - -CREATE TABLE dst -( - `key` Int64, - `value` String -) -ENGINE = MergeTree -ORDER BY tuple() -SETTINGS non_replicated_deduplication_window=1000; - -SET max_block_size=1; -SET min_insert_block_size_rows=0; -SET min_insert_block_size_bytes=0; - -select 'first attempt'; - -INSERT INTO dst SELECT - 0 AS key, - 'A' AS value -FROM numbers(2) -SETTINGS insert_deduplication_token='some_user_token'; - -SELECT - 'from dst', - *, - _part -FROM dst -ORDER by all; - -select 'second attempt'; - -INSERT INTO dst SELECT - 0 AS key, - 'A' AS value -FROM numbers(2) -SETTINGS insert_deduplication_token='some_user_token'; - -SELECT - 'from dst', - *, - _part -FROM dst -ORDER by all; - -select 'third attempt'; - -INSERT INTO dst SELECT - 1 AS key, - 'b' AS value -FROM numbers(2) -SETTINGS insert_deduplication_token='some_user_token'; - -SELECT - 'from dst', - *, - _part -FROM dst -ORDER by all; - -DROP TABLE dst; - - --- ######### -select 'Indentical blocks in insertion'; - -DROP TABLE IF EXISTS dst; - -CREATE TABLE dst -( - `key` Int64, - `value` String -) -ENGINE = MergeTree -ORDER BY tuple() -SETTINGS non_replicated_deduplication_window=1000; - -SET max_block_size=1; -SET min_insert_block_size_rows=0; -SET min_insert_block_size_bytes=0; - -INSERT INTO dst SELECT - 0 AS key, - 'A' AS value -FROM numbers(2); - -SELECT - 'from dst', - *, - _part -FROM dst -ORDER by all; - -DROP TABLE dst; - - --- ######### -select 'Indentical blocks after materialised view`s transformation'; - -DROP TABLE IF EXISTS dst; -DROP TABLE IF EXISTS mv_dst; - -CREATE TABLE dst -( - `key` Int64, - `value` String -) -ENGINE = MergeTree -ORDER BY tuple() -SETTINGS non_replicated_deduplication_window=1000; - -CREATE MATERIALIZED VIEW mv_dst -( - `key` Int64, - `value` String -) -ENGINE = MergeTree -ORDER BY tuple() -SETTINGS non_replicated_deduplication_window=1000 -AS SELECT - 0 AS key, - value AS value -FROM dst; - -SET max_block_size=1; -SET min_insert_block_size_rows=0; -SET min_insert_block_size_bytes=0; - -SET deduplicate_blocks_in_dependent_materialized_views=1; - -select 'first attempt'; - -INSERT INTO dst SELECT - number + 1 AS key, - IF(key = 0, 'A', 'B') AS value -FROM numbers(2); - -SELECT - 'from dst', - *, - _part -FROM dst -ORDER by all; - -SELECT - 'from mv_dst', - *, - _part -FROM mv_dst -ORDER by all; - -select 'second attempt'; - -INSERT INTO dst SELECT - number + 1 AS key, - IF(key = 0, 'A', 'B') AS value -FROM numbers(2); - -SELECT - 'from dst', - *, - _part -FROM dst -ORDER by all; - -SELECT - 'from mv_dst', - *, - _part -FROM mv_dst -ORDER by all; - -DROP TABLE mv_dst; -DROP TABLE dst; diff --git a/tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.reference b/tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.reference deleted file mode 100644 index c82a6eaa213..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.reference +++ /dev/null @@ -1,35 +0,0 @@ -no user deduplication token -partitioned_table is deduplicated bacause deduplication works in scope of one partiotion: -1 A -1 D -2 B -2 C -mv_table is not deduplicated because the inserted blocks was different: -1 A -1 A -1 D -2 B -2 B -2 C -with user deduplication token -partitioned_table is not deduplicated because different tokens: -1 A -1 A -1 D -2 B -2 B -2 C -mv_table is not deduplicated because different tokens: -1 A -1 A -1 D -2 B -2 B -2 C -with incorrect ussage of user deduplication token -partitioned_table is deduplicated because equal tokens: -1 A -2 B -mv_table is deduplicated because equal tokens: -1 A -2 B diff --git a/tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.sql b/tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.sql deleted file mode 100644 index 2eb931f7f73..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.sql +++ /dev/null @@ -1,83 +0,0 @@ -DROP TABLE IF EXISTS partitioned_table; -DROP TABLE IF EXISTS mv_table; - - -SET deduplicate_blocks_in_dependent_materialized_views = 1; - - -SELECT 'no user deduplication token'; - -CREATE TABLE partitioned_table - (key Int64, value String) - ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/03008_deduplication_insert_into_partitioned_table', '{replica}') - partition by key % 10 - order by tuple(); - -CREATE MATERIALIZED VIEW mv_table (key Int64, value String) - ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/03008_deduplication_insert_into_partitioned_table_mv', '{replica}') - ORDER BY tuple() - AS SELECT key, value FROM partitioned_table; - -INSERT INTO partitioned_table VALUES (1, 'A'), (2, 'B'); -INSERT INTO partitioned_table VALUES (1, 'A'), (2, 'C'); -INSERT INTO partitioned_table VALUES (1, 'D'), (2, 'B'); - -SELECT 'partitioned_table is deduplicated bacause deduplication works in scope of one partiotion:'; -SELECT * FROM partitioned_table ORDER BY ALL; -SELECT 'mv_table is not deduplicated because the inserted blocks was different:'; -SELECT * FROM mv_table ORDER BY ALL; - -DROP TABLE partitioned_table; -DROP TABLE mv_table; - - -SELECT 'with user deduplication token'; - -CREATE TABLE partitioned_table - (key Int64, value String) - ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/03008_deduplication_insert_into_partitioned_table', '{replica}') - partition by key % 10 - order by tuple(); - -CREATE MATERIALIZED VIEW mv_table (key Int64, value String) - ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/03008_deduplication_insert_into_partitioned_table_mv', '{replica}') - ORDER BY tuple() - AS SELECT key, value FROM partitioned_table; - -INSERT INTO partitioned_table SETTINGS insert_deduplication_token='token_1' VALUES (1, 'A'), (2, 'B'); -INSERT INTO partitioned_table SETTINGS insert_deduplication_token='token_2' VALUES (1, 'A'), (2, 'C'); -INSERT INTO partitioned_table SETTINGS insert_deduplication_token='token_3' VALUES (1, 'D'), (2, 'B'); - -SELECT 'partitioned_table is not deduplicated because different tokens:'; -SELECT * FROM partitioned_table ORDER BY ALL; -SELECT 'mv_table is not deduplicated because different tokens:'; -SELECT * FROM mv_table ORDER BY ALL; - -DROP TABLE partitioned_table; -DROP TABLE mv_table; - - -SELECT 'with incorrect ussage of user deduplication token'; - -CREATE TABLE partitioned_table - (key Int64, value String) - ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/03008_deduplication_insert_into_partitioned_table', '{replica}') - partition by key % 10 - order by tuple(); - -CREATE MATERIALIZED VIEW mv_table (key Int64, value String) - ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/03008_deduplication_insert_into_partitioned_table_mv', '{replica}') - ORDER BY tuple() - AS SELECT key, value FROM partitioned_table; - -INSERT INTO partitioned_table SETTINGS insert_deduplication_token='token_0' VALUES (1, 'A'), (2, 'B'); -INSERT INTO partitioned_table SETTINGS insert_deduplication_token='token_0' VALUES (1, 'A'), (2, 'C'); -INSERT INTO partitioned_table SETTINGS insert_deduplication_token='token_0' VALUES (1, 'D'), (2, 'B'); - -SELECT 'partitioned_table is deduplicated because equal tokens:'; -SELECT * FROM partitioned_table ORDER BY ALL; -SELECT 'mv_table is deduplicated because equal tokens:'; -SELECT * FROM mv_table ORDER BY ALL; - -DROP TABLE partitioned_table; -DROP TABLE mv_table; diff --git a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.reference b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.reference deleted file mode 100644 index bf900aa84d2..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.reference +++ /dev/null @@ -1,962 +0,0 @@ - -Test case 0: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 1: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 2: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 3: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 20 -0 -0 -OK - -Test case 4: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 5: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 6: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 7: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 8: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 9: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 10: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 11: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 20 -0 -0 -OK - -Test case 12: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 13: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 14: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 15: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 16: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 17: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -OK - -Test case 18: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 19: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even -count 20 -0 -0 -OK - -Test case 20: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 21: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 1 -0 -0 -table_a_b -count 20 -table_when_b_even -count 1 -0 -0 -OK - -Test case 22: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 23: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 24: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 25: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -OK - -Test case 26: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 27: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even -count 20 -0 -0 -OK - -Test case 28: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 29: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 1 -0 -0 -table_a_b -count 20 -table_when_b_even -count 1 -0 -0 -OK - -Test case 30: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 31: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 32: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 33: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 34: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 35: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 20 -0 -0 -OK - -Test case 36: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 37: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 38: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 39: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 40: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 41: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 42: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 43: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 20 -0 -0 -OK - -Test case 44: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 45: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 46: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 47: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 48: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 49: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -OK - -Test case 50: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 51: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even -count 20 -0 -0 -OK - -Test case 52: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 53: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 1 -0 -0 -table_a_b -count 20 -table_when_b_even -count 1 -0 -0 -OK - -Test case 54: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 55: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 56: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 57: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -OK - -Test case 58: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 59: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even -count 20 -0 -0 -OK - -Test case 60: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 61: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 1 -0 -0 -table_a_b -count 20 -table_when_b_even -count 1 -0 -0 -OK - -Test case 62: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 63: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -All cases executed diff --git a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.sh b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.sh deleted file mode 100755 index 49eb52b47fd..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.sh +++ /dev/null @@ -1,59 +0,0 @@ -#!/usr/bin/env bash -# Tags: long, no-fasttest, no-parallel - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -ENGINE="MergeTree" - -RUN_ONLY="" -#RUN_ONLY="Test case 52: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True" - -i=0 -for insert_method in "InsertSelect" "InsertValues"; do - for use_insert_token in "True" "False"; do - for single_thread in "True" "False"; do - for deduplicate_src_table in "True" "False"; do - for deduplicate_dst_table in "True" "False"; do - for insert_unique_blocks in "True" "False"; do - - THIS_RUN="Test case $i:" - THIS_RUN+=" insert_method=$insert_method" - THIS_RUN+=" engine=$ENGINE" - THIS_RUN+=" use_insert_token=$use_insert_token" - THIS_RUN+=" single_thread=$single_thread" - THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" - THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" - THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" - - i=$((i+1)) - - echo - if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then - echo "skip $THIS_RUN" - continue - fi - echo "$THIS_RUN" - - $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " - $(python3 $CURDIR/03008_deduplication.python insert_several_blocks_into_table \ - --insert-method $insert_method \ - --table-engine $ENGINE \ - --use-insert-token $use_insert_token \ - --single-thread $single_thread \ - --deduplicate-src-table $deduplicate_src_table \ - --deduplicate-dst-table $deduplicate_dst_table \ - --insert-unique-blocks $insert_unique_blocks \ - --get-logs false \ - ) - " && echo OK || echo FAIL - done - done - done - done - done -done - -echo -echo "All cases executed" diff --git a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.reference b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.reference deleted file mode 100644 index c815324b455..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.reference +++ /dev/null @@ -1,962 +0,0 @@ - -Test case 0: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 1: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 2: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 3: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 20 -0 -0 -OK - -Test case 4: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 5: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 6: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 7: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 8: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 9: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 10: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 11: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 20 -0 -0 -OK - -Test case 12: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 13: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 14: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 15: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 16: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 17: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -OK - -Test case 18: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 19: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even -count 20 -0 -0 -OK - -Test case 20: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 21: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 1 -0 -0 -table_a_b -count 20 -table_when_b_even -count 1 -0 -0 -OK - -Test case 22: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 23: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 24: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 25: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -OK - -Test case 26: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 27: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even -count 20 -0 -0 -OK - -Test case 28: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 29: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 1 -0 -0 -table_a_b -count 20 -table_when_b_even -count 1 -0 -0 -OK - -Test case 30: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 31: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 32: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 33: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 34: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 35: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 20 -0 -0 -OK - -Test case 36: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 37: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 38: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 39: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 40: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 41: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 42: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 43: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 20 -0 -0 -OK - -Test case 44: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 45: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 46: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 47: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 48: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 49: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -OK - -Test case 50: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 51: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even -count 20 -0 -0 -OK - -Test case 52: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 53: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 1 -0 -0 -table_a_b -count 20 -table_when_b_even -count 1 -0 -0 -OK - -Test case 54: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 55: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 56: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 57: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -OK - -Test case 58: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 59: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even -count 20 -0 -0 -OK - -Test case 60: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 61: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 1 -0 -0 -table_a_b -count 20 -table_when_b_even -count 1 -0 -0 -OK - -Test case 62: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 63: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -All cases executed diff --git a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.sh b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.sh deleted file mode 100755 index 53af06d4a6f..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.sh +++ /dev/null @@ -1,59 +0,0 @@ -#!/usr/bin/env bash -# Tags: long, no-fasttest, no-parallel - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -ENGINE="ReplicatedMergeTree" - -RUN_ONLY="" -#RUN_ONLY="Test case 52: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True" - -i=0 -for insert_method in "InsertSelect" "InsertValues"; do - for use_insert_token in "True" "False"; do - for single_thread in "True" "False"; do - for deduplicate_src_table in "True" "False"; do - for deduplicate_dst_table in "True" "False"; do - for insert_unique_blocks in "True" "False"; do - - THIS_RUN="Test case $i:" - THIS_RUN+=" insert_method=$insert_method" - THIS_RUN+=" engine=$ENGINE" - THIS_RUN+=" use_insert_token=$use_insert_token" - THIS_RUN+=" single_thread=$single_thread" - THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" - THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" - THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" - - i=$((i+1)) - - echo - if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then - echo "skip $THIS_RUN" - continue - fi - echo "$THIS_RUN" - - $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " - $(python3 $CURDIR/03008_deduplication.python insert_several_blocks_into_table \ - --insert-method $insert_method \ - --table-engine $ENGINE \ - --use-insert-token $use_insert_token \ - --single-thread $single_thread \ - --deduplicate-src-table $deduplicate_src_table \ - --deduplicate-dst-table $deduplicate_dst_table \ - --insert-unique-blocks $insert_unique_blocks \ - --get-logs false \ - ) - " && echo OK || echo FAIL - done - done - done - done - done -done - -echo -echo "All cases executed" diff --git a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.reference b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.reference deleted file mode 100644 index 6e76ec46aa8..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.reference +++ /dev/null @@ -1,962 +0,0 @@ - -Test case 0: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 1: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -OK - -Test case 2: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 3: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 4: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 5: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 10 -0 -0 -OK - -Test case 6: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 7: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 8: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 9: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -OK - -Test case 10: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 11: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 12: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 13: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 10 -0 -0 -OK - -Test case 14: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 15: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 16: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 17: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 2 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 2 -0 -0 -OK - -Test case 18: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 19: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 20: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 21: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 2 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 2 -0 -0 -OK - -Test case 22: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 23: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 24: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 25: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 2 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 2 -0 -0 -OK - -Test case 26: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 27: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 28: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 29: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 2 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 2 -0 -0 -OK - -Test case 30: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 31: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 32: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 33: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -OK - -Test case 34: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 35: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 36: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 37: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 10 -0 -0 -OK - -Test case 38: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 39: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 40: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 41: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -OK - -Test case 42: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 43: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 44: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 45: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 10 -0 -0 -OK - -Test case 46: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 47: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 48: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 49: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 2 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 2 -0 -0 -OK - -Test case 50: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 51: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 52: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 53: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 2 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 2 -0 -0 -OK - -Test case 54: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 55: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 56: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 57: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 2 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 2 -0 -0 -OK - -Test case 58: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 59: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 60: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 61: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 2 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 2 -0 -0 -OK - -Test case 62: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 63: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -All cases executed diff --git a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.sh b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.sh deleted file mode 100755 index 7d4f5240cd1..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.sh +++ /dev/null @@ -1,59 +0,0 @@ -#!/usr/bin/env bash -# Tags: long, no-fasttest, no-parallel - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -ENGINE="MergeTree" - -RUN_ONLY="" -#RUN_ONLY="Test case 20: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True" - -i=0 -for insert_method in "InsertSelect" "InsertValues"; do - for use_insert_token in "True" "False"; do - for single_thread in "True" "False"; do - for deduplicate_src_table in "True" "False"; do - for deduplicate_dst_table in "True" "False"; do - for insert_unique_blocks in "True" "False"; do - - THIS_RUN="Test case $i:" - THIS_RUN+=" insert_method=$insert_method" - THIS_RUN+=" engine=$ENGINE" - THIS_RUN+=" use_insert_token=$use_insert_token" - THIS_RUN+=" single_thread=$single_thread" - THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" - THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" - THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" - - i=$((i+1)) - - echo - if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then - echo "skip $THIS_RUN" - continue - fi - echo "$THIS_RUN" - - $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " - $(python3 $CURDIR/03008_deduplication.python mv_generates_several_blocks \ - --insert-method $insert_method \ - --table-engine $ENGINE \ - --use-insert-token $use_insert_token \ - --single-thread $single_thread \ - --deduplicate-src-table $deduplicate_src_table \ - --deduplicate-dst-table $deduplicate_dst_table \ - --insert-unique-blocks $insert_unique_blocks \ - --get-logs false \ - ) - " && echo OK || echo FAIL - done - done - done - done - done -done - -echo -echo "All cases executed" diff --git a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.reference b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.reference deleted file mode 100644 index a25e8713c61..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.reference +++ /dev/null @@ -1,962 +0,0 @@ - -Test case 0: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 1: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -OK - -Test case 2: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 3: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 4: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 5: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 10 -0 -0 -OK - -Test case 6: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 7: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 8: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 9: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -OK - -Test case 10: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 11: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 12: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 13: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 10 -0 -0 -OK - -Test case 14: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 15: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 16: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 17: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 2 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 2 -0 -0 -OK - -Test case 18: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 19: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 20: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 21: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 2 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 2 -0 -0 -OK - -Test case 22: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 23: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 24: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 25: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 2 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 2 -0 -0 -OK - -Test case 26: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 27: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 28: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 29: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 2 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 2 -0 -0 -OK - -Test case 30: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 31: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 32: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 33: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -OK - -Test case 34: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 35: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 36: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 37: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 10 -0 -0 -OK - -Test case 38: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 39: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 40: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 41: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -OK - -Test case 42: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 43: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 44: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 45: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 10 -0 -0 -OK - -Test case 46: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 47: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 48: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 49: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 2 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 2 -0 -0 -OK - -Test case 50: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 51: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 52: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 53: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 2 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 2 -0 -0 -OK - -Test case 54: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 55: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 56: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 57: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 2 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 2 -0 -0 -OK - -Test case 58: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 59: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 60: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 61: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 2 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 2 -0 -0 -OK - -Test case 62: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 63: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -All cases executed diff --git a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.sh b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.sh deleted file mode 100755 index 109d1674f3a..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.sh +++ /dev/null @@ -1,59 +0,0 @@ -#!/usr/bin/env bash -# Tags: long, no-fasttest, no-parallel - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -ENGINE="ReplicatedMergeTree" - -RUN_ONLY="" -#RUN_ONLY="Test case 20: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True" - -i=0 -for insert_method in "InsertSelect" "InsertValues"; do - for use_insert_token in "True" "False"; do - for single_thread in "True" "False"; do - for deduplicate_src_table in "True" "False"; do - for deduplicate_dst_table in "True" "False"; do - for insert_unique_blocks in "True" "False"; do - - THIS_RUN="Test case $i:" - THIS_RUN+=" insert_method=$insert_method" - THIS_RUN+=" engine=$ENGINE" - THIS_RUN+=" use_insert_token=$use_insert_token" - THIS_RUN+=" single_thread=$single_thread" - THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" - THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" - THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" - - i=$((i+1)) - - echo - if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then - echo "skip $THIS_RUN" - continue - fi - echo "$THIS_RUN" - - $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " - $(python3 $CURDIR/03008_deduplication.python mv_generates_several_blocks \ - --insert-method $insert_method \ - --table-engine $ENGINE \ - --use-insert-token $use_insert_token \ - --single-thread $single_thread \ - --deduplicate-src-table $deduplicate_src_table \ - --deduplicate-dst-table $deduplicate_dst_table \ - --insert-unique-blocks $insert_unique_blocks \ - --get-logs false \ - ) - " && echo OK || echo FAIL - done - done - done - done - done -done - -echo -echo "All cases executed" diff --git a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.reference b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.reference deleted file mode 100644 index b6a3e0175a7..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.reference +++ /dev/null @@ -1,706 +0,0 @@ - -Test case 0: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 1: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 16 -0 -0 -OK - -Test case 2: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 3: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 32 -0 -0 -OK - -Test case 4: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 5: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 16 -0 -0 -OK - -Test case 6: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 7: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 8: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 9: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 16 -0 -0 -OK - -Test case 10: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 11: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 32 -0 -0 -OK - -Test case 12: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 13: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 16 -0 -0 -OK - -Test case 14: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 15: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 16: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 17: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 1 -table_dst count 2 -0 -0 -table_src count 1 -table_dst count 2 -0 -0 -OK - -Test case 18: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 19: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 1 -table_dst count 16 -0 -0 -table_src count 1 -table_dst count 32 -0 -0 -OK - -Test case 20: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 21: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 2 -0 -0 -table_src count 16 -table_dst count 2 -0 -0 -OK - -Test case 22: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 23: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 24: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 25: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 1 -table_dst count 2 -0 -0 -table_src count 1 -table_dst count 2 -0 -0 -OK - -Test case 26: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 27: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 1 -table_dst count 16 -0 -0 -table_src count 1 -table_dst count 32 -0 -0 -OK - -Test case 28: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 29: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 2 -0 -0 -table_src count 16 -table_dst count 2 -0 -0 -OK - -Test case 30: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 31: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 32: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 33: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 16 -0 -0 -OK - -Test case 34: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 35: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 32 -0 -0 -OK - -Test case 36: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 37: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 16 -0 -0 -OK - -Test case 38: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 39: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 40: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 41: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 16 -0 -0 -OK - -Test case 42: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 43: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 32 -0 -0 -OK - -Test case 44: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 45: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 16 -0 -0 -OK - -Test case 46: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 47: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 48: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 49: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 1 -table_dst count 2 -0 -0 -table_src count 1 -table_dst count 2 -0 -0 -OK - -Test case 50: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 51: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 1 -table_dst count 16 -0 -0 -table_src count 1 -table_dst count 32 -0 -0 -OK - -Test case 52: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 53: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 2 -0 -0 -table_src count 16 -table_dst count 2 -0 -0 -OK - -Test case 54: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 55: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 56: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 57: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 1 -table_dst count 2 -0 -0 -table_src count 1 -table_dst count 2 -0 -0 -OK - -Test case 58: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 59: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 1 -table_dst count 16 -0 -0 -table_src count 1 -table_dst count 32 -0 -0 -OK - -Test case 60: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 61: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 2 -0 -0 -table_src count 16 -table_dst count 2 -0 -0 -OK - -Test case 62: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 63: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -All cases executed diff --git a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.sh b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.sh deleted file mode 100755 index fe3d610a758..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.sh +++ /dev/null @@ -1,59 +0,0 @@ -#!/usr/bin/env bash -# Tags: long, no-fasttest, no-parallel - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -ENGINE="MergeTree" - -RUN_ONLY="" -#RUN_ONLY="Test case 17: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False" - -i=0 -for insert_method in "InsertSelect" "InsertValues"; do - for use_insert_token in "True" "False"; do - for single_thread in "True" "False"; do - for deduplicate_src_table in "True" "False"; do - for deduplicate_dst_table in "True" "False"; do - for insert_unique_blocks in "True" "False"; do - - THIS_RUN="Test case $i:" - THIS_RUN+=" insert_method=$insert_method" - THIS_RUN+=" engine=$ENGINE" - THIS_RUN+=" use_insert_token=$use_insert_token" - THIS_RUN+=" single_thread=$single_thread" - THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" - THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" - THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" - - i=$((i+1)) - - echo - if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then - echo "skip $THIS_RUN" - continue - fi - echo "$THIS_RUN" - - $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " - $(python3 $CURDIR/03008_deduplication.python several_mv_into_one_table \ - --insert-method $insert_method \ - --table-engine $ENGINE \ - --use-insert-token $use_insert_token \ - --single-thread $single_thread \ - --deduplicate-src-table $deduplicate_src_table \ - --deduplicate-dst-table $deduplicate_dst_table \ - --insert-unique-blocks $insert_unique_blocks \ - --get-logs false \ - ) - " && echo OK || echo FAIL - done - done - done - done - done -done - -echo -echo "All cases executed" diff --git a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.reference b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.reference deleted file mode 100644 index 1921103f49e..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.reference +++ /dev/null @@ -1,706 +0,0 @@ - -Test case 0: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 1: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 16 -0 -0 -OK - -Test case 2: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 3: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 32 -0 -0 -OK - -Test case 4: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 5: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 16 -0 -0 -OK - -Test case 6: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 7: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 8: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 9: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 16 -0 -0 -OK - -Test case 10: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 11: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 32 -0 -0 -OK - -Test case 12: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 13: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 16 -0 -0 -OK - -Test case 14: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 15: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 16: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 17: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 1 -table_dst count 2 -0 -0 -table_src count 1 -table_dst count 2 -0 -0 -OK - -Test case 18: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 19: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 1 -table_dst count 16 -0 -0 -table_src count 1 -table_dst count 32 -0 -0 -OK - -Test case 20: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 21: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 2 -0 -0 -table_src count 16 -table_dst count 2 -0 -0 -OK - -Test case 22: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 23: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 24: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 25: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 1 -table_dst count 2 -0 -0 -table_src count 1 -table_dst count 2 -0 -0 -OK - -Test case 26: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 27: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 1 -table_dst count 16 -0 -0 -table_src count 1 -table_dst count 32 -0 -0 -OK - -Test case 28: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 29: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 2 -0 -0 -table_src count 16 -table_dst count 2 -0 -0 -OK - -Test case 30: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 31: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 32: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 33: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 16 -0 -0 -OK - -Test case 34: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 35: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 32 -0 -0 -OK - -Test case 36: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 37: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 16 -0 -0 -OK - -Test case 38: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 39: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 40: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 41: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 16 -0 -0 -OK - -Test case 42: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 43: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 32 -0 -0 -OK - -Test case 44: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 45: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 16 -0 -0 -OK - -Test case 46: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 47: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 48: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 49: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 1 -table_dst count 2 -0 -0 -table_src count 1 -table_dst count 2 -0 -0 -OK - -Test case 50: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 51: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 1 -table_dst count 16 -0 -0 -table_src count 1 -table_dst count 32 -0 -0 -OK - -Test case 52: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 53: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 2 -0 -0 -table_src count 16 -table_dst count 2 -0 -0 -OK - -Test case 54: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 55: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 56: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 57: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 1 -table_dst count 2 -0 -0 -table_src count 1 -table_dst count 2 -0 -0 -OK - -Test case 58: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 59: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 1 -table_dst count 16 -0 -0 -table_src count 1 -table_dst count 32 -0 -0 -OK - -Test case 60: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 61: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 2 -0 -0 -table_src count 16 -table_dst count 2 -0 -0 -OK - -Test case 62: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 63: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -All cases executed diff --git a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.sh b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.sh deleted file mode 100755 index 9adee6d53d4..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.sh +++ /dev/null @@ -1,59 +0,0 @@ -#!/usr/bin/env bash -# Tags: long, no-fasttest, no-parallel - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -ENGINE="ReplicatedMergeTree" - -RUN_ONLY="" -#RUN_ONLY="Test case 17: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False" - -i=0 -for insert_method in "InsertSelect" "InsertValues"; do - for use_insert_token in "True" "False"; do - for single_thread in "True" "False"; do - for deduplicate_src_table in "True" "False"; do - for deduplicate_dst_table in "True" "False"; do - for insert_unique_blocks in "True" "False"; do - - THIS_RUN="Test case $i:" - THIS_RUN+=" insert_method=$insert_method" - THIS_RUN+=" engine=$ENGINE" - THIS_RUN+=" use_insert_token=$use_insert_token" - THIS_RUN+=" single_thread=$single_thread" - THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" - THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" - THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" - - i=$((i+1)) - - echo - if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then - echo "skip $THIS_RUN" - continue - fi - echo "$THIS_RUN" - - $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " - $(python3 $CURDIR/03008_deduplication.python several_mv_into_one_table \ - --insert-method $insert_method \ - --table-engine $ENGINE \ - --use-insert-token $use_insert_token \ - --single-thread $single_thread \ - --deduplicate-src-table $deduplicate_src_table \ - --deduplicate-dst-table $deduplicate_dst_table \ - --insert-unique-blocks $insert_unique_blocks \ - --get-logs false \ - ) - " && echo OK || echo FAIL - done - done - done - done - done -done - -echo -echo "All cases executed" diff --git a/tests/queries/0_stateless/03031_table_function_fuzzquery.reference b/tests/queries/0_stateless/03031_table_function_fuzzquery.reference new file mode 100644 index 00000000000..202e4557a33 --- /dev/null +++ b/tests/queries/0_stateless/03031_table_function_fuzzquery.reference @@ -0,0 +1,2 @@ +query +String diff --git a/tests/queries/0_stateless/03031_table_function_fuzzquery.sql b/tests/queries/0_stateless/03031_table_function_fuzzquery.sql new file mode 100644 index 00000000000..b26096f7f0e --- /dev/null +++ b/tests/queries/0_stateless/03031_table_function_fuzzquery.sql @@ -0,0 +1,18 @@ + +SELECT * FROM fuzzQuery('SELECT 1', 500, 8956) LIMIT 0 FORMAT TSVWithNamesAndTypes; + +SELECT * FROM fuzzQuery('SELECT * +FROM ( + SELECT + ([toString(number % 2)] :: Array(LowCardinality(String))) AS item_id, + count() + FROM numbers(3) + GROUP BY item_id WITH TOTALS +) AS l FULL JOIN ( + SELECT + ([toString((number % 2) * 2)] :: Array(String)) AS item_id + FROM numbers(3) +) AS r +ON l.item_id = r.item_id +ORDER BY 1,2,3; +', 500, 8956) LIMIT 10 FORMAT NULL; diff --git a/tests/queries/0_stateless/03035_max_insert_threads_support.sh b/tests/queries/0_stateless/03035_max_insert_threads_support.sh index cedb651a430..1e6bfb414d8 100755 --- a/tests/queries/0_stateless/03035_max_insert_threads_support.sh +++ b/tests/queries/0_stateless/03035_max_insert_threads_support.sh @@ -8,7 +8,7 @@ DATA_FILE="data_$CLICKHOUSE_TEST_UNIQUE_NAME.csv" $CLICKHOUSE_CLIENT --max_insert_threads=4 --query=" EXPLAIN PIPELINE INSERT INTO FUNCTION file('$DATA_FILE') SELECT * FROM numbers_mt(1000000) ORDER BY number DESC -" | grep -o StorageFileSink | wc -l +" | grep -o MaterializingTransform | wc -l DATA_FILE_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path from file('$DATA_FILE', 'One')") rm $DATA_FILE_PATH diff --git a/tests/queries/0_stateless/03040_dynamic_type_alters_1.reference b/tests/queries/0_stateless/03040_dynamic_type_alters_1.reference index ca98ec0963c..a9c785d1e48 100644 --- a/tests/queries/0_stateless/03040_dynamic_type_alters_1.reference +++ b/tests/queries/0_stateless/03040_dynamic_type_alters_1.reference @@ -2,525 +2,525 @@ Memory initial insert alter add column 1 3 None -0 0 \N \N \N \N -1 1 \N \N \N \N -2 2 \N \N \N \N +0 0 \N \N \N 0 +1 1 \N \N \N 0 +2 2 \N \N \N 0 insert after alter add column 1 4 String 4 UInt64 7 None -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 \N 3 \N \N -4 4 4 \N 4 \N \N -5 5 5 \N 5 \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 \N 12 \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 \N 3 \N 0 +4 4 4 \N 4 \N 0 +5 5 5 \N 5 \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 \N 12 \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 alter modify column 1 7 None 8 String -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 3 \N \N \N -4 4 4 4 \N \N \N -5 5 5 5 \N \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 12 \N \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 3 \N \N 0 +4 4 4 4 \N \N 0 +5 5 5 5 \N \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 12 \N \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 insert after alter modify column 1 8 None 11 String -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 3 \N \N \N -4 4 4 4 \N \N \N -5 5 5 5 \N \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 12 \N \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N -15 15 \N \N \N \N \N -16 16 16 16 \N \N \N -17 17 str_17 str_17 \N \N \N -18 18 1970-01-19 1970-01-19 \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 3 \N \N 0 +4 4 4 4 \N \N 0 +5 5 5 5 \N \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 12 \N \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 +15 15 \N \N \N \N 0 +16 16 16 16 \N \N 0 +17 17 str_17 str_17 \N \N 0 +18 18 1970-01-19 1970-01-19 \N \N 0 alter modify column 2 4 UInt64 7 String 8 None -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 \N 3 \N \N -4 4 4 \N 4 \N \N -5 5 5 \N 5 \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 \N 12 \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N -15 15 \N \N \N \N \N -16 16 16 16 \N \N \N -17 17 str_17 str_17 \N \N \N -18 18 1970-01-19 1970-01-19 \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 \N 3 \N 0 +4 4 4 \N 4 \N 0 +5 5 5 \N 5 \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 \N 12 \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 +15 15 \N \N \N \N 0 +16 16 16 16 \N \N 0 +17 17 str_17 str_17 \N \N 0 +18 18 1970-01-19 1970-01-19 \N \N 0 insert after alter modify column 2 1 Date 5 UInt64 8 String 9 None -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 \N 3 \N \N -4 4 4 \N 4 \N \N -5 5 5 \N 5 \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 \N 12 \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N -15 15 \N \N \N \N \N -16 16 16 16 \N \N \N -17 17 str_17 str_17 \N \N \N -18 18 1970-01-19 1970-01-19 \N \N \N -19 19 \N \N \N \N \N -20 20 20 \N 20 \N \N -21 21 str_21 str_21 \N \N \N -22 22 1970-01-23 \N \N 1970-01-23 \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 \N 3 \N 0 +4 4 4 \N 4 \N 0 +5 5 5 \N 5 \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 \N 12 \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 +15 15 \N \N \N \N 0 +16 16 16 16 \N \N 0 +17 17 str_17 str_17 \N \N 0 +18 18 1970-01-19 1970-01-19 \N \N 0 +19 19 \N \N \N \N 0 +20 20 20 \N 20 \N 0 +21 21 str_21 str_21 \N \N 0 +22 22 1970-01-23 \N \N 1970-01-23 0 alter modify column 3 1 Date 5 UInt64 8 String 9 None -0 0 0 \N \N \N \N \N \N -1 1 1 \N \N \N \N \N \N -2 2 2 \N \N \N \N \N \N -3 3 3 \N \N \N 3 \N \N -4 4 4 \N \N \N 4 \N \N -5 5 5 \N \N \N 5 \N \N -6 6 6 \N \N str_6 \N \N \N -7 7 7 \N \N str_7 \N \N \N -8 8 8 \N \N str_8 \N \N \N -9 9 9 \N \N \N \N \N \N -10 10 10 \N \N \N \N \N \N -11 11 11 \N \N \N \N \N \N -12 12 12 \N \N \N 12 \N \N -13 13 13 \N \N str_13 \N \N \N -14 14 14 \N \N \N \N \N \N -15 15 15 \N \N \N \N \N \N -16 16 16 \N \N 16 \N \N \N -17 17 17 \N \N str_17 \N \N \N -18 18 18 \N \N 1970-01-19 \N \N \N -19 19 19 \N \N \N \N \N \N -20 20 20 \N \N \N 20 \N \N -21 21 21 \N \N str_21 \N \N \N -22 22 22 \N \N \N \N 1970-01-23 \N +0 0 0 \N 0 \N \N \N 0 +1 1 1 \N 0 \N \N \N 0 +2 2 2 \N 0 \N \N \N 0 +3 3 3 \N 0 \N 3 \N 0 +4 4 4 \N 0 \N 4 \N 0 +5 5 5 \N 0 \N 5 \N 0 +6 6 6 \N 0 str_6 \N \N 0 +7 7 7 \N 0 str_7 \N \N 0 +8 8 8 \N 0 str_8 \N \N 0 +9 9 9 \N 0 \N \N \N 0 +10 10 10 \N 0 \N \N \N 0 +11 11 11 \N 0 \N \N \N 0 +12 12 12 \N 0 \N 12 \N 0 +13 13 13 \N 0 str_13 \N \N 0 +14 14 14 \N 0 \N \N \N 0 +15 15 15 \N 0 \N \N \N 0 +16 16 16 \N 0 16 \N \N 0 +17 17 17 \N 0 str_17 \N \N 0 +18 18 18 \N 0 1970-01-19 \N \N 0 +19 19 19 \N 0 \N \N \N 0 +20 20 20 \N 0 \N 20 \N 0 +21 21 21 \N 0 str_21 \N \N 0 +22 22 22 \N 0 \N \N 1970-01-23 0 insert after alter modify column 3 1 Date 5 UInt64 8 String 12 None -0 0 0 \N \N \N \N \N \N -1 1 1 \N \N \N \N \N \N -2 2 2 \N \N \N \N \N \N -3 3 3 \N \N \N 3 \N \N -4 4 4 \N \N \N 4 \N \N -5 5 5 \N \N \N 5 \N \N -6 6 6 \N \N str_6 \N \N \N -7 7 7 \N \N str_7 \N \N \N -8 8 8 \N \N str_8 \N \N \N -9 9 9 \N \N \N \N \N \N -10 10 10 \N \N \N \N \N \N -11 11 11 \N \N \N \N \N \N -12 12 12 \N \N \N 12 \N \N -13 13 13 \N \N str_13 \N \N \N -14 14 14 \N \N \N \N \N \N -15 15 15 \N \N \N \N \N \N -16 16 16 \N \N 16 \N \N \N -17 17 17 \N \N str_17 \N \N \N -18 18 18 \N \N 1970-01-19 \N \N \N -19 19 19 \N \N \N \N \N \N -20 20 20 \N \N \N 20 \N \N -21 21 21 \N \N str_21 \N \N \N -22 22 22 \N \N \N \N 1970-01-23 \N -23 \N \N \N \N \N \N \N \N -24 24 24 \N \N \N \N \N \N -25 str_25 \N str_25 \N \N \N \N \N +0 0 0 \N 0 \N \N \N 0 +1 1 1 \N 0 \N \N \N 0 +2 2 2 \N 0 \N \N \N 0 +3 3 3 \N 0 \N 3 \N 0 +4 4 4 \N 0 \N 4 \N 0 +5 5 5 \N 0 \N 5 \N 0 +6 6 6 \N 0 str_6 \N \N 0 +7 7 7 \N 0 str_7 \N \N 0 +8 8 8 \N 0 str_8 \N \N 0 +9 9 9 \N 0 \N \N \N 0 +10 10 10 \N 0 \N \N \N 0 +11 11 11 \N 0 \N \N \N 0 +12 12 12 \N 0 \N 12 \N 0 +13 13 13 \N 0 str_13 \N \N 0 +14 14 14 \N 0 \N \N \N 0 +15 15 15 \N 0 \N \N \N 0 +16 16 16 \N 0 16 \N \N 0 +17 17 17 \N 0 str_17 \N \N 0 +18 18 18 \N 0 1970-01-19 \N \N 0 +19 19 19 \N 0 \N \N \N 0 +20 20 20 \N 0 \N 20 \N 0 +21 21 21 \N 0 str_21 \N \N 0 +22 22 22 \N 0 \N \N 1970-01-23 0 +23 \N \N \N 0 \N \N \N 0 +24 24 24 \N 0 \N \N \N 0 +25 str_25 \N str_25 0 \N \N \N 0 MergeTree compact initial insert alter add column 1 3 None -0 0 \N \N \N \N -1 1 \N \N \N \N -2 2 \N \N \N \N +0 0 \N \N \N 0 +1 1 \N \N \N 0 +2 2 \N \N \N 0 insert after alter add column 1 4 String 4 UInt64 7 None -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 \N 3 \N \N -4 4 4 \N 4 \N \N -5 5 5 \N 5 \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 \N 12 \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 \N 3 \N 0 +4 4 4 \N 4 \N 0 +5 5 5 \N 5 \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 \N 12 \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 alter modify column 1 7 None 8 String -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 3 \N \N \N -4 4 4 4 \N \N \N -5 5 5 5 \N \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 12 \N \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 3 \N \N 0 +4 4 4 4 \N \N 0 +5 5 5 5 \N \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 12 \N \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 insert after alter modify column 1 8 None 11 String -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 3 \N \N \N -4 4 4 4 \N \N \N -5 5 5 5 \N \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 12 \N \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N -15 15 \N \N \N \N \N -16 16 16 16 \N \N \N -17 17 str_17 str_17 \N \N \N -18 18 1970-01-19 1970-01-19 \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 3 \N \N 0 +4 4 4 4 \N \N 0 +5 5 5 5 \N \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 12 \N \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 +15 15 \N \N \N \N 0 +16 16 16 16 \N \N 0 +17 17 str_17 str_17 \N \N 0 +18 18 1970-01-19 1970-01-19 \N \N 0 alter modify column 2 8 None 11 String -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 3 \N \N \N -4 4 4 4 \N \N \N -5 5 5 5 \N \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 12 \N \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N -15 15 \N \N \N \N \N -16 16 16 16 \N \N \N -17 17 str_17 str_17 \N \N \N -18 18 1970-01-19 1970-01-19 \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 3 \N \N 0 +4 4 4 4 \N \N 0 +5 5 5 5 \N \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 12 \N \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 +15 15 \N \N \N \N 0 +16 16 16 16 \N \N 0 +17 17 str_17 str_17 \N \N 0 +18 18 1970-01-19 1970-01-19 \N \N 0 insert after alter modify column 2 1 Date 1 UInt64 9 None 12 String -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 3 \N \N \N -4 4 4 4 \N \N \N -5 5 5 5 \N \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 12 \N \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N -15 15 \N \N \N \N \N -16 16 16 16 \N \N \N -17 17 str_17 str_17 \N \N \N -18 18 1970-01-19 1970-01-19 \N \N \N -19 19 \N \N \N \N \N -20 20 20 \N 20 \N \N -21 21 str_21 str_21 \N \N \N -22 22 1970-01-23 \N \N 1970-01-23 \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 3 \N \N 0 +4 4 4 4 \N \N 0 +5 5 5 5 \N \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 12 \N \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 +15 15 \N \N \N \N 0 +16 16 16 16 \N \N 0 +17 17 str_17 str_17 \N \N 0 +18 18 1970-01-19 1970-01-19 \N \N 0 +19 19 \N \N \N \N 0 +20 20 20 \N 20 \N 0 +21 21 str_21 str_21 \N \N 0 +22 22 1970-01-23 \N \N 1970-01-23 0 alter modify column 3 1 Date 1 UInt64 9 None 12 String -0 0 0 \N \N \N \N \N \N -1 1 1 \N \N \N \N \N \N -2 2 2 \N \N \N \N \N \N -3 3 3 \N \N 3 \N \N \N -4 4 4 \N \N 4 \N \N \N -5 5 5 \N \N 5 \N \N \N -6 6 6 \N \N str_6 \N \N \N -7 7 7 \N \N str_7 \N \N \N -8 8 8 \N \N str_8 \N \N \N -9 9 9 \N \N \N \N \N \N -10 10 10 \N \N \N \N \N \N -11 11 11 \N \N \N \N \N \N -12 12 12 \N \N 12 \N \N \N -13 13 13 \N \N str_13 \N \N \N -14 14 14 \N \N \N \N \N \N -15 15 15 \N \N \N \N \N \N -16 16 16 \N \N 16 \N \N \N -17 17 17 \N \N str_17 \N \N \N -18 18 18 \N \N 1970-01-19 \N \N \N -19 19 19 \N \N \N \N \N \N -20 20 20 \N \N \N 20 \N \N -21 21 21 \N \N str_21 \N \N \N -22 22 22 \N \N \N \N 1970-01-23 \N +0 0 0 \N 0 \N \N \N 0 +1 1 1 \N 0 \N \N \N 0 +2 2 2 \N 0 \N \N \N 0 +3 3 3 \N 0 3 \N \N 0 +4 4 4 \N 0 4 \N \N 0 +5 5 5 \N 0 5 \N \N 0 +6 6 6 \N 0 str_6 \N \N 0 +7 7 7 \N 0 str_7 \N \N 0 +8 8 8 \N 0 str_8 \N \N 0 +9 9 9 \N 0 \N \N \N 0 +10 10 10 \N 0 \N \N \N 0 +11 11 11 \N 0 \N \N \N 0 +12 12 12 \N 0 12 \N \N 0 +13 13 13 \N 0 str_13 \N \N 0 +14 14 14 \N 0 \N \N \N 0 +15 15 15 \N 0 \N \N \N 0 +16 16 16 \N 0 16 \N \N 0 +17 17 17 \N 0 str_17 \N \N 0 +18 18 18 \N 0 1970-01-19 \N \N 0 +19 19 19 \N 0 \N \N \N 0 +20 20 20 \N 0 \N 20 \N 0 +21 21 21 \N 0 str_21 \N \N 0 +22 22 22 \N 0 \N \N 1970-01-23 0 insert after alter modify column 3 1 Date 1 UInt64 12 None 12 String -0 0 0 \N \N \N \N \N \N -1 1 1 \N \N \N \N \N \N -2 2 2 \N \N \N \N \N \N -3 3 3 \N \N 3 \N \N \N -4 4 4 \N \N 4 \N \N \N -5 5 5 \N \N 5 \N \N \N -6 6 6 \N \N str_6 \N \N \N -7 7 7 \N \N str_7 \N \N \N -8 8 8 \N \N str_8 \N \N \N -9 9 9 \N \N \N \N \N \N -10 10 10 \N \N \N \N \N \N -11 11 11 \N \N \N \N \N \N -12 12 12 \N \N 12 \N \N \N -13 13 13 \N \N str_13 \N \N \N -14 14 14 \N \N \N \N \N \N -15 15 15 \N \N \N \N \N \N -16 16 16 \N \N 16 \N \N \N -17 17 17 \N \N str_17 \N \N \N -18 18 18 \N \N 1970-01-19 \N \N \N -19 19 19 \N \N \N \N \N \N -20 20 20 \N \N \N 20 \N \N -21 21 21 \N \N str_21 \N \N \N -22 22 22 \N \N \N \N 1970-01-23 \N -23 \N \N \N \N \N \N \N \N -24 24 24 \N \N \N \N \N \N -25 str_25 \N str_25 \N \N \N \N \N +0 0 0 \N 0 \N \N \N 0 +1 1 1 \N 0 \N \N \N 0 +2 2 2 \N 0 \N \N \N 0 +3 3 3 \N 0 3 \N \N 0 +4 4 4 \N 0 4 \N \N 0 +5 5 5 \N 0 5 \N \N 0 +6 6 6 \N 0 str_6 \N \N 0 +7 7 7 \N 0 str_7 \N \N 0 +8 8 8 \N 0 str_8 \N \N 0 +9 9 9 \N 0 \N \N \N 0 +10 10 10 \N 0 \N \N \N 0 +11 11 11 \N 0 \N \N \N 0 +12 12 12 \N 0 12 \N \N 0 +13 13 13 \N 0 str_13 \N \N 0 +14 14 14 \N 0 \N \N \N 0 +15 15 15 \N 0 \N \N \N 0 +16 16 16 \N 0 16 \N \N 0 +17 17 17 \N 0 str_17 \N \N 0 +18 18 18 \N 0 1970-01-19 \N \N 0 +19 19 19 \N 0 \N \N \N 0 +20 20 20 \N 0 \N 20 \N 0 +21 21 21 \N 0 str_21 \N \N 0 +22 22 22 \N 0 \N \N 1970-01-23 0 +23 \N \N \N 0 \N \N \N 0 +24 24 24 \N 0 \N \N \N 0 +25 str_25 \N str_25 0 \N \N \N 0 MergeTree wide initial insert alter add column 1 3 None -0 0 \N \N \N \N -1 1 \N \N \N \N -2 2 \N \N \N \N +0 0 \N \N \N 0 +1 1 \N \N \N 0 +2 2 \N \N \N 0 insert after alter add column 1 4 String 4 UInt64 7 None -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 \N 3 \N \N -4 4 4 \N 4 \N \N -5 5 5 \N 5 \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 \N 12 \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 \N 3 \N 0 +4 4 4 \N 4 \N 0 +5 5 5 \N 5 \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 \N 12 \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 alter modify column 1 7 None 8 String -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 3 \N \N \N -4 4 4 4 \N \N \N -5 5 5 5 \N \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 12 \N \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 3 \N \N 0 +4 4 4 4 \N \N 0 +5 5 5 5 \N \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 12 \N \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 insert after alter modify column 1 8 None 11 String -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 3 \N \N \N -4 4 4 4 \N \N \N -5 5 5 5 \N \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 12 \N \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N -15 15 \N \N \N \N \N -16 16 16 16 \N \N \N -17 17 str_17 str_17 \N \N \N -18 18 1970-01-19 1970-01-19 \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 3 \N \N 0 +4 4 4 4 \N \N 0 +5 5 5 5 \N \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 12 \N \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 +15 15 \N \N \N \N 0 +16 16 16 16 \N \N 0 +17 17 str_17 str_17 \N \N 0 +18 18 1970-01-19 1970-01-19 \N \N 0 alter modify column 2 8 None 11 String -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 3 \N \N \N -4 4 4 4 \N \N \N -5 5 5 5 \N \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 12 \N \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N -15 15 \N \N \N \N \N -16 16 16 16 \N \N \N -17 17 str_17 str_17 \N \N \N -18 18 1970-01-19 1970-01-19 \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 3 \N \N 0 +4 4 4 4 \N \N 0 +5 5 5 5 \N \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 12 \N \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 +15 15 \N \N \N \N 0 +16 16 16 16 \N \N 0 +17 17 str_17 str_17 \N \N 0 +18 18 1970-01-19 1970-01-19 \N \N 0 insert after alter modify column 2 1 Date 1 UInt64 9 None 12 String -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 3 \N \N \N -4 4 4 4 \N \N \N -5 5 5 5 \N \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 12 \N \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N -15 15 \N \N \N \N \N -16 16 16 16 \N \N \N -17 17 str_17 str_17 \N \N \N -18 18 1970-01-19 1970-01-19 \N \N \N -19 19 \N \N \N \N \N -20 20 20 \N 20 \N \N -21 21 str_21 str_21 \N \N \N -22 22 1970-01-23 \N \N 1970-01-23 \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 3 \N \N 0 +4 4 4 4 \N \N 0 +5 5 5 5 \N \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 12 \N \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 +15 15 \N \N \N \N 0 +16 16 16 16 \N \N 0 +17 17 str_17 str_17 \N \N 0 +18 18 1970-01-19 1970-01-19 \N \N 0 +19 19 \N \N \N \N 0 +20 20 20 \N 20 \N 0 +21 21 str_21 str_21 \N \N 0 +22 22 1970-01-23 \N \N 1970-01-23 0 alter modify column 3 1 Date 1 UInt64 9 None 12 String -0 0 0 \N \N \N \N \N \N -1 1 1 \N \N \N \N \N \N -2 2 2 \N \N \N \N \N \N -3 3 3 \N \N 3 \N \N \N -4 4 4 \N \N 4 \N \N \N -5 5 5 \N \N 5 \N \N \N -6 6 6 \N \N str_6 \N \N \N -7 7 7 \N \N str_7 \N \N \N -8 8 8 \N \N str_8 \N \N \N -9 9 9 \N \N \N \N \N \N -10 10 10 \N \N \N \N \N \N -11 11 11 \N \N \N \N \N \N -12 12 12 \N \N 12 \N \N \N -13 13 13 \N \N str_13 \N \N \N -14 14 14 \N \N \N \N \N \N -15 15 15 \N \N \N \N \N \N -16 16 16 \N \N 16 \N \N \N -17 17 17 \N \N str_17 \N \N \N -18 18 18 \N \N 1970-01-19 \N \N \N -19 19 19 \N \N \N \N \N \N -20 20 20 \N \N \N 20 \N \N -21 21 21 \N \N str_21 \N \N \N -22 22 22 \N \N \N \N 1970-01-23 \N +0 0 0 \N 0 \N \N \N 0 +1 1 1 \N 0 \N \N \N 0 +2 2 2 \N 0 \N \N \N 0 +3 3 3 \N 0 3 \N \N 0 +4 4 4 \N 0 4 \N \N 0 +5 5 5 \N 0 5 \N \N 0 +6 6 6 \N 0 str_6 \N \N 0 +7 7 7 \N 0 str_7 \N \N 0 +8 8 8 \N 0 str_8 \N \N 0 +9 9 9 \N 0 \N \N \N 0 +10 10 10 \N 0 \N \N \N 0 +11 11 11 \N 0 \N \N \N 0 +12 12 12 \N 0 12 \N \N 0 +13 13 13 \N 0 str_13 \N \N 0 +14 14 14 \N 0 \N \N \N 0 +15 15 15 \N 0 \N \N \N 0 +16 16 16 \N 0 16 \N \N 0 +17 17 17 \N 0 str_17 \N \N 0 +18 18 18 \N 0 1970-01-19 \N \N 0 +19 19 19 \N 0 \N \N \N 0 +20 20 20 \N 0 \N 20 \N 0 +21 21 21 \N 0 str_21 \N \N 0 +22 22 22 \N 0 \N \N 1970-01-23 0 insert after alter modify column 3 1 Date 1 UInt64 12 None 12 String -0 0 0 \N \N \N \N \N \N -1 1 1 \N \N \N \N \N \N -2 2 2 \N \N \N \N \N \N -3 3 3 \N \N 3 \N \N \N -4 4 4 \N \N 4 \N \N \N -5 5 5 \N \N 5 \N \N \N -6 6 6 \N \N str_6 \N \N \N -7 7 7 \N \N str_7 \N \N \N -8 8 8 \N \N str_8 \N \N \N -9 9 9 \N \N \N \N \N \N -10 10 10 \N \N \N \N \N \N -11 11 11 \N \N \N \N \N \N -12 12 12 \N \N 12 \N \N \N -13 13 13 \N \N str_13 \N \N \N -14 14 14 \N \N \N \N \N \N -15 15 15 \N \N \N \N \N \N -16 16 16 \N \N 16 \N \N \N -17 17 17 \N \N str_17 \N \N \N -18 18 18 \N \N 1970-01-19 \N \N \N -19 19 19 \N \N \N \N \N \N -20 20 20 \N \N \N 20 \N \N -21 21 21 \N \N str_21 \N \N \N -22 22 22 \N \N \N \N 1970-01-23 \N -23 \N \N \N \N \N \N \N \N -24 24 24 \N \N \N \N \N \N -25 str_25 \N str_25 \N \N \N \N \N +0 0 0 \N 0 \N \N \N 0 +1 1 1 \N 0 \N \N \N 0 +2 2 2 \N 0 \N \N \N 0 +3 3 3 \N 0 3 \N \N 0 +4 4 4 \N 0 4 \N \N 0 +5 5 5 \N 0 5 \N \N 0 +6 6 6 \N 0 str_6 \N \N 0 +7 7 7 \N 0 str_7 \N \N 0 +8 8 8 \N 0 str_8 \N \N 0 +9 9 9 \N 0 \N \N \N 0 +10 10 10 \N 0 \N \N \N 0 +11 11 11 \N 0 \N \N \N 0 +12 12 12 \N 0 12 \N \N 0 +13 13 13 \N 0 str_13 \N \N 0 +14 14 14 \N 0 \N \N \N 0 +15 15 15 \N 0 \N \N \N 0 +16 16 16 \N 0 16 \N \N 0 +17 17 17 \N 0 str_17 \N \N 0 +18 18 18 \N 0 1970-01-19 \N \N 0 +19 19 19 \N 0 \N \N \N 0 +20 20 20 \N 0 \N 20 \N 0 +21 21 21 \N 0 str_21 \N \N 0 +22 22 22 \N 0 \N \N 1970-01-23 0 +23 \N \N \N 0 \N \N \N 0 +24 24 24 \N 0 \N \N \N 0 +25 str_25 \N str_25 0 \N \N \N 0 diff --git a/tests/queries/0_stateless/03040_dynamic_type_alters_2.reference b/tests/queries/0_stateless/03040_dynamic_type_alters_2.reference index 18a181464e9..f7c00bd8c44 100644 --- a/tests/queries/0_stateless/03040_dynamic_type_alters_2.reference +++ b/tests/queries/0_stateless/03040_dynamic_type_alters_2.reference @@ -2,181 +2,181 @@ MergeTree compact initial insert alter add column 3 None -0 0 \N \N \N \N -1 1 \N \N \N \N -2 2 \N \N \N \N +0 0 \N \N \N 0 +1 1 \N \N \N 0 +2 2 \N \N \N 0 insert after alter add column 1 4 String 4 UInt64 7 None -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 \N 3 \N \N -4 4 4 \N 4 \N \N -5 5 5 \N 5 \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 \N 12 \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 \N 3 \N 0 +4 4 4 \N 4 \N 0 +5 5 5 \N 5 \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 \N 12 \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 alter rename column 1 4 String 4 UInt64 7 None -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 \N 3 \N \N -4 4 4 \N 4 \N \N -5 5 5 \N 5 \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 \N 12 \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 \N 3 \N 0 +4 4 4 \N 4 \N 0 +5 5 5 \N 5 \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 \N 12 \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 insert nested dynamic 3 Array(Dynamic) 4 String 4 UInt64 7 None -0 0 \N \N \N \N \N [] [] [] -1 1 \N \N \N \N \N [] [] [] -2 2 \N \N \N \N \N [] [] [] -3 3 3 \N 3 \N \N [] [] [] -4 4 4 \N 4 \N \N [] [] [] -5 5 5 \N 5 \N \N [] [] [] -6 6 str_6 str_6 \N \N \N [] [] [] -7 7 str_7 str_7 \N \N \N [] [] [] -8 8 str_8 str_8 \N \N \N [] [] [] -9 9 \N \N \N \N \N [] [] [] -10 10 \N \N \N \N \N [] [] [] -11 11 \N \N \N \N \N [] [] [] -12 12 12 \N 12 \N \N [] [] [] -13 13 str_13 str_13 \N \N \N [] [] [] -14 14 \N \N \N \N \N [] [] [] -15 15 [15] \N \N \N \N [15] [NULL] [NULL] -16 16 ['str_16'] \N \N \N \N [NULL] ['str_16'] [NULL] -17 17 [17] \N \N \N \N [17] [NULL] [NULL] +0 0 \N \N \N \N 0 [] [] [] +1 1 \N \N \N \N 0 [] [] [] +2 2 \N \N \N \N 0 [] [] [] +3 3 3 \N 3 \N 0 [] [] [] +4 4 4 \N 4 \N 0 [] [] [] +5 5 5 \N 5 \N 0 [] [] [] +6 6 str_6 str_6 \N \N 0 [] [] [] +7 7 str_7 str_7 \N \N 0 [] [] [] +8 8 str_8 str_8 \N \N 0 [] [] [] +9 9 \N \N \N \N 0 [] [] [] +10 10 \N \N \N \N 0 [] [] [] +11 11 \N \N \N \N 0 [] [] [] +12 12 12 \N 12 \N 0 [] [] [] +13 13 str_13 str_13 \N \N 0 [] [] [] +14 14 \N \N \N \N 0 [] [] [] +15 15 [15] \N \N \N 0 [15] [NULL] [NULL] +16 16 ['str_16'] \N \N \N 0 [NULL] ['str_16'] [NULL] +17 17 [17] \N \N \N 0 [17] [NULL] [NULL] alter rename column 2 3 Array(Dynamic) 4 String 4 UInt64 7 None -0 0 \N \N \N \N \N [] [] [] -1 1 \N \N \N \N \N [] [] [] -2 2 \N \N \N \N \N [] [] [] -3 3 3 \N 3 \N \N [] [] [] -4 4 4 \N 4 \N \N [] [] [] -5 5 5 \N 5 \N \N [] [] [] -6 6 str_6 str_6 \N \N \N [] [] [] -7 7 str_7 str_7 \N \N \N [] [] [] -8 8 str_8 str_8 \N \N \N [] [] [] -9 9 \N \N \N \N \N [] [] [] -10 10 \N \N \N \N \N [] [] [] -11 11 \N \N \N \N \N [] [] [] -12 12 12 \N 12 \N \N [] [] [] -13 13 str_13 str_13 \N \N \N [] [] [] -14 14 \N \N \N \N \N [] [] [] -15 15 [15] \N \N \N \N [15] [NULL] [NULL] -16 16 ['str_16'] \N \N \N \N [NULL] ['str_16'] [NULL] -17 17 [17] \N \N \N \N [17] [NULL] [NULL] +0 0 \N \N \N \N 0 [] [] [] +1 1 \N \N \N \N 0 [] [] [] +2 2 \N \N \N \N 0 [] [] [] +3 3 3 \N 3 \N 0 [] [] [] +4 4 4 \N 4 \N 0 [] [] [] +5 5 5 \N 5 \N 0 [] [] [] +6 6 str_6 str_6 \N \N 0 [] [] [] +7 7 str_7 str_7 \N \N 0 [] [] [] +8 8 str_8 str_8 \N \N 0 [] [] [] +9 9 \N \N \N \N 0 [] [] [] +10 10 \N \N \N \N 0 [] [] [] +11 11 \N \N \N \N 0 [] [] [] +12 12 12 \N 12 \N 0 [] [] [] +13 13 str_13 str_13 \N \N 0 [] [] [] +14 14 \N \N \N \N 0 [] [] [] +15 15 [15] \N \N \N 0 [15] [NULL] [NULL] +16 16 ['str_16'] \N \N \N 0 [NULL] ['str_16'] [NULL] +17 17 [17] \N \N \N 0 [17] [NULL] [NULL] MergeTree wide initial insert alter add column 3 None -0 0 \N \N \N \N -1 1 \N \N \N \N -2 2 \N \N \N \N +0 0 \N \N \N 0 +1 1 \N \N \N 0 +2 2 \N \N \N 0 insert after alter add column 1 4 String 4 UInt64 7 None -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 \N 3 \N \N -4 4 4 \N 4 \N \N -5 5 5 \N 5 \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 \N 12 \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 \N 3 \N 0 +4 4 4 \N 4 \N 0 +5 5 5 \N 5 \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 \N 12 \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 alter rename column 1 4 String 4 UInt64 7 None -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 \N 3 \N \N -4 4 4 \N 4 \N \N -5 5 5 \N 5 \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 \N 12 \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 \N 3 \N 0 +4 4 4 \N 4 \N 0 +5 5 5 \N 5 \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 \N 12 \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 insert nested dynamic 3 Array(Dynamic) 4 String 4 UInt64 7 None -0 0 \N \N \N \N \N [] [] [] -1 1 \N \N \N \N \N [] [] [] -2 2 \N \N \N \N \N [] [] [] -3 3 3 \N 3 \N \N [] [] [] -4 4 4 \N 4 \N \N [] [] [] -5 5 5 \N 5 \N \N [] [] [] -6 6 str_6 str_6 \N \N \N [] [] [] -7 7 str_7 str_7 \N \N \N [] [] [] -8 8 str_8 str_8 \N \N \N [] [] [] -9 9 \N \N \N \N \N [] [] [] -10 10 \N \N \N \N \N [] [] [] -11 11 \N \N \N \N \N [] [] [] -12 12 12 \N 12 \N \N [] [] [] -13 13 str_13 str_13 \N \N \N [] [] [] -14 14 \N \N \N \N \N [] [] [] -15 15 [15] \N \N \N \N [15] [NULL] [NULL] -16 16 ['str_16'] \N \N \N \N [NULL] ['str_16'] [NULL] -17 17 [17] \N \N \N \N [17] [NULL] [NULL] +0 0 \N \N \N \N 0 [] [] [] +1 1 \N \N \N \N 0 [] [] [] +2 2 \N \N \N \N 0 [] [] [] +3 3 3 \N 3 \N 0 [] [] [] +4 4 4 \N 4 \N 0 [] [] [] +5 5 5 \N 5 \N 0 [] [] [] +6 6 str_6 str_6 \N \N 0 [] [] [] +7 7 str_7 str_7 \N \N 0 [] [] [] +8 8 str_8 str_8 \N \N 0 [] [] [] +9 9 \N \N \N \N 0 [] [] [] +10 10 \N \N \N \N 0 [] [] [] +11 11 \N \N \N \N 0 [] [] [] +12 12 12 \N 12 \N 0 [] [] [] +13 13 str_13 str_13 \N \N 0 [] [] [] +14 14 \N \N \N \N 0 [] [] [] +15 15 [15] \N \N \N 0 [15] [NULL] [NULL] +16 16 ['str_16'] \N \N \N 0 [NULL] ['str_16'] [NULL] +17 17 [17] \N \N \N 0 [17] [NULL] [NULL] alter rename column 2 3 Array(Dynamic) 4 String 4 UInt64 7 None -0 0 \N \N \N \N \N [] [] [] -1 1 \N \N \N \N \N [] [] [] -2 2 \N \N \N \N \N [] [] [] -3 3 3 \N 3 \N \N [] [] [] -4 4 4 \N 4 \N \N [] [] [] -5 5 5 \N 5 \N \N [] [] [] -6 6 str_6 str_6 \N \N \N [] [] [] -7 7 str_7 str_7 \N \N \N [] [] [] -8 8 str_8 str_8 \N \N \N [] [] [] -9 9 \N \N \N \N \N [] [] [] -10 10 \N \N \N \N \N [] [] [] -11 11 \N \N \N \N \N [] [] [] -12 12 12 \N 12 \N \N [] [] [] -13 13 str_13 str_13 \N \N \N [] [] [] -14 14 \N \N \N \N \N [] [] [] -15 15 [15] \N \N \N \N [15] [NULL] [NULL] -16 16 ['str_16'] \N \N \N \N [NULL] ['str_16'] [NULL] -17 17 [17] \N \N \N \N [17] [NULL] [NULL] +0 0 \N \N \N \N 0 [] [] [] +1 1 \N \N \N \N 0 [] [] [] +2 2 \N \N \N \N 0 [] [] [] +3 3 3 \N 3 \N 0 [] [] [] +4 4 4 \N 4 \N 0 [] [] [] +5 5 5 \N 5 \N 0 [] [] [] +6 6 str_6 str_6 \N \N 0 [] [] [] +7 7 str_7 str_7 \N \N 0 [] [] [] +8 8 str_8 str_8 \N \N 0 [] [] [] +9 9 \N \N \N \N 0 [] [] [] +10 10 \N \N \N \N 0 [] [] [] +11 11 \N \N \N \N 0 [] [] [] +12 12 12 \N 12 \N 0 [] [] [] +13 13 str_13 str_13 \N \N 0 [] [] [] +14 14 \N \N \N \N 0 [] [] [] +15 15 [15] \N \N \N 0 [15] [NULL] [NULL] +16 16 ['str_16'] \N \N \N 0 [NULL] ['str_16'] [NULL] +17 17 [17] \N \N \N 0 [17] [NULL] [NULL] diff --git a/tests/queries/0_stateless/03041_dynamic_type_check_table.reference b/tests/queries/0_stateless/03041_dynamic_type_check_table.reference index b1ea186a917..0dab4ea0d20 100644 --- a/tests/queries/0_stateless/03041_dynamic_type_check_table.reference +++ b/tests/queries/0_stateless/03041_dynamic_type_check_table.reference @@ -2,55 +2,55 @@ MergeTree compact initial insert alter add column 3 None -0 0 \N \N \N \N -1 1 \N \N \N \N -2 2 \N \N \N \N +0 0 \N \N \N 0 +1 1 \N \N \N 0 +2 2 \N \N \N 0 insert after alter add column 4 String 4 UInt64 7 None -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 \N 3 \N \N -4 4 4 \N 4 \N \N -5 5 5 \N 5 \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 \N 12 \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 \N 3 \N 0 +4 4 4 \N 4 \N 0 +5 5 5 \N 5 \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 \N 12 \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 check table 1 MergeTree wide initial insert alter add column 3 None -0 0 \N \N \N \N -1 1 \N \N \N \N -2 2 \N \N \N \N +0 0 \N \N \N 0 +1 1 \N \N \N 0 +2 2 \N \N \N 0 insert after alter add column 4 String 4 UInt64 7 None -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 \N 3 \N \N -4 4 4 \N 4 \N \N -5 5 5 \N 5 \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 \N 12 \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 \N 3 \N 0 +4 4 4 \N 4 \N 0 +5 5 5 \N 5 \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 \N 12 \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 check table 1 diff --git a/tests/queries/0_stateless/03127_system_unload_primary_key_table.reference b/tests/queries/0_stateless/03127_system_unload_primary_key_table.reference index 3ac6127fb21..2d33f7f6683 100644 --- a/tests/queries/0_stateless/03127_system_unload_primary_key_table.reference +++ b/tests/queries/0_stateless/03127_system_unload_primary_key_table.reference @@ -1,8 +1,8 @@ -100000000 140000000 -100000000 140000000 -100000000 140000000 +100000000 100000000 +100000000 100000000 +100000000 100000000 0 0 -100000000 140000000 +100000000 100000000 0 0 0 0 1 diff --git a/tests/queries/0_stateless/03128_system_unload_primary_key.reference b/tests/queries/0_stateless/03128_system_unload_primary_key.reference index c7b40ae5b06..2646dc7247f 100644 --- a/tests/queries/0_stateless/03128_system_unload_primary_key.reference +++ b/tests/queries/0_stateless/03128_system_unload_primary_key.reference @@ -1,4 +1,4 @@ -100000000 140000000 -100000000 140000000 +100000000 100000000 +100000000 100000000 0 0 0 0 diff --git a/tests/queries/0_stateless/03167_base64_url_functions_sh.reference b/tests/queries/0_stateless/03167_base64_url_functions_sh.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03167_base64_url_functions_sh.sh b/tests/queries/0_stateless/03167_base64_url_functions_sh.sh new file mode 100755 index 00000000000..57060b8c525 --- /dev/null +++ b/tests/queries/0_stateless/03167_base64_url_functions_sh.sh @@ -0,0 +1,166 @@ +#!/usr/bin/env bash +# Tags: no-fasttest +# shellcheck disable=SC2155 + +set -e + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + + +urls=( + "http://www.example.com" + "https://secure.example.com" + "http://example.com" + "https://www.example.org" + "https://subdomain.example.com" + "http://sub.sub.example.com" + "http://192.168.1.1" + "https://[2001:db8:85a3:8d3:1319:8a2e:370:7348]" + "http://example.com:8080" + "https://example.com:443" + "http://example.com/path/to/page.html" + "https://example.com/path/with/trailing/slash/" + "http://example.com/search?q=query&lang=en" + "https://example.com/path?param1=value1¶m2=value2" + "http://example.com/page.html#section1" + "https://example.com/document.pdf#page=10" + "http://user:password@example.com" + "https://user@example.com" + "https://user:pass@sub.example.com:8080/path/page.html?query=123#fragment" + "http://example.com/path%20with%20spaces" + "https://example.com/search?q=encode+this" + "http://例子.测试" + "https://mañana.com" + "http://example.com/%E2%82%AC" + "data:text/plain;base64,SGVsbG8sIFdvcmxkIQ==" + "file:///C:/path/to/file.txt" + "file:///home/user/document.pdf" + "ftp://ftp.example.com/pub/file.zip" + "ftps://secure-ftp.example.com/private/doc.pdf" + "mailto:user@example.com" + "mailto:user@example.com?subject=Hello&body=How%20are%20you" + "git://github.com/user/repo.git" + "ssh://user@host.xz:port/path/to/repo.git" + "https://example.com/path(1)/[2]/{3}" + "http://example.com/path;param?query,value" + "" + "http://" + "example.com" + "http:" + "//" + "?query=value" + "#fragment" + "http://?#" + "http://xn--bcher-kva.ch" + "https://xn--bcher-kva.xn--tckwe/xn--8ws00zhy3a/%E6%B8%AC%E8%A9%A6.php?xn--o39an51a5phao35a=xn--mgbh0fb&xn--fiq228c5hs=test" + "https://xn--3e0b707e.xn--79-8kcre8v3a/%ED%85%8C%EC%8A%A4%ED%8A%B8/%ED%8C%8C%EC%9D%BC.jsp?xn--i1b6b1a6a2e=xn--9t4b11yi5a&xn--3e0b707e=xn--80aaa1cbgbm" + "https://example.com/path?param=value&special=!@#$%^&*()" + + "http://example.com/path/with/~tilde" + "https://example.com/path/with/\`backtick\`" + + "https://example.com/path?param1=value1¶m2=value2¶m3=value3#section1#section2" + "http://example.com/page?q1=v1&q2=v2#frag1#frag2#frag3" + + "https://example.com/☃/snowman" + "http://example.com/path/⽇本語" + "https://example.com/ü/ñ/path?q=ç" + + "https://example.com/path/to/very/long/url/that/exceeds/two/hundred/and/fifty/five/characters/lorem/ipsum/dolor/sit/amet/consectetur/adipiscing/elit/sed/do/eiusmod/tempor/incididunt/ut/labore/et/dolore/magna/aliqua/ut/enim/ad/minim/veniam/quis/nostrud/exercitation/ullamco/laboris/nisi/ut/aliquip/ex/ea/commodo/consequat" + + "https://example.com//path///to//file" + "http://example.com/path?param1=value1&¶m2=value2&&¶m3=value3" + + "http://example.com/%70%61%74%68?%70%61%72%61%6d=%76%61%6c%75%65#%66%72%61%67%6d%65%6e%74" + + "HtTpS://ExAmPlE.cOm/PaTh" + "http://EXAMPLE.COM/PATH" + + "http://127.0.0.1:8080/path" + "https://[::1]/path" + "http://[2001:0db8:85a3:0000:0000:8a2e:0370:7334]:8080/path" + + "http://example.com:65535/path" + "https://example.com:0/path" + + "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAACklEQVR4nGMAAQAABQABDQottAAAAABJRU5ErkJggg==" + + "https://user:password@example.com:8080/path?query=value#fragment" + "ftp://anonymous:password@ftp.example.com/pub/" + + "http://example.com/path%20with%20spaces" + "https://example.com/search?q=query%20with%20spaces" + + "https://www.mañana.com/path" + "http://例子.测试/path" + "https://рм.рф/path" + + "https://user:pass@sub.example.com:8080/p/a/t/h?query=123&key=value#fragid1" + + "jdbc:mysql://localhost:3306/database" + "market://details?id=com.example.app" + "tel:+1-816-555-1212" + "sms:+18165551212" + + "http://[1080:0:0:0:8:800:200C:417A]/index.html" + "https://[2001:db8::1428:57ab]:8080/path" + + "http://.." + "http://../" + "http://??" + "http://??/" + "http:///a" + "http://example.com??" + "http://example.com??/" + "foo://example.com:8042/over/there?name=ferret#nose" + "//example.com/path" +) + + +base64URLEncode() { + echo -n "$1" | base64 -w0 | tr '+/' '-_' | tr -d '=' +} + +base64URLDecode() { + local len=$((${#1} % 4)) + local result="$1" + if [ $len -eq 2 ]; then result="$1"'==' + elif [ $len -eq 3 ]; then result="$1"'=' + fi + echo "$result" | tr '_-' '/+' | base64 -w0 -d +} + +test() { + local input="$1" + local encode_ch=$(${CLICKHOUSE_CLIENT} --query="SELECT base64URLEncode('$input')") + local encode_gold=$(base64URLEncode $input) + + local decode_ch=$(${CLICKHOUSE_CLIENT} --query="SELECT base64URLDecode('$encode_gold')") + local decode_gold=$(base64URLDecode $encode_gold) + + if [ "$encode_ch" != "$encode_gold" ]; then + echo "Input: $input" + echo "Expected: $encode_gold" + echo "Got: $encode_ch" + fi + + if [ "$decode_ch" != "$input" ] || [ "$decode_ch" != "$decode_gold" ]; then + echo "Input: $input" + echo "Decode gold: $decode_gold" + echo "Got: $decode_ch" + fi +} + + +for url in "${urls[@]}"; do + test "$url" +done + +# special case for ' +decode=$(${CLICKHOUSE_CLIENT} --query="SELECT base64URLDecode(base64URLEncode('http://example.com/!$&\'()*+,;=:@/path'))") +if [ "$decode" != "http://example.com/!$&\'()*+,;=:@/path" ]; then + echo "Special case fail" + echo "Got: $decode" +fi diff --git a/tests/queries/0_stateless/03172_error_log_table_not_empty.sh b/tests/queries/0_stateless/03172_error_log_table_not_empty.sh index 8d74ebe1039..22a2fd82c64 100755 --- a/tests/queries/0_stateless/03172_error_log_table_not_empty.sh +++ b/tests/queries/0_stateless/03172_error_log_table_not_empty.sh @@ -1,9 +1,14 @@ #!/usr/bin/env bash +# Tags: no-fasttest +# Tag no-fasttest: this test relies on the timeouts, it always takes no less that 4 seconds to run CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh +# system.error_log is created lazy, flush logs query makes it sure that the table is created. +$CLICKHOUSE_CLIENT -q "SYSTEM FLUSH LOGS;" + # Get the previous number of errors for 111, 222 and 333 errors_111=$($CLICKHOUSE_CLIENT -q "SELECT sum(value) FROM system.error_log WHERE code = 111") errors_222=$($CLICKHOUSE_CLIENT -q "SELECT sum(value) FROM system.error_log WHERE code = 222") @@ -38,4 +43,4 @@ $CLICKHOUSE_CLIENT -mn -q " SELECT sum(value) > $(($errors_111+1)) FROM system.error_log WHERE code = 111; SELECT sum(value) > $(($errors_222+1)) FROM system.error_log WHERE code = 222; SELECT sum(value) > $(($errors_333+1)) FROM system.error_log WHERE code = 333; -" \ No newline at end of file +" diff --git a/tests/queries/0_stateless/03198_bit_shift_throws_error_for_out_of_bounds.reference b/tests/queries/0_stateless/03198_bit_shift_throws_error_for_out_of_bounds.reference new file mode 100644 index 00000000000..33b8cd6ee26 --- /dev/null +++ b/tests/queries/0_stateless/03198_bit_shift_throws_error_for_out_of_bounds.reference @@ -0,0 +1,3 @@ +-- bitShiftRight +-- bitShiftLeft +OK diff --git a/tests/queries/0_stateless/03198_bit_shift_throws_error_for_out_of_bounds.sql b/tests/queries/0_stateless/03198_bit_shift_throws_error_for_out_of_bounds.sql new file mode 100644 index 00000000000..aec01753673 --- /dev/null +++ b/tests/queries/0_stateless/03198_bit_shift_throws_error_for_out_of_bounds.sql @@ -0,0 +1,17 @@ +SELECT '-- bitShiftRight'; +SELECT bitShiftRight(1, -1); -- { serverError ARGUMENT_OUT_OF_BOUND } +SELECT bitShiftRight(toUInt8(1), 8 + 1); -- { serverError ARGUMENT_OUT_OF_BOUND } +SELECT bitShiftRight('hola', -1); -- { serverError ARGUMENT_OUT_OF_BOUND } +SELECT bitShiftRight('hola', 4 * 8 + 1); -- { serverError ARGUMENT_OUT_OF_BOUND } +SELECT bitShiftRight(toFixedString('hola', 8), -1); -- { serverError ARGUMENT_OUT_OF_BOUND } +SELECT bitShiftRight(toFixedString('hola', 8), 8 * 8 + 1); -- { serverError ARGUMENT_OUT_OF_BOUND } + +SELECT '-- bitShiftLeft'; +SELECT bitShiftLeft(1, -1); -- { serverError ARGUMENT_OUT_OF_BOUND } +SELECT bitShiftLeft(toUInt8(1), 8 + 1); -- { serverError ARGUMENT_OUT_OF_BOUND } +SELECT bitShiftLeft('hola', -1); -- { serverError ARGUMENT_OUT_OF_BOUND } +SELECT bitShiftLeft('hola', 4 * 8 + 1); -- { serverError ARGUMENT_OUT_OF_BOUND } +SELECT bitShiftLeft(toFixedString('hola', 8), -1); -- { serverError ARGUMENT_OUT_OF_BOUND } +SELECT bitShiftLeft(toFixedString('hola', 8), 8 * 8 + 1); -- { serverError ARGUMENT_OUT_OF_BOUND } + +SELECT 'OK'; \ No newline at end of file diff --git a/tests/queries/0_stateless/03198_non_adaptive_granularity_no_errors.reference b/tests/queries/0_stateless/03198_non_adaptive_granularity_no_errors.reference new file mode 100644 index 00000000000..fcd78da1283 --- /dev/null +++ b/tests/queries/0_stateless/03198_non_adaptive_granularity_no_errors.reference @@ -0,0 +1,2 @@ +1000000 +1000000 diff --git a/tests/queries/0_stateless/03198_non_adaptive_granularity_no_errors.sql b/tests/queries/0_stateless/03198_non_adaptive_granularity_no_errors.sql new file mode 100644 index 00000000000..25798ef6d33 --- /dev/null +++ b/tests/queries/0_stateless/03198_non_adaptive_granularity_no_errors.sql @@ -0,0 +1,12 @@ +DROP TABLE IF EXISTS data_02051__fuzz_24; + +CREATE TABLE data_02051__fuzz_24 (`key` Int16, `value` String) ENGINE = MergeTree ORDER BY key SETTINGS index_granularity_bytes = 0, min_rows_for_wide_part = 0, min_bytes_for_wide_part=0 AS SELECT number, repeat(toString(number), 5) FROM numbers(1000000.); + +SELECT count(ignore(*)) FROM data_02051__fuzz_24 PREWHERE materialize(1) GROUP BY ignore(*); + +detach table data_02051__fuzz_24; +attach table data_02051__fuzz_24; + +SELECT count(ignore(*)) FROM data_02051__fuzz_24 PREWHERE materialize(1) GROUP BY ignore(*); + +DROP TABLE data_02051__fuzz_24; diff --git a/tests/queries/0_stateless/03199_has_lc_fixed_string.reference b/tests/queries/0_stateless/03199_has_lc_fixed_string.reference new file mode 100644 index 00000000000..b261da18d51 --- /dev/null +++ b/tests/queries/0_stateless/03199_has_lc_fixed_string.reference @@ -0,0 +1,2 @@ +1 +0 diff --git a/tests/queries/0_stateless/03199_has_lc_fixed_string.sql b/tests/queries/0_stateless/03199_has_lc_fixed_string.sql new file mode 100644 index 00000000000..3cb551804b7 --- /dev/null +++ b/tests/queries/0_stateless/03199_has_lc_fixed_string.sql @@ -0,0 +1,7 @@ +DROP TABLE IF EXISTS 03199_fixedstring_array; +CREATE TABLE 03199_fixedstring_array (arr Array(LowCardinality(FixedString(8)))) ENGINE = Memory; +INSERT INTO 03199_fixedstring_array VALUES (['a', 'b']), (['c', 'd']); + +SELECT has(arr, toFixedString(materialize('a'), 1)) FROM 03199_fixedstring_array; + +DROP TABLE 03199_fixedstring_array; diff --git a/tests/queries/0_stateless/03199_merge_filters_bug.reference b/tests/queries/0_stateless/03199_merge_filters_bug.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03199_merge_filters_bug.sql b/tests/queries/0_stateless/03199_merge_filters_bug.sql new file mode 100644 index 00000000000..ed2ec2ea217 --- /dev/null +++ b/tests/queries/0_stateless/03199_merge_filters_bug.sql @@ -0,0 +1,70 @@ +drop table if exists t1; +drop table if exists t2; + +CREATE TABLE t1 +( + `s1` String, + `s2` String, + `s3` String +) +ENGINE = MergeTree +ORDER BY tuple(); + + +CREATE TABLE t2 +( + `fs1` FixedString(10), + `fs2` FixedString(10) +) +ENGINE = MergeTree +ORDER BY tuple(); + +INSERT INTO t1 SELECT + repeat('t', 15) s1, + 'test' s2, + 'test' s3; + +INSERT INTO t1 SELECT + substring(s1, 1, 10), + s2, + s3 +FROM generateRandom('s1 String, s2 String, s3 String') +LIMIT 10000; + +INSERT INTO t2 SELECT * +FROM generateRandom() +LIMIT 10000; + +WITH +tmp1 AS +( + SELECT + CAST(s1, 'FixedString(10)') AS fs1, + s2 AS sector, + s3 + FROM t1 + WHERE (s3 != 'test') +) + SELECT + fs1 + FROM t2 + LEFT JOIN tmp1 USING (fs1) + WHERE (fs1 IN ('test')) SETTINGS enable_multiple_prewhere_read_steps = 0; + +optimize table t1 final; + +WITH +tmp1 AS +( + SELECT + CAST(s1, 'FixedString(10)') AS fs1, + s2 AS sector, + s3 + FROM t1 + WHERE (s3 != 'test') +) + SELECT + fs1 + FROM t2 + LEFT JOIN tmp1 USING (fs1) + WHERE (fs1 IN ('test')); diff --git a/tests/queries/0_stateless/03199_queries_with_new_analyzer.reference b/tests/queries/0_stateless/03199_queries_with_new_analyzer.reference new file mode 100644 index 00000000000..10ce589000d --- /dev/null +++ b/tests/queries/0_stateless/03199_queries_with_new_analyzer.reference @@ -0,0 +1,27 @@ +5 (4230072075578472911,4230072075578472911) 71789584853496063 +2 (4401188181514187637,4401188181514187637) 878466845199253299 +4 (4940826638032106783,4940826638032106783) 3675164899122807807 +6 (10957420562507184961,10957420562507184961) 3732623117916254211 +0 (797076400500506358,797076400500506358) 3746094338409299772 +7 (10843611042193511775,10843611042193511775) 4607251742847087615 +3 (12588286986351526898,12588286986351526898) 13889114719560662796 +8 (452995860660674674,452995860660674674) 17365664920787500812 +9 (12206106972241516904,12206106972241516904) 17567684527097330880 +1 (14558425114501132193,14558425114501132193) 18445898820068822019 +3 255 255 +0 0 0 +0 0 0 +0 0 0 +0 0 0 +0 0 0 +0 0 0 +0 0 0 +0 0 0 +0 0 0 +0 +1 +2 +3 +4 +5 +6 diff --git a/tests/queries/0_stateless/03199_queries_with_new_analyzer.sql b/tests/queries/0_stateless/03199_queries_with_new_analyzer.sql new file mode 100644 index 00000000000..c32d7524492 --- /dev/null +++ b/tests/queries/0_stateless/03199_queries_with_new_analyzer.sql @@ -0,0 +1,41 @@ +SET allow_experimental_analyzer=1; + +SELECT *, ngramMinHash(*) AS minhash, mortonEncode(untuple(ngramMinHash(*))) AS z +FROM (SELECT toString(number) FROM numbers(10)) +ORDER BY z LIMIT 100; + +CREATE TABLE test ( + idx UInt64, + coverage Array(UInt64), + test_name String +) +ENGINE = MergeTree +ORDER BY tuple(); + +INSERT INTO test VALUES (10, [0,1,2,3], 'xx'), (20, [3,4,5,6], 'xxx'), (90, [3,4,5,6,9], 'xxxx'); + +WITH + 4096 AS w, 4096 AS h, w * h AS pixels, + arrayJoin(coverage) AS num, + num DIV (32768 * 32768 DIV pixels) AS idx, + mortonDecode(2, idx) AS coord, + 255 AS b, + least(255, uniq(test_name)) AS r, + 255 * uniq(test_name) / (max(uniq(test_name)) OVER ()) AS g +SELECT r::UInt8, g::UInt8, b::UInt8 +FROM test +GROUP BY coord +ORDER BY coord.2 * w + coord.1 +WITH FILL FROM 0 TO 10; + + +CREATE TABLE seq ( + number UInt64 +) +ENGINE = MergeTree +ORDER BY tuple(); + +INSERT INTO seq VALUES (0), (6), (7); + +WITH (Select min(number), max(number) from seq) as range Select * from numbers(range.1, range.2); + diff --git a/tests/queries/0_stateless/03199_unbin_buffer_overflow.reference b/tests/queries/0_stateless/03199_unbin_buffer_overflow.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03199_unbin_buffer_overflow.sh b/tests/queries/0_stateless/03199_unbin_buffer_overflow.sh new file mode 100755 index 00000000000..337debebb14 --- /dev/null +++ b/tests/queries/0_stateless/03199_unbin_buffer_overflow.sh @@ -0,0 +1,33 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + + +# check for buffer overflow in unbin (due to not enough memory preallocate for output buffer) +# we iterate over all remainders of input string length modulo word_size and check that no assertions are triggered + +word_size=8 +for i in $(seq 1 $((word_size+1))); do + str=$(printf "%${i}s" | tr ' ' 'x') + $CLICKHOUSE_CLIENT -q "SELECT count() FROM numbers(99) GROUP BY unbin(toFixedString(materialize('$str'), $i)) WITH ROLLUP WITH TOTALS FORMAT NULL" +done + +word_size=8 +for i in $(seq 1 $((word_size+1))); do + str=$(printf "%${i}s" | tr ' ' 'x') + $CLICKHOUSE_CLIENT -q "SELECT count() FROM numbers(99) GROUP BY unbin(materialize('$str')) WITH ROLLUP WITH TOTALS FORMAT NULL" +done + +word_size=2 +for i in $(seq 1 $((word_size+1))); do + str=$(printf "%${i}s" | tr ' ' 'x') + $CLICKHOUSE_CLIENT -q "SELECT count() FROM numbers(99) GROUP BY unhex(toFixedString(materialize('$str'), $i)) WITH ROLLUP WITH TOTALS FORMAT NULL" +done + +word_size=2 +for i in $(seq 1 $((word_size+1))); do + str=$(printf "%${i}s" | tr ' ' 'x') + $CLICKHOUSE_CLIENT -q "SELECT count() FROM numbers(99) GROUP BY unhex(materialize('$str')) WITH ROLLUP WITH TOTALS FORMAT NULL" +done diff --git a/tests/queries/0_stateless/03200_memory_engine_alter_dynamic.reference b/tests/queries/0_stateless/03200_memory_engine_alter_dynamic.reference new file mode 100644 index 00000000000..6d2c1334d6e --- /dev/null +++ b/tests/queries/0_stateless/03200_memory_engine_alter_dynamic.reference @@ -0,0 +1,10 @@ +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N diff --git a/tests/queries/0_stateless/03200_memory_engine_alter_dynamic.sql b/tests/queries/0_stateless/03200_memory_engine_alter_dynamic.sql new file mode 100644 index 00000000000..a01a595dbb5 --- /dev/null +++ b/tests/queries/0_stateless/03200_memory_engine_alter_dynamic.sql @@ -0,0 +1,7 @@ +set allow_experimental_dynamic_type=1; +create table test (d Dynamic) engine=Memory; +insert into table test select * from numbers(5); +alter table test modify column d Dynamic(max_types=1); +select d.UInt64 from test settings allow_experimental_analyzer=1; +select d.UInt64 from test settings allow_experimental_analyzer=0; + diff --git a/tests/queries/0_stateless/03201_query_cache_empty_tuple.reference b/tests/queries/0_stateless/03201_query_cache_empty_tuple.reference new file mode 100644 index 00000000000..50e44edaecb --- /dev/null +++ b/tests/queries/0_stateless/03201_query_cache_empty_tuple.reference @@ -0,0 +1,2 @@ +() 0 +() 0 diff --git a/tests/queries/0_stateless/03201_query_cache_empty_tuple.sql b/tests/queries/0_stateless/03201_query_cache_empty_tuple.sql new file mode 100644 index 00000000000..8e133143ef8 --- /dev/null +++ b/tests/queries/0_stateless/03201_query_cache_empty_tuple.sql @@ -0,0 +1,2 @@ +SELECT tuple(), 0 FROM numbers(1) SETTINGS use_query_cache = true; +SELECT tuple(), 0 FROM numbers(1) SETTINGS use_query_cache = true; diff --git a/tests/queries/0_stateless/03201_variant_null_map_subcolumn.reference b/tests/queries/0_stateless/03201_variant_null_map_subcolumn.reference new file mode 100644 index 00000000000..8565fe3d0fa --- /dev/null +++ b/tests/queries/0_stateless/03201_variant_null_map_subcolumn.reference @@ -0,0 +1,402 @@ +Memory +test +[] 1 0 0 [] +1 0 1 0 [] +\N 1 1 0 [] +['str_3','str_3','str_3'] 1 0 3 [1,1,1] +4 0 1 0 [] +\N 1 1 0 [] +[6,6,6,6,6,6] 1 0 6 [0,0,0,0,0,0] +7 0 1 0 [] +\N 1 1 0 [] +[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] 1 0 9 [1,1,1,1,1,1,1,1,1] +10 0 1 0 [] +\N 1 1 0 [] +['str_12','str_12'] 1 0 2 [1,1] +13 0 1 0 [] +\N 1 1 0 [] +[15,15,15,15,15] 1 0 5 [0,0,0,0,0] +16 0 1 0 [] +\N 1 1 0 [] +[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] 1 0 8 [1,1,1,1,1,1,1,1] +19 0 1 0 [] +\N 1 1 0 [] +['str_21'] 1 0 1 [1] +22 0 1 0 [] +\N 1 1 0 [] +[24,24,24,24] 1 0 4 [0,0,0,0] +25 0 1 0 [] +\N 1 1 0 [] +[NULL,NULL,NULL,NULL,NULL,NULL,NULL] 1 0 7 [1,1,1,1,1,1,1] +28 0 1 0 [] +\N 1 1 0 [] +[] 1 0 0 [] +31 0 1 0 [] +\N 1 1 0 [] +[33,33,33] 1 0 3 [0,0,0] +34 0 1 0 [] +\N 1 1 0 [] +1 0 0 [] +0 1 0 [] +1 1 0 [] +1 0 3 [1,1,1] +0 1 0 [] +1 1 0 [] +1 0 6 [0,0,0,0,0,0] +0 1 0 [] +1 1 0 [] +1 0 9 [1,1,1,1,1,1,1,1,1] +0 1 0 [] +1 1 0 [] +1 0 2 [1,1] +0 1 0 [] +1 1 0 [] +1 0 5 [0,0,0,0,0] +0 1 0 [] +1 1 0 [] +1 0 8 [1,1,1,1,1,1,1,1] +0 1 0 [] +1 1 0 [] +1 0 1 [1] +0 1 0 [] +1 1 0 [] +1 0 4 [0,0,0,0] +0 1 0 [] +1 1 0 [] +1 0 7 [1,1,1,1,1,1,1] +0 1 0 [] +1 1 0 [] +1 0 0 [] +0 1 0 [] +1 1 0 [] +1 0 3 [0,0,0] +0 1 0 [] +1 1 0 [] +0 0 [] [] +1 0 [] [] +1 0 [] [] +0 3 [1,1,1] [0,0,0] +1 0 [] [] +1 0 [] [] +0 6 [0,0,0,0,0,0] [1,1,1,1,1,1] +1 0 [] [] +1 0 [] [] +0 9 [1,1,1,1,1,1,1,1,1] [1,1,1,1,1,1,1,1,1] +1 0 [] [] +1 0 [] [] +0 2 [1,1] [0,0] +1 0 [] [] +1 0 [] [] +0 5 [0,0,0,0,0] [1,1,1,1,1] +1 0 [] [] +1 0 [] [] +0 8 [1,1,1,1,1,1,1,1] [1,1,1,1,1,1,1,1] +1 0 [] [] +1 0 [] [] +0 1 [1] [0] +1 0 [] [] +1 0 [] [] +0 4 [0,0,0,0] [1,1,1,1] +1 0 [] [] +1 0 [] [] +0 7 [1,1,1,1,1,1,1] [1,1,1,1,1,1,1] +1 0 [] [] +1 0 [] [] +0 0 [] [] +1 0 [] [] +1 0 [] [] +0 3 [0,0,0] [1,1,1] +1 0 [] [] +1 0 [] [] +0 +2 +3 +5 +6 +8 +9 +11 +12 +14 +15 +17 +18 +20 +21 +23 +24 +26 +27 +29 +30 +32 +33 +35 +MergeTree compact +test +[] 1 0 0 [] +1 0 1 0 [] +\N 1 1 0 [] +['str_3','str_3','str_3'] 1 0 3 [1,1,1] +4 0 1 0 [] +\N 1 1 0 [] +[6,6,6,6,6,6] 1 0 6 [0,0,0,0,0,0] +7 0 1 0 [] +\N 1 1 0 [] +[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] 1 0 9 [1,1,1,1,1,1,1,1,1] +10 0 1 0 [] +\N 1 1 0 [] +['str_12','str_12'] 1 0 2 [1,1] +13 0 1 0 [] +\N 1 1 0 [] +[15,15,15,15,15] 1 0 5 [0,0,0,0,0] +16 0 1 0 [] +\N 1 1 0 [] +[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] 1 0 8 [1,1,1,1,1,1,1,1] +19 0 1 0 [] +\N 1 1 0 [] +['str_21'] 1 0 1 [1] +22 0 1 0 [] +\N 1 1 0 [] +[24,24,24,24] 1 0 4 [0,0,0,0] +25 0 1 0 [] +\N 1 1 0 [] +[NULL,NULL,NULL,NULL,NULL,NULL,NULL] 1 0 7 [1,1,1,1,1,1,1] +28 0 1 0 [] +\N 1 1 0 [] +[] 1 0 0 [] +31 0 1 0 [] +\N 1 1 0 [] +[33,33,33] 1 0 3 [0,0,0] +34 0 1 0 [] +\N 1 1 0 [] +1 0 0 [] +0 1 0 [] +1 1 0 [] +1 0 3 [1,1,1] +0 1 0 [] +1 1 0 [] +1 0 6 [0,0,0,0,0,0] +0 1 0 [] +1 1 0 [] +1 0 9 [1,1,1,1,1,1,1,1,1] +0 1 0 [] +1 1 0 [] +1 0 2 [1,1] +0 1 0 [] +1 1 0 [] +1 0 5 [0,0,0,0,0] +0 1 0 [] +1 1 0 [] +1 0 8 [1,1,1,1,1,1,1,1] +0 1 0 [] +1 1 0 [] +1 0 1 [1] +0 1 0 [] +1 1 0 [] +1 0 4 [0,0,0,0] +0 1 0 [] +1 1 0 [] +1 0 7 [1,1,1,1,1,1,1] +0 1 0 [] +1 1 0 [] +1 0 0 [] +0 1 0 [] +1 1 0 [] +1 0 3 [0,0,0] +0 1 0 [] +1 1 0 [] +0 0 [] [] +1 0 [] [] +1 0 [] [] +0 3 [1,1,1] [0,0,0] +1 0 [] [] +1 0 [] [] +0 6 [0,0,0,0,0,0] [1,1,1,1,1,1] +1 0 [] [] +1 0 [] [] +0 9 [1,1,1,1,1,1,1,1,1] [1,1,1,1,1,1,1,1,1] +1 0 [] [] +1 0 [] [] +0 2 [1,1] [0,0] +1 0 [] [] +1 0 [] [] +0 5 [0,0,0,0,0] [1,1,1,1,1] +1 0 [] [] +1 0 [] [] +0 8 [1,1,1,1,1,1,1,1] [1,1,1,1,1,1,1,1] +1 0 [] [] +1 0 [] [] +0 1 [1] [0] +1 0 [] [] +1 0 [] [] +0 4 [0,0,0,0] [1,1,1,1] +1 0 [] [] +1 0 [] [] +0 7 [1,1,1,1,1,1,1] [1,1,1,1,1,1,1] +1 0 [] [] +1 0 [] [] +0 0 [] [] +1 0 [] [] +1 0 [] [] +0 3 [0,0,0] [1,1,1] +1 0 [] [] +1 0 [] [] +0 +2 +3 +5 +6 +8 +9 +11 +12 +14 +15 +17 +18 +20 +21 +23 +24 +26 +27 +29 +30 +32 +33 +35 +MergeTree wide +test +[] 1 0 0 [] +1 0 1 0 [] +\N 1 1 0 [] +['str_3','str_3','str_3'] 1 0 3 [1,1,1] +4 0 1 0 [] +\N 1 1 0 [] +[6,6,6,6,6,6] 1 0 6 [0,0,0,0,0,0] +7 0 1 0 [] +\N 1 1 0 [] +[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] 1 0 9 [1,1,1,1,1,1,1,1,1] +10 0 1 0 [] +\N 1 1 0 [] +['str_12','str_12'] 1 0 2 [1,1] +13 0 1 0 [] +\N 1 1 0 [] +[15,15,15,15,15] 1 0 5 [0,0,0,0,0] +16 0 1 0 [] +\N 1 1 0 [] +[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] 1 0 8 [1,1,1,1,1,1,1,1] +19 0 1 0 [] +\N 1 1 0 [] +['str_21'] 1 0 1 [1] +22 0 1 0 [] +\N 1 1 0 [] +[24,24,24,24] 1 0 4 [0,0,0,0] +25 0 1 0 [] +\N 1 1 0 [] +[NULL,NULL,NULL,NULL,NULL,NULL,NULL] 1 0 7 [1,1,1,1,1,1,1] +28 0 1 0 [] +\N 1 1 0 [] +[] 1 0 0 [] +31 0 1 0 [] +\N 1 1 0 [] +[33,33,33] 1 0 3 [0,0,0] +34 0 1 0 [] +\N 1 1 0 [] +1 0 0 [] +0 1 0 [] +1 1 0 [] +1 0 3 [1,1,1] +0 1 0 [] +1 1 0 [] +1 0 6 [0,0,0,0,0,0] +0 1 0 [] +1 1 0 [] +1 0 9 [1,1,1,1,1,1,1,1,1] +0 1 0 [] +1 1 0 [] +1 0 2 [1,1] +0 1 0 [] +1 1 0 [] +1 0 5 [0,0,0,0,0] +0 1 0 [] +1 1 0 [] +1 0 8 [1,1,1,1,1,1,1,1] +0 1 0 [] +1 1 0 [] +1 0 1 [1] +0 1 0 [] +1 1 0 [] +1 0 4 [0,0,0,0] +0 1 0 [] +1 1 0 [] +1 0 7 [1,1,1,1,1,1,1] +0 1 0 [] +1 1 0 [] +1 0 0 [] +0 1 0 [] +1 1 0 [] +1 0 3 [0,0,0] +0 1 0 [] +1 1 0 [] +0 0 [] [] +1 0 [] [] +1 0 [] [] +0 3 [1,1,1] [0,0,0] +1 0 [] [] +1 0 [] [] +0 6 [0,0,0,0,0,0] [1,1,1,1,1,1] +1 0 [] [] +1 0 [] [] +0 9 [1,1,1,1,1,1,1,1,1] [1,1,1,1,1,1,1,1,1] +1 0 [] [] +1 0 [] [] +0 2 [1,1] [0,0] +1 0 [] [] +1 0 [] [] +0 5 [0,0,0,0,0] [1,1,1,1,1] +1 0 [] [] +1 0 [] [] +0 8 [1,1,1,1,1,1,1,1] [1,1,1,1,1,1,1,1] +1 0 [] [] +1 0 [] [] +0 1 [1] [0] +1 0 [] [] +1 0 [] [] +0 4 [0,0,0,0] [1,1,1,1] +1 0 [] [] +1 0 [] [] +0 7 [1,1,1,1,1,1,1] [1,1,1,1,1,1,1] +1 0 [] [] +1 0 [] [] +0 0 [] [] +1 0 [] [] +1 0 [] [] +0 3 [0,0,0] [1,1,1] +1 0 [] [] +1 0 [] [] +0 +2 +3 +5 +6 +8 +9 +11 +12 +14 +15 +17 +18 +20 +21 +23 +24 +26 +27 +29 +30 +32 +33 +35 diff --git a/tests/queries/0_stateless/03201_variant_null_map_subcolumn.sh b/tests/queries/0_stateless/03201_variant_null_map_subcolumn.sh new file mode 100755 index 00000000000..8231691e184 --- /dev/null +++ b/tests/queries/0_stateless/03201_variant_null_map_subcolumn.sh @@ -0,0 +1,44 @@ +#!/usr/bin/env bash +# Tags: long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# reset --log_comment +CLICKHOUSE_LOG_COMMENT= +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_suspicious_variant_types=1" + +function test() +{ + echo "test" + $CH_CLIENT -q "insert into test select number, multiIf(number % 3 == 2, NULL, number % 3 == 1, number, arrayMap(x -> multiIf(number % 9 == 0, NULL, number % 9 == 3, 'str_' || toString(number), number), range(number % 10))) from numbers(36)" + $CH_CLIENT -q "select v, v.UInt64.null, v.\`Array(Variant(String, UInt64))\`.null, v.\`Array(Variant(String, UInt64))\`.size0, v.\`Array(Variant(String, UInt64))\`.UInt64.null from test order by id" + $CH_CLIENT -q "select v.UInt64.null, v.\`Array(Variant(String, UInt64))\`.null, v.\`Array(Variant(String, UInt64))\`.size0, v.\`Array(Variant(String, UInt64))\`.UInt64.null from test order by id" + $CH_CLIENT -q "select v.\`Array(Variant(String, UInt64))\`.null, v.\`Array(Variant(String, UInt64))\`.size0, v.\`Array(Variant(String, UInt64))\`.UInt64.null, v.\`Array(Variant(String, UInt64))\`.String.null from test order by id" + $CH_CLIENT -q "select id from test where v.UInt64 is null order by id" + + $CH_CLIENT -q "insert into test select number, multiIf(number % 3 == 2, NULL, number % 3 == 1, number, arrayMap(x -> multiIf(number % 9 == 0, NULL, number % 9 == 3, 'str_' || toString(number), number), range(number % 10))) from numbers(1000000) settings min_insert_block_size_rows=100000" + $CH_CLIENT -q "select v, v.UInt64.null, v.\`Array(Variant(String, UInt64))\`.null, v.\`Array(Variant(String, UInt64))\`.size0, v.\`Array(Variant(String, UInt64))\`.UInt64.null from test order by id format Null" + $CH_CLIENT -q "select v.UInt64.null, v.\`Array(Variant(String, UInt64))\`.null, v.\`Array(Variant(String, UInt64))\`.size0, v.\`Array(Variant(String, UInt64))\`.UInt64.null from test order by id format Null" + $CH_CLIENT -q "select v.\`Array(Variant(String, UInt64))\`.null, v.\`Array(Variant(String, UInt64))\`.size0, v.\`Array(Variant(String, UInt64))\`.UInt64.null, v.\`Array(Variant(String, UInt64))\`.String.null from test order by id format Null" + $CH_CLIENT -q "select id from test where v.UInt64 is null order by id format Null" +} + +$CH_CLIENT -q "drop table if exists test;" + +echo "Memory" +$CH_CLIENT -q "create table test (id UInt64, v Variant(UInt64, Array(Variant(String, UInt64)))) engine=Memory" +test +$CH_CLIENT -q "drop table test;" + +echo "MergeTree compact" +$CH_CLIENT -q "create table test (id UInt64, v Variant(UInt64, Array(Variant(String, UInt64)))) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000;" +test +$CH_CLIENT -q "drop table test;" + +echo "MergeTree wide" +$CH_CLIENT -q "create table test (id UInt64, v Variant(UInt64, Array(Variant(String, UInt64)))) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;" +test +$CH_CLIENT -q "drop table test;" + diff --git a/tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.reference b/tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.reference new file mode 100644 index 00000000000..8740726c7ef --- /dev/null +++ b/tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.reference @@ -0,0 +1,57 @@ +Memory +test +Array(Array(Dynamic)) +Array(Variant(String, UInt64)) +None +String +UInt64 +20 +20 +20 +20 +0 +0 +20 +20 +10 +10 +20 +0 +MergeTree compact +test +Array(Array(Dynamic)) +Array(Variant(String, UInt64)) +None +String +UInt64 +20 +20 +20 +20 +0 +0 +20 +20 +10 +10 +20 +0 +MergeTree wide +test +Array(Array(Dynamic)) +Array(Variant(String, UInt64)) +None +String +UInt64 +20 +20 +20 +20 +0 +0 +20 +20 +10 +10 +20 +0 diff --git a/tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.sh b/tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.sh new file mode 100755 index 00000000000..aa06e48376c --- /dev/null +++ b/tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.sh @@ -0,0 +1,62 @@ +#!/usr/bin/env bash +# Tags: long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# reset --log_comment +CLICKHOUSE_LOG_COMMENT= +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1" + + +function test() +{ + echo "test" + $CH_CLIENT -q "insert into test select number, number from numbers(10) settings min_insert_block_size_rows=50000" + $CH_CLIENT -q "insert into test select number, 'str_' || toString(number) from numbers(10, 10) settings min_insert_block_size_rows=50000" + $CH_CLIENT -q "insert into test select number, arrayMap(x -> multiIf(number % 9 == 0, NULL, number % 9 == 3, 'str_' || toString(number), number), range(number % 10 + 1)) from numbers(20, 10) settings min_insert_block_size_rows=50000" + $CH_CLIENT -q "insert into test select number, NULL from numbers(30, 10) settings min_insert_block_size_rows=50000" + $CH_CLIENT -q "insert into test select number, multiIf(number % 4 == 3, 'str_' || toString(number), number % 4 == 2, NULL, number % 4 == 1, number, arrayMap(x -> multiIf(number % 9 == 0, NULL, number % 9 == 3, 'str_' || toString(number), number), range(number % 10 + 1))) from numbers(40, 40) settings min_insert_block_size_rows=50000" + $CH_CLIENT -q "insert into test select number, [range((number % 10 + 1)::UInt64)]::Array(Array(Dynamic)) from numbers(10, 10) settings min_insert_block_size_rows=50000" + + $CH_CLIENT -q "select distinct dynamicType(d) as type from test order by type" + $CH_CLIENT -q "select count() from test where dynamicType(d) == 'UInt64'" + $CH_CLIENT -q "select count() from test where d.UInt64 is not NULL" + $CH_CLIENT -q "select count() from test where dynamicType(d) == 'String'" + $CH_CLIENT -q "select count() from test where d.String is not NULL" + $CH_CLIENT -q "select count() from test where dynamicType(d) == 'Date'" + $CH_CLIENT -q "select count() from test where d.Date is not NULL" + $CH_CLIENT -q "select count() from test where dynamicType(d) == 'Array(Variant(String, UInt64))'" + $CH_CLIENT -q "select count() from test where not empty(d.\`Array(Variant(String, UInt64))\`)" + $CH_CLIENT -q "select count() from test where dynamicType(d) == 'Array(Array(Dynamic))'" + $CH_CLIENT -q "select count() from test where not empty(d.\`Array(Array(Dynamic))\`)" + $CH_CLIENT -q "select count() from test where d is NULL" + $CH_CLIENT -q "select count() from test where not empty(d.\`Tuple(a Array(Dynamic))\`.a.String)" + + $CH_CLIENT -q "select d, d.UInt64.null, d.String.null, d.\`Array(Variant(String, UInt64))\`.null from test format Null" + $CH_CLIENT -q "select d.UInt64.null, d.String.null, d.\`Array(Variant(String, UInt64))\`.null from test format Null" + $CH_CLIENT -q "select d.Int8.null, d.Date.null, d.\`Array(String)\`.null from test format Null" + $CH_CLIENT -q "select d, d.UInt64.null, d.Date.null, d.\`Array(Variant(String, UInt64))\`.null, d.\`Array(Variant(String, UInt64))\`.size0, d.\`Array(Variant(String, UInt64))\`.UInt64.null from test format Null" + $CH_CLIENT -q "select d.UInt64.null, d.Date.null, d.\`Array(Variant(String, UInt64))\`.null, d.\`Array(Variant(String, UInt64))\`.size0, d.\`Array(Variant(String, UInt64))\`.UInt64.null, d.\`Array(Variant(String, UInt64))\`.String.null from test format Null" + $CH_CLIENT -q "select d, d.\`Tuple(a UInt64, b String)\`.a, d.\`Array(Dynamic)\`.\`Variant(String, UInt64)\`.UInt64.null, d.\`Array(Variant(String, UInt64))\`.UInt64.null from test format Null" + $CH_CLIENT -q "select d.\`Array(Dynamic)\`.\`Variant(String, UInt64)\`.UInt64.null, d.\`Array(Dynamic)\`.size0, d.\`Array(Variant(String, UInt64))\`.UInt64.null from test format Null" + $CH_CLIENT -q "select d.\`Array(Array(Dynamic))\`.size1, d.\`Array(Array(Dynamic))\`.UInt64.null, d.\`Array(Array(Dynamic))\`.\`Map(String, Tuple(a UInt64))\`.values.a from test format Null" +} + +$CH_CLIENT -q "drop table if exists test;" + +echo "Memory" +$CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=Memory" +test +$CH_CLIENT -q "drop table test;" + +echo "MergeTree compact" +$CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000;" +test +$CH_CLIENT -q "drop table test;" + +echo "MergeTree wide" +$CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;" +test +$CH_CLIENT -q "drop table test;" diff --git a/tests/queries/1_stateful/00166_explain_estimate.reference b/tests/queries/1_stateful/00166_explain_estimate.reference index 71ddd681581..85ecd0b9a71 100644 --- a/tests/queries/1_stateful/00166_explain_estimate.reference +++ b/tests/queries/1_stateful/00166_explain_estimate.reference @@ -1,5 +1,5 @@ test hits 1 57344 7 -test hits 1 8839168 1079 -test hits 1 835584 102 +test hits 1 8832938 1079 +test hits 1 829354 102 test hits 1 8003584 977 test hits 2 581632 71 diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 68734ef5ec8..161f59dc3c9 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -1098,6 +1098,8 @@ aggregatefunction aggregatingmergetree aggregatio aggretate +aggthrow +aggThrow aiochclient allocator alphaTokens @@ -1658,6 +1660,7 @@ fuzzBits fuzzJSON fuzzer fuzzers +fuzzQuery gRPC gccMurmurHash gcem @@ -1979,6 +1982,8 @@ mapExtractKeyLike mapFilter mapFromArrays mapKeys +mapPartialReverseSort +mapPartialSort mapPopulateSeries mapReverseSort mapSort diff --git a/utils/keeper-bench/Runner.cpp b/utils/keeper-bench/Runner.cpp index 5ae4c7a0b1c..587e015b340 100644 --- a/utils/keeper-bench/Runner.cpp +++ b/utils/keeper-bench/Runner.cpp @@ -1311,9 +1311,9 @@ void removeRecursive(Coordination::ZooKeeper & zookeeper, const std::string & pa while (!children_span.empty()) { Coordination::Requests ops; - for (size_t i = 0; i < 1000 && !children.empty(); ++i) + for (size_t i = 0; i < 1000 && !children_span.empty(); ++i) { - removeRecursive(zookeeper, fs::path(path) / children.back()); + removeRecursive(zookeeper, fs::path(path) / children_span.back()); ops.emplace_back(zkutil::makeRemoveRequest(fs::path(path) / children_span.back(), -1)); children_span = children_span.subspan(0, children_span.size() - 1); } diff --git a/utils/keeper-bench/example.yaml b/utils/keeper-bench/example.yaml index e800e923482..c3a62a01eac 100644 --- a/utils/keeper-bench/example.yaml +++ b/utils/keeper-bench/example.yaml @@ -18,45 +18,46 @@ connections: host: "localhost:9181" -generator: - setup: +setup: + node: + name: "test3" + node: + name: "test_create" + node: + name: "test4" + node: + name: "test" + data: "somedata" node: - name: "test3" + repeat: 4 + name: + random_string: + size: 15 + data: + random_string: + size: + min_value: 10 + max_value: 20 node: - name: "test_create" - node: - name: "test4" - node: - name: "test" - data: "somedata" - node: - repeat: 4 - name: - random_string: - size: 15 - data: - random_string: - size: - min_value: 10 - max_value: 20 + repeat: 2 node: repeat: 2 - node: - repeat: 2 - name: - random_string: - size: 12 name: random_string: - size: 15 - data: - random_string: - size: - min_value: 10 - max_value: 20 - node: - name: "test2" - data: "somedata" + size: 12 + name: + random_string: + size: 15 + data: + random_string: + size: + min_value: 10 + max_value: 20 + node: + name: "test2" + data: "somedata" + +generator: requests: create: path: "/test_create" diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index 8112ed9083b..271065a78fb 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -1,3 +1,4 @@ +v24.6.2.17-stable 2024-07-05 v24.6.1.4423-stable 2024-07-01 v24.5.4.49-stable 2024-07-01 v24.5.3.5-stable 2024-06-13 @@ -6,6 +7,7 @@ v24.5.1.1763-stable 2024-06-01 v24.4.3.25-stable 2024-06-14 v24.4.2.141-stable 2024-06-07 v24.4.1.2088-stable 2024-05-01 +v24.3.5.46-lts 2024-07-03 v24.3.4.147-lts 2024-06-13 v24.3.3.102-lts 2024-05-01 v24.3.2.23-lts 2024-04-03