Merge remote-tracking branch 'origin/master' into base64encode-tests

This commit is contained in:
Nikita Fomichev 2024-07-07 10:04:13 +02:00
commit db462a7920
388 changed files with 7181 additions and 4393 deletions

1
.gitattributes vendored
View File

@ -2,3 +2,4 @@ contrib/* linguist-vendored
*.h linguist-language=C++ *.h linguist-language=C++
tests/queries/0_stateless/data_json/* binary tests/queries/0_stateless/data_json/* binary
tests/queries/0_stateless/*.reference -crlf tests/queries/0_stateless/*.reference -crlf
src/Core/SettingsChangesHistory.cpp merge=union

View File

@ -21,6 +21,8 @@
#include <atomic> #include <atomic>
#include <cstddef> #include <cstddef>
#include <map> #include <map>
#include <memory>
#include <unordered_map>
#include <vector> #include <vector>
#include "Poco/Channel.h" #include "Poco/Channel.h"

View File

@ -19,6 +19,7 @@
#include <map> #include <map>
#include <vector>
#include "Poco/Foundation.h" #include "Poco/Foundation.h"
#include "Poco/Timestamp.h" #include "Poco/Timestamp.h"

View File

@ -84,5 +84,5 @@ if (CMAKE_CROSSCOMPILING)
message (FATAL_ERROR "Trying to cross-compile to unsupported system: ${CMAKE_SYSTEM_NAME}!") message (FATAL_ERROR "Trying to cross-compile to unsupported system: ${CMAKE_SYSTEM_NAME}!")
endif () endif ()
message (STATUS "Cross-compiling for target: ${CMAKE_CXX_COMPILE_TARGET}") message (STATUS "Cross-compiling for target: ${CMAKE_CXX_COMPILER_TARGET}")
endif () endif ()

View File

@ -228,6 +228,8 @@ add_contrib (ulid-c-cmake ulid-c)
add_contrib (libssh-cmake libssh) add_contrib (libssh-cmake libssh)
add_contrib (prometheus-protobufs-cmake prometheus-protobufs prometheus-protobufs-gogo)
# Put all targets defined here and in subdirectories under "contrib/<immediate-subdir>" folders in GUI-based IDEs. # Put all targets defined here and in subdirectories under "contrib/<immediate-subdir>" folders in GUI-based IDEs.
# Some of third-party projects may override CMAKE_FOLDER or FOLDER property of their targets, so they would not appear # Some of third-party projects may override CMAKE_FOLDER or FOLDER property of their targets, so they would not appear
# in "contrib/..." as originally planned, so we workaround this by fixing FOLDER properties of all targets manually, # in "contrib/..." as originally planned, so we workaround this by fixing FOLDER properties of all targets manually,

2
contrib/azure vendored

@ -1 +1 @@
Subproject commit 6262a76ef4c4c330c84e58dd4f6f13f4e6230fcd Subproject commit 92c94d7f37a43cc8fc4d466884a95f610c0593bf

View File

@ -157,15 +157,13 @@ function(protobuf_generate)
set(_generated_srcs_all) set(_generated_srcs_all)
foreach(_proto ${protobuf_generate_PROTOS}) foreach(_proto ${protobuf_generate_PROTOS})
get_filename_component(_abs_file ${_proto} ABSOLUTE) # The protobuf compiler doesn't return paths to the files it generates so we have to calculate those paths here:
get_filename_component(_abs_dir ${_abs_file} DIRECTORY) # _abs_file - absolute path to a .proto file,
get_filename_component(_basename ${_proto} NAME_WE) # _possible_rel_dir - relative path to the .proto file from some import directory specified in Protobuf_IMPORT_DIRS,
file(RELATIVE_PATH _rel_dir ${CMAKE_CURRENT_SOURCE_DIR} ${_abs_dir}) # _basename - filename of the .proto file (without path and without extenstion).
get_proto_absolute_path(_abs_file "${_proto}" ${_protobuf_include_path})
set(_possible_rel_dir) get_proto_relative_path(_possible_rel_dir "${_abs_file}" ${_protobuf_include_path})
if (NOT protobuf_generate_APPEND_PATH) get_filename_component(_basename "${_abs_file}" NAME_WE)
set(_possible_rel_dir ${_rel_dir}/)
endif()
set(_generated_srcs) set(_generated_srcs)
foreach(_ext ${protobuf_generate_GENERATE_EXTENSIONS}) foreach(_ext ${protobuf_generate_GENERATE_EXTENSIONS})
@ -173,7 +171,7 @@ function(protobuf_generate)
endforeach() endforeach()
if(protobuf_generate_DESCRIPTORS AND protobuf_generate_LANGUAGE STREQUAL cpp) if(protobuf_generate_DESCRIPTORS AND protobuf_generate_LANGUAGE STREQUAL cpp)
set(_descriptor_file "${CMAKE_CURRENT_BINARY_DIR}/${_basename}.desc") set(_descriptor_file "${protobuf_generate_PROTOC_OUT_DIR}/${_possible_rel_dir}${_basename}.desc")
set(_dll_desc_out "--descriptor_set_out=${_descriptor_file}") set(_dll_desc_out "--descriptor_set_out=${_descriptor_file}")
list(APPEND _generated_srcs ${_descriptor_file}) list(APPEND _generated_srcs ${_descriptor_file})
endif() endif()
@ -196,3 +194,36 @@ function(protobuf_generate)
target_sources(${protobuf_generate_TARGET} PRIVATE ${_generated_srcs_all}) target_sources(${protobuf_generate_TARGET} PRIVATE ${_generated_srcs_all})
endif() endif()
endfunction() endfunction()
# Calculates the absolute path to a .proto file.
function(get_proto_absolute_path result proto)
cmake_path(IS_ABSOLUTE proto _is_abs_path)
if(_is_abs_path)
set(${result} "${proto}" PARENT_SCOPE)
return()
endif()
foreach(_include_dir ${ARGN})
if(EXISTS "${_include_dir}/${proto}")
set(${result} "${_include_dir}/${proto}" PARENT_SCOPE)
return()
endif()
endforeach()
message(SEND_ERROR "Not found protobuf ${proto} in Protobuf_IMPORT_DIRS: ${ARGN}")
endfunction()
# Calculates a relative path to a .proto file. The returned path is relative to one of include directories.
function(get_proto_relative_path result abs_path)
set(${result} "" PARENT_SCOPE)
get_filename_component(_abs_dir "${abs_path}" DIRECTORY)
foreach(_include_dir ${ARGN})
cmake_path(IS_PREFIX _include_dir "${_abs_dir}" _is_prefix)
if(_is_prefix)
file(RELATIVE_PATH _rel_dir "${_include_dir}" "${_abs_dir}")
if(NOT _rel_dir STREQUAL "")
set(${result} "${_rel_dir}/" PARENT_SCOPE)
endif()
return()
endif()
endforeach()
message(WARNING "Not found protobuf ${abs_path} in Protobuf_IMPORT_DIRS: ${ARGN}")
endfunction()

View File

@ -5,7 +5,7 @@ else ()
endif () endif ()
if (NOT ENABLE_ICU) if (NOT ENABLE_ICU)
message(STATUS "Not using icu") message(STATUS "Not using ICU")
return() return()
endif() endif()

View File

@ -34,7 +34,11 @@ if (OS_LINUX)
# avoid spurious latencies and additional work associated with # avoid spurious latencies and additional work associated with
# MADV_DONTNEED. See # MADV_DONTNEED. See
# https://github.com/ClickHouse/ClickHouse/issues/11121 for motivation. # https://github.com/ClickHouse/ClickHouse/issues/11121 for motivation.
set (JEMALLOC_CONFIG_MALLOC_CONF "percpu_arena:percpu,oversize_threshold:0,muzzy_decay_ms:0,dirty_decay_ms:5000,prof:true,prof_active:false,background_thread:true") if (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG")
set (JEMALLOC_CONFIG_MALLOC_CONF "percpu_arena:percpu,oversize_threshold:0,muzzy_decay_ms:0,dirty_decay_ms:5000")
else()
set (JEMALLOC_CONFIG_MALLOC_CONF "percpu_arena:percpu,oversize_threshold:0,muzzy_decay_ms:0,dirty_decay_ms:5000,prof:true,prof_active:false,background_thread:true")
endif()
else() else()
set (JEMALLOC_CONFIG_MALLOC_CONF "oversize_threshold:0,muzzy_decay_ms:0,dirty_decay_ms:5000") set (JEMALLOC_CONFIG_MALLOC_CONF "oversize_threshold:0,muzzy_decay_ms:0,dirty_decay_ms:5000")
endif() endif()

@ -1 +1 @@
Subproject commit d2142eed98046a47ff7112e3cc1e197c8a5cd80f Subproject commit 2a8967b60cbe5bc2df253712bac343cc5263c5fc

2
contrib/openssl vendored

@ -1 +1 @@
Subproject commit 5d81fa7068fc8c07f4d0997d5b703f3c541a637c Subproject commit ee2bb8513b28bf86b35404dd17a0e29305ca9e08

View File

@ -0,0 +1,34 @@
option(ENABLE_PROMETHEUS_PROTOBUFS "Enable Prometheus Protobufs" ${ENABLE_PROTOBUF})
if(NOT ENABLE_PROMETHEUS_PROTOBUFS)
message(STATUS "Not using prometheus-protobufs")
return()
endif()
set(Protobuf_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/google-protobuf/src")
set(Prometheus_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/prometheus-protobufs")
set(GogoProto_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/prometheus-protobufs-gogo")
# Protobuf_IMPORT_DIRS specify where the protobuf compiler will look for .proto files.
set(Old_Protobuf_IMPORT_DIRS ${Protobuf_IMPORT_DIRS})
list(APPEND Protobuf_IMPORT_DIRS "${Protobuf_INCLUDE_DIR}" "${Prometheus_INCLUDE_DIR}" "${GogoProto_INCLUDE_DIR}")
PROTOBUF_GENERATE_CPP(prometheus_protobufs_sources prometheus_protobufs_headers
"prompb/remote.proto"
"prompb/types.proto"
"gogoproto/gogo.proto"
)
set(Protobuf_IMPORT_DIRS ${Old_Protobuf_IMPORT_DIRS})
# Ignore warnings while compiling protobuf-generated *.pb.h and *.pb.cpp files.
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -w")
# Disable clang-tidy for protobuf-generated *.pb.h and *.pb.cpp files.
set (CMAKE_CXX_CLANG_TIDY "")
add_library(_prometheus_protobufs ${prometheus_protobufs_sources} ${prometheus_protobufs_headers})
target_include_directories(_prometheus_protobufs SYSTEM PUBLIC "${CMAKE_CURRENT_BINARY_DIR}")
target_link_libraries (_prometheus_protobufs PUBLIC ch_contrib::protobuf)
add_library (ch_contrib::prometheus_protobufs ALIAS _prometheus_protobufs)

View File

@ -0,0 +1,35 @@
Copyright (c) 2022, The Cosmos SDK Authors. All rights reserved.
Copyright (c) 2013, The GoGo Authors. All rights reserved.
Protocol Buffers for Go with Gadgets
Go support for Protocol Buffers - Google's data interchange format
Copyright 2010 The Go Authors. All rights reserved.
https://github.com/golang/protobuf
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@ -0,0 +1,4 @@
File "gogoproto/gogo.proto" was downloaded from the "Protocol Buffers for Go with Gadgets" project:
https://github.com/cosmos/gogoproto/blob/main/gogoproto/gogo.proto
File "gogoproto/gogo.proto" is used in ClickHouse to compile prometheus protobufs.

View File

@ -0,0 +1,145 @@
// Protocol Buffers for Go with Gadgets
//
// Copyright (c) 2013, The GoGo Authors. All rights reserved.
// http://github.com/cosmos/gogoproto
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
syntax = "proto2";
package gogoproto;
import "google/protobuf/descriptor.proto";
option java_package = "com.google.protobuf";
option java_outer_classname = "GoGoProtos";
option go_package = "github.com/cosmos/gogoproto/gogoproto";
extend google.protobuf.EnumOptions {
optional bool goproto_enum_prefix = 62001;
optional bool goproto_enum_stringer = 62021;
optional bool enum_stringer = 62022;
optional string enum_customname = 62023;
optional bool enumdecl = 62024;
}
extend google.protobuf.EnumValueOptions {
optional string enumvalue_customname = 66001;
}
extend google.protobuf.FileOptions {
optional bool goproto_getters_all = 63001;
optional bool goproto_enum_prefix_all = 63002;
optional bool goproto_stringer_all = 63003;
optional bool verbose_equal_all = 63004;
optional bool face_all = 63005;
optional bool gostring_all = 63006;
optional bool populate_all = 63007;
optional bool stringer_all = 63008;
optional bool onlyone_all = 63009;
optional bool equal_all = 63013;
optional bool description_all = 63014;
optional bool testgen_all = 63015;
optional bool benchgen_all = 63016;
optional bool marshaler_all = 63017;
optional bool unmarshaler_all = 63018;
optional bool stable_marshaler_all = 63019;
optional bool sizer_all = 63020;
optional bool goproto_enum_stringer_all = 63021;
optional bool enum_stringer_all = 63022;
optional bool unsafe_marshaler_all = 63023;
optional bool unsafe_unmarshaler_all = 63024;
optional bool goproto_extensions_map_all = 63025;
optional bool goproto_unrecognized_all = 63026;
optional bool gogoproto_import = 63027;
optional bool protosizer_all = 63028;
optional bool compare_all = 63029;
optional bool typedecl_all = 63030;
optional bool enumdecl_all = 63031;
optional bool goproto_registration = 63032;
optional bool messagename_all = 63033;
optional bool goproto_sizecache_all = 63034;
optional bool goproto_unkeyed_all = 63035;
}
extend google.protobuf.MessageOptions {
optional bool goproto_getters = 64001;
optional bool goproto_stringer = 64003;
optional bool verbose_equal = 64004;
optional bool face = 64005;
optional bool gostring = 64006;
optional bool populate = 64007;
optional bool stringer = 67008;
optional bool onlyone = 64009;
optional bool equal = 64013;
optional bool description = 64014;
optional bool testgen = 64015;
optional bool benchgen = 64016;
optional bool marshaler = 64017;
optional bool unmarshaler = 64018;
optional bool stable_marshaler = 64019;
optional bool sizer = 64020;
optional bool unsafe_marshaler = 64023;
optional bool unsafe_unmarshaler = 64024;
optional bool goproto_extensions_map = 64025;
optional bool goproto_unrecognized = 64026;
optional bool protosizer = 64028;
optional bool compare = 64029;
optional bool typedecl = 64030;
optional bool messagename = 64033;
optional bool goproto_sizecache = 64034;
optional bool goproto_unkeyed = 64035;
}
extend google.protobuf.FieldOptions {
optional bool nullable = 65001;
optional bool embed = 65002;
optional string customtype = 65003;
optional string customname = 65004;
optional string jsontag = 65005;
optional string moretags = 65006;
optional string casttype = 65007;
optional string castkey = 65008;
optional string castvalue = 65009;
optional bool stdtime = 65010;
optional bool stdduration = 65011;
optional bool wktpointer = 65012;
optional string castrepeated = 65013;
}

View File

@ -0,0 +1,201 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

View File

@ -0,0 +1,2 @@
Files "prompb/remote.proto" and "prompb/types.proto" were downloaded from the Prometheus repository:
https://github.com/prometheus/prometheus/tree/main/prompb

View File

@ -0,0 +1,88 @@
// Copyright 2016 Prometheus Team
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package prometheus;
option go_package = "prompb";
import "prompb/types.proto";
import "gogoproto/gogo.proto";
message WriteRequest {
repeated prometheus.TimeSeries timeseries = 1 [(gogoproto.nullable) = false];
// Cortex uses this field to determine the source of the write request.
// We reserve it to avoid any compatibility issues.
reserved 2;
repeated prometheus.MetricMetadata metadata = 3 [(gogoproto.nullable) = false];
}
// ReadRequest represents a remote read request.
message ReadRequest {
repeated Query queries = 1;
enum ResponseType {
// Server will return a single ReadResponse message with matched series that includes list of raw samples.
// It's recommended to use streamed response types instead.
//
// Response headers:
// Content-Type: "application/x-protobuf"
// Content-Encoding: "snappy"
SAMPLES = 0;
// Server will stream a delimited ChunkedReadResponse message that
// contains XOR or HISTOGRAM(!) encoded chunks for a single series.
// Each message is following varint size and fixed size bigendian
// uint32 for CRC32 Castagnoli checksum.
//
// Response headers:
// Content-Type: "application/x-streamed-protobuf; proto=prometheus.ChunkedReadResponse"
// Content-Encoding: ""
STREAMED_XOR_CHUNKS = 1;
}
// accepted_response_types allows negotiating the content type of the response.
//
// Response types are taken from the list in the FIFO order. If no response type in `accepted_response_types` is
// implemented by server, error is returned.
// For request that do not contain `accepted_response_types` field the SAMPLES response type will be used.
repeated ResponseType accepted_response_types = 2;
}
// ReadResponse is a response when response_type equals SAMPLES.
message ReadResponse {
// In same order as the request's queries.
repeated QueryResult results = 1;
}
message Query {
int64 start_timestamp_ms = 1;
int64 end_timestamp_ms = 2;
repeated prometheus.LabelMatcher matchers = 3;
prometheus.ReadHints hints = 4;
}
message QueryResult {
// Samples within a time series must be ordered by time.
repeated prometheus.TimeSeries timeseries = 1;
}
// ChunkedReadResponse is a response when response_type equals STREAMED_XOR_CHUNKS.
// We strictly stream full series after series, optionally split by time. This means that a single frame can contain
// partition of the single series, but once a new series is started to be streamed it means that no more chunks will
// be sent for previous one. Series are returned sorted in the same way TSDB block are internally.
message ChunkedReadResponse {
repeated prometheus.ChunkedSeries chunked_series = 1;
// query_index represents an index of the query from ReadRequest.queries these chunks relates to.
int64 query_index = 2;
}

View File

@ -0,0 +1,187 @@
// Copyright 2017 Prometheus Team
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package prometheus;
option go_package = "prompb";
import "gogoproto/gogo.proto";
message MetricMetadata {
enum MetricType {
UNKNOWN = 0;
COUNTER = 1;
GAUGE = 2;
HISTOGRAM = 3;
GAUGEHISTOGRAM = 4;
SUMMARY = 5;
INFO = 6;
STATESET = 7;
}
// Represents the metric type, these match the set from Prometheus.
// Refer to github.com/prometheus/common/model/metadata.go for details.
MetricType type = 1;
string metric_family_name = 2;
string help = 4;
string unit = 5;
}
message Sample {
double value = 1;
// timestamp is in ms format, see model/timestamp/timestamp.go for
// conversion from time.Time to Prometheus timestamp.
int64 timestamp = 2;
}
message Exemplar {
// Optional, can be empty.
repeated Label labels = 1 [(gogoproto.nullable) = false];
double value = 2;
// timestamp is in ms format, see model/timestamp/timestamp.go for
// conversion from time.Time to Prometheus timestamp.
int64 timestamp = 3;
}
// A native histogram, also known as a sparse histogram.
// Original design doc:
// https://docs.google.com/document/d/1cLNv3aufPZb3fNfaJgdaRBZsInZKKIHo9E6HinJVbpM/edit
// The appendix of this design doc also explains the concept of float
// histograms. This Histogram message can represent both, the usual
// integer histogram as well as a float histogram.
message Histogram {
enum ResetHint {
UNKNOWN = 0; // Need to test for a counter reset explicitly.
YES = 1; // This is the 1st histogram after a counter reset.
NO = 2; // There was no counter reset between this and the previous Histogram.
GAUGE = 3; // This is a gauge histogram where counter resets don't happen.
}
oneof count { // Count of observations in the histogram.
uint64 count_int = 1;
double count_float = 2;
}
double sum = 3; // Sum of observations in the histogram.
// The schema defines the bucket schema. Currently, valid numbers
// are -4 <= n <= 8. They are all for base-2 bucket schemas, where 1
// is a bucket boundary in each case, and then each power of two is
// divided into 2^n logarithmic buckets. Or in other words, each
// bucket boundary is the previous boundary times 2^(2^-n). In the
// future, more bucket schemas may be added using numbers < -4 or >
// 8.
sint32 schema = 4;
double zero_threshold = 5; // Breadth of the zero bucket.
oneof zero_count { // Count in zero bucket.
uint64 zero_count_int = 6;
double zero_count_float = 7;
}
// Negative Buckets.
repeated BucketSpan negative_spans = 8 [(gogoproto.nullable) = false];
// Use either "negative_deltas" or "negative_counts", the former for
// regular histograms with integer counts, the latter for float
// histograms.
repeated sint64 negative_deltas = 9; // Count delta of each bucket compared to previous one (or to zero for 1st bucket).
repeated double negative_counts = 10; // Absolute count of each bucket.
// Positive Buckets.
repeated BucketSpan positive_spans = 11 [(gogoproto.nullable) = false];
// Use either "positive_deltas" or "positive_counts", the former for
// regular histograms with integer counts, the latter for float
// histograms.
repeated sint64 positive_deltas = 12; // Count delta of each bucket compared to previous one (or to zero for 1st bucket).
repeated double positive_counts = 13; // Absolute count of each bucket.
ResetHint reset_hint = 14;
// timestamp is in ms format, see model/timestamp/timestamp.go for
// conversion from time.Time to Prometheus timestamp.
int64 timestamp = 15;
}
// A BucketSpan defines a number of consecutive buckets with their
// offset. Logically, it would be more straightforward to include the
// bucket counts in the Span. However, the protobuf representation is
// more compact in the way the data is structured here (with all the
// buckets in a single array separate from the Spans).
message BucketSpan {
sint32 offset = 1; // Gap to previous span, or starting point for 1st span (which can be negative).
uint32 length = 2; // Length of consecutive buckets.
}
// TimeSeries represents samples and labels for a single time series.
message TimeSeries {
// For a timeseries to be valid, and for the samples and exemplars
// to be ingested by the remote system properly, the labels field is required.
repeated Label labels = 1 [(gogoproto.nullable) = false];
repeated Sample samples = 2 [(gogoproto.nullable) = false];
repeated Exemplar exemplars = 3 [(gogoproto.nullable) = false];
repeated Histogram histograms = 4 [(gogoproto.nullable) = false];
}
message Label {
string name = 1;
string value = 2;
}
message Labels {
repeated Label labels = 1 [(gogoproto.nullable) = false];
}
// Matcher specifies a rule, which can match or set of labels or not.
message LabelMatcher {
enum Type {
EQ = 0;
NEQ = 1;
RE = 2;
NRE = 3;
}
Type type = 1;
string name = 2;
string value = 3;
}
message ReadHints {
int64 step_ms = 1; // Query step size in milliseconds.
string func = 2; // String representation of surrounding function or aggregation.
int64 start_ms = 3; // Start time in milliseconds.
int64 end_ms = 4; // End time in milliseconds.
repeated string grouping = 5; // List of label names used in aggregation.
bool by = 6; // Indicate whether it is without or by.
int64 range_ms = 7; // Range vector selector range in milliseconds.
}
// Chunk represents a TSDB chunk.
// Time range [min, max] is inclusive.
message Chunk {
int64 min_time_ms = 1;
int64 max_time_ms = 2;
// We require this to match chunkenc.Encoding.
enum Encoding {
UNKNOWN = 0;
XOR = 1;
HISTOGRAM = 2;
FLOAT_HISTOGRAM = 3;
}
Encoding type = 3;
bytes data = 4;
}
// ChunkedSeries represents single, encoded time series.
message ChunkedSeries {
// Labels should be sorted.
repeated Label labels = 1 [(gogoproto.nullable) = false];
// Chunks will be in start time order and may overlap.
repeated Chunk chunks = 2 [(gogoproto.nullable) = false];
}

2
contrib/s2geometry vendored

@ -1 +1 @@
Subproject commit 0547c38371777a1c1c8be263a6f05c3bf71bb05b Subproject commit 6522a40338d58752c2a4227a3fc2bc4107c73e43

View File

@ -1,7 +1,7 @@
option(ENABLE_S2_GEOMETRY "Enable S2 geometry library" ${ENABLE_LIBRARIES}) option(ENABLE_S2_GEOMETRY "Enable S2 Geometry" ${ENABLE_LIBRARIES})
if (NOT ENABLE_S2_GEOMETRY) if (NOT ENABLE_S2_GEOMETRY)
message(STATUS "Not using S2 geometry") message(STATUS "Not using S2 Geometry")
return() return()
endif() endif()
@ -38,6 +38,7 @@ set(S2_SRCS
"${S2_SOURCE_DIR}/s2/s2cell_index.cc" "${S2_SOURCE_DIR}/s2/s2cell_index.cc"
"${S2_SOURCE_DIR}/s2/s2cell_union.cc" "${S2_SOURCE_DIR}/s2/s2cell_union.cc"
"${S2_SOURCE_DIR}/s2/s2centroids.cc" "${S2_SOURCE_DIR}/s2/s2centroids.cc"
"${S2_SOURCE_DIR}/s2/s2chain_interpolation_query.cc"
"${S2_SOURCE_DIR}/s2/s2closest_cell_query.cc" "${S2_SOURCE_DIR}/s2/s2closest_cell_query.cc"
"${S2_SOURCE_DIR}/s2/s2closest_edge_query.cc" "${S2_SOURCE_DIR}/s2/s2closest_edge_query.cc"
"${S2_SOURCE_DIR}/s2/s2closest_point_query.cc" "${S2_SOURCE_DIR}/s2/s2closest_point_query.cc"
@ -46,6 +47,7 @@ set(S2_SRCS
"${S2_SOURCE_DIR}/s2/s2coords.cc" "${S2_SOURCE_DIR}/s2/s2coords.cc"
"${S2_SOURCE_DIR}/s2/s2crossing_edge_query.cc" "${S2_SOURCE_DIR}/s2/s2crossing_edge_query.cc"
"${S2_SOURCE_DIR}/s2/s2debug.cc" "${S2_SOURCE_DIR}/s2/s2debug.cc"
"${S2_SOURCE_DIR}/s2/s2density_tree.cc"
"${S2_SOURCE_DIR}/s2/s2earth.cc" "${S2_SOURCE_DIR}/s2/s2earth.cc"
"${S2_SOURCE_DIR}/s2/s2edge_clipping.cc" "${S2_SOURCE_DIR}/s2/s2edge_clipping.cc"
"${S2_SOURCE_DIR}/s2/s2edge_crosser.cc" "${S2_SOURCE_DIR}/s2/s2edge_crosser.cc"
@ -53,8 +55,10 @@ set(S2_SRCS
"${S2_SOURCE_DIR}/s2/s2edge_distances.cc" "${S2_SOURCE_DIR}/s2/s2edge_distances.cc"
"${S2_SOURCE_DIR}/s2/s2edge_tessellator.cc" "${S2_SOURCE_DIR}/s2/s2edge_tessellator.cc"
"${S2_SOURCE_DIR}/s2/s2error.cc" "${S2_SOURCE_DIR}/s2/s2error.cc"
"${S2_SOURCE_DIR}/s2/s2fractal.cc"
"${S2_SOURCE_DIR}/s2/s2furthest_edge_query.cc" "${S2_SOURCE_DIR}/s2/s2furthest_edge_query.cc"
"${S2_SOURCE_DIR}/s2/s2hausdorff_distance_query.cc" "${S2_SOURCE_DIR}/s2/s2hausdorff_distance_query.cc"
"${S2_SOURCE_DIR}/s2/s2index_cell_data.cc"
"${S2_SOURCE_DIR}/s2/s2latlng.cc" "${S2_SOURCE_DIR}/s2/s2latlng.cc"
"${S2_SOURCE_DIR}/s2/s2latlng_rect.cc" "${S2_SOURCE_DIR}/s2/s2latlng_rect.cc"
"${S2_SOURCE_DIR}/s2/s2latlng_rect_bounder.cc" "${S2_SOURCE_DIR}/s2/s2latlng_rect_bounder.cc"
@ -63,10 +67,10 @@ set(S2_SRCS
"${S2_SOURCE_DIR}/s2/s2lax_polyline_shape.cc" "${S2_SOURCE_DIR}/s2/s2lax_polyline_shape.cc"
"${S2_SOURCE_DIR}/s2/s2loop.cc" "${S2_SOURCE_DIR}/s2/s2loop.cc"
"${S2_SOURCE_DIR}/s2/s2loop_measures.cc" "${S2_SOURCE_DIR}/s2/s2loop_measures.cc"
"${S2_SOURCE_DIR}/s2/s2max_distance_targets.cc"
"${S2_SOURCE_DIR}/s2/s2measures.cc" "${S2_SOURCE_DIR}/s2/s2measures.cc"
"${S2_SOURCE_DIR}/s2/s2memory_tracker.cc" "${S2_SOURCE_DIR}/s2/s2memory_tracker.cc"
"${S2_SOURCE_DIR}/s2/s2metrics.cc" "${S2_SOURCE_DIR}/s2/s2metrics.cc"
"${S2_SOURCE_DIR}/s2/s2max_distance_targets.cc"
"${S2_SOURCE_DIR}/s2/s2min_distance_targets.cc" "${S2_SOURCE_DIR}/s2/s2min_distance_targets.cc"
"${S2_SOURCE_DIR}/s2/s2padded_cell.cc" "${S2_SOURCE_DIR}/s2/s2padded_cell.cc"
"${S2_SOURCE_DIR}/s2/s2point_compression.cc" "${S2_SOURCE_DIR}/s2/s2point_compression.cc"
@ -80,10 +84,11 @@ set(S2_SRCS
"${S2_SOURCE_DIR}/s2/s2predicates.cc" "${S2_SOURCE_DIR}/s2/s2predicates.cc"
"${S2_SOURCE_DIR}/s2/s2projections.cc" "${S2_SOURCE_DIR}/s2/s2projections.cc"
"${S2_SOURCE_DIR}/s2/s2r2rect.cc" "${S2_SOURCE_DIR}/s2/s2r2rect.cc"
"${S2_SOURCE_DIR}/s2/s2region.cc" "${S2_SOURCE_DIR}/s2/s2random.cc"
"${S2_SOURCE_DIR}/s2/s2region_term_indexer.cc"
"${S2_SOURCE_DIR}/s2/s2region_coverer.cc" "${S2_SOURCE_DIR}/s2/s2region_coverer.cc"
"${S2_SOURCE_DIR}/s2/s2region_intersection.cc" "${S2_SOURCE_DIR}/s2/s2region_intersection.cc"
"${S2_SOURCE_DIR}/s2/s2region_sharder.cc"
"${S2_SOURCE_DIR}/s2/s2region_term_indexer.cc"
"${S2_SOURCE_DIR}/s2/s2region_union.cc" "${S2_SOURCE_DIR}/s2/s2region_union.cc"
"${S2_SOURCE_DIR}/s2/s2shape_index.cc" "${S2_SOURCE_DIR}/s2/s2shape_index.cc"
"${S2_SOURCE_DIR}/s2/s2shape_index_buffered_region.cc" "${S2_SOURCE_DIR}/s2/s2shape_index_buffered_region.cc"
@ -94,9 +99,12 @@ set(S2_SRCS
"${S2_SOURCE_DIR}/s2/s2shapeutil_coding.cc" "${S2_SOURCE_DIR}/s2/s2shapeutil_coding.cc"
"${S2_SOURCE_DIR}/s2/s2shapeutil_contains_brute_force.cc" "${S2_SOURCE_DIR}/s2/s2shapeutil_contains_brute_force.cc"
"${S2_SOURCE_DIR}/s2/s2shapeutil_conversion.cc" "${S2_SOURCE_DIR}/s2/s2shapeutil_conversion.cc"
"${S2_SOURCE_DIR}/s2/s2shapeutil_count_vertices.cc"
"${S2_SOURCE_DIR}/s2/s2shapeutil_edge_iterator.cc" "${S2_SOURCE_DIR}/s2/s2shapeutil_edge_iterator.cc"
"${S2_SOURCE_DIR}/s2/s2shapeutil_edge_wrap.cc"
"${S2_SOURCE_DIR}/s2/s2shapeutil_get_reference_point.cc" "${S2_SOURCE_DIR}/s2/s2shapeutil_get_reference_point.cc"
"${S2_SOURCE_DIR}/s2/s2shapeutil_visit_crossing_edge_pairs.cc" "${S2_SOURCE_DIR}/s2/s2shapeutil_visit_crossing_edge_pairs.cc"
"${S2_SOURCE_DIR}/s2/s2testing.cc"
"${S2_SOURCE_DIR}/s2/s2text_format.cc" "${S2_SOURCE_DIR}/s2/s2text_format.cc"
"${S2_SOURCE_DIR}/s2/s2wedge_relations.cc" "${S2_SOURCE_DIR}/s2/s2wedge_relations.cc"
"${S2_SOURCE_DIR}/s2/s2winding_operation.cc" "${S2_SOURCE_DIR}/s2/s2winding_operation.cc"
@ -140,6 +148,7 @@ target_link_libraries(_s2 PRIVATE
absl::strings absl::strings
absl::type_traits absl::type_traits
absl::utility absl::utility
absl::vlog_is_on
) )
target_include_directories(_s2 SYSTEM BEFORE PUBLIC "${S2_SOURCE_DIR}/") target_include_directories(_s2 SYSTEM BEFORE PUBLIC "${S2_SOURCE_DIR}/")

2
contrib/sysroot vendored

@ -1 +1 @@
Subproject commit 39c4713334f9f156dbf508f548d510d9129a657c Subproject commit cc385041b226d1fc28ead14dbab5d40a5f821dd8

2
contrib/vectorscan vendored

@ -1 +1 @@
Subproject commit 38431d111781843741a781a57a6381a527d900a4 Subproject commit d29730e1cb9daaa66bda63426cdce83505d2c809

View File

@ -1,11 +1,8 @@
# We use vectorscan, a portable and API/ABI-compatible drop-in replacement for hyperscan. # Vectorscan is drop-in replacement for Hyperscan.
if ((ARCH_AMD64 AND NOT NO_SSE3_OR_HIGHER) OR ARCH_AARCH64) if ((ARCH_AMD64 AND NOT NO_SSE3_OR_HIGHER) OR ARCH_AARCH64)
option (ENABLE_VECTORSCAN "Enable vectorscan library" ${ENABLE_LIBRARIES}) option (ENABLE_VECTORSCAN "Enable vectorscan" ${ENABLE_LIBRARIES})
endif() endif()
# TODO PPC should generally work but needs manual generation of ppc/config.h file on a PPC machine
if (NOT ENABLE_VECTORSCAN) if (NOT ENABLE_VECTORSCAN)
message (STATUS "Not using vectorscan") message (STATUS "Not using vectorscan")
return() return()
@ -272,34 +269,24 @@ if (ARCH_AARCH64)
) )
endif() endif()
# TODO
# if (ARCH_PPC64LE)
# list(APPEND SRCS
# "${LIBRARY_DIR}/src/util/supervector/arch/ppc64el/impl.cpp"
# )
# endif()
add_library (_vectorscan ${SRCS}) add_library (_vectorscan ${SRCS})
target_compile_options (_vectorscan PRIVATE
-fno-sanitize=undefined # assume the library takes care of itself
-O2 -fno-strict-aliasing -fno-omit-frame-pointer -fvisibility=hidden # options from original build system
)
# library has too much debug information # library has too much debug information
if (OMIT_HEAVY_DEBUG_SYMBOLS) if (OMIT_HEAVY_DEBUG_SYMBOLS)
target_compile_options (_vectorscan PRIVATE -g0) target_compile_options (_vectorscan PRIVATE -g0)
endif() endif()
# Include version header manually generated by running the original build system target_include_directories (_vectorscan SYSTEM PUBLIC "${LIBRARY_DIR}/src")
target_include_directories (_vectorscan SYSTEM PRIVATE common)
# Makes the version header visible. It was generated by running the native build system manually.
# Please update whenever you update vectorscan.
target_include_directories (_vectorscan SYSTEM PUBLIC common)
# vectorscan inherited some patched in-source versions of boost headers to fix a bug in # vectorscan inherited some patched in-source versions of boost headers to fix a bug in
# boost 1.69. This bug has been solved long ago but vectorscan's source code still # boost 1.69. This bug has been solved long ago but vectorscan's source code still
# points to the patched versions, so include it here. # points to the patched versions, so include it here.
target_include_directories (_vectorscan SYSTEM PRIVATE "${LIBRARY_DIR}/include") target_include_directories (_vectorscan SYSTEM PRIVATE "${LIBRARY_DIR}/include")
target_include_directories (_vectorscan SYSTEM PUBLIC "${LIBRARY_DIR}/src")
# Include platform-specific config header generated by manually running the original build system # Include platform-specific config header generated by manually running the original build system
# Please regenerate these files if you update vectorscan. # Please regenerate these files if you update vectorscan.

View File

@ -32,8 +32,12 @@
/** /**
* A version string to identify this release of Hyperscan. * A version string to identify this release of Hyperscan.
*/ */
#define HS_VERSION_STRING "5.4.7 2022-06-20" #define HS_VERSION_STRING "5.4.11 2024-07-04"
#define HS_VERSION_32BIT ((5 << 24) | (1 << 16) | (7 << 8) | 0) #define HS_VERSION_32BIT ((5 << 24) | (1 << 16) | (7 << 8) | 0)
#define HS_MAJOR 5
#define HS_MINOR 4
#define HS_PATCH 11
#endif /* HS_VERSION_H_C6428FAF8E3713 */ #endif /* HS_VERSION_H_C6428FAF8E3713 */

View File

@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \
# lts / testing / prestable / etc # lts / testing / prestable / etc
ARG REPO_CHANNEL="stable" ARG REPO_CHANNEL="stable"
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
ARG VERSION="24.6.1.4423" ARG VERSION="24.6.2.17"
ARG PACKAGES="clickhouse-keeper" ARG PACKAGES="clickhouse-keeper"
ARG DIRECT_DOWNLOAD_URLS="" ARG DIRECT_DOWNLOAD_URLS=""

View File

@ -111,6 +111,7 @@ fi
mv ./programs/clickhouse* /output || mv ./programs/*_fuzzer /output mv ./programs/clickhouse* /output || mv ./programs/*_fuzzer /output
[ -x ./programs/self-extracting/clickhouse ] && mv ./programs/self-extracting/clickhouse /output [ -x ./programs/self-extracting/clickhouse ] && mv ./programs/self-extracting/clickhouse /output
[ -x ./programs/self-extracting/clickhouse-stripped ] && mv ./programs/self-extracting/clickhouse-stripped /output [ -x ./programs/self-extracting/clickhouse-stripped ] && mv ./programs/self-extracting/clickhouse-stripped /output
[ -x ./programs/self-extracting/clickhouse-keeper ] && mv ./programs/self-extracting/clickhouse-keeper /output
mv ./src/unit_tests_dbms /output ||: # may not exist for some binary builds mv ./src/unit_tests_dbms /output ||: # may not exist for some binary builds
mv ./programs/*.dict ./programs/*.options ./programs/*_seed_corpus.zip /output ||: # libFuzzer oss-fuzz compatible infrastructure mv ./programs/*.dict ./programs/*.options ./programs/*_seed_corpus.zip /output ||: # libFuzzer oss-fuzz compatible infrastructure

View File

@ -276,10 +276,7 @@ def parse_env_variables(
if is_release_build(debug_build, package_type, sanitizer, coverage): if is_release_build(debug_build, package_type, sanitizer, coverage):
cmake_flags.append("-DSPLIT_DEBUG_SYMBOLS=ON") cmake_flags.append("-DSPLIT_DEBUG_SYMBOLS=ON")
result.append("WITH_PERFORMANCE=1") result.append("WITH_PERFORMANCE=1")
if is_cross_arm: cmake_flags.append("-DBUILD_STANDALONE_KEEPER=1")
cmake_flags.append("-DBUILD_STANDALONE_KEEPER=1")
else:
result.append("BUILD_MUSL_KEEPER=1")
elif package_type == "fuzzers": elif package_type == "fuzzers":
cmake_flags.append("-DENABLE_FUZZING=1") cmake_flags.append("-DENABLE_FUZZING=1")
cmake_flags.append("-DENABLE_PROTOBUF=1") cmake_flags.append("-DENABLE_PROTOBUF=1")

47
docker/reqgenerator.py Normal file
View File

@ -0,0 +1,47 @@
#!/usr/bin/env python3
# To run this script you must install docker and piddeptree python package
#
import subprocess
import os
import sys
def build_docker_deps(image_name, imagedir):
cmd = f"""docker run --entrypoint "/bin/bash" {image_name} -c "pip install pipdeptree 2>/dev/null 1>/dev/null && pipdeptree --freeze --warn silence | sed 's/ \+//g' | sort | uniq" > {imagedir}/requirements.txt"""
subprocess.check_call(cmd, shell=True)
def check_docker_file_install_with_pip(filepath):
image_name = None
with open(filepath, "r") as f:
for line in f:
if "docker build" in line:
arr = line.split(" ")
if len(arr) > 4:
image_name = arr[4]
if "pip3 install" in line or "pip install" in line:
return image_name, True
return image_name, False
def process_affected_images(images_dir):
for root, _dirs, files in os.walk(images_dir):
for f in files:
if f == "Dockerfile":
docker_file_path = os.path.join(root, f)
print("Checking image on path", docker_file_path)
image_name, has_pip = check_docker_file_install_with_pip(
docker_file_path
)
if has_pip:
print("Find pip in", image_name)
try:
build_docker_deps(image_name, root)
except Exception as ex:
print(ex)
else:
print("Pip not found in", docker_file_path)
process_affected_images(sys.argv[1])

View File

@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \
# lts / testing / prestable / etc # lts / testing / prestable / etc
ARG REPO_CHANNEL="stable" ARG REPO_CHANNEL="stable"
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
ARG VERSION="24.6.1.4423" ARG VERSION="24.6.2.17"
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
ARG DIRECT_DOWNLOAD_URLS="" ARG DIRECT_DOWNLOAD_URLS=""

View File

@ -28,7 +28,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
ARG REPO_CHANNEL="stable" ARG REPO_CHANNEL="stable"
ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main" ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
ARG VERSION="24.6.1.4423" ARG VERSION="24.6.2.17"
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
#docker-official-library:off #docker-official-library:off

View File

@ -19,10 +19,7 @@ RUN apt-get update \
odbcinst \ odbcinst \
psmisc \ psmisc \
python3 \ python3 \
python3-lxml \
python3-pip \ python3-pip \
python3-requests \
python3-termcolor \
unixodbc \ unixodbc \
pv \ pv \
jq \ jq \
@ -31,7 +28,8 @@ RUN apt-get update \
&& apt-get clean \ && apt-get clean \
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
RUN pip3 install numpy==1.26.3 scipy==1.12.0 pandas==1.5.3 Jinja2==3.1.3 COPY requirements.txt /
RUN pip3 install --no-cache-dir -r /requirements.txt
# This symlink is required by gcc to find the lld linker # This symlink is required by gcc to find the lld linker
RUN ln -s /usr/bin/lld-${LLVM_VERSION} /usr/bin/ld.lld RUN ln -s /usr/bin/lld-${LLVM_VERSION} /usr/bin/ld.lld
@ -39,6 +37,10 @@ RUN ln -s /usr/bin/lld-${LLVM_VERSION} /usr/bin/ld.lld
# https://salsa.debian.org/pkg-llvm-team/llvm-toolchain/-/commit/992e52c0b156a5ba9c6a8a54f8c4857ddd3d371d # https://salsa.debian.org/pkg-llvm-team/llvm-toolchain/-/commit/992e52c0b156a5ba9c6a8a54f8c4857ddd3d371d
RUN sed -i '/_IMPORT_CHECK_FILES_FOR_\(mlir-\|llvm-bolt\|merge-fdata\|MLIR\)/ {s|^|#|}' /usr/lib/llvm-${LLVM_VERSION}/lib/cmake/llvm/LLVMExports-*.cmake RUN sed -i '/_IMPORT_CHECK_FILES_FOR_\(mlir-\|llvm-bolt\|merge-fdata\|MLIR\)/ {s|^|#|}' /usr/lib/llvm-${LLVM_VERSION}/lib/cmake/llvm/LLVMExports-*.cmake
# LLVM changes paths for compiler-rt libraries. For some reason clang-18.1.8 cannot catch up libraries from default install path.
# It's very dirty workaround, better to build compiler and LLVM ourself and use it. Details: https://github.com/llvm/llvm-project/issues/95792
RUN test ! -d /usr/lib/llvm-18/lib/clang/18/lib/x86_64-pc-linux-gnu || ln -s /usr/lib/llvm-18/lib/clang/18/lib/x86_64-pc-linux-gnu /usr/lib/llvm-18/lib/clang/18/lib/x86_64-unknown-linux-gnu
ARG CCACHE_VERSION=4.6.1 ARG CCACHE_VERSION=4.6.1
RUN mkdir /tmp/ccache \ RUN mkdir /tmp/ccache \
&& cd /tmp/ccache \ && cd /tmp/ccache \

View File

@ -0,0 +1,41 @@
Jinja2==3.1.3
MarkupSafe==2.1.5
PyJWT==2.3.0
PyYAML==6.0.1
Pygments==2.11.2
SecretStorage==3.3.1
blinker==1.4
certifi==2020.6.20
chardet==4.0.0
cryptography==3.4.8
dbus-python==1.2.18
distro==1.7.0
httplib2==0.20.2
idna==3.3
importlib-metadata==4.6.4
jeepney==0.7.1
keyring==23.5.0
launchpadlib==1.10.16
lazr.restfulclient==0.14.4
lazr.uri==1.0.6
lxml==4.8.0
more-itertools==8.10.0
numpy==1.26.3
oauthlib==3.2.0
packaging==24.1
pandas==1.5.3
pip==24.1.1
pipdeptree==2.23.0
pyparsing==2.4.7
python-apt==2.4.0+ubuntu3
python-dateutil==2.9.0.post0
pytz==2024.1
requests==2.32.3
scipy==1.12.0
setuptools==59.6.0
six==1.16.0
termcolor==1.1.0
urllib3==1.26.5
wadllib==1.3.6
wheel==0.37.1
zipp==1.0.0

View File

@ -31,7 +31,8 @@ RUN apt-get update \
&& apt-get clean \ && apt-get clean \
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
RUN pip3 install Jinja2 COPY requirements.txt /
RUN pip3 install --no-cache-dir -r /requirements.txt
COPY * / COPY * /

View File

@ -0,0 +1,27 @@
blinker==1.4
cryptography==3.4.8
dbus-python==1.2.18
distro==1.7.0
httplib2==0.20.2
importlib-metadata==4.6.4
jeepney==0.7.1
Jinja2==3.1.4
keyring==23.5.0
launchpadlib==1.10.16
lazr.restfulclient==0.14.4
lazr.uri==1.0.6
MarkupSafe==2.1.5
more-itertools==8.10.0
oauthlib==3.2.0
packaging==24.1
pip==24.1.1
pipdeptree==2.23.0
PyJWT==2.3.0
pyparsing==2.4.7
python-apt==2.4.0+ubuntu3
SecretStorage==3.3.1
setuptools==59.6.0
six==1.16.0
wadllib==1.3.6
wheel==0.37.1
zipp==1.0.0

View File

@ -33,7 +33,8 @@ RUN apt-get update \
&& apt-get clean \ && apt-get clean \
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
RUN pip3 install pycurl COPY requirements.txt /
RUN pip3 install --no-cache-dir -r requirements.txt && rm -rf /root/.cache/pip
# Architecture of the image when BuildKit/buildx is used # Architecture of the image when BuildKit/buildx is used
ARG TARGETARCH ARG TARGETARCH

View File

@ -0,0 +1,26 @@
blinker==1.4
cryptography==3.4.8
dbus-python==1.2.18
distro==1.7.0
httplib2==0.20.2
importlib-metadata==4.6.4
jeepney==0.7.1
keyring==23.5.0
launchpadlib==1.10.16
lazr.restfulclient==0.14.4
lazr.uri==1.0.6
more-itertools==8.10.0
oauthlib==3.2.0
packaging==24.1
pip==24.1.1
pipdeptree==2.23.0
pycurl==7.45.3
PyJWT==2.3.0
pyparsing==2.4.7
python-apt==2.4.0+ubuntu3
SecretStorage==3.3.1
setuptools==59.6.0
six==1.16.0
wadllib==1.3.6
wheel==0.37.1
zipp==1.0.0

View File

@ -2,4 +2,5 @@
# Helper docker container to run python bottle apps # Helper docker container to run python bottle apps
FROM python:3 FROM python:3
RUN python -m pip install bottle COPY requirements.txt /
RUN python -m pip install --no-cache-dir -r requirements.txt

View File

@ -0,0 +1,6 @@
bottle==0.12.25
packaging==24.1
pip==23.2.1
pipdeptree==2.23.0
setuptools==69.0.3
wheel==0.42.0

View File

@ -26,7 +26,6 @@ RUN apt-get update \
libicu-dev \ libicu-dev \
bsdutils \ bsdutils \
curl \ curl \
python3-pika \
liblua5.1-dev \ liblua5.1-dev \
luajit \ luajit \
libssl-dev \ libssl-dev \
@ -61,49 +60,8 @@ RUN curl -fsSL https://download.docker.com/linux/ubuntu/gpg | apt-key add - \
# kazoo 2.10.0 is broken # kazoo 2.10.0 is broken
# https://s3.amazonaws.com/clickhouse-test-reports/59337/524625a1d2f4cc608a3f1059e3df2c30f353a649/integration_tests__asan__analyzer__[5_6].html # https://s3.amazonaws.com/clickhouse-test-reports/59337/524625a1d2f4cc608a3f1059e3df2c30f353a649/integration_tests__asan__analyzer__[5_6].html
RUN python3 -m pip install --no-cache-dir \ COPY requirements.txt /
PyMySQL==1.1.0 \ RUN python3 -m pip install --no-cache-dir -r requirements.txt
asyncio==3.4.3 \
avro==1.10.2 \
azure-storage-blob==12.19.0 \
boto3==1.34.24 \
cassandra-driver==3.29.0 \
confluent-kafka==2.3.0 \
delta-spark==2.3.0 \
dict2xml==1.7.4 \
dicttoxml==1.7.16 \
docker==6.1.3 \
docker-compose==1.29.2 \
grpcio==1.60.0 \
grpcio-tools==1.60.0 \
kafka-python==2.0.2 \
lz4==4.3.3 \
minio==7.2.3 \
nats-py==2.6.0 \
protobuf==4.25.2 \
kazoo==2.9.0 \
psycopg2-binary==2.9.6 \
pyhdfs==0.3.1 \
pymongo==3.11.0 \
pyspark==3.3.2 \
pytest==7.4.4 \
pytest-order==1.0.0 \
pytest-random==0.2 \
pytest-repeat==0.9.3 \
pytest-timeout==2.2.0 \
pytest-xdist==3.5.0 \
pytest-reportlog==0.4.0 \
pytz==2023.3.post1 \
pyyaml==5.3.1 \
redis==5.0.1 \
requests-kerberos==0.14.0 \
tzlocal==2.1 \
retry==0.9.2 \
bs4==0.0.2 \
lxml==5.1.0 \
urllib3==2.0.7 \
jwcrypto==1.5.6
# bs4, lxml are for cloud tests, do not delete
# Hudi supports only spark 3.3.*, not 3.4 # Hudi supports only spark 3.3.*, not 3.4
RUN curl -fsSL -O https://archive.apache.org/dist/spark/spark-3.3.2/spark-3.3.2-bin-hadoop3.tgz \ RUN curl -fsSL -O https://archive.apache.org/dist/spark/spark-3.3.2/spark-3.3.2-bin-hadoop3.tgz \

View File

@ -0,0 +1,113 @@
PyHDFS==0.3.1
PyJWT==2.3.0
PyMySQL==1.1.0
PyNaCl==1.5.0
PyYAML==5.3.1
SecretStorage==3.3.1
argon2-cffi-bindings==21.2.0
argon2-cffi==23.1.0
async-timeout==4.0.3
asyncio==3.4.3
attrs==23.2.0
avro==1.10.2
azure-core==1.30.1
azure-storage-blob==12.19.0
bcrypt==4.1.3
beautifulsoup4==4.12.3
blinker==1.4
boto3==1.34.24
botocore==1.34.101
bs4==0.0.2
cassandra-driver==3.29.0
certifi==2024.2.2
cffi==1.16.0
charset-normalizer==3.3.2
click==8.1.7
confluent-kafka==2.3.0
cryptography==3.4.8
dbus-python==1.2.18
decorator==5.1.1
delta-spark==2.3.0
dict2xml==1.7.4
dicttoxml==1.7.16
distro-info==1.1+ubuntu0.2
distro==1.7.0
docker-compose==1.29.2
docker==6.1.3
dockerpty==0.4.1
docopt==0.6.2
exceptiongroup==1.2.1
execnet==2.1.1
geomet==0.2.1.post1
grpcio-tools==1.60.0
grpcio==1.60.0
gssapi==1.8.3
httplib2==0.20.2
idna==3.7
importlib-metadata==4.6.4
iniconfig==2.0.0
isodate==0.6.1
jeepney==0.7.1
jmespath==1.0.1
jsonschema==3.2.0
jwcrypto==1.5.6
kafka-python==2.0.2
kazoo==2.9.0
keyring==23.5.0
krb5==0.5.1
launchpadlib==1.10.16
lazr.restfulclient==0.14.4
lazr.uri==1.0.6
lxml==5.1.0
lz4==4.3.3
minio==7.2.3
more-itertools==8.10.0
nats-py==2.6.0
oauthlib==3.2.0
packaging==24.0
paramiko==3.4.0
pika==1.2.0
pip==24.1.1
pipdeptree==2.23.0
pluggy==1.5.0
protobuf==4.25.2
psycopg2-binary==2.9.6
py4j==0.10.9.5
py==1.11.0
pycparser==2.22
pycryptodome==3.20.0
pymongo==3.11.0
pyparsing==2.4.7
pyrsistent==0.20.0
pyspark==3.3.2
pyspnego==0.10.2
pytest-order==1.0.0
pytest-random==0.2
pytest-repeat==0.9.3
pytest-reportlog==0.4.0
pytest-timeout==2.2.0
pytest-xdist==3.5.0
pytest==7.4.4
python-apt==2.4.0+ubuntu3
python-dateutil==2.9.0.post0
python-dotenv==0.21.1
pytz==2023.3.post1
redis==5.0.1
requests-kerberos==0.14.0
requests==2.31.0
retry==0.9.2
s3transfer==0.10.1
setuptools==59.6.0
simplejson==3.19.2
six==1.16.0
soupsieve==2.5
texttable==1.7.0
tomli==2.0.1
typing_extensions==4.11.0
tzlocal==2.1
unattended-upgrades==0.1
urllib3==2.0.7
wadllib==1.3.6
websocket-client==0.59.0
wheel==0.37.1
zipp==1.0.0

View File

@ -1,3 +1,4 @@
# docker build -t clickhouse/libfuzzer .
ARG FROM_TAG=latest ARG FROM_TAG=latest
FROM clickhouse/test-base:$FROM_TAG FROM clickhouse/test-base:$FROM_TAG
@ -29,7 +30,8 @@ RUN apt-get update \
&& apt-get clean \ && apt-get clean \
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
RUN pip3 install Jinja2 COPY requirements.txt /
RUN pip3 install --no-cache-dir -r /requirements.txt
COPY * / COPY * /

View File

@ -0,0 +1,27 @@
blinker==1.4
cryptography==3.4.8
dbus-python==1.2.18
distro==1.7.0
httplib2==0.20.2
importlib-metadata==4.6.4
jeepney==0.7.1
Jinja2==3.1.4
keyring==23.5.0
launchpadlib==1.10.16
lazr.restfulclient==0.14.4
lazr.uri==1.0.6
MarkupSafe==2.1.5
more-itertools==8.10.0
oauthlib==3.2.0
packaging==24.1
pip==24.1.1
pipdeptree==2.23.0
PyJWT==2.3.0
pyparsing==2.4.7
python-apt==2.4.0+ubuntu3
SecretStorage==3.3.1
setuptools==59.6.0
six==1.16.0
wadllib==1.3.6
wheel==0.37.1
zipp==1.0.0

View File

@ -23,7 +23,6 @@ RUN apt-get update \
python3 \ python3 \
python3-dev \ python3-dev \
python3-pip \ python3-pip \
python3-setuptools \
rsync \ rsync \
tree \ tree \
tzdata \ tzdata \
@ -33,12 +32,14 @@ RUN apt-get update \
cargo \ cargo \
ripgrep \ ripgrep \
zstd \ zstd \
&& pip3 --no-cache-dir install 'clickhouse-driver==0.2.1' scipy \
&& apt-get purge --yes python3-dev g++ \ && apt-get purge --yes python3-dev g++ \
&& apt-get autoremove --yes \ && apt-get autoremove --yes \
&& apt-get clean \ && apt-get clean \
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
COPY requirements.txt /
RUN pip3 --no-cache-dir install -r requirements.txt
COPY run.sh / COPY run.sh /
CMD ["bash", "/run.sh"] CMD ["bash", "/run.sh"]

View File

@ -0,0 +1,32 @@
blinker==1.4
clickhouse-driver==0.2.7
cryptography==3.4.8
dbus-python==1.2.18
distro==1.7.0
httplib2==0.20.2
importlib-metadata==4.6.4
jeepney==0.7.1
keyring==23.5.0
launchpadlib==1.10.16
lazr.restfulclient==0.14.4
lazr.uri==1.0.6
more-itertools==8.10.0
numpy==1.26.3
oauthlib==3.2.0
packaging==24.1
pip==24.1.1
pipdeptree==2.23.0
Pygments==2.11.2
PyJWT==2.3.0
pyparsing==2.4.7
python-apt==2.4.0+ubuntu3
pytz==2023.4
PyYAML==6.0.1
scipy==1.12.0
SecretStorage==3.3.1
setuptools==59.6.0
six==1.16.0
tzlocal==2.1
wadllib==1.3.6
wheel==0.37.1
zipp==1.0.0

View File

@ -18,11 +18,8 @@ RUN apt-get update --yes \
&& apt-get clean \ && apt-get clean \
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
RUN pip3 install \ COPY requirements.txt /
numpy \ RUN pip3 install --no-cache-dir -r /requirements.txt
pyodbc \
deepdiff \
sqlglot
ARG odbc_driver_url="https://github.com/ClickHouse/clickhouse-odbc/releases/download/v1.1.6.20200320/clickhouse-odbc-1.1.6-Linux.tar.gz" ARG odbc_driver_url="https://github.com/ClickHouse/clickhouse-odbc/releases/download/v1.1.6.20200320/clickhouse-odbc-1.1.6-Linux.tar.gz"

View File

@ -0,0 +1,30 @@
blinker==1.4
cryptography==3.4.8
dbus-python==1.2.18
deepdiff==7.0.1
distro==1.7.0
httplib2==0.20.2
importlib-metadata==4.6.4
jeepney==0.7.1
keyring==23.5.0
launchpadlib==1.10.16
lazr.restfulclient==0.14.4
lazr.uri==1.0.6
more-itertools==8.10.0
numpy==1.26.4
oauthlib==3.2.0
ordered-set==4.1.0
packaging==24.1
pip==24.1.1
pipdeptree==2.23.0
PyJWT==2.3.0
pyodbc==5.1.0
pyparsing==2.4.7
python-apt==2.4.0+ubuntu3
SecretStorage==3.3.1
setuptools==59.6.0
six==1.16.0
sqlglot==23.16.0
wadllib==1.3.6
wheel==0.37.1
zipp==1.0.0

View File

@ -14,9 +14,8 @@ RUN apt-get update --yes \
&& apt-get clean \ && apt-get clean \
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
RUN pip3 install \ COPY requirements.txt /
pyyaml \ RUN pip3 install --no-cache-dir -r /requirements.txt
clickhouse-driver
ARG sqltest_repo="https://github.com/elliotchance/sqltest/" ARG sqltest_repo="https://github.com/elliotchance/sqltest/"

View File

@ -0,0 +1,29 @@
blinker==1.4
clickhouse-driver==0.2.7
cryptography==3.4.8
dbus-python==1.2.18
distro==1.7.0
httplib2==0.20.2
importlib-metadata==4.6.4
jeepney==0.7.1
keyring==23.5.0
launchpadlib==1.10.16
lazr.restfulclient==0.14.4
lazr.uri==1.0.6
more-itertools==8.10.0
oauthlib==3.2.0
packaging==24.1
pip==24.1.1
pipdeptree==2.23.0
PyJWT==2.3.0
pyparsing==2.4.7
python-apt==2.4.0+ubuntu3
pytz==2024.1
PyYAML==6.0.1
SecretStorage==3.3.1
setuptools==59.6.0
six==1.16.0
tzlocal==5.2
wadllib==1.3.6
wheel==0.37.1
zipp==1.0.0

View File

@ -6,7 +6,6 @@ FROM clickhouse/stateless-test:$FROM_TAG
RUN apt-get update -y \ RUN apt-get update -y \
&& env DEBIAN_FRONTEND=noninteractive \ && env DEBIAN_FRONTEND=noninteractive \
apt-get install --yes --no-install-recommends \ apt-get install --yes --no-install-recommends \
python3-requests \
nodejs \ nodejs \
npm \ npm \
&& apt-get clean \ && apt-get clean \

View File

@ -213,6 +213,10 @@ function run_tests()
ADDITIONAL_OPTIONS+=('--s3-storage') ADDITIONAL_OPTIONS+=('--s3-storage')
fi fi
if [[ -n "$USE_AZURE_STORAGE_FOR_MERGE_TREE" ]] && [[ "$USE_AZURE_STORAGE_FOR_MERGE_TREE" -eq 1 ]]; then
ADDITIONAL_OPTIONS+=('--azure-blob-storage')
fi
if [[ -n "$USE_DATABASE_ORDINARY" ]] && [[ "$USE_DATABASE_ORDINARY" -eq 1 ]]; then if [[ -n "$USE_DATABASE_ORDINARY" ]] && [[ "$USE_DATABASE_ORDINARY" -eq 1 ]]; then
ADDITIONAL_OPTIONS+=('--db-engine=Ordinary') ADDITIONAL_OPTIONS+=('--db-engine=Ordinary')
fi fi

View File

@ -25,10 +25,7 @@ RUN apt-get update -y \
openssl \ openssl \
postgresql-client \ postgresql-client \
python3 \ python3 \
python3-lxml \
python3-pip \ python3-pip \
python3-requests \
python3-termcolor \
qemu-user-static \ qemu-user-static \
sqlite3 \ sqlite3 \
sudo \ sudo \
@ -51,7 +48,8 @@ RUN curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v${PR
&& unzip protoc-${PROTOC_VERSION}-linux-x86_64.zip -d /usr/local \ && unzip protoc-${PROTOC_VERSION}-linux-x86_64.zip -d /usr/local \
&& rm protoc-${PROTOC_VERSION}-linux-x86_64.zip && rm protoc-${PROTOC_VERSION}-linux-x86_64.zip
RUN pip3 install numpy==1.26.3 scipy==1.12.0 pandas==1.5.3 Jinja2==3.1.3 pyarrow==15.0.0 COPY requirements.txt /
RUN pip3 install --no-cache-dir -r /requirements.txt
RUN mkdir -p /tmp/clickhouse-odbc-tmp \ RUN mkdir -p /tmp/clickhouse-odbc-tmp \
&& cd /tmp/clickhouse-odbc-tmp \ && cd /tmp/clickhouse-odbc-tmp \

View File

@ -0,0 +1,51 @@
awscli==1.22.34
blinker==1.4
botocore==1.23.34
certifi==2020.6.20
chardet==4.0.0
colorama==0.4.4
cryptography==3.4.8
dbus-python==1.2.18
distro==1.7.0
docutils==0.17.1
gyp==0.1
httplib2==0.20.2
idna==3.3
importlib-metadata==4.6.4
jeepney==0.7.1
Jinja2==3.1.3
jmespath==0.10.0
keyring==23.5.0
launchpadlib==1.10.16
lazr.restfulclient==0.14.4
lazr.uri==1.0.6
lxml==4.8.0
MarkupSafe==2.1.5
more-itertools==8.10.0
numpy==1.26.3
oauthlib==3.2.0
packaging==24.1
pandas==1.5.3
pip==24.1.1
pipdeptree==2.23.0
pyarrow==15.0.0
pyasn1==0.4.8
PyJWT==2.3.0
pyparsing==2.4.7
python-apt==2.4.0+ubuntu3
python-dateutil==2.8.1
pytz==2024.1
PyYAML==6.0.1
requests==2.32.3
roman==3.3
rsa==4.8
s3transfer==0.5.0
scipy==1.12.0
SecretStorage==3.3.1
setuptools==59.6.0
six==1.16.0
termcolor==1.1.0
urllib3==1.26.5
wadllib==1.3.6
wheel==0.37.1
zipp==1.0.0

View File

@ -207,7 +207,7 @@ function run_tests()
if [[ -n "$USE_AZURE_STORAGE_FOR_MERGE_TREE" ]] && [[ "$USE_AZURE_STORAGE_FOR_MERGE_TREE" -eq 1 ]]; then if [[ -n "$USE_AZURE_STORAGE_FOR_MERGE_TREE" ]] && [[ "$USE_AZURE_STORAGE_FOR_MERGE_TREE" -eq 1 ]]; then
# to disable the same tests # to disable the same tests
ADDITIONAL_OPTIONS+=('--s3-storage') ADDITIONAL_OPTIONS+=('--azure-blob-storage')
# azurite is slow, but with these two settings it can be super slow # azurite is slow, but with these two settings it can be super slow
ADDITIONAL_OPTIONS+=('--no-random-settings') ADDITIONAL_OPTIONS+=('--no-random-settings')
ADDITIONAL_OPTIONS+=('--no-random-merge-tree-settings') ADDITIONAL_OPTIONS+=('--no-random-merge-tree-settings')

View File

@ -110,6 +110,15 @@ start_server
clickhouse-client --query "SHOW TABLES FROM datasets" clickhouse-client --query "SHOW TABLES FROM datasets"
clickhouse-client --query "SHOW TABLES FROM test" clickhouse-client --query "SHOW TABLES FROM test"
if [[ "$USE_S3_STORAGE_FOR_MERGE_TREE" == "1" ]]; then
TEMP_POLICY="s3_cache"
elif [[ "$USE_AZURE_STORAGE_FOR_MERGE_TREE" == "1" ]]; then
TEMP_POLICY="azure_cache"
else
TEMP_POLICY="default"
fi
clickhouse-client --query "CREATE TABLE test.hits_s3 (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, clickhouse-client --query "CREATE TABLE test.hits_s3 (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16,
EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32,
UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, RefererDomain String, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, RefererDomain String,
@ -135,7 +144,7 @@ clickhouse-client --query "CREATE TABLE test.hits_s3 (WatchID UInt64, JavaEnabl
URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String, URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String,
ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64),
IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate)
ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'" ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='$TEMP_POLICY'"
clickhouse-client --query "CREATE TABLE test.hits (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, clickhouse-client --query "CREATE TABLE test.hits (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16,
EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32,
UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String,
@ -161,7 +170,7 @@ clickhouse-client --query "CREATE TABLE test.hits (WatchID UInt64, JavaEnable U
URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String, URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String,
ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64),
IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate)
ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'" ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='$TEMP_POLICY'"
clickhouse-client --query "CREATE TABLE test.visits (CounterID UInt32, StartDate Date, Sign Int8, IsNew UInt8, clickhouse-client --query "CREATE TABLE test.visits (CounterID UInt32, StartDate Date, Sign Int8, IsNew UInt8,
VisitID UInt64, UserID UInt64, StartTime DateTime, Duration UInt32, UTCStartTime DateTime, PageViews Int32, VisitID UInt64, UserID UInt64, StartTime DateTime, Duration UInt32, UTCStartTime DateTime, PageViews Int32,
Hits Int32, IsBounce UInt8, Referer String, StartURL String, RefererDomain String, StartURLDomain String, Hits Int32, IsBounce UInt8, Referer String, StartURL String, RefererDomain String, StartURLDomain String,
@ -195,7 +204,7 @@ clickhouse-client --query "CREATE TABLE test.visits (CounterID UInt32, StartDat
Market Nested(Type UInt8, GoalID UInt32, OrderID String, OrderPrice Int64, PP UInt32, DirectPlaceID UInt32, DirectOrderID UInt32, Market Nested(Type UInt8, GoalID UInt32, OrderID String, OrderPrice Int64, PP UInt32, DirectPlaceID UInt32, DirectOrderID UInt32,
DirectBannerID UInt32, GoodID String, GoodName String, GoodQuantity Int32, GoodPrice Int64), IslandID FixedString(16)) DirectBannerID UInt32, GoodID String, GoodName String, GoodQuantity Int32, GoodPrice Int64), IslandID FixedString(16))
ENGINE = CollapsingMergeTree(Sign) PARTITION BY toYYYYMM(StartDate) ORDER BY (CounterID, StartDate, intHash32(UserID), VisitID) ENGINE = CollapsingMergeTree(Sign) PARTITION BY toYYYYMM(StartDate) ORDER BY (CounterID, StartDate, intHash32(UserID), VisitID)
SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'" SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='$TEMP_POLICY'"
clickhouse-client --query "INSERT INTO test.hits_s3 SELECT * FROM datasets.hits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0" clickhouse-client --query "INSERT INTO test.hits_s3 SELECT * FROM datasets.hits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0"
clickhouse-client --query "INSERT INTO test.hits SELECT * FROM datasets.hits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0" clickhouse-client --query "INSERT INTO test.hits SELECT * FROM datasets.hits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0"
@ -211,19 +220,29 @@ clickhouse-client --query "SYSTEM STOP THREAD FUZZER"
stop_server stop_server
# Let's enable S3 storage by default # Let's enable S3 storage by default
export USE_S3_STORAGE_FOR_MERGE_TREE=1
export RANDOMIZE_OBJECT_KEY_TYPE=1 export RANDOMIZE_OBJECT_KEY_TYPE=1
export ZOOKEEPER_FAULT_INJECTION=1 export ZOOKEEPER_FAULT_INJECTION=1
export THREAD_POOL_FAULT_INJECTION=1 export THREAD_POOL_FAULT_INJECTION=1
configure configure
# But we still need default disk because some tables loaded only into it if [[ "$USE_S3_STORAGE_FOR_MERGE_TREE" == "1" ]]; then
sudo cat /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml \ # But we still need default disk because some tables loaded only into it
| sed "s|<main><disk>s3</disk></main>|<main><disk>s3</disk></main><default><disk>default</disk></default>|" \ sudo cat /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml \
> /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp | sed "s|<main><disk>s3</disk></main>|<main><disk>s3</disk></main><default><disk>default</disk></default>|" \
mv /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml > /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp
sudo chown clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml mv /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml
sudo chgrp clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml sudo chown clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml
sudo chgrp clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml
elif [[ "$USE_AZURE_STORAGE_FOR_MERGE_TREE" == "1" ]]; then
# But we still need default disk because some tables loaded only into it
sudo cat /etc/clickhouse-server/config.d/azure_storage_policy_by_default.xml \
| sed "s|<main><disk>azure</disk></main>|<main><disk>azure</disk></main><default><disk>default</disk></default>|" \
> /etc/clickhouse-server/config.d/azure_storage_policy_by_default.xml.tmp
mv /etc/clickhouse-server/config.d/azure_storage_policy_by_default.xml.tmp /etc/clickhouse-server/config.d/azure_storage_policy_by_default.xml
sudo chown clickhouse /etc/clickhouse-server/config.d/azure_storage_policy_by_default.xml
sudo chgrp clickhouse /etc/clickhouse-server/config.d/azure_storage_policy_by_default.xml
fi
sudo cat /etc/clickhouse-server/config.d/logger_trace.xml \ sudo cat /etc/clickhouse-server/config.d/logger_trace.xml \
| sed "s|<level>trace</level>|<level>test</level>|" \ | sed "s|<level>trace</level>|<level>test</level>|" \

View File

@ -23,22 +23,8 @@ RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
# python-magic is the same version as in Ubuntu 22.04 # python-magic is the same version as in Ubuntu 22.04
RUN pip3 install \ COPY requirements.txt /
PyGithub \ RUN pip3 install --no-cache-dir -r requirements.txt
black==23.12.0 \
boto3 \
codespell==2.2.1 \
mypy==1.8.0 \
pylint==3.1.0 \
python-magic==0.4.24 \
flake8==4.0.1 \
requests \
thefuzz \
tqdm==4.66.4 \
types-requests \
unidiff \
jwt \
&& rm -rf /root/.cache/pip
RUN echo "en_US.UTF-8 UTF-8" > /etc/locale.gen && locale-gen en_US.UTF-8 RUN echo "en_US.UTF-8 UTF-8" > /etc/locale.gen && locale-gen en_US.UTF-8
ENV LC_ALL en_US.UTF-8 ENV LC_ALL en_US.UTF-8

View File

@ -0,0 +1,58 @@
aiohttp==3.9.5
aiosignal==1.3.1
astroid==3.1.0
async-timeout==4.0.3
attrs==23.2.0
black==23.12.0
boto3==1.34.131
botocore==1.34.131
certifi==2024.6.2
cffi==1.16.0
charset-normalizer==3.3.2
click==8.1.7
codespell==2.2.1
cryptography==42.0.8
Deprecated==1.2.14
dill==0.3.8
flake8==4.0.1
frozenlist==1.4.1
idna==3.7
isort==5.13.2
jmespath==1.0.1
jwt==1.3.1
mccabe==0.6.1
multidict==6.0.5
mypy==1.8.0
mypy-extensions==1.0.0
packaging==24.1
pathspec==0.9.0
pip==24.1.1
pipdeptree==2.23.0
platformdirs==4.2.2
pycodestyle==2.8.0
pycparser==2.22
pyflakes==2.4.0
PyGithub==2.3.0
PyJWT==2.8.0
pylint==3.1.0
PyNaCl==1.5.0
python-dateutil==2.9.0.post0
python-magic==0.4.24
PyYAML==6.0.1
rapidfuzz==3.9.3
requests==2.32.3
s3transfer==0.10.1
setuptools==59.6.0
six==1.16.0
thefuzz==0.22.1
tomli==2.0.1
tomlkit==0.12.5
tqdm==4.66.4
types-requests==2.32.0.20240622
typing_extensions==4.12.2
unidiff==0.7.5
urllib3==2.2.2
wheel==0.37.1
wrapt==1.16.0
yamllint==1.26.3
yarl==1.9.4

View File

@ -0,0 +1,26 @@
---
sidebar_position: 1
sidebar_label: 2024
---
# 2024 Changelog
### ClickHouse release v24.6.2.17-stable (5710a8b5c0c) FIXME as compared to v24.6.1.4423-stable (dcced7c8478)
#### New Feature
* Backported in [#66002](https://github.com/ClickHouse/ClickHouse/issues/66002): Add AzureQueue storage. [#65458](https://github.com/ClickHouse/ClickHouse/pull/65458) ([Kseniia Sumarokova](https://github.com/kssenii)).
#### Improvement
* Backported in [#65898](https://github.com/ClickHouse/ClickHouse/issues/65898): Respect cgroup CPU limit in Keeper. [#65819](https://github.com/ClickHouse/ClickHouse/pull/65819) ([Antonio Andelic](https://github.com/antonio2368)).
#### Bug Fix (user-visible misbehavior in an official stable release)
* Backported in [#65935](https://github.com/ClickHouse/ClickHouse/issues/65935): For queries that read from `PostgreSQL`, cancel the internal `PostgreSQL` query if the ClickHouse query is finished. Otherwise, `ClickHouse` query cannot be canceled until the internal `PostgreSQL` query is finished. [#65771](https://github.com/ClickHouse/ClickHouse/pull/65771) ([Maksim Kita](https://github.com/kitaisreal)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Backported in [#65907](https://github.com/ClickHouse/ClickHouse/issues/65907): Fix bug with session closing in Keeper. [#65735](https://github.com/ClickHouse/ClickHouse/pull/65735) ([Antonio Andelic](https://github.com/antonio2368)).
* Backported in [#65962](https://github.com/ClickHouse/ClickHouse/issues/65962): Add missing workload identity changes. [#65848](https://github.com/ClickHouse/ClickHouse/pull/65848) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
* Backported in [#66033](https://github.com/ClickHouse/ClickHouse/issues/66033): Follow up to [#65046](https://github.com/ClickHouse/ClickHouse/issues/65046). [#65928](https://github.com/ClickHouse/ClickHouse/pull/65928) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Backported in [#66076](https://github.com/ClickHouse/ClickHouse/issues/66076): Fix support of non-const scale arguments in rounding functions. [#65983](https://github.com/ClickHouse/ClickHouse/pull/65983) ([Mikhail Gorshkov](https://github.com/mgorshkov)).
* Backported in [#66017](https://github.com/ClickHouse/ClickHouse/issues/66017): Fix race in s3queue. [#65986](https://github.com/ClickHouse/ClickHouse/pull/65986) ([Kseniia Sumarokova](https://github.com/kssenii)).

View File

@ -993,11 +993,11 @@ They can be used for prewhere optimization only if we enable `set allow_statisti
- `TDigest` - `TDigest`
Stores distribution of values from numeric columns in [TDigest](https://github.com/tdunning/t-digest) sketch. [TDigest](https://github.com/tdunning/t-digest) sketches which allow to compute approximate percentiles (e.g. the 90th percentile) for numeric columns.
- `Uniq` - `Uniq`
Estimate the number of distinct values of a column by HyperLogLog. [HyperLogLog](https://en.wikipedia.org/wiki/HyperLogLog) sketches which provide an estimation how many distinct values a column contains.
## Column-level Settings {#column-level-settings} ## Column-level Settings {#column-level-settings}

View File

@ -84,6 +84,7 @@ The BACKUP and RESTORE statements take a list of DATABASE and TABLE names, a des
- [`compression_method`](/docs/en/sql-reference/statements/create/table.md/#column-compression-codecs) and compression_level - [`compression_method`](/docs/en/sql-reference/statements/create/table.md/#column-compression-codecs) and compression_level
- `password` for the file on disk - `password` for the file on disk
- `base_backup`: the destination of the previous backup of this source. For example, `Disk('backups', '1.zip')` - `base_backup`: the destination of the previous backup of this source. For example, `Disk('backups', '1.zip')`
- `use_same_s3_credentials_for_base_backup`: whether base backup to S3 should inherit credentials from the query. Only works with `S3`.
- `structure_only`: if enabled, allows to only backup or restore the CREATE statements without the data of tables - `structure_only`: if enabled, allows to only backup or restore the CREATE statements without the data of tables
- `storage_policy`: storage policy for the tables being restored. See [Using Multiple Block Devices for Data Storage](../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes). This setting is only applicable to the `RESTORE` command. The specified storage policy applies only to tables with an engine from the `MergeTree` family. - `storage_policy`: storage policy for the tables being restored. See [Using Multiple Block Devices for Data Storage](../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes). This setting is only applicable to the `RESTORE` command. The specified storage policy applies only to tables with an engine from the `MergeTree` family.
- `s3_storage_class`: the storage class used for S3 backup. For example, `STANDARD` - `s3_storage_class`: the storage class used for S3 backup. For example, `STANDARD`

View File

@ -974,6 +974,13 @@ Default value: false
- [exclude_deleted_rows_for_part_size_in_merge](#exclude_deleted_rows_for_part_size_in_merge) setting - [exclude_deleted_rows_for_part_size_in_merge](#exclude_deleted_rows_for_part_size_in_merge) setting
## use_compact_variant_discriminators_serialization {#use_compact_variant_discriminators_serialization}
Enables compact mode for binary serialization of discriminators in Variant data type.
This mode allows to use significantly less memory for storing discriminators in parts when there is mostly one variant or a lot of NULL values.
Default value: true
## merge_workload ## merge_workload
Used to regulate how resources are utilized and shared between merges and other workloads. Specified value is used as `workload` setting value for background merges of this table. If not specified (empty string), then server setting `merge_workload` is used instead. Used to regulate how resources are utilized and shared between merges and other workloads. Specified value is used as `workload` setting value for background merges of this table. If not specified (empty string), then server setting `merge_workload` is used instead.

View File

@ -2536,7 +2536,7 @@ Possible values:
- 0 — Optimization disabled. - 0 — Optimization disabled.
- 1 — Optimization enabled. - 1 — Optimization enabled.
Default value: `0`. Default value: `1`.
## optimize_trivial_count_query {#optimize-trivial-count-query} ## optimize_trivial_count_query {#optimize-trivial-count-query}

View File

@ -0,0 +1,37 @@
---
slug: /en/sql-reference/aggregate-functions/reference/aggthrow
sidebar_position: 101
---
# aggThrow
This function can be used for the purpose of testing exception safety. It will throw an exception on creation with the specified probability.
**Syntax**
```sql
aggThrow(throw_prob)
```
**Arguments**
- `throw_prob` — Probability to throw on creation. [Float64](../../data-types/float.md).
**Returned value**
- An exception: `Code: 503. DB::Exception: Aggregate function aggThrow has thrown exception successfully`.
**Example**
Query:
```sql
SELECT number % 2 AS even, aggThrow(number) FROM numbers(10) GROUP BY even;
```
Result:
```response
Received exception:
Code: 503. DB::Exception: Aggregate function aggThrow has thrown exception successfully: While executing AggregatingTransform. (AGGREGATE_FUNCTION_THROW)
```

View File

@ -43,6 +43,7 @@ Standard aggregate functions:
ClickHouse-specific aggregate functions: ClickHouse-specific aggregate functions:
- [aggThrow](../reference/aggthrow.md)
- [analysisOfVariance](../reference/analysis_of_variance.md) - [analysisOfVariance](../reference/analysis_of_variance.md)
- [any](../reference/any_respect_nulls.md) - [any](../reference/any_respect_nulls.md)
- [anyHeavy](../reference/anyheavy.md) - [anyHeavy](../reference/anyheavy.md)

View File

@ -83,7 +83,57 @@ Result:
``` ```
## makeDate32 ## makeDate32
Like [makeDate](#makedate) but produces a [Date32](../data-types/date32.md). Creates a date of type [Date32](../../sql-reference/data-types/date32.md) from a year, month, day (or optionally a year and a day).
**Syntax**
```sql
makeDate32(year, [month,] day)
```
**Arguments**
- `year` — Year. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
- `month` — Month (optional). [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
- `day` — Day. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
:::note
If `month` is omitted then `day` should take a value between `1` and `365`, otherwise it should take a value between `1` and `31`.
:::
**Returned values**
- A date created from the arguments. [Date32](../../sql-reference/data-types/date32.md).
**Examples**
Create a date from a year, month, and day:
Query:
```sql
SELECT makeDate32(2024, 1, 1);
```
Result:
```response
2024-01-01
```
Create a Date from a year and day of year:
Query:
``` sql
SELECT makeDate32(2024, 100);
```
Result:
```response
2024-04-09
```
## makeDateTime ## makeDateTime
@ -125,12 +175,38 @@ Result:
## makeDateTime64 ## makeDateTime64
Like [makeDateTime](#makedatetime) but produces a [DateTime64](../data-types/datetime64.md). Creates a [DateTime64](../../sql-reference/data-types/datetime64.md) data type value from its components: year, month, day, hour, minute, second. With optional sub-second precision.
**Syntax** **Syntax**
```sql
makeDateTime64(year, month, day, hour, minute, second[, precision])
```
**Arguments**
- `year` — Year (0-9999). [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
- `month` — Month (1-12). [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
- `day` — Day (1-31). [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
- `hour` — Hour (0-23). [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
- `minute` — Minute (0-59). [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
- `second` — Second (0-59). [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
- `precision` — Optional precision of the sub-second component (0-9). [Integer](../../sql-reference/data-types/int-uint.md).
**Returned value**
- A date and time created from the supplied arguments. [DateTime64](../../sql-reference/data-types/datetime64.md).
**Example**
``` sql ``` sql
makeDateTime64(year, month, day, hour, minute, second[, fraction[, precision[, timezone]]]) SELECT makeDateTime64(2023, 5, 15, 10, 30, 45, 779, 5);
```
```response
┌─makeDateTime64(2023, 5, 15, 10, 30, 45, 779, 5)─┐
│ 2023-05-15 10:30:45.00779 │
└─────────────────────────────────────────────────┘
``` ```
## timestamp ## timestamp

View File

@ -86,7 +86,7 @@ Returns the fully qualified domain name of the ClickHouse server.
fqdn(); fqdn();
``` ```
This function is case-insensitive. Aliases: `fullHostName`, 'FQDN'.
**Returned value** **Returned value**

View File

@ -6,41 +6,119 @@ sidebar_label: Time Window
# Time Window Functions # Time Window Functions
Time window functions return the inclusive lower and exclusive upper bound of the corresponding window. The functions for working with WindowView are listed below: Time window functions return the inclusive lower and exclusive upper bound of the corresponding window. The functions for working with [WindowView](../statements/create/view.md/#window-view-experimental) are listed below:
## tumble ## tumble
A tumbling time window assigns records to non-overlapping, continuous windows with a fixed duration (`interval`). A tumbling time window assigns records to non-overlapping, continuous windows with a fixed duration (`interval`).
**Syntax**
``` sql ``` sql
tumble(time_attr, interval [, timezone]) tumble(time_attr, interval [, timezone])
``` ```
**Arguments** **Arguments**
- `time_attr` - Date and time. [DateTime](../data-types/datetime.md) data type. - `time_attr` Date and time. [DateTime](../data-types/datetime.md).
- `interval` - Window interval in [Interval](../data-types/special-data-types/interval.md) data type. - `interval` Window interval in [Interval](../data-types/special-data-types/interval.md).
- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). - `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional).
**Returned values** **Returned values**
- The inclusive lower and exclusive upper bound of the corresponding tumbling window. [Tuple](../data-types/tuple.md)([DateTime](../data-types/datetime.md), [DateTime](../data-types/datetime.md))`. - The inclusive lower and exclusive upper bound of the corresponding tumbling window. [Tuple](../data-types/tuple.md)([DateTime](../data-types/datetime.md), [DateTime](../data-types/datetime.md)).
**Example** **Example**
Query: Query:
``` sql ``` sql
SELECT tumble(now(), toIntervalDay('1')) SELECT tumble(now(), toIntervalDay('1'));
``` ```
Result: Result:
``` text ``` text
┌─tumble(now(), toIntervalDay('1'))─────────────┐ ┌─tumble(now(), toIntervalDay('1'))─────────────┐
['2020-01-01 00:00:00','2020-01-02 00:00:00'] ('2024-07-04 00:00:00','2024-07-05 00:00:00')
└───────────────────────────────────────────────┘ └───────────────────────────────────────────────┘
``` ```
## tumbleStart
Returns the inclusive lower bound of the corresponding [tumbling window](#tumble).
**Syntax**
``` sql
tumbleStart(time_attr, interval [, timezone]);
```
**Arguments**
- `time_attr` — Date and time. [DateTime](../data-types/datetime.md).
- `interval` — Window interval in [Interval](../data-types/special-data-types/interval.md).
- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional).
The parameters above can also be passed to the function as a [tuple](../data-types/tuple.md).
**Returned values**
- The inclusive lower bound of the corresponding tumbling window. [DateTime](../data-types/datetime.md), [Tuple](../data-types/tuple.md) or [UInt32](../data-types/int-uint.md).
**Example**
Query:
```sql
SELECT tumbleStart(now(), toIntervalDay('1'));
```
Result:
```response
┌─tumbleStart(now(), toIntervalDay('1'))─┐
│ 2024-07-04 00:00:00 │
└────────────────────────────────────────┘
```
## tumbleEnd
Returns the exclusive upper bound of the corresponding [tumbling window](#tumble).
**Syntax**
``` sql
tumbleEnd(time_attr, interval [, timezone]);
```
**Arguments**
- `time_attr` — Date and time. [DateTime](../data-types/datetime.md).
- `interval` — Window interval in [Interval](../data-types/special-data-types/interval.md).
- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional).
The parameters above can also be passed to the function as a [tuple](../data-types/tuple.md).
**Returned values**
- The inclusive lower bound of the corresponding tumbling window. [DateTime](../data-types/datetime.md), [Tuple](../data-types/tuple.md) or [UInt32](../data-types/int-uint.md).
**Example**
Query:
```sql
SELECT tumbleEnd(now(), toIntervalDay('1'));
```
Result:
```response
┌─tumbleEnd(now(), toIntervalDay('1'))─┐
│ 2024-07-05 00:00:00 │
└──────────────────────────────────────┘
```
## hop ## hop
A hopping time window has a fixed duration (`window_interval`) and hops by a specified hop interval (`hop_interval`). If the `hop_interval` is smaller than the `window_interval`, hopping windows are overlapping. Thus, records can be assigned to multiple windows. A hopping time window has a fixed duration (`window_interval`) and hops by a specified hop interval (`hop_interval`). If the `hop_interval` is smaller than the `window_interval`, hopping windows are overlapping. Thus, records can be assigned to multiple windows.
@ -51,65 +129,118 @@ hop(time_attr, hop_interval, window_interval [, timezone])
**Arguments** **Arguments**
- `time_attr` - Date and time. [DateTime](../data-types/datetime.md) data type. - `time_attr` Date and time. [DateTime](../data-types/datetime.md).
- `hop_interval` - Hop interval in [Interval](../data-types/special-data-types/interval.md) data type. Should be a positive number. - `hop_interval` — Positive Hop interval. [Interval](../data-types/special-data-types/interval.md).
- `window_interval` - Window interval in [Interval](../data-types/special-data-types/interval.md) data type. Should be a positive number. - `window_interval` — Positive Window interval. [Interval](../data-types/special-data-types/interval.md).
- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). - `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional).
**Returned values** **Returned values**
- The inclusive lower and exclusive upper bound of the corresponding hopping window. Since one record can be assigned to multiple hop windows, the function only returns the bound of the **first** window when hop function is used **without** `WINDOW VIEW`. [Tuple](../data-types/tuple.md)([DateTime](../data-types/datetime.md), [DateTime](../data-types/datetime.md))`. - The inclusive lower and exclusive upper bound of the corresponding hopping window. [Tuple](../data-types/tuple.md)([DateTime](../data-types/datetime.md), [DateTime](../data-types/datetime.md))`.
:::note
Since one record can be assigned to multiple hop windows, the function only returns the bound of the **first** window when hop function is used **without** `WINDOW VIEW`.
:::
**Example** **Example**
Query: Query:
``` sql ``` sql
SELECT hop(now(), INTERVAL '1' SECOND, INTERVAL '2' SECOND) SELECT hop(now(), INTERVAL '1' DAY, INTERVAL '2' DAY);
``` ```
Result: Result:
``` text ``` text
┌─hop(now(), toIntervalSecond('1'), toIntervalSecond('2'))──┐ ┌─hop(now(), toIntervalDay('1'), toIntervalDay('2'))─┐
│ ('2020-01-14 16:58:22','2020-01-14 16:58:24') │ │ ('2024-07-03 00:00:00','2024-07-05 00:00:00') │
└───────────────────────────────────────────────────────────┘ └────────────────────────────────────────────────────┘
```
## tumbleStart
Returns the inclusive lower bound of the corresponding tumbling window.
``` sql
tumbleStart(bounds_tuple);
tumbleStart(time_attr, interval [, timezone]);
```
## tumbleEnd
Returns the exclusive upper bound of the corresponding tumbling window.
``` sql
tumbleEnd(bounds_tuple);
tumbleEnd(time_attr, interval [, timezone]);
``` ```
## hopStart ## hopStart
Returns the inclusive lower bound of the corresponding hopping window. Returns the inclusive lower bound of the corresponding [hopping window](#hop).
**Syntax**
``` sql ``` sql
hopStart(bounds_tuple);
hopStart(time_attr, hop_interval, window_interval [, timezone]); hopStart(time_attr, hop_interval, window_interval [, timezone]);
``` ```
**Arguments**
- `time_attr` — Date and time. [DateTime](../data-types/datetime.md).
- `hop_interval` — Positive Hop interval. [Interval](../data-types/special-data-types/interval.md).
- `window_interval` — Positive Window interval. [Interval](../data-types/special-data-types/interval.md).
- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional).
The parameters above can also be passed to the function as a [tuple](../data-types/tuple.md).
**Returned values**
- The inclusive lower bound of the corresponding hopping window. [DateTime](../data-types/datetime.md), [Tuple](../data-types/tuple.md) or [UInt32](../data-types/int-uint.md).
:::note
Since one record can be assigned to multiple hop windows, the function only returns the bound of the **first** window when hop function is used **without** `WINDOW VIEW`.
:::
**Example**
Query:
``` sql
SELECT hopStart(now(), INTERVAL '1' DAY, INTERVAL '2' DAY);
```
Result:
``` text
┌─hopStart(now(), toIntervalDay('1'), toIntervalDay('2'))─┐
│ 2024-07-03 00:00:00 │
└─────────────────────────────────────────────────────────┘
```
## hopEnd ## hopEnd
Returns the exclusive upper bound of the corresponding hopping window. Returns the exclusive upper bound of the corresponding [hopping window](#hop).
**Syntax**
``` sql ``` sql
hopEnd(bounds_tuple);
hopEnd(time_attr, hop_interval, window_interval [, timezone]); hopEnd(time_attr, hop_interval, window_interval [, timezone]);
```
**Arguments**
- `time_attr` — Date and time. [DateTime](../data-types/datetime.md).
- `hop_interval` — Positive Hop interval. [Interval](../data-types/special-data-types/interval.md).
- `window_interval` — Positive Window interval. [Interval](../data-types/special-data-types/interval.md).
- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional).
The parameters above can also be passed to the function as a [tuple](../data-types/tuple.md).
**Returned values**
- The exclusive upper bound of the corresponding hopping window. [DateTime](../data-types/datetime.md), [Tuple](../data-types/tuple.md) or [UInt32](../data-types/int-uint.md).
:::note
Since one record can be assigned to multiple hop windows, the function only returns the bound of the **first** window when hop function is used **without** `WINDOW VIEW`.
:::
**Example**
Query:
``` sql
SELECT hopEnd(now(), INTERVAL '1' DAY, INTERVAL '2' DAY);
```
Result:
``` text
┌─hopEnd(now(), toIntervalDay('1'), toIntervalDay('2'))─┐
│ 2024-07-05 00:00:00 │
└───────────────────────────────────────────────────────┘
``` ```
## Related content ## Related content

View File

@ -0,0 +1,36 @@
---
slug: /en/sql-reference/table-functions/fuzzQuery
sidebar_position: 75
sidebar_label: fuzzQuery
---
# fuzzQuery
Perturbs the given query string with random variations.
``` sql
fuzzQuery(query[, max_query_length[, random_seed]])
```
**Arguments**
- `query` (String) - The source query to perform the fuzzing on.
- `max_query_length` (UInt64) - A maximum length the query can get during the fuzzing process.
- `random_seed` (UInt64) - A random seed for producing stable results.
**Returned Value**
A table object with a single column containing perturbed query strings.
## Usage Example
``` sql
SELECT * FROM fuzzQuery('SELECT materialize(\'a\' AS key) GROUP BY key') LIMIT 2;
```
```
┌─query──────────────────────────────────────────────────────────┐
1. │ SELECT 'a' AS key GROUP BY key │
2. │ EXPLAIN PIPELINE compact = true SELECT 'a' AS key GROUP BY key │
└────────────────────────────────────────────────────────────────┘
```

View File

@ -23,6 +23,7 @@ ClickHouse supports the standard grammar for defining windows and window functio
| `GROUPS` frame | ❌ | | `GROUPS` frame | ❌ |
| Calculating aggregate functions over a frame (`sum(value) over (order by time)`) | ✅ (All aggregate functions are supported) | | Calculating aggregate functions over a frame (`sum(value) over (order by time)`) | ✅ (All aggregate functions are supported) |
| `rank()`, `dense_rank()`, `row_number()` | ✅ | | `rank()`, `dense_rank()`, `row_number()` | ✅ |
| `percent_rank()` | ✅ Efficiently computes the relative standing of a value within a partition in a dataset. This function effectively replaces the more verbose and computationally intensive manual SQL calculation expressed as `ifNull((rank() OVER(PARTITION BY x ORDER BY y) - 1) / nullif(count(1) OVER(PARTITION BY x) - 1, 0), 0)`|
| `lag/lead(value, offset)` | ❌ <br/> You can use one of the following workarounds:<br/> 1) `any(value) over (.... rows between <offset> preceding and <offset> preceding)`, or `following` for `lead` <br/> 2) `lagInFrame/leadInFrame`, which are analogous, but respect the window frame. To get behavior identical to `lag/lead`, use `rows between unbounded preceding and unbounded following` | | `lag/lead(value, offset)` | ❌ <br/> You can use one of the following workarounds:<br/> 1) `any(value) over (.... rows between <offset> preceding and <offset> preceding)`, or `following` for `lead` <br/> 2) `lagInFrame/leadInFrame`, which are analogous, but respect the window frame. To get behavior identical to `lag/lead`, use `rows between unbounded preceding and unbounded following` |
| ntile(buckets) | ✅ <br/> Specify window like, (partition by x order by y rows between unbounded preceding and unrounded following). | | ntile(buckets) | ✅ <br/> Specify window like, (partition by x order by y rows between unbounded preceding and unrounded following). |

View File

@ -2077,7 +2077,7 @@ SELECT * FROM test_table
- 0 — оптимизация отключена. - 0 — оптимизация отключена.
- 1 — оптимизация включена. - 1 — оптимизация включена.
Значение по умолчанию: `0`. Значение по умолчанию: `1`.
## optimize_trivial_count_query {#optimize-trivial-count-query} ## optimize_trivial_count_query {#optimize-trivial-count-query}

View File

@ -66,18 +66,18 @@ else()
message(STATUS "Library bridge mode: OFF") message(STATUS "Library bridge mode: OFF")
endif() endif()
if (ENABLE_CLICKHOUSE_KEEPER)
message(STATUS "ClickHouse keeper mode: ON")
else()
message(STATUS "ClickHouse keeper mode: OFF")
endif()
if (ENABLE_CLICKHOUSE_KEEPER_CONVERTER) if (ENABLE_CLICKHOUSE_KEEPER_CONVERTER)
message(STATUS "ClickHouse keeper-converter mode: ON") message(STATUS "ClickHouse keeper-converter mode: ON")
else() else()
message(STATUS "ClickHouse keeper-converter mode: OFF") message(STATUS "ClickHouse keeper-converter mode: OFF")
endif() endif()
if (ENABLE_CLICKHOUSE_KEEPER)
message(STATUS "ClickHouse Keeper: ON")
else()
message(STATUS "ClickHouse Keeper: OFF")
endif()
if (ENABLE_CLICKHOUSE_KEEPER_CLIENT) if (ENABLE_CLICKHOUSE_KEEPER_CLIENT)
message(STATUS "ClickHouse keeper-client mode: ON") message(STATUS "ClickHouse keeper-client mode: ON")
else() else()
@ -131,10 +131,6 @@ add_subdirectory (static-files-disk-uploader)
add_subdirectory (su) add_subdirectory (su)
add_subdirectory (disks) add_subdirectory (disks)
if (ENABLE_CLICKHOUSE_KEEPER)
add_subdirectory (keeper)
endif()
if (ENABLE_CLICKHOUSE_KEEPER_CONVERTER) if (ENABLE_CLICKHOUSE_KEEPER_CONVERTER)
add_subdirectory (keeper-converter) add_subdirectory (keeper-converter)
endif() endif()
@ -143,6 +139,10 @@ if (ENABLE_CLICKHOUSE_KEEPER_CLIENT)
add_subdirectory (keeper-client) add_subdirectory (keeper-client)
endif() endif()
if (ENABLE_CLICKHOUSE_KEEPER)
add_subdirectory (keeper)
endif()
if (ENABLE_CLICKHOUSE_ODBC_BRIDGE) if (ENABLE_CLICKHOUSE_ODBC_BRIDGE)
add_subdirectory (odbc-bridge) add_subdirectory (odbc-bridge)
endif () endif ()

View File

@ -9,7 +9,10 @@ namespace DB
class Client : public ClientBase class Client : public ClientBase
{ {
public: public:
Client() = default; Client()
{
fuzzer = QueryFuzzer(randomSeed(), &std::cout, &std::cerr);
}
void initialize(Poco::Util::Application & self) override; void initialize(Poco::Util::Application & self) override;

View File

@ -1,4 +1,5 @@
set(CLICKHOUSE_KEEPER_SOURCES set(CLICKHOUSE_KEEPER_SOURCES
keeper_main.cpp
Keeper.cpp Keeper.cpp
) )
@ -8,6 +9,9 @@ set (CLICKHOUSE_KEEPER_LINK
clickhouse_common_io clickhouse_common_io
clickhouse_common_zookeeper clickhouse_common_zookeeper
daemon daemon
clickhouse-keeper-converter-lib
clickhouse-keeper-client-lib
clickhouse_functions
dbms dbms
) )
@ -17,199 +21,11 @@ install(FILES keeper_config.xml DESTINATION "${CLICKHOUSE_ETC_DIR}/clickhouse-ke
if (BUILD_STANDALONE_KEEPER) if (BUILD_STANDALONE_KEEPER)
# Straight list of all required sources # Straight list of all required sources
set(CLICKHOUSE_KEEPER_STANDALONE_SOURCES clickhouse_add_executable(clickhouse-keeper ${CLICKHOUSE_KEEPER_SOURCES})
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperReconfiguration.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/RaftServerConfig.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/ACLMap.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/Changelog.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/CoordinationSettings.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/FourLetterCommand.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/InMemoryLogStore.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperConnectionStats.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperDispatcher.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperLogStore.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperServer.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperContext.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperFeatureFlags.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperSnapshotManager.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperSnapshotManagerS3.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperStateMachine.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperContext.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperStateManager.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperStorage.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperConstants.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperAsynchronousMetrics.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperCommon.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/SessionExpiryQueue.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/SummingStateMachine.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/WriteBufferFromNuraftBuffer.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/ZooKeeperDataReader.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Core/SettingsFields.cpp target_link_libraries(clickhouse-keeper PUBLIC ${CLICKHOUSE_KEEPER_LINK})
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Core/BaseSettings.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Core/ServerSettings.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Core/Field.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Core/SettingsEnums.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Core/ServerUUID.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Core/UUID.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Core/BackgroundSchedulePool.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/IO/ReadBuffer.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTPPathHints.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/KeeperTCPHandler.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/TCPServer.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/NotFoundHandler.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/ProtocolServerAdapter.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/CertificateReloader.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/PrometheusRequestHandler.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/PrometheusMetricsWriter.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/waitServersToFinish.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/ServerType.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTPRequestHandlerFactoryMain.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/KeeperReadinessHandler.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/CloudPlacementInfo.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTP/HTTPServer.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTP/ReadHeaders.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTP/HTTPServerConnection.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTP/HTTPServerRequest.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTP/HTTPServerResponse.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTP/HTTPServerConnectionFactory.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CachedCompressedReadBuffer.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CheckingCompressedReadBuffer.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressedReadBufferBase.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressedReadBuffer.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressedReadBufferFromFile.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressedWriteBuffer.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecEncrypted.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecLZ4.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecMultiple.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecNone.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecZSTD.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionFactory.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/ICompressionCodec.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/LZ4_decompress_faster.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/CurrentThread.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/NamedCollections/NamedCollections.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/NamedCollections/NamedCollectionConfiguration.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/Jemalloc.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/ZooKeeper/IKeeper.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/ZooKeeper/TestKeeper.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/ZooKeeper/ZooKeeperCommon.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/ZooKeeper/ZooKeeperConstants.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/ZooKeeper/ZooKeeper.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/ZooKeeper/ZooKeeperImpl.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/ZooKeeper/ZooKeeperIO.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/ZooKeeper/ZooKeeperLock.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/ZooKeeper/ZooKeeperNodeCache.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/registerDisks.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IDisk.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/DiskFactory.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/DiskSelector.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/DiskLocal.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/DiskLocalCheckThread.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/LocalDirectorySyncGuard.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/TemporaryFileOnDisk.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/loadLocalDiskConfig.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/DiskType.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/IObjectStorage.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/MetadataOperationsHolder.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/MetadataStorageFromDisk.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/MetadataStorageTransactionState.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/DiskObjectStorage.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/ObjectStorageIteratorAsync.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/ObjectStorageIterator.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/StoredObject.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/S3/S3Capabilities.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/S3/diskSettings.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/S3/DiskS3Utils.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/ObjectStorageFactory.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/MetadataStorageFactory.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/RegisterDiskObjectStorage.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/createReadBufferFromFileBase.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/ReadBufferFromRemoteFSGather.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/IOUringReader.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/getIOUringReader.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/WriteBufferFromTemporaryFile.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/WriteBufferWithFinalizeCallback.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/AsynchronousBoundedReadBuffer.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/getThreadPoolReader.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/ThreadPoolRemoteFSReader.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/ThreadPoolReader.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Daemon/BaseDaemon.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Daemon/SentryWriter.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Daemon/GraphiteWriter.cpp
${CMAKE_CURRENT_BINARY_DIR}/../../src/Daemon/GitHash.generated.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/Standalone/Context.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/Standalone/Settings.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/Standalone/ThreadStatusExt.cpp
Keeper.cpp
clickhouse-keeper.cpp
)
# List of resources for clickhouse-keeper client
if (ENABLE_CLICKHOUSE_KEEPER_CLIENT)
list(APPEND CLICKHOUSE_KEEPER_STANDALONE_SOURCES
${CMAKE_CURRENT_SOURCE_DIR}/../../programs/keeper-client/KeeperClient.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../programs/keeper-client/Commands.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../programs/keeper-client/Parser.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Client/LineReader.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Client/ReplxxLineReader.cpp
)
endif()
clickhouse_add_executable(clickhouse-keeper ${CLICKHOUSE_KEEPER_STANDALONE_SOURCES})
# Remove some redundant dependencies
target_compile_definitions (clickhouse-keeper PRIVATE -DCLICKHOUSE_KEEPER_STANDALONE_BUILD)
target_compile_definitions (clickhouse-keeper PUBLIC -DWITHOUT_TEXT_LOG)
if (ENABLE_CLICKHOUSE_KEEPER_CLIENT AND TARGET ch_rust::skim)
target_link_libraries(clickhouse-keeper PRIVATE ch_rust::skim)
endif()
target_link_libraries(clickhouse-keeper
PRIVATE
ch_contrib::abseil_swiss_tables
ch_contrib::nuraft
ch_contrib::lz4
ch_contrib::zstd
ch_contrib::cityhash
ch_contrib::jemalloc
common ch_contrib::double_conversion
ch_contrib::dragonbox_to_chars
pcg_random
ch_contrib::pdqsort
ch_contrib::miniselect
clickhouse_common_config_no_zookeeper_log
loggers_no_text_log
clickhouse_common_io
clickhouse_parsers # Otherwise compression will not built. FIXME.
)
set_target_properties(clickhouse-keeper PROPERTIES RUNTIME_OUTPUT_DIRECTORY ../) set_target_properties(clickhouse-keeper PROPERTIES RUNTIME_OUTPUT_DIRECTORY ../)
if (SPLIT_DEBUG_SYMBOLS) if (SPLIT_DEBUG_SYMBOLS)
clickhouse_split_debug_symbols(TARGET clickhouse-keeper DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${SPLITTED_DEBUG_SYMBOLS_DIR} BINARY_PATH ../clickhouse-keeper) clickhouse_split_debug_symbols(TARGET clickhouse-keeper DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${SPLITTED_DEBUG_SYMBOLS_DIR} BINARY_PATH ../clickhouse-keeper)
else() else()

View File

@ -27,6 +27,8 @@
#include <sys/stat.h> #include <sys/stat.h>
#include <pwd.h> #include <pwd.h>
#include <Common/Jemalloc.h>
#include <Interpreters/Context.h> #include <Interpreters/Context.h>
#include <Coordination/FourLetterCommand.h> #include <Coordination/FourLetterCommand.h>
@ -75,16 +77,6 @@ int mainEntryClickHouseKeeper(int argc, char ** argv)
} }
} }
#ifdef CLICKHOUSE_KEEPER_STANDALONE_BUILD
// Weak symbols don't work correctly on Darwin
// so we have a stub implementation to avoid linker errors
void collectCrashLog(
Int32, UInt64, const String &, const StackTrace &)
{}
#endif
namespace DB namespace DB
{ {
@ -277,6 +269,9 @@ HTTPContextPtr httpContext()
int Keeper::main(const std::vector<std::string> & /*args*/) int Keeper::main(const std::vector<std::string> & /*args*/)
try try
{ {
#if USE_JEMALLOC
setJemallocBackgroundThreads(true);
#endif
Poco::Logger * log = &logger(); Poco::Logger * log = &logger();
UseSSL use_ssl; UseSSL use_ssl;

View File

@ -1,30 +0,0 @@
#include <Common/StringUtils.h>
#include "config_tools.h"
int mainEntryClickHouseKeeper(int argc, char ** argv);
#if ENABLE_CLICKHOUSE_KEEPER_CLIENT
int mainEntryClickHouseKeeperClient(int argc, char ** argv);
#endif
int main(int argc_, char ** argv_)
{
#if ENABLE_CLICKHOUSE_KEEPER_CLIENT
if (argc_ >= 2)
{
/// 'clickhouse-keeper --client ...' and 'clickhouse-keeper client ...' are OK
if (strcmp(argv_[1], "--client") == 0 || strcmp(argv_[1], "client") == 0)
{
argv_[1] = argv_[0];
return mainEntryClickHouseKeeperClient(--argc_, argv_ + 1);
}
}
if (argc_ > 0 && (strcmp(argv_[0], "clickhouse-keeper-client") == 0 || endsWith(argv_[0], "/clickhouse-keeper-client")))
return mainEntryClickHouseKeeperClient(argc_, argv_);
#endif
return mainEntryClickHouseKeeper(argc_, argv_);
}

View File

@ -0,0 +1,189 @@
#include <unistd.h>
#include <fcntl.h>
#include <new>
#include <iostream>
#include <vector>
#include <string_view>
#include <utility> /// pair
#include <fmt/format.h>
#include "config.h"
#include "config_tools.h"
#include <Common/EnvironmentChecks.h>
#include <Common/Coverage.h>
#include <Common/StringUtils.h>
#include <Common/getHashOfLoadedBinary.h>
#include <Common/IO.h>
#include <base/phdr_cache.h>
#include <base/coverage.h>
int mainEntryClickHouseKeeper(int argc, char ** argv);
#if ENABLE_CLICKHOUSE_KEEPER_CONVERTER
int mainEntryClickHouseKeeperConverter(int argc, char ** argv);
#endif
#if ENABLE_CLICKHOUSE_KEEPER_CLIENT
int mainEntryClickHouseKeeperClient(int argc, char ** argv);
#endif
namespace
{
using MainFunc = int (*)(int, char**);
/// Add an item here to register new application
std::pair<std::string_view, MainFunc> clickhouse_applications[] =
{
// keeper
{"keeper", mainEntryClickHouseKeeper},
#if ENABLE_CLICKHOUSE_KEEPER_CONVERTER
{"converter", mainEntryClickHouseKeeperConverter},
{"keeper-converter", mainEntryClickHouseKeeperConverter},
#endif
#if ENABLE_CLICKHOUSE_KEEPER_CLIENT
{"client", mainEntryClickHouseKeeperClient},
{"keeper-client", mainEntryClickHouseKeeperClient},
#endif
};
int printHelp(int, char **)
{
std::cerr << "Use one of the following commands:" << std::endl;
for (auto & application : clickhouse_applications)
std::cerr << "clickhouse " << application.first << " [args] " << std::endl;
return -1;
}
}
bool isClickhouseApp(std::string_view app_suffix, std::vector<char *> & argv)
{
/// Use app if the first arg 'app' is passed (the arg should be quietly removed)
if (argv.size() >= 2)
{
auto first_arg = argv.begin() + 1;
/// 'clickhouse --client ...' and 'clickhouse client ...' are Ok
if (*first_arg == app_suffix
|| (std::string_view(*first_arg).starts_with("--") && std::string_view(*first_arg).substr(2) == app_suffix))
{
argv.erase(first_arg);
return true;
}
}
/// keeper suffix is default which will be used if no other app is detected
if (app_suffix == "keeper")
return false;
/// Use app if clickhouse binary is run through symbolic link with name clickhouse-app
std::string app_name = "clickhouse-" + std::string(app_suffix);
return !argv.empty() && (app_name == argv[0] || endsWith(argv[0], "/" + app_name));
}
/// Don't allow dlopen in the main ClickHouse binary, because it is harmful and insecure.
/// We don't use it. But it can be used by some libraries for implementation of "plugins".
/// We absolutely discourage the ancient technique of loading
/// 3rd-party uncontrolled dangerous libraries into the process address space,
/// because it is insane.
#if !defined(USE_MUSL)
extern "C"
{
void * dlopen(const char *, int)
{
return nullptr;
}
void * dlmopen(long, const char *, int) // NOLINT
{
return nullptr;
}
int dlclose(void *)
{
return 0;
}
const char * dlerror()
{
return "ClickHouse does not allow dynamic library loading";
}
}
#endif
/// Prevent messages from JeMalloc in the release build.
/// Some of these messages are non-actionable for the users, such as:
/// <jemalloc>: Number of CPUs detected is not deterministic. Per-CPU arena disabled.
#if USE_JEMALLOC && defined(NDEBUG) && !defined(SANITIZER)
extern "C" void (*malloc_message)(void *, const char *s);
__attribute__((constructor(0))) void init_je_malloc_message() { malloc_message = [](void *, const char *){}; }
#endif
/// This allows to implement assert to forbid initialization of a class in static constructors.
/// Usage:
///
/// extern bool inside_main;
/// class C { C() { assert(inside_main); } };
bool inside_main = false;
int main(int argc_, char ** argv_)
{
inside_main = true;
SCOPE_EXIT({ inside_main = false; });
/// PHDR cache is required for query profiler to work reliably
/// It also speed up exception handling, but exceptions from dynamically loaded libraries (dlopen)
/// will work only after additional call of this function.
/// Note: we forbid dlopen in our code.
updatePHDRCache();
#if !defined(USE_MUSL)
checkHarmfulEnvironmentVariables(argv_);
#endif
/// This is used for testing. For example,
/// clickhouse-local should be able to run a simple query without throw/catch.
if (getenv("CLICKHOUSE_TERMINATE_ON_ANY_EXCEPTION")) // NOLINT(concurrency-mt-unsafe)
DB::terminate_on_any_exception = true;
/// Reset new handler to default (that throws std::bad_alloc)
/// It is needed because LLVM library clobbers it.
std::set_new_handler(nullptr);
std::vector<char *> argv(argv_, argv_ + argc_);
/// Print a basic help if nothing was matched
MainFunc main_func = mainEntryClickHouseKeeper;
if (isClickhouseApp("help", argv))
{
main_func = printHelp;
}
else
{
for (auto & application : clickhouse_applications)
{
if (isClickhouseApp(application.first, argv))
{
main_func = application.second;
break;
}
}
}
int exit_code = main_func(static_cast<int>(argv.size()), argv.data());
#if defined(SANITIZE_COVERAGE)
dumpCoverage();
#endif
return exit_code;
}

View File

@ -1,5 +1,3 @@
#include <csignal>
#include <csetjmp>
#include <unistd.h> #include <unistd.h>
#include <fcntl.h> #include <fcntl.h>
@ -7,7 +5,6 @@
#include <iostream> #include <iostream>
#include <vector> #include <vector>
#include <string> #include <string>
#include <tuple>
#include <string_view> #include <string_view>
#include <utility> /// pair #include <utility> /// pair
@ -16,6 +13,8 @@
#include "config.h" #include "config.h"
#include "config_tools.h" #include "config_tools.h"
#include <Common/EnvironmentChecks.h>
#include <Common/Coverage.h>
#include <Common/StringUtils.h> #include <Common/StringUtils.h>
#include <Common/getHashOfLoadedBinary.h> #include <Common/getHashOfLoadedBinary.h>
#include <Common/IO.h> #include <Common/IO.h>
@ -119,268 +118,6 @@ std::pair<std::string_view, std::string_view> clickhouse_short_names[] =
{"chc", "client"}, {"chc", "client"},
}; };
enum class InstructionFail : uint8_t
{
NONE = 0,
SSE3 = 1,
SSSE3 = 2,
SSE4_1 = 3,
SSE4_2 = 4,
POPCNT = 5,
AVX = 6,
AVX2 = 7,
AVX512 = 8
};
auto instructionFailToString(InstructionFail fail)
{
switch (fail)
{
#define ret(x) return std::make_tuple(STDERR_FILENO, x, sizeof(x) - 1)
case InstructionFail::NONE:
ret("NONE");
case InstructionFail::SSE3:
ret("SSE3");
case InstructionFail::SSSE3:
ret("SSSE3");
case InstructionFail::SSE4_1:
ret("SSE4.1");
case InstructionFail::SSE4_2:
ret("SSE4.2");
case InstructionFail::POPCNT:
ret("POPCNT");
case InstructionFail::AVX:
ret("AVX");
case InstructionFail::AVX2:
ret("AVX2");
case InstructionFail::AVX512:
ret("AVX512");
#undef ret
}
}
sigjmp_buf jmpbuf;
[[noreturn]] void sigIllCheckHandler(int, siginfo_t *, void *)
{
siglongjmp(jmpbuf, 1);
}
/// Check if necessary SSE extensions are available by trying to execute some sse instructions.
/// If instruction is unavailable, SIGILL will be sent by kernel.
void checkRequiredInstructionsImpl(volatile InstructionFail & fail)
{
#if defined(__SSE3__)
fail = InstructionFail::SSE3;
__asm__ volatile ("addsubpd %%xmm0, %%xmm0" : : : "xmm0");
#endif
#if defined(__SSSE3__)
fail = InstructionFail::SSSE3;
__asm__ volatile ("pabsw %%xmm0, %%xmm0" : : : "xmm0");
#endif
#if defined(__SSE4_1__)
fail = InstructionFail::SSE4_1;
__asm__ volatile ("pmaxud %%xmm0, %%xmm0" : : : "xmm0");
#endif
#if defined(__SSE4_2__)
fail = InstructionFail::SSE4_2;
__asm__ volatile ("pcmpgtq %%xmm0, %%xmm0" : : : "xmm0");
#endif
/// Defined by -msse4.2
#if defined(__POPCNT__)
fail = InstructionFail::POPCNT;
{
uint64_t a = 0;
uint64_t b = 0;
__asm__ volatile ("popcnt %1, %0" : "=r"(a) :"r"(b) :);
}
#endif
#if defined(__AVX__)
fail = InstructionFail::AVX;
__asm__ volatile ("vaddpd %%ymm0, %%ymm0, %%ymm0" : : : "ymm0");
#endif
#if defined(__AVX2__)
fail = InstructionFail::AVX2;
__asm__ volatile ("vpabsw %%ymm0, %%ymm0" : : : "ymm0");
#endif
#if defined(__AVX512__)
fail = InstructionFail::AVX512;
__asm__ volatile ("vpabsw %%zmm0, %%zmm0" : : : "zmm0");
#endif
fail = InstructionFail::NONE;
}
/// Macros to avoid using strlen(), since it may fail if SSE is not supported.
#define writeError(data) do \
{ \
static_assert(__builtin_constant_p(data)); \
if (!writeRetry(STDERR_FILENO, data, sizeof(data) - 1)) \
_Exit(1); \
} while (false)
/// Check SSE and others instructions availability. Calls exit on fail.
/// This function must be called as early as possible, even before main, because static initializers may use unavailable instructions.
void checkRequiredInstructions()
{
struct sigaction sa{};
struct sigaction sa_old{};
sa.sa_sigaction = sigIllCheckHandler;
sa.sa_flags = SA_SIGINFO;
auto signal = SIGILL;
if (sigemptyset(&sa.sa_mask) != 0
|| sigaddset(&sa.sa_mask, signal) != 0
|| sigaction(signal, &sa, &sa_old) != 0)
{
/// You may wonder about strlen.
/// Typical implementation of strlen is using SSE4.2 or AVX2.
/// But this is not the case because it's compiler builtin and is executed at compile time.
writeError("Can not set signal handler\n");
_Exit(1);
}
volatile InstructionFail fail = InstructionFail::NONE;
if (sigsetjmp(jmpbuf, 1))
{
writeError("Instruction check fail. The CPU does not support ");
if (!std::apply(writeRetry, instructionFailToString(fail)))
_Exit(1);
writeError(" instruction set.\n");
_Exit(1);
}
checkRequiredInstructionsImpl(fail);
if (sigaction(signal, &sa_old, nullptr))
{
writeError("Can not set signal handler\n");
_Exit(1);
}
}
struct Checker
{
Checker()
{
checkRequiredInstructions();
}
} checker
#ifndef OS_DARWIN
__attribute__((init_priority(101))) /// Run before other static initializers.
#endif
;
#if !defined(USE_MUSL)
/// NOTE: We will migrate to full static linking or our own dynamic loader to make this code obsolete.
void checkHarmfulEnvironmentVariables(char ** argv)
{
std::initializer_list<const char *> harmful_env_variables = {
/// The list is a selection from "man ld-linux".
"LD_PRELOAD",
"LD_LIBRARY_PATH",
"LD_ORIGIN_PATH",
"LD_AUDIT",
"LD_DYNAMIC_WEAK",
/// The list is a selection from "man dyld" (osx).
"DYLD_LIBRARY_PATH",
"DYLD_FALLBACK_LIBRARY_PATH",
"DYLD_VERSIONED_LIBRARY_PATH",
"DYLD_INSERT_LIBRARIES",
};
bool require_reexec = false;
for (const auto * var : harmful_env_variables)
{
if (const char * value = getenv(var); value && value[0]) // NOLINT(concurrency-mt-unsafe)
{
/// NOTE: setenv() is used over unsetenv() since unsetenv() marked as harmful
if (setenv(var, "", true)) // NOLINT(concurrency-mt-unsafe) // this is safe if not called concurrently
{
fmt::print(stderr, "Cannot override {} environment variable", var);
_exit(1);
}
require_reexec = true;
}
}
if (require_reexec)
{
/// Use execvp() over execv() to search in PATH.
///
/// This should be safe, since:
/// - if argv[0] is relative path - it is OK
/// - if argv[0] has only basename, the it will search in PATH, like shell will do.
///
/// Also note, that this (search in PATH) because there is no easy and
/// portable way to get absolute path of argv[0].
/// - on linux there is /proc/self/exec and AT_EXECFN
/// - but on other OSes there is no such thing (especially on OSX).
///
/// And since static linking will be done someday anyway,
/// let's not pollute the code base with special cases.
int error = execvp(argv[0], argv);
_exit(error);
}
}
#endif
#if defined(SANITIZE_COVERAGE)
__attribute__((no_sanitize("coverage")))
void dumpCoverage()
{
/// A user can request to dump the coverage information into files at exit.
/// This is useful for non-server applications such as clickhouse-format or clickhouse-client,
/// that cannot introspect it with SQL functions at runtime.
/// The CLICKHOUSE_WRITE_COVERAGE environment variable defines a prefix for a filename 'prefix.pid'
/// containing the list of addresses of covered .
/// The format is even simpler than Clang's "sancov": an array of 64-bit addresses, native byte order, no header.
if (const char * coverage_filename_prefix = getenv("CLICKHOUSE_WRITE_COVERAGE")) // NOLINT(concurrency-mt-unsafe)
{
auto dump = [](const std::string & name, auto span)
{
/// Write only non-zeros.
std::vector<uintptr_t> data;
data.reserve(span.size());
for (auto addr : span)
if (addr)
data.push_back(addr);
int fd = ::open(name.c_str(), O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC, 0400);
if (-1 == fd)
{
writeError("Cannot open a file to write the coverage data\n");
}
else
{
if (!writeRetry(fd, reinterpret_cast<const char *>(data.data()), data.size() * sizeof(data[0])))
writeError("Cannot write the coverage data to a file\n");
if (0 != ::close(fd))
writeError("Cannot close the file with coverage data\n");
}
};
dump(fmt::format("{}.{}", coverage_filename_prefix, getpid()), getCumulativeCoverage());
}
}
#endif
} }
bool isClickhouseApp(std::string_view app_suffix, std::vector<char *> & argv) bool isClickhouseApp(std::string_view app_suffix, std::vector<char *> & argv)

View File

@ -10,9 +10,24 @@ else ()
set (COMPRESSOR "${PROJECT_BINARY_DIR}/utils/self-extracting-executable/compressor") set (COMPRESSOR "${PROJECT_BINARY_DIR}/utils/self-extracting-executable/compressor")
endif () endif ()
add_custom_target (self-extracting ALL add_custom_target (self-extracting-server ALL
${CMAKE_COMMAND} -E remove clickhouse clickhouse-stripped ${CMAKE_COMMAND} -E remove clickhouse clickhouse-stripped
COMMAND ${COMPRESSOR} ${DECOMPRESSOR} clickhouse ../clickhouse COMMAND ${COMPRESSOR} ${DECOMPRESSOR} clickhouse ../clickhouse
COMMAND ${COMPRESSOR} ${DECOMPRESSOR} clickhouse-stripped ../clickhouse-stripped COMMAND ${COMPRESSOR} ${DECOMPRESSOR} clickhouse-stripped ../clickhouse-stripped
DEPENDS clickhouse clickhouse-stripped compressor DEPENDS clickhouse clickhouse-stripped compressor
) )
set(self_extracting_deps "self-extracting-server")
if (BUILD_STANDALONE_KEEPER)
add_custom_target (self-extracting-keeper ALL
${CMAKE_COMMAND} -E remove clickhouse-keeper
COMMAND ${COMPRESSOR} ${DECOMPRESSOR} clickhouse-keeper ../clickhouse-keeper
DEPENDS compressor clickhouse-keeper
)
list(APPEND self_extracting_deps "self-extracting-keeper")
endif()
add_custom_target (self-extracting ALL
DEPENDS ${self_extracting_deps}
)

View File

@ -11,6 +11,7 @@
#include <Poco/Util/HelpFormatter.h> #include <Poco/Util/HelpFormatter.h>
#include <Poco/Environment.h> #include <Poco/Environment.h>
#include <Poco/Config.h> #include <Poco/Config.h>
#include <Common/Jemalloc.h>
#include <Common/scope_guard_safe.h> #include <Common/scope_guard_safe.h>
#include <Common/logger_useful.h> #include <Common/logger_useful.h>
#include <base/phdr_cache.h> #include <base/phdr_cache.h>
@ -625,9 +626,35 @@ static void initializeAzureSDKLogger(
#endif #endif
} }
#if defined(SANITIZER)
static std::vector<String> getSanitizerNames()
{
std::vector<String> names;
#if defined(ADDRESS_SANITIZER)
names.push_back("address");
#endif
#if defined(THREAD_SANITIZER)
names.push_back("thread");
#endif
#if defined(MEMORY_SANITIZER)
names.push_back("memory");
#endif
#if defined(UNDEFINED_BEHAVIOR_SANITIZER)
names.push_back("undefined behavior");
#endif
return names;
}
#endif
int Server::main(const std::vector<std::string> & /*args*/) int Server::main(const std::vector<std::string> & /*args*/)
try try
{ {
#if USE_JEMALLOC
setJemallocBackgroundThreads(true);
#endif
Stopwatch startup_watch; Stopwatch startup_watch;
Poco::Logger * log = &logger(); Poco::Logger * log = &logger();
@ -711,7 +738,17 @@ try
global_context->addWarningMessage("ThreadFuzzer is enabled. Application will run slowly and unstable."); global_context->addWarningMessage("ThreadFuzzer is enabled. Application will run slowly and unstable.");
#if defined(SANITIZER) #if defined(SANITIZER)
global_context->addWarningMessage("Server was built with sanitizer. It will work slowly."); auto sanitizers = getSanitizerNames();
String log_message;
if (sanitizers.empty())
log_message = "sanitizer";
else if (sanitizers.size() == 1)
log_message = fmt::format("{} sanitizer", sanitizers.front());
else
log_message = fmt::format("sanitizers ({})", fmt::join(sanitizers, ", "));
global_context->addWarningMessage(fmt::format("Server was built with {}. It will work slowly.", log_message));
#endif #endif
#if defined(SANITIZE_COVERAGE) || WITH_COVERAGE #if defined(SANITIZE_COVERAGE) || WITH_COVERAGE

View File

@ -506,6 +506,14 @@ let user = 'default';
let password = ''; let password = '';
let add_http_cors_header = (location.protocol != 'file:'); let add_http_cors_header = (location.protocol != 'file:');
const current_url = new URL(window.location);
/// Substitute user name if it's specified in the query string
const user_from_url = current_url.searchParams.get('user');
if (user_from_url) {
user = user_from_url;
}
const errorCodeMessageMap = { const errorCodeMessageMap = {
516: 'Error authenticating with database. Please check your connection params and try again.' 516: 'Error authenticating with database. Please check your connection params and try again.'
} }

View File

@ -9,6 +9,7 @@
#include <Analyzer/InDepthQueryTreeVisitor.h> #include <Analyzer/InDepthQueryTreeVisitor.h>
#include <Analyzer/ConstantNode.h> #include <Analyzer/ConstantNode.h>
#include <Analyzer/FunctionNode.h> #include <Analyzer/FunctionNode.h>
#include <Analyzer/Utils.h>
namespace DB namespace DB
{ {
@ -164,32 +165,15 @@ private:
auto aggregate_function_clone = aggregate_function->clone(); auto aggregate_function_clone = aggregate_function->clone();
auto & aggregate_function_clone_typed = aggregate_function_clone->as<FunctionNode &>(); auto & aggregate_function_clone_typed = aggregate_function_clone->as<FunctionNode &>();
aggregate_function_clone_typed.getArguments().getNodes() = { arithmetic_function_clone_argument }; aggregate_function_clone_typed.getArguments().getNodes() = { arithmetic_function_clone_argument };
resolveAggregateFunctionNode(aggregate_function_clone_typed, arithmetic_function_clone_argument, result_aggregate_function_name); resolveAggregateFunctionNodeByName(aggregate_function_clone_typed, result_aggregate_function_name);
arithmetic_function_clone_arguments_nodes[arithmetic_function_argument_index] = std::move(aggregate_function_clone); arithmetic_function_clone_arguments_nodes[arithmetic_function_argument_index] = std::move(aggregate_function_clone);
resolveOrdinaryFunctionNode(arithmetic_function_clone_typed, arithmetic_function_clone_typed.getFunctionName()); resolveOrdinaryFunctionNodeByName(arithmetic_function_clone_typed, arithmetic_function_clone_typed.getFunctionName(), getContext());
return arithmetic_function_clone; return arithmetic_function_clone;
} }
void resolveOrdinaryFunctionNode(FunctionNode & function_node, const String & function_name) const
{
auto function = FunctionFactory::instance().get(function_name, getContext());
function_node.resolveAsFunction(function->build(function_node.getArgumentColumns()));
}
static void resolveAggregateFunctionNode(FunctionNode & function_node, const QueryTreeNodePtr & argument, const String & aggregate_function_name)
{
auto function_aggregate_function = function_node.getAggregateFunction();
AggregateFunctionProperties properties;
auto action = NullsAction::EMPTY;
auto aggregate_function = AggregateFunctionFactory::instance().get(
aggregate_function_name, action, {argument->getResultType()}, function_aggregate_function->getParameters(), properties);
function_node.resolveAsAggregateFunction(std::move(aggregate_function));
}
}; };
} }

View File

@ -11,6 +11,7 @@
#include <Analyzer/InDepthQueryTreeVisitor.h> #include <Analyzer/InDepthQueryTreeVisitor.h>
#include <Analyzer/ConstantNode.h> #include <Analyzer/ConstantNode.h>
#include <Analyzer/FunctionNode.h> #include <Analyzer/FunctionNode.h>
#include <Analyzer/Utils.h>
namespace DB namespace DB
{ {
@ -18,19 +19,18 @@ namespace DB
namespace namespace
{ {
class ComparisonTupleEliminationPassVisitor : public InDepthQueryTreeVisitor<ComparisonTupleEliminationPassVisitor> class ComparisonTupleEliminationPassVisitor : public InDepthQueryTreeVisitorWithContext<ComparisonTupleEliminationPassVisitor>
{ {
public: public:
explicit ComparisonTupleEliminationPassVisitor(ContextPtr context_) using Base = InDepthQueryTreeVisitorWithContext<ComparisonTupleEliminationPassVisitor>;
: context(std::move(context_)) using Base::Base;
{}
static bool needChildVisit(QueryTreeNodePtr &, QueryTreeNodePtr & child) static bool needChildVisit(QueryTreeNodePtr &, QueryTreeNodePtr & child)
{ {
return child->getNodeType() != QueryTreeNodeType::TABLE_FUNCTION; return child->getNodeType() != QueryTreeNodeType::TABLE_FUNCTION;
} }
void visitImpl(QueryTreeNodePtr & node) const void enterImpl(QueryTreeNodePtr & node) const
{ {
auto * function_node = node->as<FunctionNode>(); auto * function_node = node->as<FunctionNode>();
if (!function_node) if (!function_node)
@ -171,13 +171,13 @@ private:
{ {
auto result_function = std::make_shared<FunctionNode>("and"); auto result_function = std::make_shared<FunctionNode>("and");
result_function->getArguments().getNodes() = std::move(tuple_arguments_equals_functions); result_function->getArguments().getNodes() = std::move(tuple_arguments_equals_functions);
resolveOrdinaryFunctionNode(*result_function, result_function->getFunctionName()); resolveOrdinaryFunctionNodeByName(*result_function, result_function->getFunctionName(), getContext());
if (comparison_function_name == "notEquals") if (comparison_function_name == "notEquals")
{ {
auto not_function = std::make_shared<FunctionNode>("not"); auto not_function = std::make_shared<FunctionNode>("not");
not_function->getArguments().getNodes().push_back(std::move(result_function)); not_function->getArguments().getNodes().push_back(std::move(result_function));
resolveOrdinaryFunctionNode(*not_function, not_function->getFunctionName()); resolveOrdinaryFunctionNodeByName(*not_function, not_function->getFunctionName(), getContext());
result_function = std::move(not_function); result_function = std::move(not_function);
} }
@ -197,18 +197,10 @@ private:
comparison_function->getArguments().getNodes().push_back(std::move(lhs_argument)); comparison_function->getArguments().getNodes().push_back(std::move(lhs_argument));
comparison_function->getArguments().getNodes().push_back(std::move(rhs_argument)); comparison_function->getArguments().getNodes().push_back(std::move(rhs_argument));
resolveOrdinaryFunctionNode(*comparison_function, comparison_function->getFunctionName()); resolveOrdinaryFunctionNodeByName(*comparison_function, comparison_function->getFunctionName(), getContext());
return comparison_function; return comparison_function;
} }
void resolveOrdinaryFunctionNode(FunctionNode & function_node, const String & function_name) const
{
auto function = FunctionFactory::instance().get(function_name, context);
function_node.resolveAsFunction(function->build(function_node.getArgumentColumns()));
}
ContextPtr context;
}; };
} }

View File

@ -9,6 +9,7 @@
#include <Analyzer/ColumnNode.h> #include <Analyzer/ColumnNode.h>
#include <Analyzer/FunctionNode.h> #include <Analyzer/FunctionNode.h>
#include <Analyzer/QueryNode.h> #include <Analyzer/QueryNode.h>
#include <Analyzer/Utils.h>
namespace DB namespace DB
{ {
@ -77,11 +78,9 @@ public:
/// Replace `countDistinct` of initial query into `count` /// Replace `countDistinct` of initial query into `count`
auto result_type = function_node->getResultType(); auto result_type = function_node->getResultType();
AggregateFunctionProperties properties;
auto action = NullsAction::EMPTY;
auto aggregate_function = AggregateFunctionFactory::instance().get("count", action, {}, {}, properties);
function_node->resolveAsAggregateFunction(std::move(aggregate_function));
function_node->getArguments().getNodes().clear(); function_node->getArguments().getNodes().clear();
resolveAggregateFunctionNodeByName(*function_node, "count");
} }
}; };

View File

@ -4,6 +4,7 @@
#include <DataTypes/DataTypeTuple.h> #include <DataTypes/DataTypeTuple.h>
#include <DataTypes/DataTypeArray.h> #include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeMap.h> #include <DataTypes/DataTypeMap.h>
#include <DataTypes/DataTypeVariant.h>
#include <Storages/IStorage.h> #include <Storages/IStorage.h>
@ -16,6 +17,9 @@
#include <Analyzer/ColumnNode.h> #include <Analyzer/ColumnNode.h>
#include <Analyzer/FunctionNode.h> #include <Analyzer/FunctionNode.h>
#include <Analyzer/TableNode.h> #include <Analyzer/TableNode.h>
#include <Analyzer/TableFunctionNode.h>
#include <Analyzer/Utils.h>
#include <Analyzer/JoinNode.h>
namespace DB namespace DB
{ {
@ -23,202 +27,410 @@ namespace DB
namespace namespace
{ {
class FunctionToSubcolumnsVisitor : public InDepthQueryTreeVisitorWithContext<FunctionToSubcolumnsVisitor> struct ColumnContext
{
NameAndTypePair column;
QueryTreeNodePtr column_source;
ContextPtr context;
};
using NodeToSubcolumnTransformer = std::function<void(QueryTreeNodePtr &, FunctionNode &, ColumnContext &)>;
void optimizeFunctionLength(QueryTreeNodePtr & node, FunctionNode &, ColumnContext & ctx)
{
/// Replace `length(argument)` with `argument.size0`
/// `argument` may be Array or Map.
NameAndTypePair column{ctx.column.name + ".size0", std::make_shared<DataTypeUInt64>()};
node = std::make_shared<ColumnNode>(column, ctx.column_source);
}
template <bool positive>
void optimizeFunctionEmpty(QueryTreeNodePtr &, FunctionNode & function_node, ColumnContext & ctx)
{
/// Replace `empty(argument)` with `equals(argument.size0, 0)` if positive
/// Replace `notEmpty(argument)` with `notEquals(argument.size0, 0)` if not positive
/// `argument` may be Array or Map.
NameAndTypePair column{ctx.column.name + ".size0", std::make_shared<DataTypeUInt64>()};
auto & function_arguments_nodes = function_node.getArguments().getNodes();
function_arguments_nodes.clear();
function_arguments_nodes.push_back(std::make_shared<ColumnNode>(column, ctx.column_source));
function_arguments_nodes.push_back(std::make_shared<ConstantNode>(static_cast<UInt64>(0)));
const auto * function_name = positive ? "equals" : "notEquals";
resolveOrdinaryFunctionNodeByName(function_node, function_name, ctx.context);
}
String getSubcolumnNameForElement(const Field & value, const DataTypeTuple & data_type_tuple)
{
if (value.getType() == Field::Types::String)
return value.get<const String &>();
if (value.getType() == Field::Types::UInt64)
return data_type_tuple.getNameByPosition(value.get<UInt64>());
return "";
}
String getSubcolumnNameForElement(const Field & value, const DataTypeVariant &)
{
if (value.getType() == Field::Types::String)
return value.get<const String &>();
return "";
}
template <typename DataType>
void optimizeTupleOrVariantElement(QueryTreeNodePtr & node, FunctionNode & function_node, ColumnContext & ctx)
{
/// Replace `tupleElement(tuple_argument, string_literal)`, `tupleElement(tuple_argument, integer_literal)` with `tuple_argument.column_name`.
/// Replace `variantElement(variant_argument, string_literal)` with `variant_argument.column_name`.
auto & function_arguments_nodes = function_node.getArguments().getNodes();
if (function_arguments_nodes.size() != 2)
return;
const auto * second_argument_constant_node = function_arguments_nodes[1]->as<ConstantNode>();
if (!second_argument_constant_node)
return;
const auto & data_type_concrete = assert_cast<const DataType &>(*ctx.column.type);
auto subcolumn_name = getSubcolumnNameForElement(second_argument_constant_node->getValue(), data_type_concrete);
if (subcolumn_name.empty())
return;
NameAndTypePair column{ctx.column.name + "." + subcolumn_name, function_node.getResultType()};
node = std::make_shared<ColumnNode>(column, ctx.column_source);
}
std::map<std::pair<TypeIndex, String>, NodeToSubcolumnTransformer> node_transformers =
{
{
{TypeIndex::Array, "length"}, optimizeFunctionLength,
},
{
{TypeIndex::Array, "empty"}, optimizeFunctionEmpty<true>,
},
{
{TypeIndex::Array, "notEmpty"}, optimizeFunctionEmpty<false>,
},
{
{TypeIndex::Map, "length"}, optimizeFunctionLength,
},
{
{TypeIndex::Map, "empty"}, optimizeFunctionEmpty<true>,
},
{
{TypeIndex::Map, "notEmpty"}, optimizeFunctionEmpty<false>,
},
{
{TypeIndex::Map, "mapKeys"},
[](QueryTreeNodePtr & node, FunctionNode & function_node, ColumnContext & ctx)
{
/// Replace `mapKeys(map_argument)` with `map_argument.keys`
NameAndTypePair column{ctx.column.name + ".keys", function_node.getResultType()};
node = std::make_shared<ColumnNode>(column, ctx.column_source);
},
},
{
{TypeIndex::Map, "mapValues"},
[](QueryTreeNodePtr & node, FunctionNode & function_node, ColumnContext & ctx)
{
/// Replace `mapValues(map_argument)` with `map_argument.values`
NameAndTypePair column{ctx.column.name + ".values", function_node.getResultType()};
node = std::make_shared<ColumnNode>(column, ctx.column_source);
},
},
{
{TypeIndex::Map, "mapContains"},
[](QueryTreeNodePtr &, FunctionNode & function_node, ColumnContext & ctx)
{
/// Replace `mapContains(map_argument, argument)` with `has(map_argument.keys, argument)`
const auto & data_type_map = assert_cast<const DataTypeMap &>(*ctx.column.type);
NameAndTypePair column{ctx.column.name + ".keys", std::make_shared<DataTypeArray>(data_type_map.getKeyType())};
auto & function_arguments_nodes = function_node.getArguments().getNodes();
auto has_function_argument = std::make_shared<ColumnNode>(column, ctx.column_source);
function_arguments_nodes[0] = std::move(has_function_argument);
resolveOrdinaryFunctionNodeByName(function_node, "has", ctx.context);
},
},
{
{TypeIndex::Nullable, "count"},
[](QueryTreeNodePtr &, FunctionNode & function_node, ColumnContext & ctx)
{
/// Replace `count(nullable_argument)` with `sum(not(nullable_argument.null))`
NameAndTypePair column{ctx.column.name + ".null", std::make_shared<DataTypeUInt8>()};
auto & function_arguments_nodes = function_node.getArguments().getNodes();
auto new_column_node = std::make_shared<ColumnNode>(column, ctx.column_source);
auto function_node_not = std::make_shared<FunctionNode>("not");
function_node_not->getArguments().getNodes().push_back(std::move(new_column_node));
resolveOrdinaryFunctionNodeByName(*function_node_not, "not", ctx.context);
function_arguments_nodes = {std::move(function_node_not)};
resolveAggregateFunctionNodeByName(function_node, "sum");
},
},
{
{TypeIndex::Nullable, "isNull"},
[](QueryTreeNodePtr & node, FunctionNode &, ColumnContext & ctx)
{
/// Replace `isNull(nullable_argument)` with `nullable_argument.null`
NameAndTypePair column{ctx.column.name + ".null", std::make_shared<DataTypeUInt8>()};
node = std::make_shared<ColumnNode>(column, ctx.column_source);
},
},
{
{TypeIndex::Nullable, "isNotNull"},
[](QueryTreeNodePtr &, FunctionNode & function_node, ColumnContext & ctx)
{
/// Replace `isNotNull(nullable_argument)` with `not(nullable_argument.null)`
NameAndTypePair column{ctx.column.name + ".null", std::make_shared<DataTypeUInt8>()};
auto & function_arguments_nodes = function_node.getArguments().getNodes();
function_arguments_nodes = {std::make_shared<ColumnNode>(column, ctx.column_source)};
resolveOrdinaryFunctionNodeByName(function_node, "not", ctx.context);
},
},
{
{TypeIndex::Tuple, "tupleElement"}, optimizeTupleOrVariantElement<DataTypeTuple>,
},
{
{TypeIndex::Variant, "variantElement"}, optimizeTupleOrVariantElement<DataTypeVariant>,
},
};
std::tuple<FunctionNode *, ColumnNode *, TableNode *> getTypedNodesForOptimization(const QueryTreeNodePtr & node)
{
auto * function_node = node->as<FunctionNode>();
if (!function_node)
return {};
auto & function_arguments_nodes = function_node->getArguments().getNodes();
if (function_arguments_nodes.empty() || function_arguments_nodes.size() > 2)
return {};
auto * first_argument_column_node = function_arguments_nodes.front()->as<ColumnNode>();
if (!first_argument_column_node || first_argument_column_node->getColumnName() == "__grouping_set")
return {};
auto column_source = first_argument_column_node->getColumnSource();
auto * table_node = column_source->as<TableNode>();
if (!table_node)
return {};
const auto & storage = table_node->getStorage();
const auto & storage_snapshot = table_node->getStorageSnapshot();
auto column = first_argument_column_node->getColumn();
if (!storage->supportsOptimizationToSubcolumns() || storage->isVirtualColumn(column.name, storage_snapshot->metadata))
return {};
auto column_in_table = storage_snapshot->tryGetColumn(GetColumnsOptions::All, column.name);
if (!column_in_table || !column_in_table->type->equals(*column.type))
return {};
return std::make_tuple(function_node, first_argument_column_node, table_node);
}
/// First pass collects info about identifiers to determine which identifiers are allowed to optimize.
class FunctionToSubcolumnsVisitorFirstPass : public InDepthQueryTreeVisitorWithContext<FunctionToSubcolumnsVisitorFirstPass>
{ {
public: public:
using Base = InDepthQueryTreeVisitorWithContext<FunctionToSubcolumnsVisitor>; using Base = InDepthQueryTreeVisitorWithContext<FunctionToSubcolumnsVisitorFirstPass>;
using Base::Base; using Base::Base;
void enterImpl(const QueryTreeNodePtr & node)
{
if (!getSettings().optimize_functions_to_subcolumns)
return;
if (auto * table_node = node->as<TableNode>())
{
enterImpl(*table_node);
return;
}
if (auto * column_node = node->as<ColumnNode>())
{
enterImpl(*column_node);
return;
}
auto [function_node, first_argument_node, table_node] = getTypedNodesForOptimization(node);
if (function_node && first_argument_node && table_node)
{
enterImpl(*function_node, *first_argument_node, *table_node);
return;
}
if (const auto * join_node = node->as<JoinNode>())
{
can_wrap_result_columns_with_nullable |= getContext()->getSettingsRef().join_use_nulls;
return;
}
if (const auto * query_node = node->as<QueryNode>())
{
if (query_node->isGroupByWithCube() || query_node->isGroupByWithRollup() || query_node->isGroupByWithGroupingSets())
can_wrap_result_columns_with_nullable |= getContext()->getSettingsRef().group_by_use_nulls;
return;
}
}
std::unordered_set<Identifier> getIdentifiersToOptimize() const
{
if (can_wrap_result_columns_with_nullable)
{
/// Do not optimize if we have JOIN with setting join_use_null.
/// Do not optimize if we have GROUP BY WITH ROLLUP/CUBE/GROUPING SETS with setting group_by_use_nulls.
/// It may change the behaviour if subcolumn can be converted
/// to Nullable while the original column cannot (e.g. for Array type).
return {};
}
/// Do not optimize if full column is requested in other context.
/// It doesn't make sense because it doesn't reduce amount of read data
/// and optimized functions are not computation heavy. But introducing
/// new identifier complicates query analysis and may break it.
///
/// E.g. query:
/// SELECT n FROM table GROUP BY n HAVING isNotNull(n)
/// may be optimized to incorrect query:
/// SELECT n FROM table GROUP BY n HAVING not(n.null)
/// Will produce: `n.null` is not under aggregate function and not in GROUP BY keys)
///
/// Do not optimize index columns (primary, min-max, secondary),
/// because otherwise analysis of indexes may be broken.
/// TODO: handle subcolumns in index analysis.
std::unordered_set<Identifier> identifiers_to_optimize;
for (const auto & [identifier, count] : optimized_identifiers_count)
{
if (all_key_columns.contains(identifier))
continue;
auto it = identifiers_count.find(identifier);
if (it != identifiers_count.end() && it->second == count)
identifiers_to_optimize.insert(identifier);
}
return identifiers_to_optimize;
}
private:
std::unordered_set<Identifier> all_key_columns;
std::unordered_map<Identifier, UInt64> identifiers_count;
std::unordered_map<Identifier, UInt64> optimized_identifiers_count;
NameSet processed_tables;
bool can_wrap_result_columns_with_nullable = false;
void enterImpl(const TableNode & table_node)
{
auto table_name = table_node.getStorage()->getStorageID().getFullTableName();
if (processed_tables.emplace(table_name).second)
return;
auto add_key_columns = [&](const auto & key_columns)
{
for (const auto & column_name : key_columns)
{
Identifier identifier({table_name, column_name});
all_key_columns.insert(identifier);
}
};
const auto & metadata_snapshot = table_node.getStorageSnapshot()->metadata;
const auto & primary_key_columns = metadata_snapshot->getColumnsRequiredForPrimaryKey();
const auto & partition_key_columns = metadata_snapshot->getColumnsRequiredForPartitionKey();
add_key_columns(primary_key_columns);
add_key_columns(partition_key_columns);
for (const auto & index : metadata_snapshot->getSecondaryIndices())
{
const auto & index_columns = index.expression->getRequiredColumns();
add_key_columns(index_columns);
}
}
void enterImpl(const ColumnNode & column_node)
{
if (column_node.getColumnName() == "__grouping_set")
return;
auto column_source = column_node.getColumnSource();
auto * table_node = column_source->as<TableNode>();
if (!table_node)
return;
auto table_name = table_node->getStorage()->getStorageID().getFullTableName();
Identifier qualified_name({table_name, column_node.getColumnName()});
++identifiers_count[qualified_name];
}
void enterImpl(const FunctionNode & function_node, const ColumnNode & first_argument_column_node, const TableNode & table_node)
{
/// For queries with FINAL converting function to subcolumn may alter
/// special merging algorithms and produce wrong result of query.
if (table_node.hasTableExpressionModifiers() && table_node.getTableExpressionModifiers()->hasFinal())
return;
const auto & column = first_argument_column_node.getColumn();
auto table_name = table_node.getStorage()->getStorageID().getFullTableName();
Identifier qualified_name({table_name, column.name});
if (node_transformers.contains({column.type->getTypeId(), function_node.getFunctionName()}))
++optimized_identifiers_count[qualified_name];
}
};
/// Second pass optimizes functions to subcolumns for allowed identifiers.
class FunctionToSubcolumnsVisitorSecondPass : public InDepthQueryTreeVisitorWithContext<FunctionToSubcolumnsVisitorSecondPass>
{
private:
std::unordered_set<Identifier> identifiers_to_optimize;
public:
using Base = InDepthQueryTreeVisitorWithContext<FunctionToSubcolumnsVisitorSecondPass>;
using Base::Base;
FunctionToSubcolumnsVisitorSecondPass(ContextPtr context_, std::unordered_set<Identifier> identifiers_to_optimize_)
: Base(std::move(context_)), identifiers_to_optimize(std::move(identifiers_to_optimize_))
{
}
void enterImpl(QueryTreeNodePtr & node) const void enterImpl(QueryTreeNodePtr & node) const
{ {
if (!getSettings().optimize_functions_to_subcolumns) if (!getSettings().optimize_functions_to_subcolumns)
return; return;
auto * function_node = node->as<FunctionNode>(); auto [function_node, first_argument_column_node, table_node] = getTypedNodesForOptimization(node);
if (!function_node) if (!function_node || !first_argument_column_node || !table_node)
return;
auto & function_arguments_nodes = function_node->getArguments().getNodes();
size_t function_arguments_nodes_size = function_arguments_nodes.size();
if (function_arguments_nodes.empty() || function_arguments_nodes_size > 2)
return;
auto * first_argument_column_node = function_arguments_nodes.front()->as<ColumnNode>();
if (!first_argument_column_node)
return;
if (first_argument_column_node->getColumnName() == "__grouping_set")
return;
auto column_source = first_argument_column_node->getColumnSource();
auto * table_node = column_source->as<TableNode>();
if (!table_node)
return;
const auto & storage = table_node->getStorage();
if (!storage->supportsSubcolumns())
return; return;
auto column = first_argument_column_node->getColumn(); auto column = first_argument_column_node->getColumn();
WhichDataType column_type(column.type); auto table_name = table_node->getStorage()->getStorageID().getFullTableName();
const auto & function_name = function_node->getFunctionName(); Identifier qualified_name({table_name, column.name});
if (!identifiers_to_optimize.contains(qualified_name))
return;
if (function_arguments_nodes_size == 1) auto transformer_it = node_transformers.find({column.type->getTypeId(), function_node->getFunctionName()});
if (transformer_it != node_transformers.end())
{ {
if (column_type.isArray()) ColumnContext ctx{std::move(column), first_argument_column_node->getColumnSource(), getContext()};
{ transformer_it->second(node, *function_node, ctx);
if (function_name == "length")
{
/// Replace `length(array_argument)` with `array_argument.size0`
column.name += ".size0";
column.type = std::make_shared<DataTypeUInt64>();
node = std::make_shared<ColumnNode>(column, column_source);
}
else if (function_name == "empty")
{
/// Replace `empty(array_argument)` with `equals(array_argument.size0, 0)`
column.name += ".size0";
column.type = std::make_shared<DataTypeUInt64>();
function_arguments_nodes.clear();
function_arguments_nodes.push_back(std::make_shared<ColumnNode>(column, column_source));
function_arguments_nodes.push_back(std::make_shared<ConstantNode>(static_cast<UInt64>(0)));
resolveOrdinaryFunctionNode(*function_node, "equals");
}
else if (function_name == "notEmpty")
{
/// Replace `notEmpty(array_argument)` with `notEquals(array_argument.size0, 0)`
column.name += ".size0";
column.type = std::make_shared<DataTypeUInt64>();
function_arguments_nodes.clear();
function_arguments_nodes.push_back(std::make_shared<ColumnNode>(column, column_source));
function_arguments_nodes.push_back(std::make_shared<ConstantNode>(static_cast<UInt64>(0)));
resolveOrdinaryFunctionNode(*function_node, "notEquals");
}
}
else if (column_type.isNullable())
{
if (function_name == "isNull")
{
/// Replace `isNull(nullable_argument)` with `nullable_argument.null`
column.name += ".null";
column.type = std::make_shared<DataTypeUInt8>();
node = std::make_shared<ColumnNode>(column, column_source);
}
else if (function_name == "isNotNull")
{
/// Replace `isNotNull(nullable_argument)` with `not(nullable_argument.null)`
column.name += ".null";
column.type = std::make_shared<DataTypeUInt8>();
function_arguments_nodes = {std::make_shared<ColumnNode>(column, column_source)};
resolveOrdinaryFunctionNode(*function_node, "not");
}
}
else if (column_type.isMap())
{
if (function_name == "mapKeys")
{
/// Replace `mapKeys(map_argument)` with `map_argument.keys`
column.name += ".keys";
column.type = function_node->getResultType();
node = std::make_shared<ColumnNode>(column, column_source);
}
else if (function_name == "mapValues")
{
/// Replace `mapValues(map_argument)` with `map_argument.values`
column.name += ".values";
column.type = function_node->getResultType();
node = std::make_shared<ColumnNode>(column, column_source);
}
}
} }
else
{
const auto * second_argument_constant_node = function_arguments_nodes[1]->as<ConstantNode>();
if (function_name == "tupleElement" && column_type.isTuple() && second_argument_constant_node)
{
/** Replace `tupleElement(tuple_argument, string_literal)`, `tupleElement(tuple_argument, integer_literal)`
* with `tuple_argument.column_name`.
*/
const auto & tuple_element_constant_value = second_argument_constant_node->getValue();
const auto & tuple_element_constant_value_type = tuple_element_constant_value.getType();
const auto & data_type_tuple = assert_cast<const DataTypeTuple &>(*column.type);
String subcolumn_name;
if (tuple_element_constant_value_type == Field::Types::String)
{
subcolumn_name = tuple_element_constant_value.get<const String &>();
}
else if (tuple_element_constant_value_type == Field::Types::UInt64)
{
auto tuple_column_index = tuple_element_constant_value.get<UInt64>();
subcolumn_name = data_type_tuple.getNameByPosition(tuple_column_index);
}
else
{
return;
}
column.name += '.';
column.name += subcolumn_name;
column.type = function_node->getResultType();
node = std::make_shared<ColumnNode>(column, column_source);
}
else if (function_name == "variantElement" && isVariant(column_type) && second_argument_constant_node)
{
/// Replace `variantElement(variant_argument, type_name)` with `variant_argument.type_name`.
const auto & variant_element_constant_value = second_argument_constant_node->getValue();
String subcolumn_name;
if (variant_element_constant_value.getType() != Field::Types::String)
return;
subcolumn_name = variant_element_constant_value.get<const String &>();
column.name += '.';
column.name += subcolumn_name;
column.type = function_node->getResultType();
node = std::make_shared<ColumnNode>(column, column_source);
}
else if (function_name == "mapContains" && column_type.isMap())
{
const auto & data_type_map = assert_cast<const DataTypeMap &>(*column.type);
/// Replace `mapContains(map_argument, argument)` with `has(map_argument.keys, argument)`
column.name += ".keys";
column.type = std::make_shared<DataTypeArray>(data_type_map.getKeyType());
auto has_function_argument = std::make_shared<ColumnNode>(column, column_source);
function_arguments_nodes[0] = std::move(has_function_argument);
resolveOrdinaryFunctionNode(*function_node, "has");
}
}
}
private:
void resolveOrdinaryFunctionNode(FunctionNode & function_node, const String & function_name) const
{
auto function = FunctionFactory::instance().get(function_name, getContext());
function_node.resolveAsFunction(function->build(function_node.getArgumentColumns()));
} }
}; };
@ -226,8 +438,15 @@ private:
void FunctionToSubcolumnsPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr context) void FunctionToSubcolumnsPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr context)
{ {
FunctionToSubcolumnsVisitor visitor(context); FunctionToSubcolumnsVisitorFirstPass first_visitor(context);
visitor.visit(query_tree_node); first_visitor.visit(query_tree_node);
auto identifiers_to_optimize = first_visitor.getIdentifiersToOptimize();
if (identifiers_to_optimize.empty())
return;
FunctionToSubcolumnsVisitorSecondPass second_visitor(std::move(context), std::move(identifiers_to_optimize));
second_visitor.visit(query_tree_node);
} }
} }

View File

@ -6,6 +6,7 @@
#include <Analyzer/InDepthQueryTreeVisitor.h> #include <Analyzer/InDepthQueryTreeVisitor.h>
#include <Analyzer/ConstantNode.h> #include <Analyzer/ConstantNode.h>
#include <Analyzer/FunctionNode.h> #include <Analyzer/FunctionNode.h>
#include <Analyzer/Utils.h>
#include <Interpreters/Context.h> #include <Interpreters/Context.h>
#include <DataTypes/DataTypesNumber.h> #include <DataTypes/DataTypesNumber.h>
@ -47,25 +48,17 @@ public:
if (function_node->getFunctionName() == "count" && !first_argument_constant_literal.isNull()) if (function_node->getFunctionName() == "count" && !first_argument_constant_literal.isNull())
{ {
resolveAsCountAggregateFunction(*function_node);
function_node->getArguments().getNodes().clear(); function_node->getArguments().getNodes().clear();
resolveAggregateFunctionNodeByName(*function_node, "count");
} }
else if (function_node->getFunctionName() == "sum" && else if (function_node->getFunctionName() == "sum" &&
first_argument_constant_literal.getType() == Field::Types::UInt64 && first_argument_constant_literal.getType() == Field::Types::UInt64 &&
first_argument_constant_literal.get<UInt64>() == 1) first_argument_constant_literal.get<UInt64>() == 1)
{ {
resolveAsCountAggregateFunction(*function_node);
function_node->getArguments().getNodes().clear(); function_node->getArguments().getNodes().clear();
resolveAggregateFunctionNodeByName(*function_node, "count");
} }
} }
private:
static void resolveAsCountAggregateFunction(FunctionNode & function_node)
{
AggregateFunctionProperties properties;
auto aggregate_function = AggregateFunctionFactory::instance().get("count", NullsAction::EMPTY, {}, {}, properties);
function_node.resolveAsAggregateFunction(std::move(aggregate_function));
}
}; };
} }

View File

@ -5,6 +5,7 @@
#include <Analyzer/ColumnNode.h> #include <Analyzer/ColumnNode.h>
#include <Analyzer/ConstantNode.h> #include <Analyzer/ConstantNode.h>
#include <Analyzer/FunctionNode.h> #include <Analyzer/FunctionNode.h>
#include <Analyzer/Utils.h>
#include <Analyzer/InDepthQueryTreeVisitor.h> #include <Analyzer/InDepthQueryTreeVisitor.h>
#include <Common/DateLUT.h> #include <Common/DateLUT.h>
#include <Common/DateLUTImpl.h> #include <Common/DateLUTImpl.h>

View File

@ -74,8 +74,7 @@ public:
new_arguments[1] = std::move(if_arguments_nodes[0]); new_arguments[1] = std::move(if_arguments_nodes[0]);
function_arguments_nodes = std::move(new_arguments); function_arguments_nodes = std::move(new_arguments);
resolveAsAggregateFunctionWithIf( resolveAggregateFunctionNodeByName(*function_node, function_node->getFunctionName() + "If");
*function_node, {function_arguments_nodes[0]->getResultType(), function_arguments_nodes[1]->getResultType()});
} }
} }
else if (first_const_node) else if (first_const_node)
@ -104,27 +103,10 @@ public:
new_arguments[1] = std::move(not_function); new_arguments[1] = std::move(not_function);
function_arguments_nodes = std::move(new_arguments); function_arguments_nodes = std::move(new_arguments);
resolveAsAggregateFunctionWithIf( resolveAggregateFunctionNodeByName(*function_node, function_node->getFunctionName() + "If");
*function_node, {function_arguments_nodes[0]->getResultType(), function_arguments_nodes[1]->getResultType()});
} }
} }
} }
private:
static void resolveAsAggregateFunctionWithIf(FunctionNode & function_node, const DataTypes & argument_types)
{
auto result_type = function_node.getResultType();
AggregateFunctionProperties properties;
auto aggregate_function = AggregateFunctionFactory::instance().get(
function_node.getFunctionName() + "If",
function_node.getNullsAction(),
argument_types,
function_node.getAggregateFunction()->getParameters(),
properties);
function_node.resolveAsAggregateFunction(std::move(aggregate_function));
}
}; };
} }

View File

@ -73,23 +73,24 @@ public:
const auto lhs = std::make_shared<FunctionNode>("sum"); const auto lhs = std::make_shared<FunctionNode>("sum");
lhs->getArguments().getNodes().push_back(func_plus_minus_nodes[column_id]); lhs->getArguments().getNodes().push_back(func_plus_minus_nodes[column_id]);
resolveAsAggregateFunctionNode(*lhs, column_type); resolveAggregateFunctionNodeByName(*lhs, lhs->getFunctionName());
const auto rhs_count = std::make_shared<FunctionNode>("count"); const auto rhs_count = std::make_shared<FunctionNode>("count");
rhs_count->getArguments().getNodes().push_back(func_plus_minus_nodes[column_id]); rhs_count->getArguments().getNodes().push_back(func_plus_minus_nodes[column_id]);
resolveAsAggregateFunctionNode(*rhs_count, column_type); resolveAggregateFunctionNodeByName(*rhs_count, rhs_count->getFunctionName());
const auto rhs = std::make_shared<FunctionNode>("multiply"); const auto rhs = std::make_shared<FunctionNode>("multiply");
rhs->getArguments().getNodes().push_back(func_plus_minus_nodes[literal_id]); rhs->getArguments().getNodes().push_back(func_plus_minus_nodes[literal_id]);
rhs->getArguments().getNodes().push_back(rhs_count); rhs->getArguments().getNodes().push_back(rhs_count);
resolveOrdinaryFunctionNode(*rhs, rhs->getFunctionName()); resolveOrdinaryFunctionNodeByName(*rhs, rhs->getFunctionName(), getContext());
auto new_node = std::make_shared<FunctionNode>(Poco::toLower(func_plus_minus_node->getFunctionName())); auto new_node = std::make_shared<FunctionNode>(Poco::toLower(func_plus_minus_node->getFunctionName()));
if (column_id == 0) if (column_id == 0)
new_node->getArguments().getNodes() = {lhs, rhs}; new_node->getArguments().getNodes() = {lhs, rhs};
else if (column_id == 1) else if (column_id == 1)
new_node->getArguments().getNodes() = {rhs, lhs}; new_node->getArguments().getNodes() = {rhs, lhs};
resolveOrdinaryFunctionNode(*new_node, new_node->getFunctionName());
resolveOrdinaryFunctionNodeByName(*new_node, new_node->getFunctionName(), getContext());
if (!new_node) if (!new_node)
return; return;
@ -100,28 +101,7 @@ public:
res = createCastFunction(res, function_node->getResultType(), getContext()); res = createCastFunction(res, function_node->getResultType(), getContext());
node = std::move(res); node = std::move(res);
} }
private:
void resolveOrdinaryFunctionNode(FunctionNode & function_node, const String & function_name) const
{
const auto function = FunctionFactory::instance().get(function_name, getContext());
function_node.resolveAsFunction(function->build(function_node.getArgumentColumns()));
}
static void resolveAsAggregateFunctionNode(FunctionNode & function_node, const DataTypePtr & argument_type)
{
AggregateFunctionProperties properties;
const auto aggregate_function = AggregateFunctionFactory::instance().get(function_node.getFunctionName(),
NullsAction::EMPTY,
{argument_type},
{},
properties);
function_node.resolveAsAggregateFunction(aggregate_function);
}
}; };
} }

View File

@ -5,6 +5,7 @@
#include <AggregateFunctions/AggregateFunctionFactory.h> #include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/IAggregateFunction.h> #include <AggregateFunctions/IAggregateFunction.h>
#include <Analyzer/Utils.h>
#include <Functions/FunctionFactory.h> #include <Functions/FunctionFactory.h>
@ -65,7 +66,8 @@ public:
auto multiplier_node = function_node_arguments_nodes[0]; auto multiplier_node = function_node_arguments_nodes[0];
function_node_arguments_nodes[0] = std::move(function_node_arguments_nodes[1]); function_node_arguments_nodes[0] = std::move(function_node_arguments_nodes[1]);
function_node_arguments_nodes.resize(1); function_node_arguments_nodes.resize(1);
resolveAsCountIfAggregateFunction(*function_node, function_node_arguments_nodes[0]->getResultType());
resolveAggregateFunctionNodeByName(*function_node, "countIf");
if (constant_value_literal.get<UInt64>() != 1) if (constant_value_literal.get<UInt64>() != 1)
{ {
@ -115,7 +117,7 @@ public:
function_node_arguments_nodes[0] = nested_if_function_arguments_nodes[0]; function_node_arguments_nodes[0] = nested_if_function_arguments_nodes[0];
function_node_arguments_nodes.resize(1); function_node_arguments_nodes.resize(1);
resolveAsCountIfAggregateFunction(*function_node, function_node_arguments_nodes[0]->getResultType()); resolveAggregateFunctionNodeByName(*function_node, "countIf");
if (if_true_condition_value != 1) if (if_true_condition_value != 1)
{ {
@ -144,7 +146,7 @@ public:
function_node_arguments_nodes[0] = std::move(not_function); function_node_arguments_nodes[0] = std::move(not_function);
function_node_arguments_nodes.resize(1); function_node_arguments_nodes.resize(1);
resolveAsCountIfAggregateFunction(*function_node, function_node_arguments_nodes[0]->getResultType()); resolveAggregateFunctionNodeByName(*function_node, "countIf");
if (if_false_condition_value != 1) if (if_false_condition_value != 1)
{ {
@ -156,15 +158,6 @@ public:
} }
private: private:
static void resolveAsCountIfAggregateFunction(FunctionNode & function_node, const DataTypePtr & argument_type)
{
AggregateFunctionProperties properties;
auto aggregate_function = AggregateFunctionFactory::instance().get(
"countIf", NullsAction::EMPTY, {argument_type}, function_node.getAggregateFunction()->getParameters(), properties);
function_node.resolveAsAggregateFunction(std::move(aggregate_function));
}
QueryTreeNodePtr getMultiplyFunction(QueryTreeNodePtr left, QueryTreeNodePtr right) QueryTreeNodePtr getMultiplyFunction(QueryTreeNodePtr left, QueryTreeNodePtr right)
{ {
auto multiply_function_node = std::make_shared<FunctionNode>("multiply"); auto multiply_function_node = std::make_shared<FunctionNode>("multiply");

View File

@ -7,6 +7,7 @@
#include <Analyzer/InDepthQueryTreeVisitor.h> #include <Analyzer/InDepthQueryTreeVisitor.h>
#include <Analyzer/FunctionNode.h> #include <Analyzer/FunctionNode.h>
#include <Analyzer/Utils.h>
namespace DB namespace DB

View File

@ -7,6 +7,7 @@
#include <Analyzer/FunctionNode.h> #include <Analyzer/FunctionNode.h>
#include <Analyzer/InDepthQueryTreeVisitor.h> #include <Analyzer/InDepthQueryTreeVisitor.h>
#include <Analyzer/QueryNode.h> #include <Analyzer/QueryNode.h>
#include <Analyzer/Utils.h>
namespace DB namespace DB
{ {
@ -184,11 +185,8 @@ public:
/// Replace uniq of initial query to count /// Replace uniq of initial query to count
if (match_subquery_with_distinct() || match_subquery_with_group_by()) if (match_subquery_with_distinct() || match_subquery_with_group_by())
{ {
AggregateFunctionProperties properties;
auto aggregate_function = AggregateFunctionFactory::instance().get("count", NullsAction::EMPTY, {}, {}, properties);
function_node->getArguments().getNodes().clear(); function_node->getArguments().getNodes().clear();
function_node->resolveAsAggregateFunction(std::move(aggregate_function)); resolveAggregateFunctionNodeByName(*function_node, "count");
} }
} }
}; };

View File

@ -636,16 +636,16 @@ private:
bool has_function = false; bool has_function = false;
}; };
inline AggregateFunctionPtr resolveAggregateFunction(FunctionNode * function_node) inline AggregateFunctionPtr resolveAggregateFunction(FunctionNode & function_node, const String & function_name)
{ {
Array parameters; Array parameters;
for (const auto & param : function_node->getParameters()) for (const auto & param : function_node.getParameters())
{ {
auto * constant = param->as<ConstantNode>(); auto * constant = param->as<ConstantNode>();
parameters.push_back(constant->getValue()); parameters.push_back(constant->getValue());
} }
const auto & function_node_argument_nodes = function_node->getArguments().getNodes(); const auto & function_node_argument_nodes = function_node.getArguments().getNodes();
DataTypes argument_types; DataTypes argument_types;
argument_types.reserve(function_node_argument_nodes.size()); argument_types.reserve(function_node_argument_nodes.size());
@ -655,7 +655,7 @@ inline AggregateFunctionPtr resolveAggregateFunction(FunctionNode * function_nod
AggregateFunctionProperties properties; AggregateFunctionProperties properties;
auto action = NullsAction::EMPTY; auto action = NullsAction::EMPTY;
return AggregateFunctionFactory::instance().get(function_node->getFunctionName(), action, argument_types, parameters, properties); return AggregateFunctionFactory::instance().get(function_name, action, argument_types, parameters, properties);
} }
} }
@ -736,11 +736,11 @@ void rerunFunctionResolve(FunctionNode * function_node, ContextPtr context)
{ {
if (name == "nothing" || name == "nothingUInt64" || name == "nothingNull") if (name == "nothing" || name == "nothingUInt64" || name == "nothingNull")
return; return;
function_node->resolveAsAggregateFunction(resolveAggregateFunction(function_node)); function_node->resolveAsAggregateFunction(resolveAggregateFunction(*function_node, function_node->getFunctionName()));
} }
else if (function_node->isWindowFunction()) else if (function_node->isWindowFunction())
{ {
function_node->resolveAsWindowFunction(resolveAggregateFunction(function_node)); function_node->resolveAsWindowFunction(resolveAggregateFunction(*function_node, function_node->getFunctionName()));
} }
} }
@ -793,6 +793,18 @@ QueryTreeNodePtr createCastFunction(QueryTreeNodePtr node, DataTypePtr result_ty
return function_node; return function_node;
} }
void resolveOrdinaryFunctionNodeByName(FunctionNode & function_node, const String & function_name, const ContextPtr & context)
{
auto function = FunctionFactory::instance().get(function_name, context);
function_node.resolveAsFunction(function->build(function_node.getArgumentColumns()));
}
void resolveAggregateFunctionNodeByName(FunctionNode & function_node, const String & function_name)
{
auto aggregate_function = resolveAggregateFunction(function_node, function_name);
function_node.resolveAsAggregateFunction(std::move(aggregate_function));
}
/** Returns: /** Returns:
* {_, false} - multiple sources * {_, false} - multiple sources
* {nullptr, true} - no sources (for constants) * {nullptr, true} - no sources (for constants)

View File

@ -112,6 +112,14 @@ NameSet collectIdentifiersFullNames(const QueryTreeNodePtr & node);
/// Wrap node into `_CAST` function /// Wrap node into `_CAST` function
QueryTreeNodePtr createCastFunction(QueryTreeNodePtr node, DataTypePtr result_type, ContextPtr context); QueryTreeNodePtr createCastFunction(QueryTreeNodePtr node, DataTypePtr result_type, ContextPtr context);
/// Resolves function node as ordinary function with given name.
/// Arguments and parameters are taken from the node.
void resolveOrdinaryFunctionNodeByName(FunctionNode & function_node, const String & function_name, const ContextPtr & context);
/// Resolves function node as aggregate function with given name.
/// Arguments and parameters are taken from the node.
void resolveAggregateFunctionNodeByName(FunctionNode & function_node, const String & function_name);
/// Checks that node has only one source and returns it /// Checks that node has only one source and returns it
QueryTreeNodePtr getExpressionSource(const QueryTreeNodePtr & node); QueryTreeNodePtr getExpressionSource(const QueryTreeNodePtr & node);

View File

@ -29,48 +29,49 @@ namespace ErrorCodes
} }
BackupReaderAzureBlobStorage::BackupReaderAzureBlobStorage( BackupReaderAzureBlobStorage::BackupReaderAzureBlobStorage(
const StorageAzureConfiguration & configuration_, const AzureBlobStorage::ConnectionParams & connection_params_,
const String & blob_path_,
bool allow_azure_native_copy, bool allow_azure_native_copy,
const ReadSettings & read_settings_, const ReadSettings & read_settings_,
const WriteSettings & write_settings_, const WriteSettings & write_settings_,
const ContextPtr & context_) const ContextPtr & context_)
: BackupReaderDefault(read_settings_, write_settings_, getLogger("BackupReaderAzureBlobStorage")) : BackupReaderDefault(read_settings_, write_settings_, getLogger("BackupReaderAzureBlobStorage"))
, data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, configuration_.getConnectionURL().toString(), false, false} , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, connection_params_.getConnectionURL(), false, false}
, configuration(configuration_) , connection_params(connection_params_)
, blob_path(blob_path_)
{ {
auto client_ptr = configuration.createClient(/* is_readonly */false, /* attempt_to_create_container */true); auto client_ptr = AzureBlobStorage::getContainerClient(connection_params, /*readonly=*/ false);
client_ptr->SetClickhouseOptions(Azure::Storage::Blobs::ClickhouseClientOptions{.IsClientForDisk=true}); auto settings_ptr = AzureBlobStorage::getRequestSettingsForBackup(context_->getSettingsRef(), allow_azure_native_copy);
object_storage = std::make_unique<AzureObjectStorage>("BackupReaderAzureBlobStorage", object_storage = std::make_unique<AzureObjectStorage>(
std::move(client_ptr), "BackupReaderAzureBlobStorage",
configuration.createSettings(context_), std::move(client_ptr),
configuration_.container, std::move(settings_ptr),
configuration.getConnectionURL().toString()); connection_params.getContainer(),
connection_params.getConnectionURL());
client = object_storage->getAzureBlobStorageClient(); client = object_storage->getAzureBlobStorageClient();
auto settings_copy = *object_storage->getSettings(); settings = object_storage->getSettings();
settings_copy.use_native_copy = allow_azure_native_copy;
settings = std::make_unique<const AzureObjectStorageSettings>(settings_copy);
} }
BackupReaderAzureBlobStorage::~BackupReaderAzureBlobStorage() = default; BackupReaderAzureBlobStorage::~BackupReaderAzureBlobStorage() = default;
bool BackupReaderAzureBlobStorage::fileExists(const String & file_name) bool BackupReaderAzureBlobStorage::fileExists(const String & file_name)
{ {
String key = fs::path(configuration.blob_path) / file_name; String key = fs::path(blob_path) / file_name;
return object_storage->exists(StoredObject(key)); return object_storage->exists(StoredObject(key));
} }
UInt64 BackupReaderAzureBlobStorage::getFileSize(const String & file_name) UInt64 BackupReaderAzureBlobStorage::getFileSize(const String & file_name)
{ {
String key = fs::path(configuration.blob_path) / file_name; String key = fs::path(blob_path) / file_name;
ObjectMetadata object_metadata = object_storage->getObjectMetadata(key); ObjectMetadata object_metadata = object_storage->getObjectMetadata(key);
return object_metadata.size_bytes; return object_metadata.size_bytes;
} }
std::unique_ptr<SeekableReadBuffer> BackupReaderAzureBlobStorage::readFile(const String & file_name) std::unique_ptr<SeekableReadBuffer> BackupReaderAzureBlobStorage::readFile(const String & file_name)
{ {
String key = fs::path(configuration.blob_path) / file_name; String key = fs::path(blob_path) / file_name;
return std::make_unique<ReadBufferFromAzureBlobStorage>( return std::make_unique<ReadBufferFromAzureBlobStorage>(
client, key, read_settings, settings->max_single_read_retries, client, key, read_settings, settings->max_single_read_retries,
settings->max_single_download_retries); settings->max_single_download_retries);
@ -85,23 +86,23 @@ void BackupReaderAzureBlobStorage::copyFileToDisk(const String & path_in_backup,
&& destination_data_source_description.is_encrypted == encrypted_in_backup) && destination_data_source_description.is_encrypted == encrypted_in_backup)
{ {
LOG_TRACE(log, "Copying {} from AzureBlobStorage to disk {}", path_in_backup, destination_disk->getName()); LOG_TRACE(log, "Copying {} from AzureBlobStorage to disk {}", path_in_backup, destination_disk->getName());
auto write_blob_function = [&](const Strings & blob_path, WriteMode mode, const std::optional<ObjectAttributes> &) -> size_t auto write_blob_function = [&](const Strings & dst_blob_path, WriteMode mode, const std::optional<ObjectAttributes> &) -> size_t
{ {
/// Object storage always uses mode `Rewrite` because it simulates append using metadata and different files. /// Object storage always uses mode `Rewrite` because it simulates append using metadata and different files.
if (blob_path.size() != 2 || mode != WriteMode::Rewrite) if (dst_blob_path.size() != 2 || mode != WriteMode::Rewrite)
throw Exception(ErrorCodes::LOGICAL_ERROR, throw Exception(ErrorCodes::LOGICAL_ERROR,
"Blob writing function called with unexpected blob_path.size={} or mode={}", "Blob writing function called with unexpected blob_path.size={} or mode={}",
blob_path.size(), mode); dst_blob_path.size(), mode);
copyAzureBlobStorageFile( copyAzureBlobStorageFile(
client, client,
destination_disk->getObjectStorage()->getAzureBlobStorageClient(), destination_disk->getObjectStorage()->getAzureBlobStorageClient(),
configuration.container, connection_params.getContainer(),
fs::path(configuration.blob_path) / path_in_backup, fs::path(blob_path) / path_in_backup,
0, 0,
file_size, file_size,
/* dest_container */ blob_path[1], /* dest_container */ dst_blob_path[1],
/* dest_path */ blob_path[0], /* dest_path */ dst_blob_path[0],
settings, settings,
read_settings, read_settings,
threadPoolCallbackRunnerUnsafe<void>(getBackupsIOThreadPool().get(), "BackupRDAzure")); threadPoolCallbackRunnerUnsafe<void>(getBackupsIOThreadPool().get(), "BackupRDAzure"));
@ -119,28 +120,33 @@ void BackupReaderAzureBlobStorage::copyFileToDisk(const String & path_in_backup,
BackupWriterAzureBlobStorage::BackupWriterAzureBlobStorage( BackupWriterAzureBlobStorage::BackupWriterAzureBlobStorage(
const StorageAzureConfiguration & configuration_, const AzureBlobStorage::ConnectionParams & connection_params_,
const String & blob_path_,
bool allow_azure_native_copy, bool allow_azure_native_copy,
const ReadSettings & read_settings_, const ReadSettings & read_settings_,
const WriteSettings & write_settings_, const WriteSettings & write_settings_,
const ContextPtr & context_, const ContextPtr & context_,
bool attempt_to_create_container) bool attempt_to_create_container)
: BackupWriterDefault(read_settings_, write_settings_, getLogger("BackupWriterAzureBlobStorage")) : BackupWriterDefault(read_settings_, write_settings_, getLogger("BackupWriterAzureBlobStorage"))
, data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, configuration_.getConnectionURL().toString(), false, false} , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, connection_params_.getConnectionURL(), false, false}
, configuration(configuration_) , connection_params(connection_params_)
, blob_path(blob_path_)
{ {
auto client_ptr = configuration.createClient(/* is_readonly */false, attempt_to_create_container); if (!attempt_to_create_container)
client_ptr->SetClickhouseOptions(Azure::Storage::Blobs::ClickhouseClientOptions{.IsClientForDisk=true}); connection_params.endpoint.container_already_exists = true;
auto client_ptr = AzureBlobStorage::getContainerClient(connection_params, /*readonly=*/ false);
auto settings_ptr = AzureBlobStorage::getRequestSettingsForBackup(context_->getSettingsRef(), allow_azure_native_copy);
object_storage = std::make_unique<AzureObjectStorage>(
"BackupWriterAzureBlobStorage",
std::move(client_ptr),
std::move(settings_ptr),
connection_params.getContainer(),
connection_params.getConnectionURL());
object_storage = std::make_unique<AzureObjectStorage>("BackupWriterAzureBlobStorage",
std::move(client_ptr),
configuration.createSettings(context_),
configuration.container,
configuration_.getConnectionURL().toString());
client = object_storage->getAzureBlobStorageClient(); client = object_storage->getAzureBlobStorageClient();
auto settings_copy = *object_storage->getSettings(); settings = object_storage->getSettings();
settings_copy.use_native_copy = allow_azure_native_copy;
settings = std::make_unique<const AzureObjectStorageSettings>(settings_copy);
} }
void BackupWriterAzureBlobStorage::copyFileFromDisk( void BackupWriterAzureBlobStorage::copyFileFromDisk(
@ -159,18 +165,18 @@ void BackupWriterAzureBlobStorage::copyFileFromDisk(
{ {
/// getBlobPath() can return more than 3 elements if the file is stored as multiple objects in AzureBlobStorage container. /// getBlobPath() can return more than 3 elements if the file is stored as multiple objects in AzureBlobStorage container.
/// In this case we can't use the native copy. /// In this case we can't use the native copy.
if (auto blob_path = src_disk->getBlobPath(src_path); blob_path.size() == 2) if (auto src_blob_path = src_disk->getBlobPath(src_path); src_blob_path.size() == 2)
{ {
LOG_TRACE(log, "Copying file {} from disk {} to AzureBlobStorag", src_path, src_disk->getName()); LOG_TRACE(log, "Copying file {} from disk {} to AzureBlobStorag", src_path, src_disk->getName());
copyAzureBlobStorageFile( copyAzureBlobStorageFile(
src_disk->getObjectStorage()->getAzureBlobStorageClient(), src_disk->getObjectStorage()->getAzureBlobStorageClient(),
client, client,
/* src_container */ blob_path[1], /* src_container */ src_blob_path[1],
/* src_path */ blob_path[0], /* src_path */ src_blob_path[0],
start_pos, start_pos,
length, length,
configuration.container, connection_params.getContainer(),
fs::path(configuration.blob_path) / path_in_backup, fs::path(blob_path) / path_in_backup,
settings, settings,
read_settings, read_settings,
threadPoolCallbackRunnerUnsafe<void>(getBackupsIOThreadPool().get(), "BackupWRAzure")); threadPoolCallbackRunnerUnsafe<void>(getBackupsIOThreadPool().get(), "BackupWRAzure"));
@ -188,11 +194,11 @@ void BackupWriterAzureBlobStorage::copyFile(const String & destination, const St
copyAzureBlobStorageFile( copyAzureBlobStorageFile(
client, client,
client, client,
configuration.container, connection_params.getContainer(),
fs::path(configuration.blob_path)/ source, fs::path(blob_path)/ source,
0, 0,
size, size,
/* dest_container */ configuration.container, /* dest_container */ connection_params.getContainer(),
/* dest_path */ destination, /* dest_path */ destination,
settings, settings,
read_settings, read_settings,
@ -206,22 +212,28 @@ void BackupWriterAzureBlobStorage::copyDataToFile(
UInt64 length) UInt64 length)
{ {
copyDataToAzureBlobStorageFile( copyDataToAzureBlobStorageFile(
create_read_buffer, start_pos, length, client, configuration.container, create_read_buffer,
fs::path(configuration.blob_path) / path_in_backup, settings, start_pos,
threadPoolCallbackRunnerUnsafe<void>(getBackupsIOThreadPool().get(), "BackupWRAzure")); length,
client,
connection_params.getContainer(),
fs::path(blob_path) / path_in_backup,
settings,
threadPoolCallbackRunnerUnsafe<void>(getBackupsIOThreadPool().get(),
"BackupWRAzure"));
} }
BackupWriterAzureBlobStorage::~BackupWriterAzureBlobStorage() = default; BackupWriterAzureBlobStorage::~BackupWriterAzureBlobStorage() = default;
bool BackupWriterAzureBlobStorage::fileExists(const String & file_name) bool BackupWriterAzureBlobStorage::fileExists(const String & file_name)
{ {
String key = fs::path(configuration.blob_path) / file_name; String key = fs::path(blob_path) / file_name;
return object_storage->exists(StoredObject(key)); return object_storage->exists(StoredObject(key));
} }
UInt64 BackupWriterAzureBlobStorage::getFileSize(const String & file_name) UInt64 BackupWriterAzureBlobStorage::getFileSize(const String & file_name)
{ {
String key = fs::path(configuration.blob_path) / file_name; String key = fs::path(blob_path) / file_name;
RelativePathsWithMetadata children; RelativePathsWithMetadata children;
object_storage->listObjects(key,children,/*max_keys*/0); object_storage->listObjects(key,children,/*max_keys*/0);
if (children.empty()) if (children.empty())
@ -231,7 +243,7 @@ UInt64 BackupWriterAzureBlobStorage::getFileSize(const String & file_name)
std::unique_ptr<ReadBuffer> BackupWriterAzureBlobStorage::readFile(const String & file_name, size_t /*expected_file_size*/) std::unique_ptr<ReadBuffer> BackupWriterAzureBlobStorage::readFile(const String & file_name, size_t /*expected_file_size*/)
{ {
String key = fs::path(configuration.blob_path) / file_name; String key = fs::path(blob_path) / file_name;
return std::make_unique<ReadBufferFromAzureBlobStorage>( return std::make_unique<ReadBufferFromAzureBlobStorage>(
client, key, read_settings, settings->max_single_read_retries, client, key, read_settings, settings->max_single_read_retries,
settings->max_single_download_retries); settings->max_single_download_retries);
@ -239,7 +251,7 @@ std::unique_ptr<ReadBuffer> BackupWriterAzureBlobStorage::readFile(const String
std::unique_ptr<WriteBuffer> BackupWriterAzureBlobStorage::writeFile(const String & file_name) std::unique_ptr<WriteBuffer> BackupWriterAzureBlobStorage::writeFile(const String & file_name)
{ {
String key = fs::path(configuration.blob_path) / file_name; String key = fs::path(blob_path) / file_name;
return std::make_unique<WriteBufferFromAzureBlobStorage>( return std::make_unique<WriteBufferFromAzureBlobStorage>(
client, client,
key, key,
@ -251,7 +263,7 @@ std::unique_ptr<WriteBuffer> BackupWriterAzureBlobStorage::writeFile(const Strin
void BackupWriterAzureBlobStorage::removeFile(const String & file_name) void BackupWriterAzureBlobStorage::removeFile(const String & file_name)
{ {
String key = fs::path(configuration.blob_path) / file_name; String key = fs::path(blob_path) / file_name;
StoredObject object(key); StoredObject object(key);
object_storage->removeObjectIfExists(object); object_storage->removeObjectIfExists(object);
} }
@ -260,7 +272,7 @@ void BackupWriterAzureBlobStorage::removeFiles(const Strings & file_names)
{ {
StoredObjects objects; StoredObjects objects;
for (const auto & file_name : file_names) for (const auto & file_name : file_names)
objects.emplace_back(fs::path(configuration.blob_path) / file_name); objects.emplace_back(fs::path(blob_path) / file_name);
object_storage->removeObjectsIfExist(objects); object_storage->removeObjectsIfExist(objects);
@ -270,7 +282,7 @@ void BackupWriterAzureBlobStorage::removeFilesBatch(const Strings & file_names)
{ {
StoredObjects objects; StoredObjects objects;
for (const auto & file_name : file_names) for (const auto & file_name : file_names)
objects.emplace_back(fs::path(configuration.blob_path) / file_name); objects.emplace_back(fs::path(blob_path) / file_name);
object_storage->removeObjectsIfExist(objects); object_storage->removeObjectsIfExist(objects);
} }

View File

@ -1,12 +1,10 @@
#pragma once #pragma once
#include "config.h" #include "config.h"
#if USE_AZURE_BLOB_STORAGE #if USE_AZURE_BLOB_STORAGE
#include <Backups/BackupIO_Default.h> #include <Backups/BackupIO_Default.h>
#include <Disks/DiskType.h> #include <Disks/DiskType.h>
#include <Interpreters/Context_fwd.h> #include <Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h>
#include <Storages/ObjectStorage/Azure/Configuration.h>
namespace DB namespace DB
@ -17,7 +15,8 @@ class BackupReaderAzureBlobStorage : public BackupReaderDefault
{ {
public: public:
BackupReaderAzureBlobStorage( BackupReaderAzureBlobStorage(
const StorageAzureConfiguration & configuration_, const AzureBlobStorage::ConnectionParams & connection_params_,
const String & blob_path_,
bool allow_azure_native_copy, bool allow_azure_native_copy,
const ReadSettings & read_settings_, const ReadSettings & read_settings_,
const WriteSettings & write_settings_, const WriteSettings & write_settings_,
@ -40,16 +39,18 @@ public:
private: private:
const DataSourceDescription data_source_description; const DataSourceDescription data_source_description;
std::shared_ptr<const Azure::Storage::Blobs::BlobContainerClient> client; std::shared_ptr<const Azure::Storage::Blobs::BlobContainerClient> client;
StorageAzureConfiguration configuration; AzureBlobStorage::ConnectionParams connection_params;
String blob_path;
std::unique_ptr<AzureObjectStorage> object_storage; std::unique_ptr<AzureObjectStorage> object_storage;
std::shared_ptr<const AzureObjectStorageSettings> settings; std::shared_ptr<const AzureBlobStorage::RequestSettings> settings;
}; };
class BackupWriterAzureBlobStorage : public BackupWriterDefault class BackupWriterAzureBlobStorage : public BackupWriterDefault
{ {
public: public:
BackupWriterAzureBlobStorage( BackupWriterAzureBlobStorage(
const StorageAzureConfiguration & configuration_, const AzureBlobStorage::ConnectionParams & connection_params_,
const String & blob_path_,
bool allow_azure_native_copy, bool allow_azure_native_copy,
const ReadSettings & read_settings_, const ReadSettings & read_settings_,
const WriteSettings & write_settings_, const WriteSettings & write_settings_,
@ -87,9 +88,10 @@ private:
const DataSourceDescription data_source_description; const DataSourceDescription data_source_description;
std::shared_ptr<const Azure::Storage::Blobs::BlobContainerClient> client; std::shared_ptr<const Azure::Storage::Blobs::BlobContainerClient> client;
StorageAzureConfiguration configuration; AzureBlobStorage::ConnectionParams connection_params;
String blob_path;
std::unique_ptr<AzureObjectStorage> object_storage; std::unique_ptr<AzureObjectStorage> object_storage;
std::shared_ptr<const AzureObjectStorageSettings> settings; std::shared_ptr<const AzureBlobStorage::RequestSettings> settings;
}; };
} }

View File

@ -5,6 +5,7 @@
#if USE_AZURE_BLOB_STORAGE #if USE_AZURE_BLOB_STORAGE
#include <Backups/BackupIO_AzureBlobStorage.h> #include <Backups/BackupIO_AzureBlobStorage.h>
#include <Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h>
#include <Backups/BackupImpl.h> #include <Backups/BackupImpl.h>
#include <IO/Archives/hasRegisteredArchiveFileExtension.h> #include <IO/Archives/hasRegisteredArchiveFileExtension.h>
#include <Interpreters/Context.h> #include <Interpreters/Context.h>
@ -49,7 +50,9 @@ void registerBackupEngineAzureBlobStorage(BackupFactory & factory)
const String & id_arg = params.backup_info.id_arg; const String & id_arg = params.backup_info.id_arg;
const auto & args = params.backup_info.args; const auto & args = params.backup_info.args;
StorageAzureConfiguration configuration; String blob_path;
AzureBlobStorage::ConnectionParams connection_params;
auto request_settings = AzureBlobStorage::getRequestSettings(params.context->getSettingsRef());
if (!id_arg.empty()) if (!id_arg.empty())
{ {
@ -59,55 +62,42 @@ void registerBackupEngineAzureBlobStorage(BackupFactory & factory)
if (!config.has(config_prefix)) if (!config.has(config_prefix))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is no collection named `{}` in config", id_arg); throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is no collection named `{}` in config", id_arg);
if (config.has(config_prefix + ".connection_string")) connection_params =
{ {
configuration.connection_url = config.getString(config_prefix + ".connection_string"); .endpoint = AzureBlobStorage::processEndpoint(config, config_prefix),
configuration.is_connection_string = true; .auth_method = AzureBlobStorage::getAuthMethod(config, config_prefix),
configuration.container = config.getString(config_prefix + ".container"); .client_options = AzureBlobStorage::getClientOptions(*request_settings, /*for_disk=*/ true),
} };
else
{
configuration.connection_url = config.getString(config_prefix + ".storage_account_url");
configuration.is_connection_string = false;
configuration.container = config.getString(config_prefix + ".container");
configuration.account_name = config.getString(config_prefix + ".account_name");
configuration.account_key = config.getString(config_prefix + ".account_key");
if (config.has(config_prefix + ".account_name") && config.has(config_prefix + ".account_key"))
{
configuration.account_name = config.getString(config_prefix + ".account_name");
configuration.account_key = config.getString(config_prefix + ".account_key");
}
}
if (args.size() > 1) if (args.size() > 1)
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"Backup AzureBlobStorage requires 1 or 2 arguments: named_collection, [filename]"); "Backup AzureBlobStorage requires 1 or 2 arguments: named_collection, [filename]");
if (args.size() == 1) if (args.size() == 1)
configuration.setPath(args[0].safeGet<String>()); blob_path = args[0].safeGet<String>();
} }
else else
{ {
if (args.size() == 3) if (args.size() == 3)
{ {
configuration.connection_url = args[0].safeGet<String>(); auto connection_url = args[0].safeGet<String>();
configuration.is_connection_string = !configuration.connection_url.starts_with("http"); auto container_name = args[1].safeGet<String>();
blob_path = args[2].safeGet<String>();
configuration.container = args[1].safeGet<String>(); AzureBlobStorage::processURL(connection_url, container_name, connection_params.endpoint, connection_params.auth_method);
configuration.blob_path = args[2].safeGet<String>(); connection_params.client_options = AzureBlobStorage::getClientOptions(*request_settings, /*for_disk=*/ true);
} }
else if (args.size() == 5) else if (args.size() == 5)
{ {
configuration.connection_url = args[0].safeGet<String>(); connection_params.endpoint.storage_account_url = args[0].safeGet<String>();
configuration.is_connection_string = false; connection_params.endpoint.container_name = args[1].safeGet<String>();
blob_path = args[2].safeGet<String>();
configuration.container = args[1].safeGet<String>(); auto account_name = args[3].safeGet<String>();
configuration.blob_path = args[2].safeGet<String>(); auto account_key = args[4].safeGet<String>();
configuration.account_name = args[3].safeGet<String>();
configuration.account_key = args[4].safeGet<String>();
connection_params.auth_method = std::make_shared<Azure::Storage::StorageSharedKeyCredential>(account_name, account_key);
connection_params.client_options = AzureBlobStorage::getClientOptions(*request_settings, /*for_disk=*/ true);
} }
else else
{ {
@ -117,16 +107,12 @@ void registerBackupEngineAzureBlobStorage(BackupFactory & factory)
} }
BackupImpl::ArchiveParams archive_params; BackupImpl::ArchiveParams archive_params;
if (hasRegisteredArchiveFileExtension(configuration.getPath())) if (hasRegisteredArchiveFileExtension(blob_path))
{ {
if (params.is_internal_backup) if (params.is_internal_backup)
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Using archives with backups on clusters is disabled"); throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Using archives with backups on clusters is disabled");
auto path = configuration.getPath(); archive_params.archive_name = removeFileNameFromURL(blob_path);
auto filename = removeFileNameFromURL(path);
configuration.setPath(path);
archive_params.archive_name = filename;
archive_params.compression_method = params.compression_method; archive_params.compression_method = params.compression_method;
archive_params.compression_level = params.compression_level; archive_params.compression_level = params.compression_level;
archive_params.password = params.password; archive_params.password = params.password;
@ -141,7 +127,8 @@ void registerBackupEngineAzureBlobStorage(BackupFactory & factory)
if (params.open_mode == IBackup::OpenMode::READ) if (params.open_mode == IBackup::OpenMode::READ)
{ {
auto reader = std::make_shared<BackupReaderAzureBlobStorage>( auto reader = std::make_shared<BackupReaderAzureBlobStorage>(
configuration, connection_params,
blob_path,
params.allow_azure_native_copy, params.allow_azure_native_copy,
params.read_settings, params.read_settings,
params.write_settings, params.write_settings,
@ -159,7 +146,8 @@ void registerBackupEngineAzureBlobStorage(BackupFactory & factory)
else else
{ {
auto writer = std::make_shared<BackupWriterAzureBlobStorage>( auto writer = std::make_shared<BackupWriterAzureBlobStorage>(
configuration, connection_params,
blob_path,
params.allow_azure_native_copy, params.allow_azure_native_copy,
params.read_settings, params.read_settings,
params.write_settings, params.write_settings,

View File

@ -607,6 +607,10 @@ if (TARGET ch_contrib::usearch)
dbms_target_link_libraries(PUBLIC ch_contrib::usearch) dbms_target_link_libraries(PUBLIC ch_contrib::usearch)
endif() endif()
if (TARGET ch_contrib::prometheus_protobufs)
dbms_target_link_libraries (PUBLIC ch_contrib::prometheus_protobufs)
endif()
if (TARGET ch_rust::skim) if (TARGET ch_rust::skim)
dbms_target_include_directories(PRIVATE $<TARGET_PROPERTY:ch_rust::skim,INTERFACE_INCLUDE_DIRECTORIES>) dbms_target_include_directories(PRIVATE $<TARGET_PROPERTY:ch_rust::skim,INTERFACE_INCLUDE_DIRECTORIES>)
dbms_target_link_libraries(PUBLIC ch_rust::skim) dbms_target_link_libraries(PUBLIC ch_rust::skim)

View File

@ -6,13 +6,13 @@
#include <Common/ProgressIndication.h> #include <Common/ProgressIndication.h>
#include <Common/InterruptListener.h> #include <Common/InterruptListener.h>
#include <Common/ShellCommand.h> #include <Common/ShellCommand.h>
#include <Common/QueryFuzzer.h>
#include <Common/Stopwatch.h> #include <Common/Stopwatch.h>
#include <Common/DNSResolver.h> #include <Common/DNSResolver.h>
#include <Core/ExternalTable.h> #include <Core/ExternalTable.h>
#include <Poco/Util/Application.h> #include <Poco/Util/Application.h>
#include <Interpreters/Context.h> #include <Interpreters/Context.h>
#include <Client/Suggest.h> #include <Client/Suggest.h>
#include <Client/QueryFuzzer.h>
#include <boost/program_options.hpp> #include <boost/program_options.hpp>
#include <Storages/StorageFile.h> #include <Storages/StorageFile.h>
#include <Storages/SelectQueryInfo.h> #include <Storages/SelectQueryInfo.h>

View File

@ -195,6 +195,12 @@ void HedgedConnections::sendQuery(
modified_settings.parallel_replica_offset = fd_to_replica_location[replica.packet_receiver->getFileDescriptor()].offset; modified_settings.parallel_replica_offset = fd_to_replica_location[replica.packet_receiver->getFileDescriptor()].offset;
} }
/// FIXME: Remove once we will make `allow_experimental_analyzer` obsolete setting.
/// Make the analyzer being set, so it will be effectively applied on the remote server.
/// In other words, the initiator always controls whether the analyzer enabled or not for
/// all servers involved in the distributed query processing.
modified_settings.set("allow_experimental_analyzer", static_cast<bool>(modified_settings.allow_experimental_analyzer));
replica.connection->sendQuery(timeouts, query, /* query_parameters */ {}, query_id, stage, &modified_settings, &client_info, with_pending_data, {}); replica.connection->sendQuery(timeouts, query, /* query_parameters */ {}, query_id, stage, &modified_settings, &client_info, with_pending_data, {});
replica.change_replica_timeout.setRelative(timeouts.receive_data_timeout); replica.change_replica_timeout.setRelative(timeouts.receive_data_timeout);
replica.packet_receiver->setTimeout(hedged_connections_factory.getConnectionTimeouts().receive_timeout); replica.packet_receiver->setTimeout(hedged_connections_factory.getConnectionTimeouts().receive_timeout);

View File

@ -150,6 +150,12 @@ void MultiplexedConnections::sendQuery(
} }
} }
/// FIXME: Remove once we will make `allow_experimental_analyzer` obsolete setting.
/// Make the analyzer being set, so it will be effectively applied on the remote server.
/// In other words, the initiator always controls whether the analyzer enabled or not for
/// all servers involved in the distributed query processing.
modified_settings.set("allow_experimental_analyzer", static_cast<bool>(modified_settings.allow_experimental_analyzer));
const bool enable_sample_offset_parallel_processing = settings.max_parallel_replicas > 1 && settings.allow_experimental_parallel_reading_from_replicas == 0; const bool enable_sample_offset_parallel_processing = settings.max_parallel_replicas > 1 && settings.allow_experimental_parallel_reading_from_replicas == 0;
size_t num_replicas = replica_states.size(); size_t num_replicas = replica_states.size();

Some files were not shown because too many files have changed in this diff Show More