Merge branch 'master' of github.com:ClickHouse/ClickHouse into fix-ubsan

This commit is contained in:
flynn 2024-12-12 11:36:40 +00:00
commit 49bcd726a4
945 changed files with 22316 additions and 6454 deletions

View File

@ -226,7 +226,7 @@
* `CREATE TABLE AS` will copy `PRIMARY KEY`, `ORDER BY`, and similar clauses (of `MergeTree` tables). [#69739](https://github.com/ClickHouse/ClickHouse/pull/69739) ([sakulali](https://github.com/sakulali)).
* Support 64-bit XID in Keeper. It can be enabled with the `use_xid_64` configuration value. [#69908](https://github.com/ClickHouse/ClickHouse/pull/69908) ([Antonio Andelic](https://github.com/antonio2368)).
* Command-line arguments for Bool settings are set to true when no value is provided for the argument (e.g. `clickhouse-client --optimize_aggregation_in_order --query "SELECT 1"`). [#70459](https://github.com/ClickHouse/ClickHouse/pull/70459) ([davidtsuk](https://github.com/davidtsuk)).
* Added user-level settings `min_free_disk_bytes_to_throw_insert` and `min_free_disk_ratio_to_throw_insert` to prevent insertions on disks that are almost full. [#69755](https://github.com/ClickHouse/ClickHouse/pull/69755) ([Marco Vilas Boas](https://github.com/marco-vb)).
* Added user-level settings `min_free_disk_bytes_to_perform_insert` and `min_free_disk_perform_to_throw_insert` to prevent insertions on disks that are almost full. [#69755](https://github.com/ClickHouse/ClickHouse/pull/69755) ([Marco Vilas Boas](https://github.com/marco-vb)).
* Embedded documentation for settings will be strictly more detailed and complete than the documentation on the website. This is the first step before making the website documentation always auto-generated from the source code. This has long-standing implications: - it will be guaranteed to have every setting; - there is no chance of having default values obsolete; - we can generate this documentation for each ClickHouse version; - the documentation can be displayed by the server itself even without Internet access. Generate the docs on the website from the source code. [#70289](https://github.com/ClickHouse/ClickHouse/pull/70289) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Allow empty needle in the function `replace`, the same behavior with PostgreSQL. [#69918](https://github.com/ClickHouse/ClickHouse/pull/69918) ([zhanglistar](https://github.com/zhanglistar)).
* Allow empty needle in functions `replaceRegexp*`. [#70053](https://github.com/ClickHouse/ClickHouse/pull/70053) ([zhanglistar](https://github.com/zhanglistar)).

View File

@ -89,7 +89,6 @@ string (TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE_UC)
list(REVERSE CMAKE_FIND_LIBRARY_SUFFIXES)
option (ENABLE_FUZZING "Fuzzy testing using libfuzzer" OFF)
option (ENABLE_FUZZER_TEST "Build testing fuzzers in order to test libFuzzer functionality" OFF)
if (ENABLE_FUZZING)
# Also set WITH_COVERAGE=1 for better fuzzing process
@ -592,6 +591,20 @@ endif()
include (cmake/sanitize_targets.cmake)
if (COMPILER_CACHE STREQUAL "chcache")
set (ENABLE_BUILD_PATH_MAPPING_DEFAULT ON)
get_all_targets(all_targets)
set (chcache_targets _cargo-build_chcache cargo-build_chcache cargo-prebuild_chcache)
foreach(target ${all_targets})
if (target IN_LIST chcache_targets)
continue()
endif()
add_dependencies(${target} cargo-build_chcache)
endforeach()
endif()
# Build native targets if necessary
get_property(NATIVE_BUILD_TARGETS GLOBAL PROPERTY NATIVE_BUILD_TARGETS)
if (NATIVE_BUILD_TARGETS

View File

@ -35,7 +35,7 @@ curl https://clickhouse.com/ | sh
Every month we get together with the community (users, contributors, customers, those interested in learning more about ClickHouse) to discuss what is coming in the latest release. If you are interested in sharing what you've built on ClickHouse, let us know.
* [v24.10 Community Call](https://clickhouse.com/company/events/v24-10-community-release-call) - October 31
* [v24.12 Community Call](https://clickhouse.com/company/events/v24-12-community-release-call) - December 19
## Upcoming Events
@ -43,7 +43,6 @@ Keep an eye out for upcoming meetups and events around the world. Somewhere else
Upcoming meetups
* [Amsterdam Meetup](https://www.meetup.com/clickhouse-netherlands-user-group/events/303638814) - December 3
* [Stockholm Meetup](https://www.meetup.com/clickhouse-stockholm-user-group/events/304382411) - December 9
* [New York Meetup](https://www.meetup.com/clickhouse-new-york-user-group/events/304268174) - December 9
* [Kuala Lampur Meetup](https://www.meetup.com/clickhouse-malaysia-meetup-group/events/304576472/) - December 11
@ -52,6 +51,7 @@ Upcoming meetups
Recently completed meetups
* [Amsterdam Meetup](https://www.meetup.com/clickhouse-netherlands-user-group/events/303638814) - December 3
* [Paris Meetup](https://www.meetup.com/clickhouse-france-user-group/events/303096434) - November 26
* [Ghent Meetup](https://www.meetup.com/clickhouse-belgium-user-group/events/303049405/) - November 19
* [Barcelona Meetup](https://www.meetup.com/clickhouse-spain-user-group/events/303096876/) - November 12

View File

@ -65,7 +65,6 @@ TRAP(gethostbyaddr)
TRAP(gethostbyname)
TRAP(gethostbyname2)
TRAP(gethostent)
TRAP(getlogin)
TRAP(getnetbyaddr)
TRAP(getnetbyname)
TRAP(getnetent)
@ -291,6 +290,7 @@ TRAP(putenv)
TRAP(setlogmask)
TRAP(rand)
TRAP(getmntent)
TRAP(getlogin)
#endif
#endif

View File

@ -103,6 +103,26 @@ namespace Net
///
/// The default limit is 100.
int getNameLengthLimit() const;
/// Returns the maximum length of a field name.
///
/// See setNameLengthLimit() for more information.
void setNameLengthLimit(int limit);
/// Sets the maximum length of a field name.
///
/// The default limit is 256.
int getValueLengthLimit() const;
/// Returns the maximum length of a field value.
///
/// See setValueLengthLimit() for more information.
void setValueLengthLimit(int limit);
/// Sets the maximum length of a field value.
///
/// The default limit is 8192.
bool hasToken(const std::string & fieldName, const std::string & token) const;
/// Returns true iff the field with the given fieldName contains
/// the given token. Tokens in a header field are expected to be
@ -157,12 +177,14 @@ namespace Net
enum Limits
/// Limits for basic sanity checks when reading a header
{
MAX_NAME_LENGTH = 256,
MAX_VALUE_LENGTH = 8192,
DFL_NAME_LENGTH_LIMIT = 256,
DFL_VALUE_LENGTH_LIMIT = 8192,
DFL_FIELD_LIMIT = 100
};
int _fieldLimit;
int _nameLengthLimit;
int _valueLengthLimit;
};

View File

@ -28,14 +28,18 @@ namespace Net {
MessageHeader::MessageHeader():
_fieldLimit(DFL_FIELD_LIMIT)
_fieldLimit(DFL_FIELD_LIMIT),
_nameLengthLimit(DFL_NAME_LENGTH_LIMIT),
_valueLengthLimit(DFL_VALUE_LENGTH_LIMIT)
{
}
MessageHeader::MessageHeader(const MessageHeader& messageHeader):
NameValueCollection(messageHeader),
_fieldLimit(DFL_FIELD_LIMIT)
_fieldLimit(DFL_FIELD_LIMIT),
_nameLengthLimit(DFL_NAME_LENGTH_LIMIT),
_valueLengthLimit(DFL_VALUE_LENGTH_LIMIT)
{
}
@ -80,12 +84,12 @@ void MessageHeader::read(std::istream& istr)
throw MessageException("Too many header fields");
name.clear();
value.clear();
while (ch != eof && ch != ':' && ch != '\n' && name.length() < MAX_NAME_LENGTH) { name += ch; ch = buf.sbumpc(); }
while (ch != eof && ch != ':' && ch != '\n' && name.length() < _nameLengthLimit) { name += ch; ch = buf.sbumpc(); }
if (ch == '\n') { ch = buf.sbumpc(); continue; } // ignore invalid header lines
if (ch != ':') throw MessageException("Field name too long/no colon found");
if (ch != eof) ch = buf.sbumpc(); // ':'
while (ch != eof && Poco::Ascii::isSpace(ch) && ch != '\r' && ch != '\n') ch = buf.sbumpc();
while (ch != eof && ch != '\r' && ch != '\n' && value.length() < MAX_VALUE_LENGTH) { value += ch; ch = buf.sbumpc(); }
while (ch != eof && ch != '\r' && ch != '\n' && value.length() < _valueLengthLimit) { value += ch; ch = buf.sbumpc(); }
if (ch == '\r') ch = buf.sbumpc();
if (ch == '\n')
ch = buf.sbumpc();
@ -93,7 +97,7 @@ void MessageHeader::read(std::istream& istr)
throw MessageException("Field value too long/no CRLF found");
while (ch == ' ' || ch == '\t') // folding
{
while (ch != eof && ch != '\r' && ch != '\n' && value.length() < MAX_VALUE_LENGTH) { value += ch; ch = buf.sbumpc(); }
while (ch != eof && ch != '\r' && ch != '\n' && value.length() < _valueLengthLimit) { value += ch; ch = buf.sbumpc(); }
if (ch == '\r') ch = buf.sbumpc();
if (ch == '\n')
ch = buf.sbumpc();
@ -122,6 +126,32 @@ void MessageHeader::setFieldLimit(int limit)
}
int MessageHeader::getNameLengthLimit() const
{
return _nameLengthLimit;
}
void MessageHeader::setNameLengthLimit(int limit)
{
poco_assert(limit >= 0);
_nameLengthLimit = limit;
}
int MessageHeader::getValueLengthLimit() const
{
return _valueLengthLimit;
}
void MessageHeader::setValueLengthLimit(int limit)
{
poco_assert(limit >= 0);
_valueLengthLimit = limit;
}
bool MessageHeader::hasToken(const std::string& fieldName, const std::string& token) const
{
std::string field = get(fieldName, "");

View File

@ -120,6 +120,12 @@ setup_aws_credentials() {
local minio_root_user=${MINIO_ROOT_USER:-clickhouse}
local minio_root_password=${MINIO_ROOT_PASSWORD:-clickhouse}
mkdir -p ~/.aws
if [[ -f ~/.aws/credentials ]]; then
if grep -q "^\[default\]" ~/.aws/credentials; then
echo "The credentials file contains a [default] section."
return
fi
fi
cat <<EOT >> ~/.aws/credentials
[default]
aws_access_key_id=${minio_root_user}

View File

@ -9,7 +9,7 @@ if (CMAKE_CXX_COMPILER_LAUNCHER MATCHES "ccache" OR CMAKE_C_COMPILER_LAUNCHER MA
return()
endif()
set(COMPILER_CACHE "auto" CACHE STRING "Speedup re-compilations using the caching tools; valid options are 'auto' (sccache, then ccache), 'ccache', 'sccache', or 'disabled'")
set(COMPILER_CACHE "auto" CACHE STRING "Speedup re-compilations using the caching tools; valid options are 'auto' (sccache, then ccache), 'ccache', 'sccache', 'chcache', or 'disabled'")
if(COMPILER_CACHE STREQUAL "auto")
find_program (CCACHE_EXECUTABLE NAMES sccache ccache)
@ -17,11 +17,25 @@ elseif (COMPILER_CACHE STREQUAL "ccache")
find_program (CCACHE_EXECUTABLE ccache)
elseif(COMPILER_CACHE STREQUAL "sccache")
find_program (CCACHE_EXECUTABLE sccache)
elseif(COMPILER_CACHE STREQUAL "chcache")
list (APPEND CMAKE_MODULE_PATH "${ClickHouse_SOURCE_DIR}/contrib/corrosion/cmake")
find_package(Rust REQUIRED)
include ("${ClickHouse_SOURCE_DIR}/contrib/corrosion/cmake/Corrosion.cmake")
corrosion_import_crate(
MANIFEST_PATH ${CMAKE_CURRENT_SOURCE_DIR}/utils/chcache/Cargo.toml
PROFILE release
LOCKED
FLAGS --offline
)
set_target_properties(chcache PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/programs/)
set(CCACHE_EXECUTABLE ${CMAKE_CURRENT_BINARY_DIR}/programs/chcache)
elseif(COMPILER_CACHE STREQUAL "disabled")
message(STATUS "Using *ccache: no (disabled via configuration)")
return()
else()
message(${RECONFIGURE_MESSAGE_LEVEL} "The COMPILER_CACHE must be one of (auto|sccache|ccache|disabled), value: '${COMPILER_CACHE}'")
message(${RECONFIGURE_MESSAGE_LEVEL} "The COMPILER_CACHE must be one of (auto|sccache|ccache|chcache|disabled), value: '${COMPILER_CACHE}'")
endif()
@ -60,6 +74,9 @@ if (CCACHE_EXECUTABLE MATCHES "/ccache$")
elseif(CCACHE_EXECUTABLE MATCHES "/sccache$")
message(STATUS "Using sccache: ${CCACHE_EXECUTABLE}")
set(LAUNCHER ${CCACHE_EXECUTABLE})
elseif(CCACHE_EXECUTABLE MATCHES "/chcache$")
message(STATUS "Using chcache: ${CCACHE_EXECUTABLE}")
set(LAUNCHER ${CCACHE_EXECUTABLE})
endif()
set (CMAKE_CXX_COMPILER_LAUNCHER ${LAUNCHER} ${CMAKE_CXX_COMPILER_LAUNCHER})

2
contrib/aws vendored

@ -1 +1 @@
Subproject commit d5450d76abda556ce145ddabe7e0cc6a7644ec59
Subproject commit 71169aeec91b41c1bd5cf78fad6158dacdcde9d5

View File

@ -1,4 +1,4 @@
if (OS_LINUX)
if (OS_LINUX OR OS_DARWIN)
option(ENABLE_ICU "Enable ICU" ${ENABLE_LIBRARIES})
else ()
option(ENABLE_ICU "Enable ICU" 0)
@ -476,11 +476,14 @@ set(ICUI18N_SOURCES
file(GENERATE OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/empty.cpp" CONTENT " ")
enable_language(ASM)
if (ARCH_S390X)
set(ICUDATA_SOURCE_FILE "${ICUDATA_SOURCE_DIR}/icudt75b_dat.S" )
else()
set(ICUDATA_SOURCE_FILE "${ICUDATA_SOURCE_DIR}/icudt75l_dat.S" )
endif()
if (OS_DARWIN)
# Fine for both x86 and ARM
set(ICUDATA_SOURCE_FILE "${ICUDATA_SOURCE_DIR}/darwin_x86_64/icudt75l_dat.S")
elseif (ARCH_S390X)
set(ICUDATA_SOURCE_FILE "${ICUDATA_SOURCE_DIR}/icudt75b_dat.S")
else ()
set(ICUDATA_SOURCE_FILE "${ICUDATA_SOURCE_DIR}/icudt75l_dat.S")
endif ()
# ^^ you might be confused how for different little endian platforms (x86, ARM) the same assembly files can be used.
# These files are indeed assembly but they only contain data ('.long' directive), which makes them portable accross CPUs.
# Only the endianness and the character set (ASCII, EBCDIC) makes a difference, also see

2
contrib/icudata vendored

@ -1 +1 @@
Subproject commit 4904951339a70b4814d2d3723436b20d079cb01b
Subproject commit cfc05b4c3140ff2be84291b80de8c62b1e42d0da

2
contrib/rust_vendor vendored

@ -1 +1 @@
Subproject commit 08e82ca6543683abe4770305ad811a942186a520
Subproject commit b25b16b0b10a1cbb33eb0922f77aeedb72119792

View File

@ -13,8 +13,8 @@ ENV CARGO_HOME=/rust/cargo
ENV PATH="/rust/cargo/bin:${PATH}"
RUN curl https://sh.rustup.rs -sSf | bash -s -- -y && \
chmod 777 -R /rust && \
rustup toolchain install nightly-2024-04-01 && \
rustup default nightly-2024-04-01 && \
rustup toolchain install nightly-2024-12-01 && \
rustup default nightly-2024-12-01 && \
rustup toolchain remove stable && \
rustup component add rust-src && \
rustup target add x86_64-unknown-linux-gnu && \

View File

@ -36,7 +36,7 @@ geomet==0.2.1.post1
grpcio-tools==1.60.0
grpcio==1.60.0
gssapi==1.8.3
httplib2==0.20.2
httplib2==0.22.0
idna==3.7
importlib-metadata==4.6.4
iniconfig==2.0.0
@ -72,7 +72,7 @@ pyarrow==17.0.0
pycparser==2.22
pycryptodome==3.20.0
pymongo==3.11.0
pyparsing==2.4.7
pyparsing==3.1.0
pyspark==3.3.2
pyspnego==0.10.2
pytest-order==1.0.0
@ -101,3 +101,5 @@ wadllib==1.3.6
websocket-client==1.8.0
wheel==0.38.1
zipp==1.0.0
pyiceberg==0.7.1
jinja2==3.1.3

View File

@ -65,7 +65,7 @@ sidebar_label: 2024
* Follow-up to https://github.com/ClickHouse/ClickHouse/pull/69346 Point 4 described there will work now as well:. [#69563](https://github.com/ClickHouse/ClickHouse/pull/69563) ([Vitaly Baranov](https://github.com/vitlibar)).
* Implement generic SerDe between Avro Union and ClickHouse Variant type. Resolves [#69713](https://github.com/ClickHouse/ClickHouse/issues/69713). [#69712](https://github.com/ClickHouse/ClickHouse/pull/69712) ([Jiří Kozlovský](https://github.com/jirislav)).
* 1. CREATE TABLE AS will copy PRIMARY KEY, ORDER BY, and similar clauses. Now it is supported only for the MergeTree family of table engines. 2. For example, the follow SQL statements will trigger exception in the past, but this PR fixes it: if the destination table do not provide an `ORDER BY` or `PRIMARY KEY` expression in the table definition, we will copy that from source table. [#69739](https://github.com/ClickHouse/ClickHouse/pull/69739) ([sakulali](https://github.com/sakulali)).
* Added user-level settings `min_free_disk_bytes_to_throw_insert` and `min_free_disk_ratio_to_throw_insert` to prevent insertions on disks that are almost full. [#69755](https://github.com/ClickHouse/ClickHouse/pull/69755) ([Marco Vilas Boas](https://github.com/marco-vb)).
* Added user-level settings `min_free_disk_bytes_to_perform_insert` and `min_free_disk_ratio_to_perform_insert` to prevent insertions on disks that are almost full. [#69755](https://github.com/ClickHouse/ClickHouse/pull/69755) ([Marco Vilas Boas](https://github.com/marco-vb)).
* If you run `clickhouse-client` or other CLI application and it starts up slowly due to an overloaded server, and you start typing your query, such as `SELECT`, the previous versions will display the remaining of the terminal echo contents before printing the greetings message, such as `SELECTClickHouse local version 24.10.1.1.` instead of `ClickHouse local version 24.10.1.1.`. Now it is fixed. This closes [#31696](https://github.com/ClickHouse/ClickHouse/issues/31696). [#69856](https://github.com/ClickHouse/ClickHouse/pull/69856) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Add new column readonly_duration to the system.replicas table. Needed to be able to distinguish actual readonly replicas from sentinel ones in alerts. [#69871](https://github.com/ClickHouse/ClickHouse/pull/69871) ([Miсhael Stetsyuk](https://github.com/mstetsyuk)).
* Change the join to sort settings type to unsigned int. [#69886](https://github.com/ClickHouse/ClickHouse/pull/69886) ([kevinyhzou](https://github.com/KevinyhZou)).

View File

@ -0,0 +1,26 @@
---
sidebar_position: 1
sidebar_label: 2024
---
# 2024 Changelog
### ClickHouse release v24.10.3.21-stable (e668b927efb) FIXME as compared to v24.10.2.80-stable (96b80057159)
#### Improvement
* Backported in [#72100](https://github.com/ClickHouse/ClickHouse/issues/72100): Fix the metadata_version record in ZooKeeper in restarting thread rather than in attach thread. [#70297](https://github.com/ClickHouse/ClickHouse/pull/70297) ([Miсhael Stetsyuk](https://github.com/mstetsyuk)).
* Backported in [#72169](https://github.com/ClickHouse/ClickHouse/issues/72169): Forbid Dynamic/Variant types in min/max functions to avoid confusion. [#71761](https://github.com/ClickHouse/ClickHouse/pull/71761) ([Pavel Kruglov](https://github.com/Avogar)).
* Backported in [#72064](https://github.com/ClickHouse/ClickHouse/issues/72064): When retrieving data directly from a dictionary using Dictionary storage, dictionary table function, or direct SELECT from the dictionary itself, it is now enough to have `SELECT` permission or `dictGet` permission for the dictionary. This aligns with previous attempts to prevent ACL bypasses: https://github.com/ClickHouse/ClickHouse/pull/57362 and https://github.com/ClickHouse/ClickHouse/pull/65359. It also makes the latter one backward compatible. [#72051](https://github.com/ClickHouse/ClickHouse/pull/72051) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
#### Bug Fix (user-visible misbehavior in an official stable release)
* Backported in [#72144](https://github.com/ClickHouse/ClickHouse/issues/72144): Acquiring zero-copy shared lock before moving a part to zero-copy disk to prevent possible data loss if Keeper is unavailable. [#71845](https://github.com/ClickHouse/ClickHouse/pull/71845) ([Aleksei Filatov](https://github.com/aalexfvk)).
* Backported in [#72088](https://github.com/ClickHouse/ClickHouse/issues/72088): Fix rows_processed column in system.s3/azure_queue_log broken in 24.6. Closes [#69975](https://github.com/ClickHouse/ClickHouse/issues/69975). [#71946](https://github.com/ClickHouse/ClickHouse/pull/71946) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Backported in [#72036](https://github.com/ClickHouse/ClickHouse/issues/72036): Fix `Illegal type` error for `MergeTree` tables with binary monotonic function in `ORDER BY` when the first argument is constant. Fixes [#71941](https://github.com/ClickHouse/ClickHouse/issues/71941). [#71966](https://github.com/ClickHouse/ClickHouse/pull/71966) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Backported in [#72207](https://github.com/ClickHouse/ClickHouse/issues/72207): Fixed incorrect settings order `max_parser_depth` and `max_parser_backtracks`. [#71498](https://github.com/ClickHouse/ClickHouse/pull/71498) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* Backported in [#72071](https://github.com/ClickHouse/ClickHouse/issues/72071): Fix client syntax highlighting that was broken in https://github.com/ClickHouse/ClickHouse/pull/71949. [#72049](https://github.com/ClickHouse/ClickHouse/pull/72049) ([Nikolay Degterinsky](https://github.com/evillique)).
* Backported in [#72095](https://github.com/ClickHouse/ClickHouse/issues/72095): Minor improvement for system.query_metric_log stateless test. [#72076](https://github.com/ClickHouse/ClickHouse/pull/72076) ([Pablo Marcos](https://github.com/pamarcos)).
* Backported in [#72184](https://github.com/ClickHouse/ClickHouse/issues/72184): Add google-cloud-cpp submodule. [#72092](https://github.com/ClickHouse/ClickHouse/pull/72092) ([Pablo Marcos](https://github.com/pamarcos)).

View File

@ -89,7 +89,7 @@ sidebar_label: 2024
* Restore mode that replaces all external table engines and functions to Null (`restore_replace_external_engines_to_null`, `restore_replace_external_table_functions_to_null` settings) was failing if table had SETTINGS. Now it removes settings from table definition in this case and allows to restore such tables. [#69253](https://github.com/ClickHouse/ClickHouse/pull/69253) ([Ilya Yatsishin](https://github.com/qoega)).
* Reduce memory usage of inserts to JSON by using adaptive write buffer size. A lot of files created by JSON column in wide part contains small amount of data and it doesn't make sense to allocate 1MB buffer for them. [#69272](https://github.com/ClickHouse/ClickHouse/pull/69272) ([Pavel Kruglov](https://github.com/Avogar)).
* CLICKHOUSE_PASSWORD is escaped for XML in clickhouse image's entrypoint. [#69301](https://github.com/ClickHouse/ClickHouse/pull/69301) ([aohoyd](https://github.com/aohoyd)).
* Added user-level settings `min_free_disk_bytes_to_throw_insert` and `min_free_disk_ratio_to_throw_insert` to prevent insertions on disks that are almost full. [#69376](https://github.com/ClickHouse/ClickHouse/pull/69376) ([Marco Vilas Boas](https://github.com/marco-vb)).
* Added user-level settings `min_free_disk_bytes_to_perform_insert` and `min_free_disk_ratio_to_perform_insert` to prevent insertions on disks that are almost full. [#69376](https://github.com/ClickHouse/ClickHouse/pull/69376) ([Marco Vilas Boas](https://github.com/marco-vb)).
* Not retaining thread in concurrent hash join threadpool to avoid query excessively spawn threads. [#69406](https://github.com/ClickHouse/ClickHouse/pull/69406) ([Duc Canh Le](https://github.com/canhld94)).
* Allow empty arguments for arrayZip/arrayZipUnaligned, as concat did in https://github.com/ClickHouse/ClickHouse/pull/65887. It is for spark compatiability in Gluten CH Backend. [#69576](https://github.com/ClickHouse/ClickHouse/pull/69576) ([李扬](https://github.com/taiyang-li)).
* Support more advanced SSL options for Keeper's internal communication (e.g. private keys with passphrase). [#69582](https://github.com/ClickHouse/ClickHouse/pull/69582) ([Antonio Andelic](https://github.com/antonio2368)).
@ -199,7 +199,7 @@ sidebar_label: 2024
* NO CL ENTRY: 'Revert "Fix prewhere without columns and without adaptive index granularity (almost w/o anything)"'. [#68897](https://github.com/ClickHouse/ClickHouse/pull/68897) ([Alexander Gololobov](https://github.com/davenger)).
* NO CL ENTRY: 'Revert "Speed up some Kafka tests with multiprocessing"'. [#69356](https://github.com/ClickHouse/ClickHouse/pull/69356) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* NO CL ENTRY: 'Revert "Remove obsolete `--multiquery` parameter (follow-up to [#63898](https://github.com/ClickHouse/ClickHouse/issues/63898)), pt. V"'. [#69393](https://github.com/ClickHouse/ClickHouse/pull/69393) ([Alexander Tokmakov](https://github.com/tavplubix)).
* NO CL ENTRY: 'Revert "Add user-level settings min_free_diskspace_bytes_to_throw_insert and min_free_diskspace_ratio_to_throw_insert"'. [#69705](https://github.com/ClickHouse/ClickHouse/pull/69705) ([Raúl Marín](https://github.com/Algunenano)).
* NO CL ENTRY: 'Revert "Add user-level settings min_free_disk_bytes_to_perform_insert and min_free_disk_ratio_to_perform_insert"'. [#69705](https://github.com/ClickHouse/ClickHouse/pull/69705) ([Raúl Marín](https://github.com/Algunenano)).
* NO CL ENTRY: 'Revert "Support more oss endpoints"'. [#69779](https://github.com/ClickHouse/ClickHouse/pull/69779) ([Raúl Marín](https://github.com/Algunenano)).
#### NOT FOR CHANGELOG / INSIGNIFICANT

View File

@ -70,8 +70,8 @@ enable sanitizers you must use a version that matches the exact same `std` as th
the crates):
```bash
rustup toolchain install nightly-2024-04-01
rustup default nightly-2024-04-01
rustup toolchain install nightly-2024-12-01
rustup default nightly-2024-12-01
rustup component add rust-src
```

View File

@ -6,6 +6,14 @@ sidebar_label: Iceberg
# Iceberg Table Engine
:::warning
We recommend using the [Iceberg Table Function](/docs/en/sql-reference/table-functions/iceberg.md) for working with Iceberg data in ClickHouse. The Iceberg Table Function currently provides sufficient functionality, offering a partial read-only interface for Iceberg tables.
The Iceberg Table Engine is available but may have limitations. ClickHouse wasn't originally designed to support tables with externally changing schemas, which can affect the functionality of the Iceberg Table Engine. As a result, some features that work with regular tables may be unavailable or may not function correctly, especially when using the old analyzer.
For optimal compatibility, we suggest using the Iceberg Table Function while we continue to improve support for the Iceberg Table Engine.
:::
This engine provides a read-only integration with existing Apache [Iceberg](https://iceberg.apache.org/) tables in Amazon S3, Azure, HDFS and locally stored tables.
## Create Table
@ -63,6 +71,16 @@ CREATE TABLE iceberg_table ENGINE=IcebergS3(iceberg_conf, filename = 'test_table
Table engine `Iceberg` is an alias to `IcebergS3` now.
**Schema Evolution**
At the moment, with the help of CH, you can read iceberg tables, the schema of which has changed over time. We currently support reading tables where columns have been added and removed, and their order has changed. You can also change a column where a value is required to one where NULL is allowed. Additionally, we support permitted type casting for simple types, namely:  
* int -> long
* float -> double
* decimal(P, S) -> decimal(P', S) where P' > P.
Currently, it is not possible to change nested structures or the types of elements within arrays and maps.
To read a table where the schema has changed after its creation with dynamic schema inference, set allow_dynamic_metadata_for_data_lakes = true when creating the table.
### Data cache {#data-cache}
`Iceberg` table engine and table function support data caching same as `S3`, `AzureBlobStorage`, `HDFS` storages. See [here](../../../engines/table-engines/integrations/s3.md#data-cache).

View File

@ -12,7 +12,7 @@ This engine provides integration with [Amazon S3](https://aws.amazon.com/s3/) ec
``` sql
CREATE TABLE s3_queue_engine_table (name String, value UInt32)
ENGINE = S3Queue(path, [NOSIGN, | aws_access_key_id, aws_secret_access_key,] format, [compression])
ENGINE = S3Queue(path, [NOSIGN, | aws_access_key_id, aws_secret_access_key,] format, [compression], [headers])
[SETTINGS]
[mode = '',]
[after_processing = 'keep',]

View File

@ -177,7 +177,7 @@ CREATE TABLE table_name
CounterID UInt32,
UserID UInt32,
ver UInt16
) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/table_name', '{replica}', ver)
ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{layer}-{shard}/table_name', '{replica}', ver)
PARTITION BY toYYYYMM(EventDate)
ORDER BY (CounterID, EventDate, intHash32(UserID))
SAMPLE BY intHash32(UserID);

View File

@ -36,6 +36,8 @@ Upper and lower bounds can be specified to limit Memory engine table size, effec
- Requires `max_rows_to_keep`
- `max_rows_to_keep` — Maximum rows to keep within memory table where oldest rows are deleted on each insertion (i.e circular buffer). Max rows can exceed the stated limit if the oldest batch of rows to remove falls under the `min_rows_to_keep` limit when adding a large block.
- Default value: `0`
- `compress` - Whether to compress data in memory.
- Default value: `false`
## Usage {#usage}

View File

@ -998,46 +998,6 @@ WHERE
);
```
::::note
As of October 2024, the query is extremely slow due to missing join predicate pushdown. Corresponding issue: https://github.com/ClickHouse/ClickHouse/issues/70802
This alternative formulation works and was verified to return the reference results.
```sql
SELECT
sum(l_extendedprice * (1 - l_discount)) AS revenue
FROM
lineitem,
part
WHERE
p_partkey = l_partkey
AND l_shipinstruct = 'DELIVER IN PERSON'
AND l_shipmode IN ('AIR', 'AIR REG')
AND (
(
p_brand = 'Brand#12'
AND p_container IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG')
AND l_quantity >= 1 AND l_quantity <= 1 + 10
AND p_size BETWEEN 1 AND 5
)
OR
(
p_brand = 'Brand#23'
AND p_container IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK')
AND l_quantity >= 10 AND l_quantity <= 10 + 10
AND p_size BETWEEN 1 AND 10
)
OR
(
p_brand = 'Brand#34'
AND p_container IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG')
AND l_quantity >= 20 AND l_quantity <= 20 + 10
AND p_size BETWEEN 1 AND 15
)
)
```
::::
**Q20**
```sql

View File

@ -1078,7 +1078,7 @@ Default value: throw
## min_free_disk_bytes_to_perform_insert
The minimum number of bytes that should be free in disk space in order to insert data. If the number of available free bytes is less than `min_free_disk_bytes_to_throw_insert` then an exception is thrown and the insert is not executed. Note that this setting:
The minimum number of bytes that should be free in disk space in order to insert data. If the number of available free bytes is less than `min_free_disk_bytes_to_perform_insert` then an exception is thrown and the insert is not executed. Note that this setting:
- takes into account the `keep_free_space_bytes` setting.
- does not take into account the amount of data that will be written by the `INSERT` operation.
- is only checked if a positive (non-zero) number of bytes is specified
@ -1091,7 +1091,7 @@ Default value: 0 bytes.
Note that if both `min_free_disk_bytes_to_perform_insert` and `min_free_disk_ratio_to_perform_insert` are specified, ClickHouse will count on the value that will allow to perform inserts on a bigger amount of free memory.
## min_free_disk_ratio_to_perform_insert
## min_free_disk_ratio_to_perform_insert
The minimum free to total disk space ratio to perform an `INSERT`. Must be a floating point value between 0 and 1. Note that this setting:
- takes into account the `keep_free_space_bytes` setting.
@ -1106,6 +1106,31 @@ Default value: 0.0
Note that if both `min_free_disk_ratio_to_perform_insert` and `min_free_disk_bytes_to_perform_insert` are specified, ClickHouse will count on the value that will allow to perform inserts on a bigger amount of free memory.
## allow_experimental_reverse_key
Enables support for descending sort order in MergeTree sorting keys. This setting is particularly useful for time series analysis and Top-N queries, allowing data to be stored in reverse chronological order to optimize query performance.
With `allow_experimental_reverse_key` enabled, you can define descending sort orders within the `ORDER BY` clause of a MergeTree table. This enables the use of more efficient `ReadInOrder` optimizations instead of `ReadInReverseOrder` for descending queries.
**Example**
```sql
CREATE TABLE example
(
time DateTime,
key Int32,
value String
) ENGINE = MergeTree
ORDER BY (time DESC, key) -- Descending order on 'time' field
SETTINGS allow_experimental_reverse_key = 1;
SELECT * FROM example WHERE key = 'xxx' ORDER BY time DESC LIMIT 10;
```
By using `ORDER BY time DESC` in the query, `ReadInOrder` is applied.
**Default Value:** false
## cache_populated_by_fetch
A Cloud only setting.
@ -1114,4 +1139,4 @@ When `cache_populated_by_fetch` is disabled (the default setting), new data part
If enabled, `cache_populated_by_fetch` will instead cause all nodes to load new data parts from storage into their cache without requiring a query to trigger such an action.
Default value: 0.
Default value: 0.

View File

@ -7,6 +7,7 @@ Contains information about setting changes in previous ClickHouse versions.
Columns:
- `type` ([Enum](../../sql-reference/data-types/enum.md)) - The settings type: `Core` (general / query settings), `MergeTree`.
- `version` ([String](../../sql-reference/data-types/string.md)) — The ClickHouse version in which settings were changed
- `changes` ([Array](../../sql-reference/data-types/array.md) of [Tuple](../../sql-reference/data-types/tuple.md)) — A description of the setting changes: (setting name, previous value, new value, reason for the change)
@ -22,6 +23,7 @@ FORMAT Vertical
``` text
Row 1:
──────
type: Core
version: 23.5
changes: [('input_format_parquet_preserve_order','1','0','Allow Parquet reader to reorder rows for better parallelism.'),('parallelize_output_from_storages','0','1','Allow parallelism when executing queries that read from file/url/s3/etc. This may reorder rows.'),('use_with_fill_by_sorting_prefix','0','1','Columns preceding WITH FILL columns in ORDER BY clause form sorting prefix. Rows with different values in sorting prefix are filled independently'),('output_format_parquet_compliant_nested_types','0','1','Change an internal field name in output Parquet file schema.')]
```

View File

@ -15,12 +15,19 @@ groupConcat[(delimiter [, limit])](expression);
**Arguments**
- `expression` — The expression or column name that outputs strings to be concatenated..
- `delimiter` — A [string](../../../sql-reference/data-types/string.md) that will be used to separate concatenated values. This parameter is optional and defaults to an empty string or delimiter from parameters if not specified.
- `expression` — The expression or column name that outputs strings to be concatenated.
**Parameters**
- `delimiter` — A [string](../../../sql-reference/data-types/string.md) that will be used to separate concatenated values. This parameter is optional and defaults to an empty string if not specified.
- `limit` — A positive [integer](../../../sql-reference/data-types/int-uint.md) specifying the maximum number of elements to concatenate. If more elements are present, excess elements are ignored. This parameter is optional.
:::note
If delimiter is specified without limit, it must be the first parameter. If both delimiter and limit are specified, delimiter must precede limit.
Also, if different delimiters are specified as parameters and arguments, the delimiter from arguments will be used only.
:::
**Returned value**

View File

@ -73,6 +73,7 @@ SELECT '{"a" : {"b" : 42},"c" : [1, 2, 3], "d" : "Hello, World!"}'::JSON AS json
Using CAST from `Tuple`:
```sql
SET enable_named_columns_in_function_tuple = 1;
SELECT (tuple(42 AS b) AS a, [1, 2, 3] AS c, 'Hello, World!' AS d)::JSON AS json;
```
@ -97,8 +98,9 @@ SELECT map('a', map('b', 42), 'c', [1,2,3], 'd', 'Hello, World!')::JSON AS json;
Using CAST from deprecated `Object('json')`:
```sql
SELECT '{"a" : {"b" : 42},"c" : [1, 2, 3], "d" : "Hello, World!"}'::Object('json')::JSON AS json;
```
SET allow_experimental_object_type = 1;
SELECT '{"a" : {"b" : 42},"c" : [1, 2, 3], "d" : "Hello, World!"}'::Object('json')::JSON AS json;
```
```text
┌─json───────────────────────────────────────────┐

View File

@ -770,7 +770,8 @@ i
## indexOf(arr, x)
Returns the index of the first x element (starting from 1) if it is in the array, or 0 if it is not.
Returns the index of the first element with value x (starting from 1) if it is in the array.
If the array does not contain the searched-for value, the function returns 0.
Example:
@ -786,6 +787,26 @@ SELECT indexOf([1, 3, NULL, NULL], NULL)
Elements set to `NULL` are handled as normal values.
## indexOfAssumeSorted(arr, x)
Returns the index of the first element with value x (starting from 1) if it is in the array.
If the array does not contain the searched-for value, the function returns 0.
Assumes that the array is sorted in ascending order (i.e., the function uses binary search).
If the array is not sorted, results are undefined.
If the internal array is of type Nullable, function indexOf will be called.
Example:
``` sql
SELECT indexOfAssumeSorted([1, 3, 3, 3, 4, 4, 5], 4)
```
``` text
┌─indexOfAssumeSorted([1, 3, 3, 3, 4, 4, 5], 4)─┐
│ 5 │
└───────────────────────────────────────────────┘
```
## arrayCount(\[func,\] arr1, ...)
Returns the number of elements for which `func(arr1[i], ..., arrN[i])` returns something other than 0. If `func` is not specified, it returns the number of non-zero elements in the array.
@ -2157,6 +2178,41 @@ Result:
└───────────────────────────────────────────────┘
```
## arrayPrAUC
Calculate AUC (Area Under the Curve) for the Precision Recall curve.
**Syntax**
``` sql
arrayPrAUC(arr_scores, arr_labels)
```
**Arguments**
- `arr_scores` — scores prediction model gives.
- `arr_labels` — labels of samples, usually 1 for positive sample and 0 for negative sample.
**Returned value**
Returns PR-AUC value with type Float64.
**Example**
Query:
``` sql
select arrayPrAUC([0.1, 0.4, 0.35, 0.8], [0, 0, 1, 1]);
```
Result:
``` text
┌─arrayPrAUC([0.1, 0.4, 0.35, 0.8], [0, 0, 1, 1])─┐
│ 0.8333333333333333 │
└─────────────────────────────────────────────────┘
```
## arrayMap(func, arr1, ...)
Returns an array obtained from the original arrays by application of `func(arr1[i], ..., arrN[i])` for each element. Arrays `arr1` ... `arrN` must have the same number of elements.
@ -3159,4 +3215,4 @@ Result:
## Distance functions
All supported functions are described in [distance functions documentation](../../sql-reference/functions/distance-functions.md).
All supported functions are described in [distance functions documentation](../../sql-reference/functions/distance-functions.md).

View File

@ -195,7 +195,7 @@ makeDateTime64(year, month, day, hour, minute, second[, precision])
**Returned value**
- A date and time created from the supplied arguments. [DateTime64](../../sql-reference/data-types/datetime64.md).
- A date and time created from the supplied arguments. [DateTime64](../../sql-reference/data-types/datetime64.md).
**Example**

View File

@ -253,7 +253,11 @@ SELECT format('{} {}', 'Hello', 'World')
## translate
Replaces characters in the string `s` using a one-to-one character mapping defined by `from` and `to` strings. `from` and `to` must be constant ASCII strings of the same size. Non-ASCII characters in the original string are not modified.
Replaces characters in the string `s` using a one-to-one character mapping defined by `from` and `to` strings.
`from` and `to` must be constant ASCII strings.
If `from` and `to` have equal sizes, each occurrence of the 1st character of `first` in `s` is replaced by the 1st character of `to`, the 2nd character of `first` in `s` is replaced by the 2nd character of `to`, etc.
If `from` contains more characters than `to`, all occurrences of the characters at the end of `from` that have no corresponding character in `to` are deleted from `s`.
Non-ASCII characters in `s` are not modified by the function.
**Syntax**
@ -275,6 +279,20 @@ Result:
└───────────────┘
```
`from` and `to` arguments have different lengths:
``` sql
SELECT translate('clickhouse', 'clickhouse', 'CLICK') AS res
```
Result:
``` text
┌─res───┐
│ CLICK │
└───────┘
```
## translateUTF8
Like [translate](#translate) but assumes `s`, `from` and `to` are UTF-8 encoded strings.

View File

@ -36,8 +36,8 @@ Alias:
**Arguments**
- `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `needle` — Substring to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `haystack` — String in which the search is performed. [String](../data-types/string.md) or [Enum](../data-types/string.md).
- `needle` — Substring to be searched. [String](../data-types/string.md).
- `start_pos` Position (1-based) in `haystack` at which the search starts. [UInt](../data-types/int-uint.md). Optional.
**Returned value**
@ -203,7 +203,7 @@ multiSearchAllPositions(haystack, [needle1, needle2, ..., needleN])
**Arguments**
- `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `haystack` — String in which the search is performed. [String](../data-types/string.md).
- `needle` — Substrings to be searched. [Array](../data-types/array.md).
**Returned value**
@ -238,7 +238,7 @@ multiSearchAllPositionsCaseInsensitive(haystack, [needle1, needle2, ..., needleN
**Parameters**
- `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `haystack` — String in which the search is performed. [String](../data-types/string.md).
- `needle` — Substrings to be searched. [Array](../data-types/array.md).
**Returned value**
@ -272,7 +272,7 @@ multiSearchAllPositionsUTF8(haystack, [needle1, needle2, ..., needleN])
**Parameters**
- `haystack` — UTF-8 encoded string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `haystack` — UTF-8 encoded string in which the search is performed. [String](../data-types/string.md).
- `needle` — UTF-8 encoded substrings to be searched. [Array](../data-types/array.md).
**Returned value**
@ -308,7 +308,7 @@ multiSearchAllPositionsCaseInsensitiveUTF8(haystack, [needle1, needle2, ..., nee
**Parameters**
- `haystack` — UTF-8 encoded string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `haystack` — UTF-8 encoded string in which the search is performed. [String](../data-types/string.md).
- `needle` — UTF-8 encoded substrings to be searched. [Array](../data-types/array.md).
**Returned value**
@ -346,7 +346,7 @@ multiSearchFirstPosition(haystack, [needle1, needle2, ..., needleN])
**Parameters**
- `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `haystack` — String in which the search is performed. [String](../data-types/string.md).
- `needle` — Substrings to be searched. [Array](../data-types/array.md).
**Returned value**
@ -380,7 +380,7 @@ multiSearchFirstPositionCaseInsensitive(haystack, [needle1, needle2, ..., needle
**Parameters**
- `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `haystack` — String in which the search is performed. [String](../data-types/string.md).
- `needle` — Array of substrings to be searched. [Array](../data-types/array.md).
**Returned value**
@ -414,7 +414,7 @@ multiSearchFirstPositionUTF8(haystack, [needle1, needle2, ..., needleN])
**Parameters**
- `haystack` — UTF-8 string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `haystack` — UTF-8 string in which the search is performed. [String](../data-types/string.md).
- `needle` — Array of UTF-8 substrings to be searched. [Array](../data-types/array.md).
**Returned value**
@ -450,7 +450,7 @@ multiSearchFirstPositionCaseInsensitiveUTF8(haystack, [needle1, needle2, ..., ne
**Parameters**
- `haystack` — UTF-8 string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `haystack` — UTF-8 string in which the search is performed. [String](../data-types/string.md).
- `needle` — Array of UTF-8 substrings to be searched. [Array](../data-types/array.md)
**Returned value**
@ -487,7 +487,7 @@ multiSearchFirstIndex(haystack, [needle1, needle2, ..., needleN])
```
**Parameters**
- `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `haystack` — String in which the search is performed. [String](../data-types/string.md).
- `needle` — Substrings to be searched. [Array](../data-types/array.md).
**Returned value**
@ -520,7 +520,7 @@ multiSearchFirstIndexCaseInsensitive(haystack, [needle1, needle2, ..., needleN])
**Parameters**
- `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `haystack` — String in which the search is performed. [String](../data-types/string.md).
- `needle` — Substrings to be searched. [Array](../data-types/array.md).
**Returned value**
@ -553,7 +553,7 @@ multiSearchFirstIndexUTF8(haystack, [needle1, needle2, ..., needleN])
**Parameters**
- `haystack` — UTF-8 string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `haystack` — UTF-8 string in which the search is performed. [String](../data-types/string.md).
- `needle` — Array of UTF-8 substrings to be searched. [Array](../data-types/array.md)
**Returned value**
@ -588,7 +588,7 @@ multiSearchFirstIndexCaseInsensitiveUTF8(haystack, [needle1, needle2, ..., needl
**Parameters**
- `haystack` — UTF-8 string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `haystack` — UTF-8 string in which the search is performed. [String](../data-types/string.md).
- `needle` — Array of UTF-8 substrings to be searched. [Array](../data-types/array.md).
**Returned value**
@ -625,7 +625,7 @@ multiSearchAny(haystack, [needle1, needle2, ..., needleN])
**Parameters**
- `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `haystack` — String in which the search is performed. [String](../data-types/string.md).
- `needle` — Substrings to be searched. [Array](../data-types/array.md).
**Returned value**
@ -659,7 +659,7 @@ multiSearchAnyCaseInsensitive(haystack, [needle1, needle2, ..., needleN])
**Parameters**
- `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `haystack` — String in which the search is performed. [String](../data-types/string.md).
- `needle` — Substrings to be searched. [Array](../data-types/array.md)
**Returned value**
@ -693,7 +693,7 @@ multiSearchAnyUTF8(haystack, [needle1, needle2, ..., needleN])
**Parameters**
- `haystack` — UTF-8 string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `haystack` — UTF-8 string in which the search is performed. [String](../data-types/string.md).
- `needle` — UTF-8 substrings to be searched. [Array](../data-types/array.md).
**Returned value**
@ -729,7 +729,7 @@ multiSearchAnyCaseInsensitiveUTF8(haystack, [needle1, needle2, ..., needleN])
**Parameters**
- `haystack` — UTF-8 string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `haystack` — UTF-8 string in which the search is performed. [String](../data-types/string.md).
- `needle` — UTF-8 substrings to be searched. [Array](../data-types/array.md)
**Returned value**
@ -1414,8 +1414,8 @@ countSubstrings(haystack, needle[, start_pos])
**Arguments**
- `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `needle` — Substring to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `haystack` — String in which the search is performed. [String](../data-types/string.md) or [Enum](../data-types/enum.md).
- `needle` — Substring to be searched. [String](../data-types/string.md).
- `start_pos` Position (1-based) in `haystack` at which the search starts. [UInt](../data-types/int-uint.md). Optional.
**Returned value**
@ -1461,8 +1461,8 @@ countSubstringsCaseInsensitive(haystack, needle[, start_pos])
**Arguments**
- `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `needle` — Substring to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `haystack` — String in which the search is performed. [String](../data-types/string.md) or [Enum](../data-types/enum.md).
- `needle` — Substring to be searched. [String](../data-types/string.md).
- `start_pos` Position (1-based) in `haystack` at which the search starts. [UInt](../data-types/int-uint.md). Optional.
**Returned value**
@ -1513,8 +1513,8 @@ countSubstringsCaseInsensitiveUTF8(haystack, needle[, start_pos])
**Arguments**
- `haystack` — UTF-8 string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `needle` — Substring to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `haystack` — UTF-8 string in which the search is performed. [String](../data-types/string.md) or [Enum](../data-types/enum.md).
- `needle` — Substring to be searched. [String](../data-types/string.md).
- `start_pos` Position (1-based) in `haystack` at which the search starts. [UInt](../data-types/int-uint.md). Optional.
**Returned value**
@ -1565,7 +1565,7 @@ countMatches(haystack, pattern)
**Arguments**
- `haystack` — The string to search in. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `haystack` — The string to search in. [String](../data-types/string.md).
- `pattern` — The regular expression with [re2 regular expression syntax](https://github.com/google/re2/wiki/Syntax). [String](../data-types/string.md).
**Returned value**
@ -1610,7 +1610,7 @@ countMatchesCaseInsensitive(haystack, pattern)
**Arguments**
- `haystack` — The string to search in. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `haystack` — The string to search in. [String](../data-types/string.md).
- `pattern` — The regular expression with [re2 regular expression syntax](https://github.com/google/re2/wiki/Syntax). [String](../data-types/string.md).
**Returned value**
@ -1647,8 +1647,8 @@ Alias: `REGEXP_EXTRACT(haystack, pattern[, index])`.
**Arguments**
- `haystack` — String, in which regexp pattern will to be matched. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `pattern` — String, regexp expression, must be constant. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `haystack` — String, in which regexp pattern will to be matched. [String](../data-types/string.md).
- `pattern` — String, regexp expression, must be constant. [String](../data-types/string.md).
- `index` An integer number greater or equal 0 with default 1. It represents which regex group to extract. [UInt or Int](../data-types/int-uint.md). Optional.
**Returned value**
@ -1687,8 +1687,8 @@ hasSubsequence(haystack, needle)
**Arguments**
- `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `needle` — Subsequence to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `haystack` — String in which the search is performed. [String](../data-types/string.md).
- `needle` — Subsequence to be searched. [String](../data-types/string.md).
**Returned value**
@ -1722,8 +1722,8 @@ hasSubsequenceCaseInsensitive(haystack, needle)
**Arguments**
- `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `needle` — Subsequence to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `haystack` — String in which the search is performed. [String](../data-types/string.md).
- `needle` — Subsequence to be searched. [String](../data-types/string.md).
**Returned value**
@ -1757,8 +1757,8 @@ hasSubsequenceUTF8(haystack, needle)
**Arguments**
- `haystack` — String in which the search is performed. UTF-8 encoded [String](../../sql-reference/syntax.md#syntax-string-literal).
- `needle` — Subsequence to be searched. UTF-8 encoded [String](../../sql-reference/syntax.md#syntax-string-literal).
- `haystack` — String in which the search is performed. UTF-8 encoded [String](../data-types/string.md).
- `needle` — Subsequence to be searched. UTF-8 encoded [String](../data-types/string.md).
**Returned value**
@ -1792,8 +1792,8 @@ hasSubsequenceCaseInsensitiveUTF8(haystack, needle)
**Arguments**
- `haystack` — String in which the search is performed. UTF-8 encoded [String](../../sql-reference/syntax.md#syntax-string-literal).
- `needle` — Subsequence to be searched. UTF-8 encoded [String](../../sql-reference/syntax.md#syntax-string-literal).
- `haystack` — String in which the search is performed. UTF-8 encoded [String](../data-types/string.md).
- `needle` — Subsequence to be searched. UTF-8 encoded [String](../data-types/string.md).
**Returned value**
@ -1827,7 +1827,7 @@ hasToken(haystack, token)
**Parameters**
- `haystack`: String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `haystack`: String in which the search is performed. [String](../data-types/string.md) or [Enum](../data-types/enum.md).
- `token`: Maximal length substring between two non alphanumeric ASCII characters (or boundaries of haystack).
**Returned value**
@ -1862,12 +1862,12 @@ hasTokenOrNull(haystack, token)
**Parameters**
- `haystack`: String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `haystack`: String in which the search is performed. [String](../data-types/string.md) or [Enum](../data-types/enum.md).
- `token`: Maximal length substring between two non alphanumeric ASCII characters (or boundaries of haystack).
**Returned value**
- 1, if the token is present in the haystack, 0 if it is not present, and null if the token is ill formed.
- 1, if the token is present in the haystack, 0 if it is not present, and null if the token is ill formed.
**Implementation details**
@ -1899,7 +1899,7 @@ hasTokenCaseInsensitive(haystack, token)
**Parameters**
- `haystack`: String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `haystack`: String in which the search is performed. [String](../data-types/string.md) or [Enum](../data-types/enum.md).
- `token`: Maximal length substring between two non alphanumeric ASCII characters (or boundaries of haystack).
**Returned value**
@ -1934,7 +1934,7 @@ hasTokenCaseInsensitiveOrNull(haystack, token)
**Parameters**
- `haystack`: String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `haystack`: String in which the search is performed. [String](../data-types/string.md) or [Enum](../data-types/enum.md).
- `token`: Maximal length substring between two non alphanumeric ASCII characters (or boundaries of haystack).
**Returned value**

View File

@ -122,7 +122,7 @@ Unsupported arguments:
:::note
If the input value cannot be represented within the bounds of [Int8](../data-types/int-uint.md), overflow or underflow of the result occurs.
This is not considered an error.
This is not considered an error.
For example: `SELECT toInt8(128) == -128;`.
:::
@ -370,7 +370,7 @@ Unsupported arguments:
:::note
If the input value cannot be represented within the bounds of [Int16](../data-types/int-uint.md), overflow or underflow of the result occurs.
This is not considered an error.
This is not considered an error.
For example: `SELECT toInt16(32768) == -32768;`.
:::
@ -618,7 +618,7 @@ Unsupported arguments:
:::note
If the input value cannot be represented within the bounds of [Int32](../data-types/int-uint.md), the result over or under flows.
This is not considered an error.
This is not considered an error.
For example: `SELECT toInt32(2147483648) == -2147483648;`
:::
@ -865,7 +865,7 @@ Unsupported types:
:::note
If the input value cannot be represented within the bounds of [Int64](../data-types/int-uint.md), the result over or under flows.
This is not considered an error.
This is not considered an error.
For example: `SELECT toInt64(9223372036854775808) == -9223372036854775808;`
:::
@ -1608,7 +1608,7 @@ Unsupported arguments:
:::note
If the input value cannot be represented within the bounds of [UInt8](../data-types/int-uint.md), overflow or underflow of the result occurs.
This is not considered an error.
This is not considered an error.
For example: `SELECT toUInt8(256) == 0;`.
:::
@ -1856,7 +1856,7 @@ Unsupported arguments:
:::note
If the input value cannot be represented within the bounds of [UInt16](../data-types/int-uint.md), overflow or underflow of the result occurs.
This is not considered an error.
This is not considered an error.
For example: `SELECT toUInt16(65536) == 0;`.
:::
@ -2104,7 +2104,7 @@ Unsupported arguments:
:::note
If the input value cannot be represented within the bounds of [UInt32](../data-types/int-uint.md), the result over or under flows.
This is not considered an error.
This is not considered an error.
For example: `SELECT toUInt32(4294967296) == 0;`
:::
@ -2353,7 +2353,7 @@ Unsupported types:
:::note
If the input value cannot be represented within the bounds of [UInt64](../data-types/int-uint.md), the result over or under flows.
This is not considered an error.
This is not considered an error.
For example: `SELECT toUInt64(18446744073709551616) == 0;`
:::
@ -3691,8 +3691,8 @@ toDateTime(expr[, time_zone ])
- `time_zone` — Time zone. [String](../data-types/string.md).
:::note
If `expr` is a number, it is interpreted as the number of seconds since the beginning of the Unix Epoch (as Unix timestamp).
If `expr` is a [String](../data-types/string.md), it may be interpreted as a Unix timestamp or as a string representation of date / date with time.
If `expr` is a number, it is interpreted as the number of seconds since the beginning of the Unix Epoch (as Unix timestamp).
If `expr` is a [String](../data-types/string.md), it may be interpreted as a Unix timestamp or as a string representation of date / date with time.
Thus, parsing of short numbers' string representations (up to 4 digits) is explicitly disabled due to ambiguity, e.g. a string `'1999'` may be both a year (an incomplete string representation of Date / DateTime) or a unix timestamp. Longer numeric strings are allowed.
:::
@ -5536,7 +5536,7 @@ Result:
## reinterpretAsUInt256
Performs byte reinterpretation by treating the input value as a value of type UInt256. Unlike [`CAST`](#cast), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
Performs byte reinterpretation by treating the input value as a value of type UInt256. Unlike [`CAST`](#cast), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
**Syntax**
@ -5612,7 +5612,7 @@ Result:
## reinterpretAsInt16
Performs byte reinterpretation by treating the input value as a value of type Int16. Unlike [`CAST`](#cast), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
Performs byte reinterpretation by treating the input value as a value of type Int16. Unlike [`CAST`](#cast), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
**Syntax**
@ -7189,6 +7189,45 @@ Result:
└───────────────────────┘
```
## toUnixTimestamp64Second
Converts a `DateTime64` to a `Int64` value with fixed second precision. The input value is scaled up or down appropriately depending on its precision.
:::note
The output value is a timestamp in UTC, not in the timezone of `DateTime64`.
:::
**Syntax**
```sql
toUnixTimestamp64Second(value)
```
**Arguments**
- `value` — DateTime64 value with any precision. [DateTime64](../data-types/datetime64.md).
**Returned value**
- `value` converted to the `Int64` data type. [Int64](../data-types/int-uint.md).
**Example**
Query:
```sql
WITH toDateTime64('2009-02-13 23:31:31.011', 3, 'UTC') AS dt64
SELECT toUnixTimestamp64Second(dt64);
```
Result:
```response
┌─toUnixTimestamp64Second(dt64)─┐
│ 1234567891 │
└───────────────────────────────┘
```
## toUnixTimestamp64Milli
Converts a `DateTime64` to a `Int64` value with fixed millisecond precision. The input value is scaled up or down appropriately depending on its precision.
@ -7306,6 +7345,48 @@ Result:
└─────────────────────────────┘
```
## fromUnixTimestamp64Second
Converts an `Int64` to a `DateTime64` value with fixed second precision and optional timezone. The input value is scaled up or down appropriately depending on its precision.
:::note
Please note that input value is treated as a UTC timestamp, not timestamp at the given (or implicit) timezone.
:::
**Syntax**
``` sql
fromUnixTimestamp64Second(value[, timezone])
```
**Arguments**
- `value` — value with any precision. [Int64](../data-types/int-uint.md).
- `timezone` — (optional) timezone name of the result. [String](../data-types/string.md).
**Returned value**
- `value` converted to DateTime64 with precision `0`. [DateTime64](../data-types/datetime64.md).
**Example**
Query:
``` sql
WITH CAST(1733935988, 'Int64') AS i64
SELECT
fromUnixTimestamp64Second(i64, 'UTC') AS x,
toTypeName(x);
```
Result:
```response
┌───────────────────x─┬─toTypeName(x)────────┐
│ 2024-12-11 16:53:08 │ DateTime64(0, 'UTC') │
└─────────────────────┴──────────────────────┘
```
## fromUnixTimestamp64Milli
Converts an `Int64` to a `DateTime64` value with fixed millisecond precision and optional timezone. The input value is scaled up or down appropriately depending on its precision.
@ -7334,7 +7415,7 @@ fromUnixTimestamp64Milli(value[, timezone])
Query:
``` sql
WITH CAST(1234567891011, 'Int64') AS i64
WITH CAST(1733935988123, 'Int64') AS i64
SELECT
fromUnixTimestamp64Milli(i64, 'UTC') AS x,
toTypeName(x);
@ -7344,7 +7425,7 @@ Result:
```response
┌───────────────────────x─┬─toTypeName(x)────────┐
│ 2009-02-13 23:31:31.011 │ DateTime64(3, 'UTC') │
│ 2024-12-11 16:53:08.123 │ DateTime64(3, 'UTC') │
└─────────────────────────┴──────────────────────┘
```
@ -7376,7 +7457,7 @@ fromUnixTimestamp64Micro(value[, timezone])
Query:
``` sql
WITH CAST(1234567891011, 'Int64') AS i64
WITH CAST(1733935988123456, 'Int64') AS i64
SELECT
fromUnixTimestamp64Micro(i64, 'UTC') AS x,
toTypeName(x);
@ -7386,7 +7467,7 @@ Result:
```response
┌──────────────────────────x─┬─toTypeName(x)────────┐
1970-01-15 06:56:07.891011 │ DateTime64(6, 'UTC') │
2024-12-11 16:53:08.123456 │ DateTime64(6, 'UTC') │
└────────────────────────────┴──────────────────────┘
```
@ -7418,7 +7499,7 @@ fromUnixTimestamp64Nano(value[, timezone])
Query:
``` sql
WITH CAST(1234567891011, 'Int64') AS i64
WITH CAST(1733935988123456789, 'Int64') AS i64
SELECT
fromUnixTimestamp64Nano(i64, 'UTC') AS x,
toTypeName(x);
@ -7428,7 +7509,7 @@ Result:
```response
┌─────────────────────────────x─┬─toTypeName(x)────────┐
1970-01-01 00:20:34.567891011 │ DateTime64(9, 'UTC') │
2024-12-11 16:53:08.123456789 │ DateTime64(9, 'UTC') │
└───────────────────────────────┴──────────────────────┘
```

View File

@ -13,5 +13,10 @@ Syntax:
``` sql
ALTER ROLE [IF EXISTS] name1 [RENAME TO new_name |, name2 [,...]]
[ON CLUSTER cluster_name]
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [CONST|READONLY|WRITABLE|CHANGEABLE_IN_READONLY] | PROFILE 'profile_name'] [,...]
[ADD|MODIFY SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [CONST|READONLY|WRITABLE|CHANGEABLE_IN_READONLY] | PROFILE 'profile_name'] [,...]
[DROP SETTINGS variable [,...] ]
[ADD PROFILES 'profile_name' [,...] ]
[DROP PROFILES 'profile_name' [,...] ]
[DROP ALL PROFILES]
[DROP ALL SETTINGS]
```

View File

@ -13,6 +13,11 @@ Syntax:
``` sql
ALTER SETTINGS PROFILE [IF EXISTS] name1 [RENAME TO new_name |, name2 [,...]]
[ON CLUSTER cluster_name]
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [CONST|READONLY|WRITABLE|CHANGEABLE_IN_READONLY] | INHERIT 'profile_name'] [,...]
[ADD|MODIFY SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [CONST|READONLY|WRITABLE|CHANGEABLE_IN_READONLY] | INHERIT 'profile_name'] [,...]
[TO {{role1 | user1 [, role2 | user2 ...]} | NONE | ALL | ALL EXCEPT {role1 | user1 [, role2 | user2 ...]}}]
[DROP SETTINGS variable [,...] ]
[ADD PROFILES 'profile_name' [,...] ]
[DROP PROFILES 'profile_name' [,...] ]
[DROP ALL PROFILES]
[DROP ALL SETTINGS]
```

View File

@ -18,7 +18,12 @@ ALTER USER [IF EXISTS] name1 [RENAME TO new_name |, name2 [,...]]
[VALID UNTIL datetime]
[DEFAULT ROLE role [,...] | ALL | ALL EXCEPT role [,...] ]
[GRANTEES {user | role | ANY | NONE} [,...] [EXCEPT {user | role} [,...]]]
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY | WRITABLE] | PROFILE 'profile_name'] [,...]
[ADD|MODIFY SETTINGS variable [=value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE|CONST|CHANGEABLE_IN_READONLY] [,...] ]
[DROP SETTINGS variable [,...] ]
[ADD PROFILES 'profile_name' [,...] ]
[DROP PROFILES 'profile_name' [,...] ]
[DROP ALL PROFILES]
[DROP ALL SETTINGS]
```
To use `ALTER USER` you must have the [ALTER USER](../../../sql-reference/statements/grant.md#access-management) privilege.

View File

@ -154,16 +154,17 @@ This feature is deprecated and will be removed in the future.
For your convenience, the old documentation is located [here](https://pastila.nl/?00f32652/fdf07272a7b54bda7e13b919264e449f.md)
## Refreshable Materialized View [Experimental] {#refreshable-materialized-view}
## Refreshable Materialized View {#refreshable-materialized-view}
```sql
CREATE MATERIALIZED VIEW [IF NOT EXISTS] [db.]table_name
CREATE MATERIALIZED VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
REFRESH EVERY|AFTER interval [OFFSET interval]
RANDOMIZE FOR interval
DEPENDS ON [db.]name [, [db.]name [, ...]]
SETTINGS name = value [, name = value [, ...]]
[RANDOMIZE FOR interval]
[DEPENDS ON [db.]name [, [db.]name [, ...]]]
[SETTINGS name = value [, name = value [, ...]]]
[APPEND]
[TO[db.]name] [(columns)] [ENGINE = engine] [EMPTY]
[TO[db.]name] [(columns)] [ENGINE = engine]
[EMPTY]
AS SELECT ...
[COMMENT 'comment']
```
@ -281,7 +282,7 @@ This replaces *all* refresh parameters at once: schedule, dependencies, settings
The status of all refreshable materialized views is available in table [`system.view_refreshes`](../../../operations/system-tables/view_refreshes.md). In particular, it contains refresh progress (if running), last and next refresh time, exception message if a refresh failed.
To manually stop, start, trigger, or cancel refreshes use [`SYSTEM STOP|START|REFRESH|CANCEL VIEW`](../system.md#refreshable-materialized-views).
To manually stop, start, trigger, or cancel refreshes use [`SYSTEM STOP|START|REFRESH|WAIT|CANCEL VIEW`](../system.md#refreshable-materialized-views).
To wait for a refresh to complete, use [`SYSTEM WAIT VIEW`](../system.md#refreshable-materialized-views). In particular, useful for waiting for initial refresh after creating a view.

View File

@ -605,6 +605,10 @@ Allows using a specified table engine when creating a table. Applies to [table e
Grants all the privileges on regulated entity to a user account or a role.
:::note
The privilege `ALL` is not supported in ClickHouse Cloud, where the `default` user has limited permissions. Users can grant the maximum permissions to a user by granting the `default_role`. See [here](/docs/en/cloud/security/cloud-access-management#initial-settings) for further details.
:::
### NONE
Doesnt grant any privileges.

View File

@ -39,7 +39,7 @@ The GCS Table Function integrates with Google Cloud Storage by using the GCS XML
- `hmac_key` and `hmac_secret` — Keys that specify credentials to use with given endpoint. Optional.
- `format` — The [format](../../interfaces/formats.md#formats) of the file.
- `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`.
- `compression_method` — Parameter is optional. Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. By default, it will autodetect compression method by file extension.
- `compression_method` — Parameter is optional. Supported values: `none`, `gzip` or `gz`, `brotli` or `br`, `xz` or `LZMA`, `zstd` or `zst`. By default, it will autodetect compression method by file extension.
Arguments can also be passed using [named collections](/docs/en/operations/named-collections.md). In this case `url`, `format`, `structure`, `compression_method` work in the same way, and some extra parameters are supported:

View File

@ -65,6 +65,14 @@ SELECT * FROM icebergS3(iceberg_conf, filename = 'test_table')
DESCRIBE icebergS3(iceberg_conf, filename = 'test_table')
```
**Schema Evolution**
At the moment, with the help of CH, you can read iceberg tables, the schema of which has changed over time. We currently support reading tables where columns have been added and removed, and their order has changed. You can also change a column where a value is required to one where NULL is allowed. Additionally, we support permitted type casting for simple types, namely:  
* int -> long
* float -> double
* decimal(P, S) -> decimal(P', S) where P' > P.
Currently, it is not possible to change nested structures or the types of elements within arrays and maps.
**Aliases**
Table function `iceberg` is an alias to `icebergS3` now.

View File

@ -16,7 +16,7 @@ When using the `s3 table function` with [`INSERT INTO...SELECT`](../../sql-refer
**Syntax**
``` sql
s3(url [, NOSIGN | access_key_id, secret_access_key, [session_token]] [,format] [,structure] [,compression_method])
s3(url [, NOSIGN | access_key_id, secret_access_key, [session_token]] [,format] [,structure] [,compression_method],[,headers])
s3(named_collection[, option=value [,..]])
```
@ -43,7 +43,8 @@ For GCS, substitute your HMAC key and HMAC secret where you see `access_key_id`
- `session_token` - Session token to use with the given keys. Optional when passing keys.
- `format` — The [format](../../interfaces/formats.md#formats) of the file.
- `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`.
- `compression_method` — Parameter is optional. Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. By default, it will autodetect compression method by file extension.
- `compression_method` — Parameter is optional. Supported values: `none`, `gzip` or `gz`, `brotli` or `br`, `xz` or `LZMA`, `zstd` or `zst`. By default, it will autodetect compression method by file extension.
- `headers` - Parameter is optional. Allows headers to be passed in the S3 request. Pass in the format `headers(key=value)` e.g. `headers('x-amz-request-payer' = 'requester')`.
Arguments can also be passed using [named collections](/docs/en/operations/named-collections.md). In this case `url`, `access_key_id`, `secret_access_key`, `format`, `structure`, `compression_method` work in the same way, and some extra parameters are supported:
@ -313,6 +314,25 @@ SET use_hive_partitioning = 1;
SELECT * from s3('s3://data/path/date=*/country=*/code=*/*.parquet') where _date > '2020-01-01' and _country = 'Netherlands' and _code = 42;
```
## Accessing requester-pays buckets
To access a requester-pays bucket, a header `x-amz-request-payer = requester` must be passed in any requests. This is achieved by passing the parameter `headers('x-amz-request-payer' = 'requester')` to the s3 function. For example:
```sql
SELECT
count() AS num_rows,
uniqExact(_file) AS num_files
FROM s3('https://coiled-datasets-rp.s3.us-east-1.amazonaws.com/1trc/measurements-100*.parquet', 'AWS_ACCESS_KEY_ID', 'AWS_SECRET_ACCESS_KEY', headers('x-amz-request-payer' = 'requester'))
┌───num_rows─┬─num_files─┐
│ 1110000000 │ 111 │
└────────────┴───────────┘
1 row in set. Elapsed: 3.089 sec. Processed 1.09 billion rows, 0.00 B (353.55 million rows/s., 0.00 B/s.)
Peak memory usage: 192.27 KiB.
```
## Storage Settings {#storage-settings}
- [s3_truncate_on_insert](/docs/en/operations/settings/settings.md#s3_truncate_on_insert) - allows to truncate file before insert into it. Disabled by default.

View File

@ -24,7 +24,7 @@ s3Cluster(cluster_name, named_collection[, option=value [,..]])
- `session_token` - Session token to use with the given keys. Optional when passing keys.
- `format` — The [format](../../interfaces/formats.md#formats) of the file.
- `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`.
- `compression_method` — Parameter is optional. Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. By default, it will autodetect compression method by file extension.
- `compression_method` — Parameter is optional. Supported values: `none`, `gzip` or `gz`, `brotli` or `br`, `xz` or `LZMA`, `zstd` or `zst`. By default, it will autodetect compression method by file extension.
Arguments can also be passed using [named collections](/docs/en/operations/named-collections.md). In this case `url`, `access_key_id`, `secret_access_key`, `format`, `structure`, `compression_method` work in the same way, and some extra parameters are supported:

View File

@ -67,8 +67,8 @@ C++ 依存関係と同様に、ClickHouse はベンダリングを使用して
サニタイザを有効にする予定がある場合は、CI で使用されるものと同じ `std` と一致するバージョンを使用する必要がありますcrates をベンダリングしています):
```bash
rustup toolchain install nightly-2024-04-01
rustup default nightly-2024-04-01
rustup toolchain install nightly-2024-12-01
rustup default nightly-2024-12-01
rustup component add rust-src
```

View File

@ -175,7 +175,7 @@ CREATE TABLE table_name
CounterID UInt32,
UserID UInt32,
ver UInt16
) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/table_name', '{replica}', ver)
) ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{layer}-{shard}/table_name', '{replica}', ver)
PARTITION BY toYYYYMM(EventDate)
ORDER BY (CounterID, EventDate, intHash32(UserID))
SAMPLE BY intHash32(UserID);

View File

@ -1058,7 +1058,7 @@ LZ4 または ZSTD の圧縮率は平均20〜40改善します。
## min_free_disk_bytes_to_perform_insert
データを挿入するためにディスクスペースに空いているべき最小バイト数。利用可能なバイトが `min_free_disk_bytes_to_throw_insert` 未満の場合は例外がスローされ、挿入が実行されません。この設定は以下を念頭に置いています。
データを挿入するためにディスクスペースに空いているべき最小バイト数。利用可能なバイトが `min_free_disk_bytes_to_perform_insert` 未満の場合は例外がスローされ、挿入が実行されません。この設定は以下を念頭に置いています。
- `keep_free_space_bytes` 設定を考慮します。
- `INSERT` 操作によって書き込まれるデータ量は考慮しません。
- 正の(ゼロでない)バイト数が指定された場合にのみチェックされます

View File

@ -785,6 +785,24 @@ SELECT indexOf([1, 3, NULL, NULL], NULL)
`NULL` に設定された要素は通常の値として扱われます。
# indexOfAssumeSorted(arr, x)
配列内にある場合は最初の'x'要素(1から始まる)のインデックスを返し、そうでない場合は0を返します。
この関数は、バイナリ検索が検索に使用されるため、降順ではなくソートされた配列に使用する必要があります。
内部配列型がNull許容の場合は、indexOf関数が使用されます
例:
``` sql
SELECT indexOfAssumeSorted([1, 3, 3, 3, 4, 4, 5], 4)
```
``` text
┌─indexOf([1, 3, 3, 3, 4, 4, 5], NULL)─┐
│ 5 │
└──────────────────────────────────--─-┘
```
## arrayCount(\[func,\] arr1, ...)
`func(arr1[i], ..., arrN[i])`が0以外の値を返す要素の数を返します。`func` が指定されていない場合、配列内の0以外の要素の数を返します。

View File

@ -306,6 +306,24 @@ SELECT indexOf([1, 3, NULL, NULL], NULL)
└───────────────────────────────────┘
```
## indexOfAssumeSorted(arr, x)
Возвращает индекс первого элемента x (начиная с 1), если он есть в массиве, или 0, если его нет.
Функция должна использоваться, если массив отсортирован в неубывающем порядке, так как используется бинарный поиск.
Если внутренний тип Nullable, то будет использована функция indexOf.
Пример:
``` sql
SELECT indexOfAssumeSorted([1, 3, 3, 3, 4, 4, 5], 4)
```
``` text
┌─indexOf([1, 3, 3, 3, 4, 4, 5], NULL)─┐
│ 5 │
└──────────────────────────────────--─-┘
```
Элементы, равные `NULL`, обрабатываются как обычные значения.
## arrayCount(\[func,\] arr1, ...) {#array-count}

View File

@ -13,5 +13,10 @@ sidebar_label: ROLE
``` sql
ALTER ROLE [IF EXISTS] name1 [RENAME TO new_name |, name2 [,...]]
[ON CLUSTER cluster_name]
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [CONST|READONLY|WRITABLE|CHANGEABLE_IN_READONLY] | PROFILE 'profile_name'] [,...]
[ADD|MODIFY SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [CONST|READONLY|WRITABLE|CHANGEABLE_IN_READONLY] | PROFILE 'profile_name'] [,...]
[DROP SETTINGS variable [,...] ]
[ADD PROFILES 'profile_name' [,...] ]
[DROP PROFILES 'profile_name' [,...] ]
[DROP ALL PROFILES]
[DROP ALL SETTINGS]
```

View File

@ -13,6 +13,11 @@ sidebar_label: SETTINGS PROFILE
``` sql
ALTER SETTINGS PROFILE [IF EXISTS] name1 [RENAME TO new_name |, name2 [,...]]
[ON CLUSTER cluster_name]
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [CONST|READONLY|WRITABLE|CHANGEABLE_IN_READONLY] | INHERIT 'profile_name'] [,...]
[ADD|MODIFY SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [CONST|READONLY|WRITABLE|CHANGEABLE_IN_READONLY] | INHERIT 'profile_name'] [,...]
[TO {{role1 | user1 [, role2 | user2 ...]} | NONE | ALL | ALL EXCEPT {role1 | user1 [, role2 | user2 ...]}}]
[DROP SETTINGS variable [,...] ]
[ADD PROFILES 'profile_name' [,...] ]
[DROP PROFILES 'profile_name' [,...] ]
[DROP ALL PROFILES]
[DROP ALL SETTINGS]
```

View File

@ -19,7 +19,12 @@ ALTER USER [IF EXISTS] name1 [RENAME TO new_name |, name2 [,...]]
[VALID UNTIL datetime]
[DEFAULT ROLE role [,...] | ALL | ALL EXCEPT role [,...] ]
[GRANTEES {user | role | ANY | NONE} [,...] [EXCEPT {user | role} [,...]]]
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY | WRITABLE] | PROFILE 'profile_name'] [,...]
[ADD|MODIFY SETTINGS variable [=value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE|CONST|CHANGEABLE_IN_READONLY] [,...] ]
[DROP SETTINGS variable [,...] ]
[ADD PROFILES 'profile_name' [,...] ]
[DROP PROFILES 'profile_name' [,...] ]
[DROP ALL PROFILES]
[DROP ALL SETTINGS]
```
Для выполнения `ALTER USER` необходима привилегия [ALTER USER](../grant.md#grant-access-management).

View File

@ -337,6 +337,24 @@ SELECT indexOf([1, 3, NULL, NULL], NULL)
设置为«NULL»的元素将作为普通的元素值处理。
## indexOfAssumeSorted(arr, x)
返回数组中第一个x元素的索引从1开始如果x元素不存在在数组中则返回0.
该函数应用于不按降序排序的数组,因为二进制搜索用于搜索。
如果内部数组类型为空则将使用indexOf函数。
示例:
``` sql
SELECT indexOfAssumeSorted([1, 3, 3, 3, 4, 4, 5], 4)
```
``` text
┌─indexOf([1, 3, 3, 3, 4, 4, 5], NULL)─┐
│ 5 │
└──────────────────────────────────--─-┘
```
## arrayCount(\[func,\] arr1, ...) {#array-count}
`func`将arr数组作为参数其返回结果为非零值的数量。如果未指定“func”则返回数组中非零元素的数量。

View File

@ -119,20 +119,26 @@ macro(clickhouse_program_add name)
clickhouse_program_add_library(${name})
endmacro()
add_subdirectory (server)
add_subdirectory (client)
add_subdirectory (local)
add_subdirectory (benchmark)
add_subdirectory (extract-from-config)
add_subdirectory (compressor)
add_subdirectory (format)
add_subdirectory (obfuscator)
add_subdirectory (install)
add_subdirectory (git-import)
add_subdirectory (bash-completion)
add_subdirectory (benchmark)
add_subdirectory (check-marks)
add_subdirectory (checksum-for-compressed-block)
add_subdirectory (client)
add_subdirectory (compressor)
add_subdirectory (disks)
add_subdirectory (extract-from-config)
add_subdirectory (format)
add_subdirectory (git-import)
add_subdirectory (install)
add_subdirectory (keeper-bench)
add_subdirectory (keeper-data-dumper)
add_subdirectory (local)
add_subdirectory (obfuscator)
add_subdirectory (server)
add_subdirectory (static-files-disk-uploader)
add_subdirectory (su)
add_subdirectory (disks)
add_subdirectory (zookeeper-dump-tree)
add_subdirectory (zookeeper-remove-by-list)
if (ENABLE_CLICKHOUSE_KEEPER_CONVERTER)
add_subdirectory (keeper-converter)
@ -198,18 +204,30 @@ if (ENABLE_CLICKHOUSE_SELF_EXTRACTING)
list(APPEND CLICKHOUSE_BUNDLE self-extracting)
endif ()
clickhouse_program_install(clickhouse-server server)
clickhouse_program_install(clickhouse-client client chc)
clickhouse_program_install(clickhouse-local local chl ch)
clickhouse_program_install(clickhouse-benchmark benchmark)
clickhouse_program_install(clickhouse-extract-from-config extract-from-config)
clickhouse_program_install(clickhouse-check-marks check-marks)
clickhouse_program_install(clickhouse-checksum-for-compressed-block checksum-for-compressed-block)
clickhouse_program_install(clickhouse-client client chc)
clickhouse_program_install(clickhouse-compressor compressor)
clickhouse_program_install(clickhouse-format format)
clickhouse_program_install(clickhouse-obfuscator obfuscator)
clickhouse_program_install(clickhouse-git-import git-import)
clickhouse_program_install(clickhouse-static-files-disk-uploader static-files-disk-uploader)
clickhouse_program_install(clickhouse-disks disks)
clickhouse_program_install(clickhouse-extract-from-config extract-from-config)
clickhouse_program_install(clickhouse-format format)
clickhouse_program_install(clickhouse-git-import git-import)
clickhouse_program_install(clickhouse-local local chl ch)
clickhouse_program_install(clickhouse-obfuscator obfuscator)
clickhouse_program_install(clickhouse-server server)
clickhouse_program_install(clickhouse-static-files-disk-uploader static-files-disk-uploader)
clickhouse_program_install(clickhouse-su su)
clickhouse_program_install(clickhouse-zookeeper-dump-tree zookeeper-dump-tree)
clickhouse_program_install(clickhouse-zookeeper-remove-by-list zookeeper-remove-by-list)
if (TARGET ch_contrib::nuraft)
clickhouse_program_install(clickhouse-keeper-data-dumper keeper-data-dumper)
endif ()
if (TARGET ch_contrib::rapidjson AND TARGET ch_contrib::nuraft)
clickhouse_program_install(clickhouse-keeper-bench keeper-bench)
endif ()
if (ENABLE_CLICKHOUSE_KEEPER)
if (NOT BUILD_STANDALONE_KEEPER AND CREATE_KEEPER_SYMLINK)

View File

@ -0,0 +1,11 @@
set (CLICKHOUSE_CHECK_MARKS_SOURCES CheckMarks.cpp)
set (CLICKHOUSE_CHECK_MARKS_LINK
PRIVATE
boost::program_options
clickhouse_aggregate_functions
clickhouse_common_config
dbms
)
clickhouse_program_add(check-marks)

View File

@ -54,7 +54,7 @@ static void checkByCompressedReadBuffer(const std::string & mrk_path, const std:
}
int main(int argc, char ** argv)
int mainEntryClickHouseCheckMarks(int argc, char ** argv)
{
boost::program_options::options_description desc("Allowed options");
desc.add_options()

View File

@ -0,0 +1 @@
extern int mainEntryClickHouseCheckMarks(int argc, char ** argv);

View File

@ -0,0 +1,9 @@
set (CLICKHOUSE_CHECKSUM_FOR_COMPRESSED_BLOCK_SOURCES ChecksumForCompressedBlock.cpp)
set (CLICKHOUSE_CHECKSUM_FOR_COMPRESSED_BLOCK_LINK
PRIVATE
clickhouse_functions
dbms
)
clickhouse_program_add(checksum-for-compressed-block)

View File

@ -34,7 +34,7 @@ std::string flipBit(std::string s, size_t pos)
}
int main(int, char **)
int mainEntryClickHouseChecksumForCompressedBlock(int, char **)
{
using namespace DB;
ReadBufferFromFileDescriptor in(STDIN_FILENO);

View File

@ -0,0 +1 @@
extern int mainEntryClickHouseChecksumForCompressedBlock(int, char **);

View File

@ -35,7 +35,7 @@ public:
throw Exception(ErrorCodes::BAD_ARGUMENTS, "cannot remove '{}': Is a directory", path);
}
disk.getDisk()->removeRecursive(path);
disk.getDisk()->removeRecursiveWithLimit(path);
}
else if (disk.getDisk()->existsFile(path))
{

View File

@ -277,10 +277,10 @@ int mainEntryClickHouseFormat(int argc, char ** argv)
{
WriteBufferFromOwnString str_buf;
bool oneline_current_query = oneline || approx_query_length < max_line_length;
IAST::FormatSettings settings(str_buf, oneline_current_query, hilite);
IAST::FormatSettings settings(oneline_current_query, hilite);
settings.show_secrets = true;
settings.print_pretty_type_names = !oneline_current_query;
res->format(settings);
res->format(str_buf, settings);
if (insert_query_payload)
{
@ -324,10 +324,10 @@ int mainEntryClickHouseFormat(int argc, char ** argv)
{
WriteBufferFromOwnString str_buf;
bool oneline_current_query = oneline || approx_query_length < max_line_length;
IAST::FormatSettings settings(str_buf, oneline_current_query, hilite);
IAST::FormatSettings settings(oneline_current_query, hilite);
settings.show_secrets = true;
settings.print_pretty_type_names = !oneline_current_query;
res->format(settings);
res->format(str_buf, settings);
auto res_string = str_buf.str();
WriteBufferFromOStream res_cout(std::cout, 4096);

View File

@ -0,0 +1,20 @@
if (NOT TARGET ch_contrib::rapidjson OR NOT TARGET ch_contrib::nuraft)
message (WARNING "Not building keeper-bench due to rapidjson or nuraft is disabled")
return()
endif ()
set (CLICKHOUSE_KEEPER_BENCH_SOURCES
Generator.cpp
Runner.cpp
Stats.cpp
KeeperBench.cpp
)
set (CLICKHOUSE_KEEPER_BENCH_LINK
PRIVATE
dbms
clickhouse_functions
ch_contrib::rapidjson
)
clickhouse_program_add(keeper-bench)

View File

@ -20,7 +20,7 @@ std::optional<T> valueToOptional(const boost::program_options::variable_value &
}
int main(int argc, char *argv[])
int mainEntryClickHouseKeeperBench(int argc, char ** argv)
{
bool print_stacktrace = true;

View File

@ -0,0 +1 @@
extern int mainEntryClickHouseKeeperBench(int argc, char ** argv);

View File

@ -0,0 +1,14 @@
if (NOT TARGET ch_contrib::nuraft)
message (WARNING "Not building keeper-data-dumper due to nuraft is disabled")
return ()
endif ()
set (CLICKHOUSE_KEEPER_DATA_DUMPER_SOURCES KeeperDataDumper.cpp)
set (CLICKHOUSE_KEEPER_DATA_DUMPER_LINK
PRIVATE
clickhouse_functions
dbms
)
clickhouse_program_add(keeper-data-dumper)

View File

@ -54,7 +54,7 @@ void dumpMachine(std::shared_ptr<KeeperStateMachine<DB::KeeperMemoryStorage>> ma
std::cout << std::flush;
}
int main(int argc, char *argv[])
int mainEntryClickHouseKeeperDataDumper(int argc, char ** argv)
{
if (argc != 3)
{

View File

@ -0,0 +1 @@
extern int mainEntryClickHouseKeeperDataDumper(int argc, char ** argv);

View File

@ -20,18 +20,22 @@
#include <vector>
/// Universal executable for various clickhouse applications
int mainEntryClickHouseServer(int argc, char ** argv);
int mainEntryClickHouseClient(int argc, char ** argv);
int mainEntryClickHouseLocal(int argc, char ** argv);
int mainEntryClickHouseBenchmark(int argc, char ** argv);
int mainEntryClickHouseExtractFromConfig(int argc, char ** argv);
int mainEntryClickHouseCheckMarks(int argc, char ** argv);
int mainEntryClickHouseChecksumForCompressedBlock(int, char **);
int mainEntryClickHouseClient(int argc, char ** argv);
int mainEntryClickHouseCompressor(int argc, char ** argv);
int mainEntryClickHouseFormat(int argc, char ** argv);
int mainEntryClickHouseObfuscator(int argc, char ** argv);
int mainEntryClickHouseGitImport(int argc, char ** argv);
int mainEntryClickHouseStaticFilesDiskUploader(int argc, char ** argv);
int mainEntryClickHouseSU(int argc, char ** argv);
int mainEntryClickHouseDisks(int argc, char ** argv);
int mainEntryClickHouseExtractFromConfig(int argc, char ** argv);
int mainEntryClickHouseFormat(int argc, char ** argv);
int mainEntryClickHouseGitImport(int argc, char ** argv);
int mainEntryClickHouseLocal(int argc, char ** argv);
int mainEntryClickHouseObfuscator(int argc, char ** argv);
int mainEntryClickHouseSU(int argc, char ** argv);
int mainEntryClickHouseServer(int argc, char ** argv);
int mainEntryClickHouseStaticFilesDiskUploader(int argc, char ** argv);
int mainEntryClickHouseZooKeeperDumpTree(int argc, char ** argv);
int mainEntryClickHouseZooKeeperRemoveByList(int argc, char ** argv);
int mainEntryClickHouseHashBinary(int, char **)
{
@ -50,6 +54,12 @@ int mainEntryClickHouseKeeperConverter(int argc, char ** argv);
#if ENABLE_CLICKHOUSE_KEEPER_CLIENT
int mainEntryClickHouseKeeperClient(int argc, char ** argv);
#endif
#if USE_RAPIDJSON && USE_NURAFT
int mainEntryClickHouseKeeperBench(int argc, char ** argv);
#endif
#if USE_NURAFT
int mainEntryClickHouseKeeperDataDumper(int argc, char ** argv);
#endif
// install
int mainEntryClickHouseInstall(int argc, char ** argv);
@ -63,7 +73,10 @@ namespace
using MainFunc = int (*)(int, char**);
/// Add an item here to register new application
/// Add an item here to register new application.
/// This list has a "priority" - e.g. we need to disambiguate clickhouse --format being
/// either clickouse-format or clickhouse-{local, client} --format.
/// Currently we will prefer the latter option.
std::pair<std::string_view, MainFunc> clickhouse_applications[] =
{
{"local", mainEntryClickHouseLocal},
@ -79,6 +92,10 @@ std::pair<std::string_view, MainFunc> clickhouse_applications[] =
{"su", mainEntryClickHouseSU},
{"hash-binary", mainEntryClickHouseHashBinary},
{"disks", mainEntryClickHouseDisks},
{"check-marks", mainEntryClickHouseCheckMarks},
{"checksum-for-compressed-block", mainEntryClickHouseChecksumForCompressedBlock},
{"zookeeper-dump-tree", mainEntryClickHouseZooKeeperDumpTree},
{"zookeeper-remove-by-list", mainEntryClickHouseZooKeeperRemoveByList},
// keeper
#if ENABLE_CLICKHOUSE_KEEPER
@ -90,7 +107,12 @@ std::pair<std::string_view, MainFunc> clickhouse_applications[] =
#if ENABLE_CLICKHOUSE_KEEPER_CLIENT
{"keeper-client", mainEntryClickHouseKeeperClient},
#endif
#if USE_RAPIDJSON && USE_NURAFT
{"keeper-bench", mainEntryClickHouseKeeperBench},
#endif
#if USE_NURAFT
{"keeper-data-dumper", mainEntryClickHouseKeeperDataDumper},
#endif
// install
{"install", mainEntryClickHouseInstall},
{"start", mainEntryClickHouseStart},

View File

@ -15,6 +15,7 @@
#include <Common/logger_useful.h>
#include <base/phdr_cache.h>
#include <Common/ErrorHandlers.h>
#include <Processors/QueryPlan/QueryPlanStepRegistry.h>
#include <base/getMemoryAmount.h>
#include <base/getAvailableMemoryAmount.h>
#include <base/errnoToString.h>
@ -59,6 +60,7 @@
#include <IO/ReadBufferFromFile.h>
#include <IO/SharedThreadPools.h>
#include <IO/UseSSL.h>
#include <Interpreters/CancellationChecker.h>
#include <Interpreters/ServerAsynchronousMetrics.h>
#include <Interpreters/DDLWorker.h>
#include <Interpreters/DNSCacheUpdater.h>
@ -295,6 +297,7 @@ namespace CurrentMetrics
extern const Metric MergesMutationsMemoryTracking;
extern const Metric MaxDDLEntryID;
extern const Metric MaxPushedDDLEntryID;
extern const Metric StartupScriptsExecutionState;
}
namespace ProfileEvents
@ -365,6 +368,14 @@ namespace ErrorCodes
}
enum StartupScriptsExecutionState : CurrentMetrics::Value
{
NotFinished = 0,
Success = 1,
Failure = 2,
};
static std::string getCanonicalPath(std::string && path)
{
Poco::trimInPlace(path);
@ -781,9 +792,12 @@ void loadStartupScripts(const Poco::Util::AbstractConfiguration & config, Contex
startup_context->makeQueryContext();
executeQuery(read_buffer, write_buffer, true, startup_context, callback, QueryFlags{ .internal = true }, std::nullopt, {});
}
CurrentMetrics::set(CurrentMetrics::StartupScriptsExecutionState, StartupScriptsExecutionState::Success);
}
catch (...)
{
CurrentMetrics::set(CurrentMetrics::StartupScriptsExecutionState, StartupScriptsExecutionState::Failure);
tryLogCurrentException(log, "Failed to parse startup scripts file");
}
}
@ -924,6 +938,8 @@ try
registerRemoteFileMetadatas();
registerSchedulerNodes();
QueryPlanStepRegistry::registerPlanSteps();
CurrentMetrics::set(CurrentMetrics::Revision, ClickHouseRevision::getVersionRevision());
CurrentMetrics::set(CurrentMetrics::VersionInteger, ClickHouseRevision::getVersionInteger());
@ -1377,6 +1393,23 @@ try
setOOMScore(oom_score, log);
#endif
std::unique_ptr<DB::BackgroundSchedulePoolTaskHolder> cancellation_task;
SCOPE_EXIT({
if (cancellation_task)
CancellationChecker::getInstance().terminateThread();
});
if (server_settings[ServerSetting::background_schedule_pool_size] > 1)
{
auto cancellation_task_holder = global_context->getSchedulePool().createTask(
"CancellationChecker",
[] { CancellationChecker::getInstance().workerFunction(); }
);
cancellation_task = std::make_unique<DB::BackgroundSchedulePoolTaskHolder>(std::move(cancellation_task_holder));
(*cancellation_task)->activateAndSchedule();
}
global_context->setRemoteHostFilter(config());
global_context->setHTTPHeaderFilter(config());

View File

@ -0,0 +1,10 @@
set (CLICKHOUSE_ZOOKEEPER_DUMP_TREE_SOURCES ZooKeeperDumpTree.cpp)
set (CLICKHOUSE_ZOOKEEPER_DUMP_TREE_LINK
PRIVATE
dbms
clickhouse_functions
boost::program_options
)
clickhouse_program_add(zookeeper-dump-tree)

View File

@ -6,7 +6,7 @@
#include <Common/ZooKeeper/KeeperException.h>
int main(int argc, char ** argv)
int mainEntryClickHouseZooKeeperDumpTree(int argc, char ** argv)
{
try
{

View File

@ -0,0 +1 @@
extern int mainEntryClickHouseZooKeeperDumpTree(int argc, char ** argv);

View File

@ -0,0 +1,10 @@
set (CLICKHOUSE_ZOOKEEPER_REMOVE_BY_LIST_SOURCES ZooKeeperRemoveByList.cpp)
set (CLICKHOUSE_ZOOKEEPER_REMOVE_BY_LIST_LINK
PRIVATE
dbms
clickhouse_functions
boost::program_options
)
clickhouse_program_add(zookeeper-remove-by-list)

View File

@ -5,7 +5,7 @@
#include <boost/program_options.hpp>
int main(int argc, char ** argv)
int mainEntryClickHouseZooKeeperRemoveByList(int argc, char ** argv)
try
{
boost::program_options::options_description desc("Allowed options");

View File

@ -0,0 +1 @@
extern int mainEntryClickHouseZooKeeperRemoveByList(int argc, char ** argv);

View File

@ -1,7 +1,7 @@
As we have multiple projects we use a workspace to manage them (it's way simpler and leads to less issues). In order
to vendor all the dependencies we need to store both the registry and the packages themselves.
Note that this includes the exact `std` dependencies for the rustc version used in CI (currently nightly-2024-04-01),
Note that this includes the exact `std` dependencies for the rustc version used in CI (currently nightly-2024-12-01),
so you need to install `rustup component add rust-src` for the specific version.
* First step: (Re)-generate the Cargo.lock file (run under `workspace/`).
@ -16,7 +16,7 @@ Note that we use both commands to vendor both registry and crates. No idea why b
* First we need to install the tool if you don't already have it:
```bash
cargo install --version 0.2.6 cargo-local-registry
cargo install --version 0.2.7 cargo-local-registry
```
* Now add the local packages:
@ -28,9 +28,9 @@ export RUSTC_ROOT=$(rustc --print=sysroot)
cd "$CH_TOP_DIR"/rust/workspace
cargo local-registry --git --sync Cargo.lock "$CH_TOP_DIR"/contrib/rust_vendor
cp "$RUSTC_ROOT"/lib/rustlib/src/rust/Cargo.lock "$RUSTC_ROOT"/lib/rustlib/src/rust/library/std/
cp "$RUSTC_ROOT"/lib/rustlib/src/rust/library/Cargo.lock "$RUSTC_ROOT"/lib/rustlib/src/rust/library/std/
cargo local-registry --no-delete --git --sync "$RUSTC_ROOT"/lib/rustlib/src/rust/library/std/Cargo.lock "$CH_TOP_DIR"/contrib/rust_vendor
cp "$RUSTC_ROOT"/lib/rustlib/src/rust/Cargo.lock "$RUSTC_ROOT"/lib/rustlib/src/rust/library/test/
cp "$RUSTC_ROOT"/lib/rustlib/src/rust/library/Cargo.lock "$RUSTC_ROOT"/lib/rustlib/src/rust/library/test/
cargo local-registry --no-delete --git --sync "$RUSTC_ROOT"/lib/rustlib/src/rust/library/test/Cargo.lock "$CH_TOP_DIR"/contrib/rust_vendor
cargo vendor --no-delete --locked "$CH_TOP_DIR"/contrib/rust_vendor

File diff suppressed because it is too large Load Diff

View File

@ -5,7 +5,7 @@ version = "0.1.0"
[dependencies]
anstream = {version = "0.6.12"}
prqlc = {version = "0.11.3", default-features = false}
prqlc = {version = "0.13.2", default-features = false}
serde_json = "1.0"
[lib]

View File

@ -36,6 +36,7 @@ pub unsafe extern "C" fn prql_to_sql_impl(
target: Target::Sql(Some(Dialect::ClickHouse)),
signature_comment: false,
color: false,
display: prqlc::DisplayOptions::Plain,
};
if let Ok(sql_str) = prqlc::compile(&query_str, &opts) {

View File

@ -6,9 +6,10 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
skim = { version = "0.10.2", default-features = false }
skim = { version = "0.15.5", default-features = false, features = ["cli"] }
cxx = "1.0.83"
term = "0.7.0"
term = "1.0.0"
clap = "4.5.22"
[build-dependencies]
cxx-build = "1.0.83"

View File

@ -44,15 +44,15 @@ fn skim_impl(prefix: &CxxString, words: &CxxVector<CxxString>) -> Result<String,
}
let options = SkimOptionsBuilder::default()
.height(Some("30%"))
.query(Some(prefix.to_str().unwrap()))
.height("30%".to_string())
.query(Some(prefix.to_str().unwrap().to_string()))
.tac(true)
// Do not clear on start and clear on exit will clear skim output from the terminal.
//
// Refs: https://github.com/lotabout/skim/issues/494#issuecomment-1776565846
.no_clear_start(true)
.no_clear(false)
.tiebreak(Some("-score".to_string()))
.tiebreak(vec![RankCriteria::NegScore])
// Exact mode performs better for SQL.
//
// Default fuzzy search is too smart for SQL, it even takes into account the case, which

View File

@ -45,7 +45,7 @@ namespace
roles_info.names_of_roles[role_id] = role->getName();
roles_info.access.makeUnion(role->access);
roles_info.settings_from_enabled_roles.merge(role->settings);
roles_info.settings_from_enabled_roles.merge(role->settings, /* normalize= */ false);
for (const auto & granted_role : role->granted_roles.getGranted())
collectRoles(roles_info, skip_ids, get_role_function, granted_role, false, false);

View File

@ -137,6 +137,13 @@ void SettingsConstraints::merge(const SettingsConstraints & other)
}
void SettingsConstraints::check(const Settings & current_settings, const AlterSettingsProfileElements & profile_elements, SettingSource source) const
{
check(current_settings, profile_elements.add_settings, source);
check(current_settings, profile_elements.modify_settings, source);
/// We don't check `drop_settings` here.
}
void SettingsConstraints::check(const Settings & current_settings, const SettingsProfileElements & profile_elements, SettingSource source) const
{
for (const auto & element : profile_elements)

View File

@ -74,10 +74,11 @@ public:
void merge(const SettingsConstraints & other);
/// Checks whether `change` violates these constraints and throws an exception if so.
void check(const Settings & current_settings, const SettingsProfileElements & profile_elements, SettingSource source) const;
void check(const Settings & current_settings, const SettingChange & change, SettingSource source) const;
void check(const Settings & current_settings, const SettingsChanges & changes, SettingSource source) const;
void check(const Settings & current_settings, SettingsChanges & changes, SettingSource source) const;
void check(const Settings & current_settings, const SettingsProfileElements & profile_elements, SettingSource source) const;
void check(const Settings & current_settings, const AlterSettingsProfileElements & profile_elements, SettingSource source) const;
/// Checks whether `change` violates these constraints and throws an exception if so. (setting short name is expected inside `changes`)
void check(const MergeTreeSettings & current_settings, const SettingChange & change) const;

View File

@ -9,6 +9,8 @@
#include <IO/WriteHelpers.h>
#include <Parsers/Access/ASTSettingsProfileElement.h>
#include <base/removeDuplicates.h>
#include <boost/container/flat_map.hpp>
#include <boost/container/flat_set.hpp>
namespace DB
@ -19,6 +21,7 @@ namespace ErrorCodes
extern const int NOT_IMPLEMENTED;
}
SettingsProfileElement::SettingsProfileElement(const ASTSettingsProfileElement & ast)
{
init(ast, nullptr);
@ -116,16 +119,20 @@ std::shared_ptr<ASTSettingsProfileElement> SettingsProfileElement::toASTWithName
}
SettingsProfileElements::SettingsProfileElements(const ASTSettingsProfileElements & ast)
SettingsProfileElements::SettingsProfileElements(const ASTSettingsProfileElements & ast, bool normalize_)
{
for (const auto & ast_element : ast.elements)
emplace_back(*ast_element);
if (normalize_)
normalize();
}
SettingsProfileElements::SettingsProfileElements(const ASTSettingsProfileElements & ast, const AccessControl & access_control)
SettingsProfileElements::SettingsProfileElements(const ASTSettingsProfileElements & ast, const AccessControl & access_control, bool normalize_)
{
for (const auto & ast_element : ast.elements)
emplace_back(*ast_element, access_control);
if (normalize_)
normalize();
}
@ -133,7 +140,11 @@ std::shared_ptr<ASTSettingsProfileElements> SettingsProfileElements::toAST() con
{
auto res = std::make_shared<ASTSettingsProfileElements>();
for (const auto & element : *this)
res->elements.push_back(element.toAST());
{
auto element_ast = element.toAST();
if (!element_ast->empty())
res->elements.push_back(element_ast);
}
return res;
}
@ -141,7 +152,11 @@ std::shared_ptr<ASTSettingsProfileElements> SettingsProfileElements::toASTWithNa
{
auto res = std::make_shared<ASTSettingsProfileElements>();
for (const auto & element : *this)
res->elements.push_back(element.toASTWithNames(access_control));
{
auto element_ast = element.toASTWithNames(access_control);
if (!element_ast->empty())
res->elements.push_back(element_ast);
}
return res;
}
@ -220,9 +235,11 @@ void SettingsProfileElements::removeSettingsKeepProfiles()
}
void SettingsProfileElements::merge(const SettingsProfileElements & other)
void SettingsProfileElements::merge(const SettingsProfileElements & other, bool normalize_)
{
insert(end(), other.begin(), other.end());
if (normalize_)
normalize();
}
@ -280,6 +297,81 @@ std::vector<UUID> SettingsProfileElements::toProfileIDs() const
return res;
}
void SettingsProfileElements::normalize()
{
/// Ensure that each element represents either a setting or a profile.
{
SettingsProfileElements new_elements;
for (auto & element : *this)
{
if (element.parent_profile && !element.setting_name.empty())
{
SettingsProfileElement new_element;
new_element.parent_profile = element.parent_profile;
element.parent_profile.reset();
new_elements.push_back(std::move(new_element));
}
}
insert(end(), new_elements.begin(), new_elements.end());
}
/// Partitioning: first profiles, then settings.
/// We use std::stable_partition() here because we want to preserve the relative order of profiles and the relative order of settings.
/// (We need that order to be preserved to remove duplicates correctly - see below.)
auto profiles_begin = begin();
auto profiles_end = std::stable_partition(begin(), end(), [](const SettingsProfileElement & element) { return static_cast<bool>(element.parent_profile); });
auto settings_begin = profiles_end;
auto settings_end = end();
/// Remove duplicates among profiles.
/// We keep the last position of any used profile.
/// It's important to keep exactly the last position (and not just any position) because profiles can override settings from each other.
/// For example, [pr_A, pr_B, pr_A, pr_C] is always the same as [pr_B, pr_A, pr_C], but can be not the same as [pr_A, pr_B, pr_C]
/// if pr_A and pr_B give different values to same settings.
{
boost::container::flat_set<UUID> profile_ids;
profile_ids.reserve(profiles_end - profiles_begin);
auto it = profiles_end;
while (it != profiles_begin)
{
--it;
auto & element = *it;
if (element.parent_profile && !profile_ids.emplace(*element.parent_profile).second)
element.parent_profile.reset();
}
}
/// Remove duplicates among settings.
/// We keep the first position of any used setting, and merge settings with the same name to that first element.
{
boost::container::flat_map<std::string_view, SettingsProfileElements::iterator> setting_name_to_first_encounter;
setting_name_to_first_encounter.reserve(settings_end - settings_begin);
for (auto it = settings_begin; it != settings_end; ++it)
{
auto & element = *it;
auto first = setting_name_to_first_encounter.emplace(element.setting_name, it).first->second;
if (it != first)
{
auto & first_element = *first;
if (element.value)
first_element.value = element.value;
if (element.min_value)
first_element.min_value = element.min_value;
if (element.max_value)
first_element.max_value = element.max_value;
if (element.writability)
first_element.writability = element.writability;
element.setting_name.clear();
}
}
}
/// Remove empty elements.
std::erase_if(*this, [](const SettingsProfileElement & element) { return element.empty(); });
}
bool SettingsProfileElements::isBackupAllowed() const
{
for (const auto & setting : *this)
@ -296,4 +388,139 @@ bool SettingsProfileElements::isAllowBackupSetting(const String & setting_name)
return Settings::resolveName(setting_name) == ALLOW_BACKUP_SETTING_NAME;
}
AlterSettingsProfileElements::AlterSettingsProfileElements(const SettingsProfileElements & ast)
{
drop_all_settings = true;
drop_all_profiles = true;
add_settings = ast;
}
AlterSettingsProfileElements::AlterSettingsProfileElements(const ASTSettingsProfileElements & ast)
: AlterSettingsProfileElements(SettingsProfileElements{ast})
{
}
AlterSettingsProfileElements::AlterSettingsProfileElements(const ASTSettingsProfileElements & ast, const AccessControl & access_control)
: AlterSettingsProfileElements(SettingsProfileElements{ast, access_control})
{
}
AlterSettingsProfileElements::AlterSettingsProfileElements(const ASTAlterSettingsProfileElements & ast)
{
drop_all_settings = ast.drop_all_settings;
drop_all_profiles = ast.drop_all_profiles;
if (ast.add_settings)
add_settings = SettingsProfileElements{*ast.add_settings, /* normalize= */ false}; /// For "ALTER" the normalization is unnecessary.
if (ast.modify_settings)
modify_settings = SettingsProfileElements{*ast.modify_settings, /* normalize= */ false};
if (ast.drop_settings)
drop_settings = SettingsProfileElements{*ast.drop_settings, /* normalize= */ false};
}
AlterSettingsProfileElements::AlterSettingsProfileElements(const ASTAlterSettingsProfileElements & ast, const AccessControl & access_control)
{
drop_all_settings = ast.drop_all_settings;
drop_all_profiles = ast.drop_all_profiles;
if (ast.add_settings)
add_settings = SettingsProfileElements{*ast.add_settings, access_control, /* normalize= */ false}; /// For "ALTER" the normalization is unnecessary.
if (ast.modify_settings)
modify_settings = SettingsProfileElements{*ast.modify_settings, access_control, /* normalize= */ false};
if (ast.drop_settings)
drop_settings = SettingsProfileElements{*ast.drop_settings, access_control, /* normalize= */ false};
}
void SettingsProfileElements::applyChanges(const AlterSettingsProfileElements & changes)
{
/// Apply "DROP" changes.
if (changes.drop_all_profiles)
{
for (auto & element : *this)
element.parent_profile.reset(); /// We only make this element empty, the element will be removed in normalizeProfileElements().
}
if (changes.drop_all_settings)
{
for (auto & element : *this)
element.setting_name.clear(); /// We only make this element empty, the element will be removed in normalizeProfileElements().
}
auto apply_drop_setting = [&](const String & setting_name)
{
for (auto & element : *this)
{
if (element.setting_name == setting_name)
element.setting_name.clear();
}
};
auto apply_drop_profile = [&](const UUID & profile_id)
{
for (auto & element : *this)
{
if (element.parent_profile == profile_id)
element.parent_profile.reset();
}
};
for (const auto & drop : changes.drop_settings)
{
if (drop.parent_profile)
apply_drop_profile(*drop.parent_profile);
if (!drop.setting_name.empty())
apply_drop_setting(drop.setting_name);
}
auto apply_modify_setting = [&](const SettingsProfileElement & modify)
{
SettingsProfileElement new_element;
new_element.setting_name = modify.setting_name;
new_element.value = modify.value;
new_element.min_value = modify.min_value;
new_element.max_value = modify.max_value;
new_element.writability = modify.writability;
push_back(new_element); /// normalizeProfileElements() will merge this new element with the previous elements.
};
/// Apply "ADD" changes.
auto apply_add_setting = [&](const SettingsProfileElement & add)
{
/// "ADD SETTING" must replace the value and the constraints of a setting, so first we need drop the previous elements for that setting.
apply_drop_setting(add.setting_name);
apply_modify_setting(add);
};
auto apply_add_profile = [&](const UUID & profile_id)
{
SettingsProfileElement new_element;
new_element.parent_profile = profile_id;
push_back(new_element); /// We don't care about possible duplicates here, normalizeProfileElements() will remove duplicates.
};
for (const auto & add : changes.add_settings)
{
if (add.parent_profile)
apply_add_profile(*add.parent_profile);
if (!add.setting_name.empty())
apply_add_setting(add);
}
/// Apply "MODIFY" changes.
for (const auto & modify : changes.modify_settings)
{
chassert(!modify.parent_profile); /// There is no such thing as "MODIFY PROFILE".
if (!modify.setting_name.empty())
apply_modify_setting(modify);
}
/// Remove empty elements and duplicates, and sort the result.
normalize();
}
}

View File

@ -13,8 +13,10 @@ namespace DB
struct Settings;
class SettingsChanges;
class SettingsConstraints;
struct AlterSettingsProfileElements;
class ASTSettingsProfileElement;
class ASTSettingsProfileElements;
class ASTAlterSettingsProfileElements;
class AccessControl;
@ -44,6 +46,8 @@ struct SettingsProfileElement
std::shared_ptr<ASTSettingsProfileElement> toAST() const;
std::shared_ptr<ASTSettingsProfileElement> toASTWithNames(const AccessControl & access_control) const;
bool empty() const { return !parent_profile && (setting_name.empty() || (!value && !min_value && !max_value && !writability)); }
bool isConstraint() const;
private:
@ -57,8 +61,9 @@ public:
SettingsProfileElements() = default;
/// The constructor from AST requires the AccessControl if `ast.id_mode == false`.
SettingsProfileElements(const ASTSettingsProfileElements & ast); /// NOLINT
SettingsProfileElements(const ASTSettingsProfileElements & ast, const AccessControl & access_control);
SettingsProfileElements(const ASTSettingsProfileElements & ast, bool normalize_ = true); /// NOLINT
SettingsProfileElements(const ASTSettingsProfileElements & ast, const AccessControl & access_control, bool normalize_ = true);
std::shared_ptr<ASTSettingsProfileElements> toAST() const;
std::shared_ptr<ASTSettingsProfileElements> toASTWithNames(const AccessControl & access_control) const;
@ -70,16 +75,41 @@ public:
void removeSettingsKeepProfiles();
void merge(const SettingsProfileElements & other);
Settings toSettings() const;
SettingsChanges toSettingsChanges() const;
SettingsConstraints toSettingsConstraints(const AccessControl & access_control) const;
std::vector<UUID> toProfileIDs() const;
bool isBackupAllowed() const;
/// Normalizes this list of profile elements: removes duplicates and empty elements, and also sorts the elements
/// in the following order: first profiles, then settings.
/// The function is called automatically after parsing profile elements from an AST and
/// at the end of an "ALTER PROFILE (USER/ROLE) command".
void normalize();
/// Appends all the elements of another list of profile elements to this list.
void merge(const SettingsProfileElements & other, bool normalize_ = true);
/// Applies changes from an "ALTER PROFILE (USER/ROLE)" command. Always normalizes the result.
void applyChanges(const AlterSettingsProfileElements & changes);
bool isBackupAllowed() const;
static bool isAllowBackupSetting(const String & setting_name);
};
struct AlterSettingsProfileElements
{
bool drop_all_settings = false;
bool drop_all_profiles = false;
SettingsProfileElements add_settings;
SettingsProfileElements modify_settings;
SettingsProfileElements drop_settings;
AlterSettingsProfileElements() = default;
explicit AlterSettingsProfileElements(const SettingsProfileElements & ast);
explicit AlterSettingsProfileElements(const ASTSettingsProfileElements & ast);
explicit AlterSettingsProfileElements(const ASTAlterSettingsProfileElements & ast);
AlterSettingsProfileElements(const ASTSettingsProfileElements & ast, const AccessControl & access_control);
AlterSettingsProfileElements(const ASTAlterSettingsProfileElements & ast, const AccessControl & access_control);
};
}

View File

@ -135,8 +135,8 @@ void SettingsProfilesCache::mergeSettingsAndConstraintsFor(EnabledSettings & ena
merged_settings.emplace_back(new_element);
}
merged_settings.merge(enabled.params.settings_from_enabled_roles);
merged_settings.merge(enabled.params.settings_from_user);
merged_settings.merge(enabled.params.settings_from_enabled_roles, /* normalize= */ false);
merged_settings.merge(enabled.params.settings_from_user, /* normalize= */ false);
auto info = std::make_shared<SettingsProfilesInfo>(access_control);

View File

@ -10,7 +10,6 @@
#include <AggregateFunctions/IAggregateFunction.h>
#include <AggregateFunctions/AggregateFunctionSum.h>
#include <Core/DecimalFunctions.h>
#include <Core/IResolvedFunction.h>
#include "config.h"
@ -141,6 +140,9 @@ public:
bool isCompilable() const override
{
if constexpr (!canBeNativeType<Numerator>() || !canBeNativeType<Denominator>())
return false;
bool can_be_compiled = true;
for (const auto & argument : this->argument_types)
@ -158,7 +160,8 @@ public:
b.CreateMemSet(aggregate_data_ptr, llvm::ConstantInt::get(b.getInt8Ty(), 0), sizeof(Fraction), llvm::assumeAligned(this->alignOfData()));
}
void compileMerge(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_dst_ptr, llvm::Value * aggregate_data_src_ptr) const override
void compileMergeImpl(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_dst_ptr, llvm::Value * aggregate_data_src_ptr) const
requires(canBeNativeType<Numerator>() && canBeNativeType<Denominator>())
{
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
@ -185,7 +188,15 @@ public:
b.CreateStore(denominator_result_value, denominator_dst_ptr);
}
llvm::Value * compileGetResult(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr) const override
void
compileMerge(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_dst_ptr, llvm::Value * aggregate_data_src_ptr) const override
{
if constexpr (canBeNativeType<Numerator>() && canBeNativeType<Denominator>())
compileMergeImpl(builder, aggregate_data_dst_ptr, aggregate_data_src_ptr);
}
llvm::Value * compileGetResultImpl(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr) const
requires(canBeNativeType<Numerator>() && canBeNativeType<Denominator>())
{
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
@ -204,6 +215,13 @@ public:
return b.CreateFDiv(double_numerator, double_denominator);
}
llvm::Value * compileGetResult(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr) const override
{
if constexpr (canBeNativeType<Numerator>() && canBeNativeType<Denominator>())
return compileGetResultImpl(builder, aggregate_data_ptr);
return nullptr;
}
#endif
private:
@ -308,7 +326,8 @@ public:
#if USE_EMBEDDED_COMPILER
void compileAdd(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, const ValuesWithType & arguments) const override
void compileAddImpl(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, const ValuesWithType & arguments) const
requires(canBeNativeType<Numerator>() && canBeNativeType<Denominator>())
{
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
@ -327,6 +346,12 @@ public:
b.CreateStore(denominator_value_updated, denominator_ptr);
}
void compileAdd(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, const ValuesWithType & arguments) const override
{
if constexpr (canBeNativeType<Numerator>() && canBeNativeType<Denominator>())
compileAddImpl(builder, aggregate_data_ptr, arguments);
}
#endif
private:

View File

@ -59,13 +59,13 @@ public:
bool isCompilable() const override
{
bool can_be_compiled = Base::isCompilable();
can_be_compiled &= canBeNativeType<Weight>();
return can_be_compiled;
if constexpr (!canBeNativeType<Weight>() || !canBeNativeType<Numerator>() || !canBeNativeType<Denominator>())
return false;
return Base::isCompilable();
}
void compileAdd(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, const ValuesWithType & arguments) const override
void compileAddImpl(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, const ValuesWithType & arguments) const
requires(canBeNativeType<Weight>() && canBeNativeType<Numerator>() && canBeNativeType<Denominator>())
{
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
@ -94,6 +94,26 @@ public:
b.CreateStore(denominator_value_updated, denominator_ptr);
}
void
compileMerge(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_dst_ptr, llvm::Value * aggregate_data_src_ptr) const override
{
if constexpr (canBeNativeType<Weight>() && canBeNativeType<Numerator>() && canBeNativeType<Denominator>())
Base::compileMergeImpl(builder, aggregate_data_dst_ptr, aggregate_data_src_ptr);
}
llvm::Value * compileGetResult(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr) const override
{
if constexpr (canBeNativeType<Weight>() && canBeNativeType<Numerator>() && canBeNativeType<Denominator>())
return Base::compileGetResultImpl(builder, aggregate_data_ptr);
return nullptr;
}
void compileAdd(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, const ValuesWithType & arguments) const override
{
if constexpr (canBeNativeType<Weight>() && canBeNativeType<Numerator>() && canBeNativeType<Denominator>())
compileAddImpl(builder, aggregate_data_ptr, arguments);
}
#endif
};
@ -104,7 +124,7 @@ bool allowTypes(const DataTypePtr& left, const DataTypePtr& right) noexcept
constexpr auto allow = [](WhichDataType t)
{
return t.isInt() || t.isUInt() || t.isFloat();
return t.isInt() || t.isUInt() || t.isNativeFloat();
};
return allow(l_dt) && allow(r_dt);

View File

@ -1,12 +1,13 @@
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/Combinators/AggregateFunctionCombinatorFactory.h>
#include <Core/Settings.h>
#include <DataTypes/DataTypeLowCardinality.h>
#include <DataTypes/DataTypesNumber.h>
#include <Functions/FunctionFactory.h>
#include <IO/WriteHelpers.h>
#include <Interpreters/Context.h>
#include <Parsers/ASTFunction.h>
#include <Common/CurrentThread.h>
#include <Core/Settings.h>
static constexpr size_t MAX_AGGREGATE_FUNCTION_NAME_LENGTH = 1000;
@ -349,4 +350,9 @@ AggregateFunctionFactory & AggregateFunctionFactory::instance()
return ret;
}
bool AggregateUtils::isAggregateFunction(const ASTFunction & node)
{
return AggregateFunctionFactory::instance().isAggregateFunctionName(node.name);
}
}

View File

@ -1,7 +1,6 @@
#pragma once
#include <AggregateFunctions/IAggregateFunction.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/NullsAction.h>
#include <Common/IFactoryWithAliases.h>
@ -23,6 +22,8 @@ class IDataType;
using DataTypePtr = std::shared_ptr<const IDataType>;
using DataTypes = std::vector<DataTypePtr>;
class ASTFunction;
/**
* The invoker has arguments: name of aggregate function, types of arguments, values of parameters.
* Parameters are for "parametric" aggregate functions.
@ -114,10 +115,7 @@ private:
struct AggregateUtils
{
static bool isAggregateFunction(const ASTFunction & node)
{
return AggregateFunctionFactory::instance().isAggregateFunctionName(node.name);
}
static bool isAggregateFunction(const ASTFunction & node);
};
const String & getAggregateFunctionCanonicalNameIfAny(const String & name);

View File

@ -1,27 +1,7 @@
#include <AggregateFunctions/IAggregateFunction.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/FactoryHelpers.h>
#include <Columns/IColumn.h>
#include <Columns/ColumnNullable.h>
#include <AggregateFunctions/AggregateFunctionGroupConcat.h>
#include <Columns/ColumnString.h>
#include <Core/ServerSettings.h>
#include <Core/ColumnWithTypeAndName.h>
#include <Common/ArenaAllocator.h>
#include <Common/assert_cast.h>
#include <Interpreters/castColumn.h>
#include <DataTypes/IDataType.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypesNumber.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
namespace DB
{
struct Settings;
@ -33,209 +13,190 @@ namespace ErrorCodes
extern const int BAD_ARGUMENTS;
}
namespace
void GroupConcatDataBase::checkAndUpdateSize(UInt64 add, Arena * arena)
{
if (data_size + add >= allocated_size)
{
auto old_size = allocated_size;
allocated_size = std::max(2 * allocated_size, data_size + add);
data = arena->realloc(data, old_size, allocated_size);
}
}
struct GroupConcatDataBase
void GroupConcatDataBase::insertChar(const char * str, UInt64 str_size, Arena * arena)
{
UInt64 data_size = 0;
UInt64 allocated_size = 0;
char * data = nullptr;
checkAndUpdateSize(str_size, arena);
memcpy(data + data_size, str, str_size);
data_size += str_size;
}
void checkAndUpdateSize(UInt64 add, Arena * arena)
{
if (data_size + add >= allocated_size)
{
auto old_size = allocated_size;
allocated_size = std::max(2 * allocated_size, data_size + add);
data = arena->realloc(data, old_size, allocated_size);
}
}
void GroupConcatDataBase::insert(const IColumn * column, const SerializationPtr & serialization, size_t row_num, Arena * arena)
{
WriteBufferFromOwnString buff;
serialization->serializeText(*column, row_num, buff, FormatSettings{});
auto string = buff.stringView();
insertChar(string.data(), string.size(), arena);
}
void insertChar(const char * str, UInt64 str_size, Arena * arena)
{
checkAndUpdateSize(str_size, arena);
memcpy(data + data_size, str, str_size);
data_size += str_size;
}
UInt64 GroupConcatData::getSize(size_t i) const
{
return offsets[i * 2 + 1] - offsets[i * 2];
}
void insert(const IColumn * column, const SerializationPtr & serialization, size_t row_num, Arena * arena)
{
WriteBufferFromOwnString buff;
serialization->serializeText(*column, row_num, buff, FormatSettings{});
auto string = buff.stringView();
insertChar(string.data(), string.size(), arena);
}
UInt64 GroupConcatData::getString(size_t i) const
{
return offsets[i * 2];
}
};
void GroupConcatData::insert(const IColumn * column, const SerializationPtr & serialization, size_t row_num, Arena * arena)
{
WriteBufferFromOwnString buff;
serialization->serializeText(*column, row_num, buff, {});
auto string = buff.stringView();
checkAndUpdateSize(string.size(), arena);
memcpy(data + data_size, string.data(), string.size());
offsets.push_back(data_size, arena);
data_size += string.size();
offsets.push_back(data_size, arena);
num_rows++;
}
template <bool has_limit>
struct GroupConcatData;
template<>
struct GroupConcatData<false> final : public GroupConcatDataBase
GroupConcatImpl<has_limit>::GroupConcatImpl(
const DataTypePtr & data_type_, const Array & parameters_, UInt64 limit_, const String & delimiter_)
: IAggregateFunctionDataHelper<GroupConcatData, GroupConcatImpl<has_limit>>(
{data_type_}, parameters_, std::make_shared<DataTypeString>())
, limit(limit_)
, delimiter(delimiter_)
, type(data_type_)
{
};
template<>
struct GroupConcatData<true> final : public GroupConcatDataBase
{
using Offset = UInt64;
using Allocator = MixedAlignedArenaAllocator<alignof(Offset), 4096>;
using Offsets = PODArray<Offset, 32, Allocator>;
/// offset[i * 2] - beginning of the i-th row, offset[i * 2 + 1] - end of the i-th row
Offsets offsets;
UInt64 num_rows = 0;
UInt64 getSize(size_t i) const { return offsets[i * 2 + 1] - offsets[i * 2]; }
UInt64 getString(size_t i) const { return offsets[i * 2]; }
void insert(const IColumn * column, const SerializationPtr & serialization, size_t row_num, Arena * arena)
{
WriteBufferFromOwnString buff;
serialization->serializeText(*column, row_num, buff, {});
auto string = buff.stringView();
checkAndUpdateSize(string.size(), arena);
memcpy(data + data_size, string.data(), string.size());
offsets.push_back(data_size, arena);
data_size += string.size();
offsets.push_back(data_size, arena);
num_rows++;
}
};
serialization = isFixedString(type) ? std::make_shared<DataTypeString>()->getDefaultSerialization() : this->argument_types[0]->getDefaultSerialization();
}
template <bool has_limit>
class GroupConcatImpl final
: public IAggregateFunctionDataHelper<GroupConcatData<has_limit>, GroupConcatImpl<has_limit>>
String GroupConcatImpl<has_limit>::getName() const
{
static constexpr auto name = "groupConcat";
return name;
}
SerializationPtr serialization;
UInt64 limit;
const String delimiter;
const DataTypePtr type;
public:
GroupConcatImpl(const DataTypePtr & data_type_, const Array & parameters_, UInt64 limit_, const String & delimiter_)
: IAggregateFunctionDataHelper<GroupConcatData<has_limit>, GroupConcatImpl<has_limit>>(
{data_type_}, parameters_, std::make_shared<DataTypeString>())
, limit(limit_)
, delimiter(delimiter_)
, type(data_type_)
{
serialization = isFixedString(type) ? std::make_shared<DataTypeString>()->getDefaultSerialization() : this->argument_types[0]->getDefaultSerialization();
}
template <bool has_limit>
void GroupConcatImpl<has_limit>::add(
AggregateDataPtr __restrict place,
const IColumn ** columns,
size_t row_num,
Arena * arena) const
{
auto & cur_data = this->data(place);
String getName() const override { return name; }
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
{
auto & cur_data = this->data(place);
if constexpr (has_limit)
if (cur_data.num_rows >= limit)
return;
if (cur_data.data_size != 0)
cur_data.insertChar(delimiter.c_str(), delimiter.size(), arena);
if (isFixedString(type))
{
ColumnWithTypeAndName col = {columns[0]->getPtr(), type, "column"};
const auto & col_str = castColumn(col, std::make_shared<DataTypeString>());
cur_data.insert(col_str.get(), serialization, row_num, arena);
}
else
cur_data.insert(columns[0], serialization, row_num, arena);
}
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
{
auto & cur_data = this->data(place);
auto & rhs_data = this->data(rhs);
if (rhs_data.data_size == 0)
if constexpr (has_limit)
if (cur_data.num_rows >= limit)
return;
if constexpr (has_limit)
{
UInt64 new_elems_count = std::min(rhs_data.num_rows, limit - cur_data.num_rows);
for (UInt64 i = 0; i < new_elems_count; ++i)
{
if (cur_data.data_size != 0)
cur_data.insertChar(delimiter.c_str(), delimiter.size(), arena);
if (cur_data.data_size != 0)
cur_data.insertChar(delimiter.c_str(), delimiter.size(), arena);
cur_data.offsets.push_back(cur_data.data_size, arena);
cur_data.insertChar(rhs_data.data + rhs_data.getString(i), rhs_data.getSize(i), arena);
cur_data.num_rows++;
cur_data.offsets.push_back(cur_data.data_size, arena);
}
}
else
if (isFixedString(type))
{
ColumnWithTypeAndName col = {columns[0]->getPtr(), type, "column"};
const auto & col_str = castColumn(col, std::make_shared<DataTypeString>());
cur_data.insert(col_str.get(), serialization, row_num, arena);
}
else
cur_data.insert(columns[0], serialization, row_num, arena);
}
template <bool has_limit>
void GroupConcatImpl<has_limit>::merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const
{
auto & cur_data = this->data(place);
auto & rhs_data = this->data(rhs);
if (rhs_data.data_size == 0)
return;
if constexpr (has_limit)
{
UInt64 new_elems_count = std::min(rhs_data.num_rows, limit - cur_data.num_rows);
for (UInt64 i = 0; i < new_elems_count; ++i)
{
if (cur_data.data_size != 0)
cur_data.insertChar(delimiter.c_str(), delimiter.size(), arena);
cur_data.insertChar(rhs_data.data, rhs_data.data_size, arena);
cur_data.offsets.push_back(cur_data.data_size, arena);
cur_data.insertChar(rhs_data.data + rhs_data.getString(i), rhs_data.getSize(i), arena);
cur_data.num_rows++;
cur_data.offsets.push_back(cur_data.data_size, arena);
}
}
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
else
{
auto & cur_data = this->data(place);
if (cur_data.data_size != 0)
cur_data.insertChar(delimiter.c_str(), delimiter.size(), arena);
writeVarUInt(cur_data.data_size, buf);
buf.write(cur_data.data, cur_data.data_size);
if constexpr (has_limit)
{
writeVarUInt(cur_data.num_rows, buf);
for (const auto & offset : cur_data.offsets)
writeVarUInt(offset, buf);
}
cur_data.insertChar(rhs_data.data, rhs_data.data_size, arena);
}
}
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
template <bool has_limit>
void GroupConcatImpl<has_limit>::serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const
{
auto & cur_data = this->data(place);
writeVarUInt(cur_data.data_size, buf);
buf.write(cur_data.data, cur_data.data_size);
if constexpr (has_limit)
{
auto & cur_data = this->data(place);
UInt64 temp_size = 0;
readVarUInt(temp_size, buf);
cur_data.checkAndUpdateSize(temp_size, arena);
buf.readStrict(cur_data.data + cur_data.data_size, temp_size);
cur_data.data_size = temp_size;
if constexpr (has_limit)
{
readVarUInt(cur_data.num_rows, buf);
cur_data.offsets.resize_exact(cur_data.num_rows * 2, arena);
for (auto & offset : cur_data.offsets)
readVarUInt(offset, buf);
}
writeVarUInt(cur_data.num_rows, buf);
for (const auto & offset : cur_data.offsets)
writeVarUInt(offset, buf);
}
}
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
template <bool has_limit>
void GroupConcatImpl<has_limit>::deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const
{
auto & cur_data = this->data(place);
UInt64 temp_size = 0;
readVarUInt(temp_size, buf);
cur_data.checkAndUpdateSize(temp_size, arena);
buf.readStrict(cur_data.data + cur_data.data_size, temp_size);
cur_data.data_size = temp_size;
if constexpr (has_limit)
{
auto & cur_data = this->data(place);
readVarUInt(cur_data.num_rows, buf);
cur_data.offsets.resize_exact(cur_data.num_rows * 2, arena);
for (auto & offset : cur_data.offsets)
readVarUInt(offset, buf);
}
}
if (cur_data.data_size == 0)
{
to.insertDefault();
return;
}
template <bool has_limit>
void GroupConcatImpl<has_limit>::insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const
{
auto & cur_data = this->data(place);
auto & column_string = assert_cast<ColumnString &>(to);
column_string.insertData(cur_data.data, cur_data.data_size);
if (cur_data.data_size == 0)
{
to.insertDefault();
return;
}
bool allocatesMemoryInArena() const override { return true; }
};
auto & column_string = assert_cast<ColumnString &>(to);
column_string.insertData(cur_data.data, cur_data.data_size);
}
template <bool has_limit>
bool GroupConcatImpl<has_limit>::allocatesMemoryInArena() const { return true; }
// Implementation of add, merge, serialize, deserialize, insertResultInto, etc. remains unchanged.
AggregateFunctionPtr createAggregateFunctionGroupConcat(
const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
@ -278,14 +239,12 @@ AggregateFunctionPtr createAggregateFunctionGroupConcat(
return std::make_shared<GroupConcatImpl</* has_limit= */ false>>(argument_types[0], parameters, limit, delimiter);
}
}
void registerAggregateFunctionGroupConcat(AggregateFunctionFactory & factory)
{
AggregateFunctionProperties properties = { .returns_default_when_only_null = false, .is_order_dependent = true };
factory.registerFunction("groupConcat", { createAggregateFunctionGroupConcat, properties });
factory.registerAlias("group_concat", "groupConcat", AggregateFunctionFactory::Case::Insensitive);
factory.registerAlias(GroupConcatImpl<false>::getNameAndAliases().at(1), GroupConcatImpl<false>::getNameAndAliases().at(0), AggregateFunctionFactory::Case::Insensitive);
}
}

View File

@ -0,0 +1,78 @@
#pragma once
#ifndef DB_GROUP_CONCAT_H
#define DB_GROUP_CONCAT_H
#include <AggregateFunctions/IAggregateFunction.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/FactoryHelpers.h>
#include <Core/ServerSettings.h>
#include <Common/ArenaAllocator.h>
#include <DataTypes/IDataType.h>
#include <DataTypes/DataTypeString.h>
#include <IO/WriteHelpers.h>
#include <IO/ReadHelpers.h>
namespace DB
{
struct Settings;
struct GroupConcatDataBase
{
UInt64 data_size = 0;
UInt64 allocated_size = 0;
char * data = nullptr;
void checkAndUpdateSize(UInt64 add, Arena * arena);
void insertChar(const char * str, UInt64 str_size, Arena * arena);
void insert(const IColumn * column, const SerializationPtr & serialization, size_t row_num, Arena * arena);
};
struct GroupConcatData : public GroupConcatDataBase
{
using Offset = UInt64;
using Allocator = MixedAlignedArenaAllocator<alignof(Offset), 4096>;
using Offsets = PODArray<Offset, 32, Allocator>;
Offsets offsets;
UInt64 num_rows = 0;
UInt64 getSize(size_t i) const;
UInt64 getString(size_t i) const;
void insert(const IColumn * column, const SerializationPtr & serialization, size_t row_num, Arena * arena);
};
template <bool has_limit>
class GroupConcatImpl : public IAggregateFunctionDataHelper<GroupConcatData, GroupConcatImpl<has_limit>>
{
static constexpr auto name = "groupConcat";
SerializationPtr serialization;
UInt64 limit;
const String delimiter;
const DataTypePtr type;
public:
GroupConcatImpl(const DataTypePtr & data_type_, const Array & parameters_, UInt64 limit_, const String & delimiter_);
String getName() const override;
static const std::vector<std::string>& getNameAndAliases()
{
static const std::vector<std::string> aliases = {"groupConcat", "group_concat"};
return aliases;
}
void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override;
void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override;
void serialize(ConstAggregateDataPtr place, WriteBuffer & buf, std::optional<size_t> version) const override;
void deserialize(AggregateDataPtr place, ReadBuffer & buf, std::optional<size_t> version, Arena * arena) const override;
void insertResultInto(AggregateDataPtr place, IColumn & to, Arena * arena) const override;
bool allocatesMemoryInArena() const override;
};
}
#endif

View File

@ -8,7 +8,6 @@
#include <Interpreters/AggregationCommon.h>
#include <Common/HashTable/HashSet.h>
#include <Common/HashTable/HashMap.h>
#include <Common/SipHash.h>
#include <IO/ReadHelpersArena.h>

Some files were not shown because too many files have changed in this diff Show More