Merge branch 'master' into optimize-join-performance-by-extracting-common-exprs

This commit is contained in:
Dmitry Novik 2024-12-10 15:49:16 +01:00 committed by GitHub
commit b44c891ea9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
955 changed files with 18715 additions and 5437 deletions

View File

@ -66,6 +66,7 @@ if (ENABLE_CHECK_HEAVY_BUILDS)
# Twice as large
set (RLIMIT_DATA 10000000000)
set (RLIMIT_AS 20000000000)
set (RLIMIT_CPU 2000)
endif()
# For some files currently building RISCV64/LOONGARCH64 might be too slow.
@ -591,6 +592,20 @@ endif()
include (cmake/sanitize_targets.cmake)
if (COMPILER_CACHE STREQUAL "chcache")
set (ENABLE_BUILD_PATH_MAPPING_DEFAULT ON)
get_all_targets(all_targets)
set (chcache_targets _cargo-build_chcache cargo-build_chcache cargo-prebuild_chcache)
foreach(target ${all_targets})
if (target IN_LIST chcache_targets)
continue()
endif()
add_dependencies(${target} cargo-build_chcache)
endforeach()
endif()
# Build native targets if necessary
get_property(NATIVE_BUILD_TARGETS GLOBAL PROPERTY NATIVE_BUILD_TARGETS)
if (NATIVE_BUILD_TARGETS

View File

@ -35,7 +35,7 @@ curl https://clickhouse.com/ | sh
Every month we get together with the community (users, contributors, customers, those interested in learning more about ClickHouse) to discuss what is coming in the latest release. If you are interested in sharing what you've built on ClickHouse, let us know.
* [v24.10 Community Call](https://clickhouse.com/company/events/v24-10-community-release-call) - October 31
* [v24.12 Community Call](https://clickhouse.com/company/events/v24-12-community-release-call) - December 19
## Upcoming Events
@ -43,7 +43,6 @@ Keep an eye out for upcoming meetups and events around the world. Somewhere else
Upcoming meetups
* [Amsterdam Meetup](https://www.meetup.com/clickhouse-netherlands-user-group/events/303638814) - December 3
* [Stockholm Meetup](https://www.meetup.com/clickhouse-stockholm-user-group/events/304382411) - December 9
* [New York Meetup](https://www.meetup.com/clickhouse-new-york-user-group/events/304268174) - December 9
* [Kuala Lampur Meetup](https://www.meetup.com/clickhouse-malaysia-meetup-group/events/304576472/) - December 11
@ -52,6 +51,7 @@ Upcoming meetups
Recently completed meetups
* [Amsterdam Meetup](https://www.meetup.com/clickhouse-netherlands-user-group/events/303638814) - December 3
* [Paris Meetup](https://www.meetup.com/clickhouse-france-user-group/events/303096434) - November 26
* [Ghent Meetup](https://www.meetup.com/clickhouse-belgium-user-group/events/303049405/) - November 19
* [Barcelona Meetup](https://www.meetup.com/clickhouse-spain-user-group/events/303096876/) - November 12

View File

@ -103,6 +103,26 @@ namespace Net
///
/// The default limit is 100.
int getNameLengthLimit() const;
/// Returns the maximum length of a field name.
///
/// See setNameLengthLimit() for more information.
void setNameLengthLimit(int limit);
/// Sets the maximum length of a field name.
///
/// The default limit is 256.
int getValueLengthLimit() const;
/// Returns the maximum length of a field value.
///
/// See setValueLengthLimit() for more information.
void setValueLengthLimit(int limit);
/// Sets the maximum length of a field value.
///
/// The default limit is 8192.
bool hasToken(const std::string & fieldName, const std::string & token) const;
/// Returns true iff the field with the given fieldName contains
/// the given token. Tokens in a header field are expected to be
@ -157,12 +177,14 @@ namespace Net
enum Limits
/// Limits for basic sanity checks when reading a header
{
MAX_NAME_LENGTH = 256,
MAX_VALUE_LENGTH = 8192,
DFL_NAME_LENGTH_LIMIT = 256,
DFL_VALUE_LENGTH_LIMIT = 8192,
DFL_FIELD_LIMIT = 100
};
int _fieldLimit;
int _nameLengthLimit;
int _valueLengthLimit;
};

View File

@ -28,14 +28,18 @@ namespace Net {
MessageHeader::MessageHeader():
_fieldLimit(DFL_FIELD_LIMIT)
_fieldLimit(DFL_FIELD_LIMIT),
_nameLengthLimit(DFL_NAME_LENGTH_LIMIT),
_valueLengthLimit(DFL_VALUE_LENGTH_LIMIT)
{
}
MessageHeader::MessageHeader(const MessageHeader& messageHeader):
NameValueCollection(messageHeader),
_fieldLimit(DFL_FIELD_LIMIT)
_fieldLimit(DFL_FIELD_LIMIT),
_nameLengthLimit(DFL_NAME_LENGTH_LIMIT),
_valueLengthLimit(DFL_VALUE_LENGTH_LIMIT)
{
}
@ -80,12 +84,12 @@ void MessageHeader::read(std::istream& istr)
throw MessageException("Too many header fields");
name.clear();
value.clear();
while (ch != eof && ch != ':' && ch != '\n' && name.length() < MAX_NAME_LENGTH) { name += ch; ch = buf.sbumpc(); }
while (ch != eof && ch != ':' && ch != '\n' && name.length() < _nameLengthLimit) { name += ch; ch = buf.sbumpc(); }
if (ch == '\n') { ch = buf.sbumpc(); continue; } // ignore invalid header lines
if (ch != ':') throw MessageException("Field name too long/no colon found");
if (ch != eof) ch = buf.sbumpc(); // ':'
while (ch != eof && Poco::Ascii::isSpace(ch) && ch != '\r' && ch != '\n') ch = buf.sbumpc();
while (ch != eof && ch != '\r' && ch != '\n' && value.length() < MAX_VALUE_LENGTH) { value += ch; ch = buf.sbumpc(); }
while (ch != eof && ch != '\r' && ch != '\n' && value.length() < _valueLengthLimit) { value += ch; ch = buf.sbumpc(); }
if (ch == '\r') ch = buf.sbumpc();
if (ch == '\n')
ch = buf.sbumpc();
@ -93,7 +97,7 @@ void MessageHeader::read(std::istream& istr)
throw MessageException("Field value too long/no CRLF found");
while (ch == ' ' || ch == '\t') // folding
{
while (ch != eof && ch != '\r' && ch != '\n' && value.length() < MAX_VALUE_LENGTH) { value += ch; ch = buf.sbumpc(); }
while (ch != eof && ch != '\r' && ch != '\n' && value.length() < _valueLengthLimit) { value += ch; ch = buf.sbumpc(); }
if (ch == '\r') ch = buf.sbumpc();
if (ch == '\n')
ch = buf.sbumpc();
@ -122,6 +126,32 @@ void MessageHeader::setFieldLimit(int limit)
}
int MessageHeader::getNameLengthLimit() const
{
return _nameLengthLimit;
}
void MessageHeader::setNameLengthLimit(int limit)
{
poco_assert(limit >= 0);
_nameLengthLimit = limit;
}
int MessageHeader::getValueLengthLimit() const
{
return _valueLengthLimit;
}
void MessageHeader::setValueLengthLimit(int limit)
{
poco_assert(limit >= 0);
_valueLengthLimit = limit;
}
bool MessageHeader::hasToken(const std::string& fieldName, const std::string& token) const
{
std::string field = get(fieldName, "");

View File

@ -120,6 +120,12 @@ setup_aws_credentials() {
local minio_root_user=${MINIO_ROOT_USER:-clickhouse}
local minio_root_password=${MINIO_ROOT_PASSWORD:-clickhouse}
mkdir -p ~/.aws
if [[ -f ~/.aws/credentials ]]; then
if grep -q "^\[default\]" ~/.aws/credentials; then
echo "The credentials file contains a [default] section."
return
fi
fi
cat <<EOT >> ~/.aws/credentials
[default]
aws_access_key_id=${minio_root_user}

View File

@ -9,7 +9,7 @@ if (CMAKE_CXX_COMPILER_LAUNCHER MATCHES "ccache" OR CMAKE_C_COMPILER_LAUNCHER MA
return()
endif()
set(COMPILER_CACHE "auto" CACHE STRING "Speedup re-compilations using the caching tools; valid options are 'auto' (sccache, then ccache), 'ccache', 'sccache', or 'disabled'")
set(COMPILER_CACHE "auto" CACHE STRING "Speedup re-compilations using the caching tools; valid options are 'auto' (sccache, then ccache), 'ccache', 'sccache', 'chcache', or 'disabled'")
if(COMPILER_CACHE STREQUAL "auto")
find_program (CCACHE_EXECUTABLE NAMES sccache ccache)
@ -17,11 +17,25 @@ elseif (COMPILER_CACHE STREQUAL "ccache")
find_program (CCACHE_EXECUTABLE ccache)
elseif(COMPILER_CACHE STREQUAL "sccache")
find_program (CCACHE_EXECUTABLE sccache)
elseif(COMPILER_CACHE STREQUAL "chcache")
list (APPEND CMAKE_MODULE_PATH "${ClickHouse_SOURCE_DIR}/contrib/corrosion/cmake")
find_package(Rust REQUIRED)
include ("${ClickHouse_SOURCE_DIR}/contrib/corrosion/cmake/Corrosion.cmake")
corrosion_import_crate(
MANIFEST_PATH ${CMAKE_CURRENT_SOURCE_DIR}/utils/chcache/Cargo.toml
PROFILE release
LOCKED
FLAGS --offline
)
set_target_properties(chcache PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/programs/)
set(CCACHE_EXECUTABLE ${CMAKE_CURRENT_BINARY_DIR}/programs/chcache)
elseif(COMPILER_CACHE STREQUAL "disabled")
message(STATUS "Using *ccache: no (disabled via configuration)")
return()
else()
message(${RECONFIGURE_MESSAGE_LEVEL} "The COMPILER_CACHE must be one of (auto|sccache|ccache|disabled), value: '${COMPILER_CACHE}'")
message(${RECONFIGURE_MESSAGE_LEVEL} "The COMPILER_CACHE must be one of (auto|sccache|ccache|chcache|disabled), value: '${COMPILER_CACHE}'")
endif()
@ -60,6 +74,9 @@ if (CCACHE_EXECUTABLE MATCHES "/ccache$")
elseif(CCACHE_EXECUTABLE MATCHES "/sccache$")
message(STATUS "Using sccache: ${CCACHE_EXECUTABLE}")
set(LAUNCHER ${CCACHE_EXECUTABLE})
elseif(CCACHE_EXECUTABLE MATCHES "/chcache$")
message(STATUS "Using chcache: ${CCACHE_EXECUTABLE}")
set(LAUNCHER ${CCACHE_EXECUTABLE})
endif()
set (CMAKE_CXX_COMPILER_LAUNCHER ${LAUNCHER} ${CMAKE_CXX_COMPILER_LAUNCHER})

2
contrib/NuRaft vendored

@ -1 +1 @@
Subproject commit ce6de271811899d587fc28b500041ebcf720014f
Subproject commit c11f7fce68737cdc67a1d61678b2717d617ebb5a

2
contrib/aws vendored

@ -1 +1 @@
Subproject commit d5450d76abda556ce145ddabe7e0cc6a7644ec59
Subproject commit 71169aeec91b41c1bd5cf78fad6158dacdcde9d5

View File

@ -34,13 +34,9 @@ set(ISAL_C_SRC
${ISAL_SOURCE_DIR}/erasure_code/ec_base.c
${ISAL_SOURCE_DIR}/erasure_code/ec_base_aliases.c
${ISAL_SOURCE_DIR}/erasure_code/ec_highlevel_func.c
${ISAL_SOURCE_DIR}/erasure_code/gen_rs_matrix_limits.c
${ISAL_SOURCE_DIR}/erasure_code/gf_vect_dot_prod_1tbl.c
${ISAL_SOURCE_DIR}/igzip/adler32_base.c
${ISAL_SOURCE_DIR}/igzip/encode_df.c
${ISAL_SOURCE_DIR}/igzip/flatten_ll.c
${ISAL_SOURCE_DIR}/igzip/generate_custom_hufftables.c
${ISAL_SOURCE_DIR}/igzip/generate_static_inflate.c
${ISAL_SOURCE_DIR}/igzip/huff_codes.c
${ISAL_SOURCE_DIR}/igzip/hufftables_c.c
${ISAL_SOURCE_DIR}/igzip/igzip_base_aliases.c
@ -51,7 +47,6 @@ set(ISAL_C_SRC
${ISAL_SOURCE_DIR}/igzip/igzip.c
${ISAL_SOURCE_DIR}/mem/mem_zero_detect_base_aliases.c
${ISAL_SOURCE_DIR}/mem/mem_zero_detect_base.c
${ISAL_SOURCE_DIR}/programs/igzip_cli.c
${ISAL_SOURCE_DIR}/raid/raid_base_aliases.c
${ISAL_SOURCE_DIR}/raid/raid_base.c
)

View File

@ -2,8 +2,8 @@ set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/libarchive")
set(SRCS
"${LIBRARY_DIR}/libarchive/archive_acl.c"
"${LIBRARY_DIR}/libarchive/archive_blake2sp_ref.c"
"${LIBRARY_DIR}/libarchive/archive_blake2s_ref.c"
# "${LIBRARY_DIR}/libarchive/archive_blake2sp_ref.c"
# "${LIBRARY_DIR}/libarchive/archive_blake2s_ref.c"
"${LIBRARY_DIR}/libarchive/archive_check_magic.c"
"${LIBRARY_DIR}/libarchive/archive_cmdline.c"
"${LIBRARY_DIR}/libarchive/archive_cryptor.c"
@ -37,7 +37,7 @@ set(SRCS
"${LIBRARY_DIR}/libarchive/archive_read_disk_entry_from_file.c"
"${LIBRARY_DIR}/libarchive/archive_read_disk_posix.c"
"${LIBRARY_DIR}/libarchive/archive_read_disk_set_standard_lookup.c"
"${LIBRARY_DIR}/libarchive/archive_read_disk_windows.c"
# "${LIBRARY_DIR}/libarchive/archive_read_disk_windows.c"
"${LIBRARY_DIR}/libarchive/archive_read_extract2.c"
"${LIBRARY_DIR}/libarchive/archive_read_extract.c"
"${LIBRARY_DIR}/libarchive/archive_read_open_fd.c"

View File

@ -130,7 +130,6 @@ set(SOURCES
${ROCKSDB_SOURCE_DIR}/db/db_impl/db_impl_files.cc
${ROCKSDB_SOURCE_DIR}/db/db_impl/db_impl_follower.cc
${ROCKSDB_SOURCE_DIR}/db/db_impl/db_impl_open.cc
${ROCKSDB_SOURCE_DIR}/db/db_impl/db_impl_debug.cc
${ROCKSDB_SOURCE_DIR}/db/db_impl/db_impl_experimental.cc
${ROCKSDB_SOURCE_DIR}/db/db_impl/db_impl_readonly.cc
${ROCKSDB_SOURCE_DIR}/db/db_impl/db_impl_secondary.cc
@ -139,7 +138,6 @@ set(SOURCES
${ROCKSDB_SOURCE_DIR}/db/dbformat.cc
${ROCKSDB_SOURCE_DIR}/db/error_handler.cc
${ROCKSDB_SOURCE_DIR}/db/event_helpers.cc
${ROCKSDB_SOURCE_DIR}/db/experimental.cc
${ROCKSDB_SOURCE_DIR}/db/external_sst_file_ingestion_job.cc
${ROCKSDB_SOURCE_DIR}/db/file_indexer.cc
${ROCKSDB_SOURCE_DIR}/db/flush_job.cc
@ -227,7 +225,7 @@ set(SOURCES
${ROCKSDB_SOURCE_DIR}/monitoring/thread_status_impl.cc
${ROCKSDB_SOURCE_DIR}/monitoring/thread_status_updater.cc
${ROCKSDB_SOURCE_DIR}/monitoring/thread_status_util.cc
${ROCKSDB_SOURCE_DIR}/monitoring/thread_status_util_debug.cc
${ROCKSDB_SOURCE_DIR}/monitoring/thread_status_util_debug.cc # Only needed on debug mode
${ROCKSDB_SOURCE_DIR}/options/cf_options.cc
${ROCKSDB_SOURCE_DIR}/options/configurable.cc
${ROCKSDB_SOURCE_DIR}/options/customizable.cc
@ -249,8 +247,8 @@ set(SOURCES
${ROCKSDB_SOURCE_DIR}/table/block_based/block_cache.cc
${ROCKSDB_SOURCE_DIR}/table/block_based/block_prefetcher.cc
${ROCKSDB_SOURCE_DIR}/table/block_based/block_prefix_index.cc
${ROCKSDB_SOURCE_DIR}/table/block_based/data_block_hash_index.cc
${ROCKSDB_SOURCE_DIR}/table/block_based/data_block_footer.cc
${ROCKSDB_SOURCE_DIR}/table/block_based/data_block_hash_index.cc
${ROCKSDB_SOURCE_DIR}/table/block_based/filter_block_reader_common.cc
${ROCKSDB_SOURCE_DIR}/table/block_based/filter_policy.cc
${ROCKSDB_SOURCE_DIR}/table/block_based/flush_block_policy.cc
@ -265,6 +263,7 @@ set(SOURCES
${ROCKSDB_SOURCE_DIR}/table/block_based/reader_common.cc
${ROCKSDB_SOURCE_DIR}/table/block_based/uncompression_dict_reader.cc
${ROCKSDB_SOURCE_DIR}/table/block_fetcher.cc
${ROCKSDB_SOURCE_DIR}/table/compaction_merging_iterator.cc
${ROCKSDB_SOURCE_DIR}/table/cuckoo/cuckoo_table_builder.cc
${ROCKSDB_SOURCE_DIR}/table/cuckoo/cuckoo_table_factory.cc
${ROCKSDB_SOURCE_DIR}/table/cuckoo/cuckoo_table_reader.cc
@ -272,7 +271,6 @@ set(SOURCES
${ROCKSDB_SOURCE_DIR}/table/get_context.cc
${ROCKSDB_SOURCE_DIR}/table/iterator.cc
${ROCKSDB_SOURCE_DIR}/table/merging_iterator.cc
${ROCKSDB_SOURCE_DIR}/table/compaction_merging_iterator.cc
${ROCKSDB_SOURCE_DIR}/table/meta_blocks.cc
${ROCKSDB_SOURCE_DIR}/table/persistent_cache_helper.cc
${ROCKSDB_SOURCE_DIR}/table/plain/plain_table_bloom.cc
@ -288,17 +286,9 @@ set(SOURCES
${ROCKSDB_SOURCE_DIR}/table/table_properties.cc
${ROCKSDB_SOURCE_DIR}/table/two_level_iterator.cc
${ROCKSDB_SOURCE_DIR}/table/unique_id.cc
${ROCKSDB_SOURCE_DIR}/test_util/sync_point.cc
${ROCKSDB_SOURCE_DIR}/test_util/sync_point_impl.cc
${ROCKSDB_SOURCE_DIR}/test_util/testutil.cc
${ROCKSDB_SOURCE_DIR}/test_util/transaction_test_util.cc
${ROCKSDB_SOURCE_DIR}/tools/block_cache_analyzer/block_cache_trace_analyzer.cc
${ROCKSDB_SOURCE_DIR}/tools/dump/db_dump_tool.cc
${ROCKSDB_SOURCE_DIR}/tools/io_tracer_parser_tool.cc
${ROCKSDB_SOURCE_DIR}/tools/ldb_cmd.cc
${ROCKSDB_SOURCE_DIR}/tools/ldb_tool.cc
${ROCKSDB_SOURCE_DIR}/tools/sst_dump_tool.cc
${ROCKSDB_SOURCE_DIR}/tools/trace_analyzer_tool.cc
${ROCKSDB_SOURCE_DIR}/test_util/sync_point.cc # Only needed on debug mode
${ROCKSDB_SOURCE_DIR}/test_util/sync_point_impl.cc # Only needed on debug mode
${ROCKSDB_SOURCE_DIR}/test_util/transaction_test_util.cc # Only needed on debug mode
${ROCKSDB_SOURCE_DIR}/trace_replay/block_cache_tracer.cc
${ROCKSDB_SOURCE_DIR}/trace_replay/io_tracer.cc
${ROCKSDB_SOURCE_DIR}/trace_replay/trace_record_handler.cc
@ -333,17 +323,6 @@ set(SOURCES
${ROCKSDB_SOURCE_DIR}/util/xxhash.cc
${ROCKSDB_SOURCE_DIR}/utilities/agg_merge/agg_merge.cc
${ROCKSDB_SOURCE_DIR}/utilities/backup/backup_engine.cc
${ROCKSDB_SOURCE_DIR}/utilities/blob_db/blob_compaction_filter.cc
${ROCKSDB_SOURCE_DIR}/utilities/blob_db/blob_db.cc
${ROCKSDB_SOURCE_DIR}/utilities/blob_db/blob_db_impl.cc
${ROCKSDB_SOURCE_DIR}/utilities/blob_db/blob_db_impl_filesnapshot.cc
${ROCKSDB_SOURCE_DIR}/utilities/blob_db/blob_dump_tool.cc
${ROCKSDB_SOURCE_DIR}/utilities/blob_db/blob_file.cc
${ROCKSDB_SOURCE_DIR}/utilities/cache_dump_load.cc
${ROCKSDB_SOURCE_DIR}/utilities/cache_dump_load_impl.cc
${ROCKSDB_SOURCE_DIR}/utilities/cassandra/cassandra_compaction_filter.cc
${ROCKSDB_SOURCE_DIR}/utilities/cassandra/format.cc
${ROCKSDB_SOURCE_DIR}/utilities/cassandra/merge_operator.cc
${ROCKSDB_SOURCE_DIR}/utilities/checkpoint/checkpoint_impl.cc
${ROCKSDB_SOURCE_DIR}/utilities/compaction_filters.cc
${ROCKSDB_SOURCE_DIR}/utilities/compaction_filters/remove_emptyvalue_compactionfilter.cc
@ -351,11 +330,7 @@ set(SOURCES
${ROCKSDB_SOURCE_DIR}/utilities/debug.cc
${ROCKSDB_SOURCE_DIR}/utilities/env_mirror.cc
${ROCKSDB_SOURCE_DIR}/utilities/env_timed.cc
${ROCKSDB_SOURCE_DIR}/utilities/fault_injection_env.cc
${ROCKSDB_SOURCE_DIR}/utilities/fault_injection_fs.cc
${ROCKSDB_SOURCE_DIR}/utilities/fault_injection_secondary_cache.cc
${ROCKSDB_SOURCE_DIR}/utilities/leveldb_options/leveldb_options.cc
${ROCKSDB_SOURCE_DIR}/utilities/memory/memory_util.cc
${ROCKSDB_SOURCE_DIR}/utilities/merge_operators.cc
${ROCKSDB_SOURCE_DIR}/utilities/merge_operators/bytesxor.cc
${ROCKSDB_SOURCE_DIR}/utilities/merge_operators/max.cc
@ -365,8 +340,6 @@ set(SOURCES
${ROCKSDB_SOURCE_DIR}/utilities/merge_operators/string_append/stringappend2.cc
${ROCKSDB_SOURCE_DIR}/utilities/merge_operators/uint64add.cc
${ROCKSDB_SOURCE_DIR}/utilities/object_registry.cc
${ROCKSDB_SOURCE_DIR}/utilities/option_change_migration/option_change_migration.cc
${ROCKSDB_SOURCE_DIR}/utilities/options/options_util.cc
${ROCKSDB_SOURCE_DIR}/utilities/persistent_cache/block_cache_tier.cc
${ROCKSDB_SOURCE_DIR}/utilities/persistent_cache/block_cache_tier_file.cc
${ROCKSDB_SOURCE_DIR}/utilities/persistent_cache/block_cache_tier_metadata.cc
@ -378,7 +351,7 @@ set(SOURCES
${ROCKSDB_SOURCE_DIR}/utilities/table_properties_collectors/compact_on_deletion_collector.cc
${ROCKSDB_SOURCE_DIR}/utilities/trace/file_trace_reader_writer.cc
${ROCKSDB_SOURCE_DIR}/utilities/trace/replayer_impl.cc
${ROCKSDB_SOURCE_DIR}/utilities/transactions/lock/lock_manager.cc
${ROCKSDB_SOURCE_DIR}/utilities/transactions/lock/lock_manager.cc # Unused but dead code elimination is hard for some linkers
${ROCKSDB_SOURCE_DIR}/utilities/transactions/lock/point/point_lock_tracker.cc
${ROCKSDB_SOURCE_DIR}/utilities/transactions/lock/point/point_lock_manager.cc
${ROCKSDB_SOURCE_DIR}/utilities/transactions/lock/range/range_tree/range_tree_lock_manager.cc
@ -398,8 +371,8 @@ set(SOURCES
${ROCKSDB_SOURCE_DIR}/utilities/types_util.cc
${ROCKSDB_SOURCE_DIR}/utilities/ttl/db_ttl_impl.cc
${ROCKSDB_SOURCE_DIR}/utilities/wal_filter.cc
${ROCKSDB_SOURCE_DIR}/utilities/write_batch_with_index/write_batch_with_index.cc
${ROCKSDB_SOURCE_DIR}/utilities/write_batch_with_index/write_batch_with_index_internal.cc
${ROCKSDB_SOURCE_DIR}/utilities/write_batch_with_index/write_batch_with_index.cc # Unused but dead code elimination is hard for some linkers
${ROCKSDB_SOURCE_DIR}/utilities/write_batch_with_index/write_batch_with_index_internal.cc # Unused but dead code elimination is hard for some linkers
${ROCKSDB_SOURCE_DIR}/utilities/transactions/lock/range/range_tree/lib/locktree/concurrent_tree.cc
${ROCKSDB_SOURCE_DIR}/utilities/transactions/lock/range/range_tree/lib/locktree/keyrange.cc
${ROCKSDB_SOURCE_DIR}/utilities/transactions/lock/range/range_tree/lib/locktree/lock_request.cc
@ -412,7 +385,7 @@ set(SOURCES
${ROCKSDB_SOURCE_DIR}/utilities/transactions/lock/range/range_tree/lib/standalone_port.cc
${ROCKSDB_SOURCE_DIR}/utilities/transactions/lock/range/range_tree/lib/util/dbt.cc
${ROCKSDB_SOURCE_DIR}/utilities/transactions/lock/range/range_tree/lib/util/memarena.cc
build_version.cc) # generated by hand
build_version.cc) # generated by hand (Mostly from contrib/rocksdb/TARGETS)
if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)64")
list(APPEND SOURCES

2
contrib/rust_vendor vendored

@ -1 +1 @@
Subproject commit 08e82ca6543683abe4770305ad811a942186a520
Subproject commit b25b16b0b10a1cbb33eb0922f77aeedb72119792

View File

@ -13,8 +13,8 @@ ENV CARGO_HOME=/rust/cargo
ENV PATH="/rust/cargo/bin:${PATH}"
RUN curl https://sh.rustup.rs -sSf | bash -s -- -y && \
chmod 777 -R /rust && \
rustup toolchain install nightly-2024-04-01 && \
rustup default nightly-2024-04-01 && \
rustup toolchain install nightly-2024-12-01 && \
rustup default nightly-2024-12-01 && \
rustup toolchain remove stable && \
rustup component add rust-src && \
rustup target add x86_64-unknown-linux-gnu && \

View File

@ -101,3 +101,4 @@ wadllib==1.3.6
websocket-client==1.8.0
wheel==0.38.1
zipp==1.0.0
jinja2==3.1.3

View File

@ -82,5 +82,5 @@ ENV MINIO_ROOT_USER="clickhouse"
ENV MINIO_ROOT_PASSWORD="clickhouse"
ENV EXPORT_S3_STORAGE_POLICIES=1
RUN npm install -g azurite@3.30.0 \
RUN npm install -g azurite@3.33.0 \
&& npm install -g tslib && npm install -g node

View File

@ -0,0 +1,26 @@
---
sidebar_position: 1
sidebar_label: 2024
---
# 2024 Changelog
### ClickHouse release v24.10.3.21-stable (e668b927efb) FIXME as compared to v24.10.2.80-stable (96b80057159)
#### Improvement
* Backported in [#72100](https://github.com/ClickHouse/ClickHouse/issues/72100): Fix the metadata_version record in ZooKeeper in restarting thread rather than in attach thread. [#70297](https://github.com/ClickHouse/ClickHouse/pull/70297) ([Miсhael Stetsyuk](https://github.com/mstetsyuk)).
* Backported in [#72169](https://github.com/ClickHouse/ClickHouse/issues/72169): Forbid Dynamic/Variant types in min/max functions to avoid confusion. [#71761](https://github.com/ClickHouse/ClickHouse/pull/71761) ([Pavel Kruglov](https://github.com/Avogar)).
* Backported in [#72064](https://github.com/ClickHouse/ClickHouse/issues/72064): When retrieving data directly from a dictionary using Dictionary storage, dictionary table function, or direct SELECT from the dictionary itself, it is now enough to have `SELECT` permission or `dictGet` permission for the dictionary. This aligns with previous attempts to prevent ACL bypasses: https://github.com/ClickHouse/ClickHouse/pull/57362 and https://github.com/ClickHouse/ClickHouse/pull/65359. It also makes the latter one backward compatible. [#72051](https://github.com/ClickHouse/ClickHouse/pull/72051) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
#### Bug Fix (user-visible misbehavior in an official stable release)
* Backported in [#72144](https://github.com/ClickHouse/ClickHouse/issues/72144): Acquiring zero-copy shared lock before moving a part to zero-copy disk to prevent possible data loss if Keeper is unavailable. [#71845](https://github.com/ClickHouse/ClickHouse/pull/71845) ([Aleksei Filatov](https://github.com/aalexfvk)).
* Backported in [#72088](https://github.com/ClickHouse/ClickHouse/issues/72088): Fix rows_processed column in system.s3/azure_queue_log broken in 24.6. Closes [#69975](https://github.com/ClickHouse/ClickHouse/issues/69975). [#71946](https://github.com/ClickHouse/ClickHouse/pull/71946) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Backported in [#72036](https://github.com/ClickHouse/ClickHouse/issues/72036): Fix `Illegal type` error for `MergeTree` tables with binary monotonic function in `ORDER BY` when the first argument is constant. Fixes [#71941](https://github.com/ClickHouse/ClickHouse/issues/71941). [#71966](https://github.com/ClickHouse/ClickHouse/pull/71966) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Backported in [#72207](https://github.com/ClickHouse/ClickHouse/issues/72207): Fixed incorrect settings order `max_parser_depth` and `max_parser_backtracks`. [#71498](https://github.com/ClickHouse/ClickHouse/pull/71498) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* Backported in [#72071](https://github.com/ClickHouse/ClickHouse/issues/72071): Fix client syntax highlighting that was broken in https://github.com/ClickHouse/ClickHouse/pull/71949. [#72049](https://github.com/ClickHouse/ClickHouse/pull/72049) ([Nikolay Degterinsky](https://github.com/evillique)).
* Backported in [#72095](https://github.com/ClickHouse/ClickHouse/issues/72095): Minor improvement for system.query_metric_log stateless test. [#72076](https://github.com/ClickHouse/ClickHouse/pull/72076) ([Pablo Marcos](https://github.com/pamarcos)).
* Backported in [#72184](https://github.com/ClickHouse/ClickHouse/issues/72184): Add google-cloud-cpp submodule. [#72092](https://github.com/ClickHouse/ClickHouse/pull/72092) ([Pablo Marcos](https://github.com/pamarcos)).

View File

@ -70,8 +70,8 @@ enable sanitizers you must use a version that matches the exact same `std` as th
the crates):
```bash
rustup toolchain install nightly-2024-04-01
rustup default nightly-2024-04-01
rustup toolchain install nightly-2024-12-01
rustup default nightly-2024-12-01
rustup component add rust-src
```

View File

@ -6,6 +6,14 @@ sidebar_label: Iceberg
# Iceberg Table Engine
:::warning
We recommend using the [Iceberg Table Function](/docs/en/sql-reference/table-functions/iceberg.md) for working with Iceberg data in ClickHouse. The Iceberg Table Function currently provides sufficient functionality, offering a partial read-only interface for Iceberg tables.
The Iceberg Table Engine is available but may have limitations. ClickHouse wasn't originally designed to support tables with externally changing schemas, which can affect the functionality of the Iceberg Table Engine. As a result, some features that work with regular tables may be unavailable or may not function correctly, especially when using the old analyzer.
For optimal compatibility, we suggest using the Iceberg Table Function while we continue to improve support for the Iceberg Table Engine.
:::
This engine provides a read-only integration with existing Apache [Iceberg](https://iceberg.apache.org/) tables in Amazon S3, Azure, HDFS and locally stored tables.
## Create Table
@ -63,6 +71,16 @@ CREATE TABLE iceberg_table ENGINE=IcebergS3(iceberg_conf, filename = 'test_table
Table engine `Iceberg` is an alias to `IcebergS3` now.
**Schema Evolution**
At the moment, with the help of CH, you can read iceberg tables, the schema of which has changed over time. We currently support reading tables where columns have been added and removed, and their order has changed. You can also change a column where a value is required to one where NULL is allowed. Additionally, we support permitted type casting for simple types, namely:  
* int -> long
* float -> double
* decimal(P, S) -> decimal(P', S) where P' > P.
Currently, it is not possible to change nested structures or the types of elements within arrays and maps.
To read a table where the schema has changed after its creation with dynamic schema inference, set allow_dynamic_metadata_for_data_lakes = true when creating the table.
### Data cache {#data-cache}
`Iceberg` table engine and table function support data caching same as `S3`, `AzureBlobStorage`, `HDFS` storages. See [here](../../../engines/table-engines/integrations/s3.md#data-cache).

View File

@ -12,7 +12,7 @@ This engine provides integration with [Amazon S3](https://aws.amazon.com/s3/) ec
``` sql
CREATE TABLE s3_queue_engine_table (name String, value UInt32)
ENGINE = S3Queue(path, [NOSIGN, | aws_access_key_id, aws_secret_access_key,] format, [compression])
ENGINE = S3Queue(path, [NOSIGN, | aws_access_key_id, aws_secret_access_key,] format, [compression], [headers])
[SETTINGS]
[mode = '',]
[after_processing = 'keep',]

View File

@ -177,7 +177,7 @@ CREATE TABLE table_name
CounterID UInt32,
UserID UInt32,
ver UInt16
) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/table_name', '{replica}', ver)
ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{layer}-{shard}/table_name', '{replica}', ver)
PARTITION BY toYYYYMM(EventDate)
ORDER BY (CounterID, EventDate, intHash32(UserID))
SAMPLE BY intHash32(UserID);

View File

@ -36,6 +36,8 @@ Upper and lower bounds can be specified to limit Memory engine table size, effec
- Requires `max_rows_to_keep`
- `max_rows_to_keep` — Maximum rows to keep within memory table where oldest rows are deleted on each insertion (i.e circular buffer). Max rows can exceed the stated limit if the oldest batch of rows to remove falls under the `min_rows_to_keep` limit when adding a large block.
- Default value: `0`
- `compress` - Whether to compress data in memory.
- Default value: `false`
## Usage {#usage}

View File

@ -7,6 +7,7 @@ Contains information about setting changes in previous ClickHouse versions.
Columns:
- `type` ([Enum](../../sql-reference/data-types/enum.md)) - The settings type: `Core` (general / query settings), `MergeTree`.
- `version` ([String](../../sql-reference/data-types/string.md)) — The ClickHouse version in which settings were changed
- `changes` ([Array](../../sql-reference/data-types/array.md) of [Tuple](../../sql-reference/data-types/tuple.md)) — A description of the setting changes: (setting name, previous value, new value, reason for the change)
@ -22,6 +23,7 @@ FORMAT Vertical
``` text
Row 1:
──────
type: Core
version: 23.5
changes: [('input_format_parquet_preserve_order','1','0','Allow Parquet reader to reorder rows for better parallelism.'),('parallelize_output_from_storages','0','1','Allow parallelism when executing queries that read from file/url/s3/etc. This may reorder rows.'),('use_with_fill_by_sorting_prefix','0','1','Columns preceding WITH FILL columns in ORDER BY clause form sorting prefix. Rows with different values in sorting prefix are filled independently'),('output_format_parquet_compliant_nested_types','0','1','Change an internal field name in output Parquet file schema.')]
```

View File

@ -15,12 +15,19 @@ groupConcat[(delimiter [, limit])](expression);
**Arguments**
- `expression` — The expression or column name that outputs strings to be concatenated..
- `delimiter` — A [string](../../../sql-reference/data-types/string.md) that will be used to separate concatenated values. This parameter is optional and defaults to an empty string or delimiter from parameters if not specified.
- `expression` — The expression or column name that outputs strings to be concatenated.
**Parameters**
- `delimiter` — A [string](../../../sql-reference/data-types/string.md) that will be used to separate concatenated values. This parameter is optional and defaults to an empty string if not specified.
- `limit` — A positive [integer](../../../sql-reference/data-types/int-uint.md) specifying the maximum number of elements to concatenate. If more elements are present, excess elements are ignored. This parameter is optional.
:::note
If delimiter is specified without limit, it must be the first parameter. If both delimiter and limit are specified, delimiter must precede limit.
Also, if different delimiters are specified as parameters and arguments, the delimiter from arguments will be used only.
:::
**Returned value**

View File

@ -73,6 +73,7 @@ SELECT '{"a" : {"b" : 42},"c" : [1, 2, 3], "d" : "Hello, World!"}'::JSON AS json
Using CAST from `Tuple`:
```sql
SET enable_named_columns_in_function_tuple = 1;
SELECT (tuple(42 AS b) AS a, [1, 2, 3] AS c, 'Hello, World!' AS d)::JSON AS json;
```
@ -97,8 +98,9 @@ SELECT map('a', map('b', 42), 'c', [1,2,3], 'd', 'Hello, World!')::JSON AS json;
Using CAST from deprecated `Object('json')`:
```sql
SELECT '{"a" : {"b" : 42},"c" : [1, 2, 3], "d" : "Hello, World!"}'::Object('json')::JSON AS json;
```
SET allow_experimental_object_type = 1;
SELECT '{"a" : {"b" : 42},"c" : [1, 2, 3], "d" : "Hello, World!"}'::Object('json')::JSON AS json;
```
```text
┌─json───────────────────────────────────────────┐

View File

@ -786,6 +786,24 @@ SELECT indexOf([1, 3, NULL, NULL], NULL)
Elements set to `NULL` are handled as normal values.
## indexOfAssumeSorted(arr, x)
Returns the index of the first x element (starting from 1) if it is in the array, or 0 if it is not.
The function should be used for an array sorted not in descending order since binary search is used for the search.
If the internal array type is Nullable, the indexOf function will be used.
Example:
``` sql
SELECT indexOfAssumeSorted([1, 3, 3, 3, 4, 4, 5], 4)
```
``` text
┌─indexOf([1, 3, 3, 3, 4, 4, 5], NULL)─┐
│ 5 │
└──────────────────────────────────--─-┘
```
## arrayCount(\[func,\] arr1, ...)
Returns the number of elements for which `func(arr1[i], ..., arrN[i])` returns something other than 0. If `func` is not specified, it returns the number of non-zero elements in the array.
@ -2157,6 +2175,41 @@ Result:
└───────────────────────────────────────────────┘
```
## arrayPrAUC
Calculate AUC (Area Under the Curve) for the Precision Recall curve.
**Syntax**
``` sql
arrayPrAUC(arr_scores, arr_labels)
```
**Arguments**
- `arr_scores` — scores prediction model gives.
- `arr_labels` — labels of samples, usually 1 for positive sample and 0 for negative sample.
**Returned value**
Returns PR-AUC value with type Float64.
**Example**
Query:
``` sql
select arrayPrAUC([0.1, 0.4, 0.35, 0.8], [0, 0, 1, 1]);
```
Result:
``` text
┌─arrayPrAUC([0.1, 0.4, 0.35, 0.8], [0, 0, 1, 1])─┐
│ 0.8333333333333333 │
└─────────────────────────────────────────────────┘
```
## arrayMap(func, arr1, ...)
Returns an array obtained from the original arrays by application of `func(arr1[i], ..., arrN[i])` for each element. Arrays `arr1` ... `arrN` must have the same number of elements.

View File

@ -195,7 +195,7 @@ makeDateTime64(year, month, day, hour, minute, second[, precision])
**Returned value**
- A date and time created from the supplied arguments. [DateTime64](../../sql-reference/data-types/datetime64.md).
- A date and time created from the supplied arguments. [DateTime64](../../sql-reference/data-types/datetime64.md).
**Example**

View File

@ -733,6 +733,18 @@ SELECT JSONExtract('{"day": "Thursday"}', 'day', 'Enum8(\'Sunday\' = 0, \'Monday
SELECT JSONExtract('{"day": 5}', 'day', 'Enum8(\'Sunday\' = 0, \'Monday\' = 1, \'Tuesday\' = 2, \'Wednesday\' = 3, \'Thursday\' = 4, \'Friday\' = 5, \'Saturday\' = 6)') = 'Friday'
```
Referring to a nested values by passing multiple indices_or_keys parameters:
```
SELECT JSONExtract('{"a":{"b":"hello","c":{"d":[1,2,3],"e":[1,3,7]}}}','a','c','Map(String, Array(UInt8))') AS val, toTypeName(val), val['d'];
```
Result:
```
┌─val───────────────────────┬─toTypeName(val)───────────┬─arrayElement(val, 'd')─┐
│ {'d':[1,2,3],'e':[1,3,7]} │ Map(String, Array(UInt8)) │ [1,2,3] │
└───────────────────────────┴───────────────────────────┴────────────────────────┘
```
### JSONExtractKeysAndValues
Parses key-value pairs from JSON where the values are of the given ClickHouse data type.

View File

@ -253,7 +253,11 @@ SELECT format('{} {}', 'Hello', 'World')
## translate
Replaces characters in the string `s` using a one-to-one character mapping defined by `from` and `to` strings. `from` and `to` must be constant ASCII strings of the same size. Non-ASCII characters in the original string are not modified.
Replaces characters in the string `s` using a one-to-one character mapping defined by `from` and `to` strings.
`from` and `to` must be constant ASCII strings.
If `from` and `to` have equal sizes, each occurrence of the 1st character of `first` in `s` is replaced by the 1st character of `to`, the 2nd character of `first` in `s` is replaced by the 2nd character of `to`, etc.
If `from` contains more characters than `to`, all occurrences of the characters at the end of `from` that have no corresponding character in `to` are deleted from `s`.
Non-ASCII characters in `s` are not modified by the function.
**Syntax**
@ -275,6 +279,20 @@ Result:
└───────────────┘
```
`from` and `to` arguments have different lengths:
``` sql
SELECT translate('clickhouse', 'clickhouse', 'CLICK') AS res
```
Result:
``` text
┌─res───┐
│ CLICK │
└───────┘
```
## translateUTF8
Like [translate](#translate) but assumes `s`, `from` and `to` are UTF-8 encoded strings.

View File

@ -36,8 +36,8 @@ Alias:
**Arguments**
- `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `needle` — Substring to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `haystack` — String in which the search is performed. [String](../data-types/string.md) or [Enum](../data-types/string.md).
- `needle` — Substring to be searched. [String](../data-types/string.md).
- `start_pos` Position (1-based) in `haystack` at which the search starts. [UInt](../data-types/int-uint.md). Optional.
**Returned value**
@ -203,7 +203,7 @@ multiSearchAllPositions(haystack, [needle1, needle2, ..., needleN])
**Arguments**
- `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `haystack` — String in which the search is performed. [String](../data-types/string.md).
- `needle` — Substrings to be searched. [Array](../data-types/array.md).
**Returned value**
@ -238,7 +238,7 @@ multiSearchAllPositionsCaseInsensitive(haystack, [needle1, needle2, ..., needleN
**Parameters**
- `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `haystack` — String in which the search is performed. [String](../data-types/string.md).
- `needle` — Substrings to be searched. [Array](../data-types/array.md).
**Returned value**
@ -272,7 +272,7 @@ multiSearchAllPositionsUTF8(haystack, [needle1, needle2, ..., needleN])
**Parameters**
- `haystack` — UTF-8 encoded string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `haystack` — UTF-8 encoded string in which the search is performed. [String](../data-types/string.md).
- `needle` — UTF-8 encoded substrings to be searched. [Array](../data-types/array.md).
**Returned value**
@ -308,7 +308,7 @@ multiSearchAllPositionsCaseInsensitiveUTF8(haystack, [needle1, needle2, ..., nee
**Parameters**
- `haystack` — UTF-8 encoded string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `haystack` — UTF-8 encoded string in which the search is performed. [String](../data-types/string.md).
- `needle` — UTF-8 encoded substrings to be searched. [Array](../data-types/array.md).
**Returned value**
@ -346,7 +346,7 @@ multiSearchFirstPosition(haystack, [needle1, needle2, ..., needleN])
**Parameters**
- `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `haystack` — String in which the search is performed. [String](../data-types/string.md).
- `needle` — Substrings to be searched. [Array](../data-types/array.md).
**Returned value**
@ -380,7 +380,7 @@ multiSearchFirstPositionCaseInsensitive(haystack, [needle1, needle2, ..., needle
**Parameters**
- `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `haystack` — String in which the search is performed. [String](../data-types/string.md).
- `needle` — Array of substrings to be searched. [Array](../data-types/array.md).
**Returned value**
@ -414,7 +414,7 @@ multiSearchFirstPositionUTF8(haystack, [needle1, needle2, ..., needleN])
**Parameters**
- `haystack` — UTF-8 string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `haystack` — UTF-8 string in which the search is performed. [String](../data-types/string.md).
- `needle` — Array of UTF-8 substrings to be searched. [Array](../data-types/array.md).
**Returned value**
@ -450,7 +450,7 @@ multiSearchFirstPositionCaseInsensitiveUTF8(haystack, [needle1, needle2, ..., ne
**Parameters**
- `haystack` — UTF-8 string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `haystack` — UTF-8 string in which the search is performed. [String](../data-types/string.md).
- `needle` — Array of UTF-8 substrings to be searched. [Array](../data-types/array.md)
**Returned value**
@ -487,7 +487,7 @@ multiSearchFirstIndex(haystack, [needle1, needle2, ..., needleN])
```
**Parameters**
- `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `haystack` — String in which the search is performed. [String](../data-types/string.md).
- `needle` — Substrings to be searched. [Array](../data-types/array.md).
**Returned value**
@ -520,7 +520,7 @@ multiSearchFirstIndexCaseInsensitive(haystack, [needle1, needle2, ..., needleN])
**Parameters**
- `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `haystack` — String in which the search is performed. [String](../data-types/string.md).
- `needle` — Substrings to be searched. [Array](../data-types/array.md).
**Returned value**
@ -553,7 +553,7 @@ multiSearchFirstIndexUTF8(haystack, [needle1, needle2, ..., needleN])
**Parameters**
- `haystack` — UTF-8 string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `haystack` — UTF-8 string in which the search is performed. [String](../data-types/string.md).
- `needle` — Array of UTF-8 substrings to be searched. [Array](../data-types/array.md)
**Returned value**
@ -588,7 +588,7 @@ multiSearchFirstIndexCaseInsensitiveUTF8(haystack, [needle1, needle2, ..., needl
**Parameters**
- `haystack` — UTF-8 string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `haystack` — UTF-8 string in which the search is performed. [String](../data-types/string.md).
- `needle` — Array of UTF-8 substrings to be searched. [Array](../data-types/array.md).
**Returned value**
@ -625,7 +625,7 @@ multiSearchAny(haystack, [needle1, needle2, ..., needleN])
**Parameters**
- `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `haystack` — String in which the search is performed. [String](../data-types/string.md).
- `needle` — Substrings to be searched. [Array](../data-types/array.md).
**Returned value**
@ -659,7 +659,7 @@ multiSearchAnyCaseInsensitive(haystack, [needle1, needle2, ..., needleN])
**Parameters**
- `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `haystack` — String in which the search is performed. [String](../data-types/string.md).
- `needle` — Substrings to be searched. [Array](../data-types/array.md)
**Returned value**
@ -693,7 +693,7 @@ multiSearchAnyUTF8(haystack, [needle1, needle2, ..., needleN])
**Parameters**
- `haystack` — UTF-8 string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `haystack` — UTF-8 string in which the search is performed. [String](../data-types/string.md).
- `needle` — UTF-8 substrings to be searched. [Array](../data-types/array.md).
**Returned value**
@ -729,7 +729,7 @@ multiSearchAnyCaseInsensitiveUTF8(haystack, [needle1, needle2, ..., needleN])
**Parameters**
- `haystack` — UTF-8 string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `haystack` — UTF-8 string in which the search is performed. [String](../data-types/string.md).
- `needle` — UTF-8 substrings to be searched. [Array](../data-types/array.md)
**Returned value**
@ -1414,8 +1414,8 @@ countSubstrings(haystack, needle[, start_pos])
**Arguments**
- `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `needle` — Substring to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `haystack` — String in which the search is performed. [String](../data-types/string.md) or [Enum](../data-types/enum.md).
- `needle` — Substring to be searched. [String](../data-types/string.md).
- `start_pos` Position (1-based) in `haystack` at which the search starts. [UInt](../data-types/int-uint.md). Optional.
**Returned value**
@ -1461,8 +1461,8 @@ countSubstringsCaseInsensitive(haystack, needle[, start_pos])
**Arguments**
- `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `needle` — Substring to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `haystack` — String in which the search is performed. [String](../data-types/string.md) or [Enum](../data-types/enum.md).
- `needle` — Substring to be searched. [String](../data-types/string.md).
- `start_pos` Position (1-based) in `haystack` at which the search starts. [UInt](../data-types/int-uint.md). Optional.
**Returned value**
@ -1513,8 +1513,8 @@ countSubstringsCaseInsensitiveUTF8(haystack, needle[, start_pos])
**Arguments**
- `haystack` — UTF-8 string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `needle` — Substring to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `haystack` — UTF-8 string in which the search is performed. [String](../data-types/string.md) or [Enum](../data-types/enum.md).
- `needle` — Substring to be searched. [String](../data-types/string.md).
- `start_pos` Position (1-based) in `haystack` at which the search starts. [UInt](../data-types/int-uint.md). Optional.
**Returned value**
@ -1565,7 +1565,7 @@ countMatches(haystack, pattern)
**Arguments**
- `haystack` — The string to search in. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `haystack` — The string to search in. [String](../data-types/string.md).
- `pattern` — The regular expression with [re2 regular expression syntax](https://github.com/google/re2/wiki/Syntax). [String](../data-types/string.md).
**Returned value**
@ -1610,7 +1610,7 @@ countMatchesCaseInsensitive(haystack, pattern)
**Arguments**
- `haystack` — The string to search in. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `haystack` — The string to search in. [String](../data-types/string.md).
- `pattern` — The regular expression with [re2 regular expression syntax](https://github.com/google/re2/wiki/Syntax). [String](../data-types/string.md).
**Returned value**
@ -1647,8 +1647,8 @@ Alias: `REGEXP_EXTRACT(haystack, pattern[, index])`.
**Arguments**
- `haystack` — String, in which regexp pattern will to be matched. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `pattern` — String, regexp expression, must be constant. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `haystack` — String, in which regexp pattern will to be matched. [String](../data-types/string.md).
- `pattern` — String, regexp expression, must be constant. [String](../data-types/string.md).
- `index` An integer number greater or equal 0 with default 1. It represents which regex group to extract. [UInt or Int](../data-types/int-uint.md). Optional.
**Returned value**
@ -1687,8 +1687,8 @@ hasSubsequence(haystack, needle)
**Arguments**
- `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `needle` — Subsequence to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `haystack` — String in which the search is performed. [String](../data-types/string.md).
- `needle` — Subsequence to be searched. [String](../data-types/string.md).
**Returned value**
@ -1722,8 +1722,8 @@ hasSubsequenceCaseInsensitive(haystack, needle)
**Arguments**
- `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `needle` — Subsequence to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `haystack` — String in which the search is performed. [String](../data-types/string.md).
- `needle` — Subsequence to be searched. [String](../data-types/string.md).
**Returned value**
@ -1757,8 +1757,8 @@ hasSubsequenceUTF8(haystack, needle)
**Arguments**
- `haystack` — String in which the search is performed. UTF-8 encoded [String](../../sql-reference/syntax.md#syntax-string-literal).
- `needle` — Subsequence to be searched. UTF-8 encoded [String](../../sql-reference/syntax.md#syntax-string-literal).
- `haystack` — String in which the search is performed. UTF-8 encoded [String](../data-types/string.md).
- `needle` — Subsequence to be searched. UTF-8 encoded [String](../data-types/string.md).
**Returned value**
@ -1792,8 +1792,8 @@ hasSubsequenceCaseInsensitiveUTF8(haystack, needle)
**Arguments**
- `haystack` — String in which the search is performed. UTF-8 encoded [String](../../sql-reference/syntax.md#syntax-string-literal).
- `needle` — Subsequence to be searched. UTF-8 encoded [String](../../sql-reference/syntax.md#syntax-string-literal).
- `haystack` — String in which the search is performed. UTF-8 encoded [String](../data-types/string.md).
- `needle` — Subsequence to be searched. UTF-8 encoded [String](../data-types/string.md).
**Returned value**
@ -1827,7 +1827,7 @@ hasToken(haystack, token)
**Parameters**
- `haystack`: String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `haystack`: String in which the search is performed. [String](../data-types/string.md) or [Enum](../data-types/enum.md).
- `token`: Maximal length substring between two non alphanumeric ASCII characters (or boundaries of haystack).
**Returned value**
@ -1862,12 +1862,12 @@ hasTokenOrNull(haystack, token)
**Parameters**
- `haystack`: String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `haystack`: String in which the search is performed. [String](../data-types/string.md) or [Enum](../data-types/enum.md).
- `token`: Maximal length substring between two non alphanumeric ASCII characters (or boundaries of haystack).
**Returned value**
- 1, if the token is present in the haystack, 0 if it is not present, and null if the token is ill formed.
- 1, if the token is present in the haystack, 0 if it is not present, and null if the token is ill formed.
**Implementation details**
@ -1899,7 +1899,7 @@ hasTokenCaseInsensitive(haystack, token)
**Parameters**
- `haystack`: String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `haystack`: String in which the search is performed. [String](../data-types/string.md) or [Enum](../data-types/enum.md).
- `token`: Maximal length substring between two non alphanumeric ASCII characters (or boundaries of haystack).
**Returned value**
@ -1934,7 +1934,7 @@ hasTokenCaseInsensitiveOrNull(haystack, token)
**Parameters**
- `haystack`: String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
- `haystack`: String in which the search is performed. [String](../data-types/string.md) or [Enum](../data-types/enum.md).
- `token`: Maximal length substring between two non alphanumeric ASCII characters (or boundaries of haystack).
**Returned value**

View File

@ -122,7 +122,7 @@ Unsupported arguments:
:::note
If the input value cannot be represented within the bounds of [Int8](../data-types/int-uint.md), overflow or underflow of the result occurs.
This is not considered an error.
This is not considered an error.
For example: `SELECT toInt8(128) == -128;`.
:::
@ -370,7 +370,7 @@ Unsupported arguments:
:::note
If the input value cannot be represented within the bounds of [Int16](../data-types/int-uint.md), overflow or underflow of the result occurs.
This is not considered an error.
This is not considered an error.
For example: `SELECT toInt16(32768) == -32768;`.
:::
@ -618,7 +618,7 @@ Unsupported arguments:
:::note
If the input value cannot be represented within the bounds of [Int32](../data-types/int-uint.md), the result over or under flows.
This is not considered an error.
This is not considered an error.
For example: `SELECT toInt32(2147483648) == -2147483648;`
:::
@ -865,7 +865,7 @@ Unsupported types:
:::note
If the input value cannot be represented within the bounds of [Int64](../data-types/int-uint.md), the result over or under flows.
This is not considered an error.
This is not considered an error.
For example: `SELECT toInt64(9223372036854775808) == -9223372036854775808;`
:::
@ -1608,7 +1608,7 @@ Unsupported arguments:
:::note
If the input value cannot be represented within the bounds of [UInt8](../data-types/int-uint.md), overflow or underflow of the result occurs.
This is not considered an error.
This is not considered an error.
For example: `SELECT toUInt8(256) == 0;`.
:::
@ -1856,7 +1856,7 @@ Unsupported arguments:
:::note
If the input value cannot be represented within the bounds of [UInt16](../data-types/int-uint.md), overflow or underflow of the result occurs.
This is not considered an error.
This is not considered an error.
For example: `SELECT toUInt16(65536) == 0;`.
:::
@ -2104,7 +2104,7 @@ Unsupported arguments:
:::note
If the input value cannot be represented within the bounds of [UInt32](../data-types/int-uint.md), the result over or under flows.
This is not considered an error.
This is not considered an error.
For example: `SELECT toUInt32(4294967296) == 0;`
:::
@ -2353,7 +2353,7 @@ Unsupported types:
:::note
If the input value cannot be represented within the bounds of [UInt64](../data-types/int-uint.md), the result over or under flows.
This is not considered an error.
This is not considered an error.
For example: `SELECT toUInt64(18446744073709551616) == 0;`
:::
@ -3691,8 +3691,8 @@ toDateTime(expr[, time_zone ])
- `time_zone` — Time zone. [String](../data-types/string.md).
:::note
If `expr` is a number, it is interpreted as the number of seconds since the beginning of the Unix Epoch (as Unix timestamp).
If `expr` is a [String](../data-types/string.md), it may be interpreted as a Unix timestamp or as a string representation of date / date with time.
If `expr` is a number, it is interpreted as the number of seconds since the beginning of the Unix Epoch (as Unix timestamp).
If `expr` is a [String](../data-types/string.md), it may be interpreted as a Unix timestamp or as a string representation of date / date with time.
Thus, parsing of short numbers' string representations (up to 4 digits) is explicitly disabled due to ambiguity, e.g. a string `'1999'` may be both a year (an incomplete string representation of Date / DateTime) or a unix timestamp. Longer numeric strings are allowed.
:::
@ -5536,7 +5536,7 @@ Result:
## reinterpretAsUInt256
Performs byte reinterpretation by treating the input value as a value of type UInt256. Unlike [`CAST`](#cast), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
Performs byte reinterpretation by treating the input value as a value of type UInt256. Unlike [`CAST`](#cast), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
**Syntax**
@ -5612,7 +5612,7 @@ Result:
## reinterpretAsInt16
Performs byte reinterpretation by treating the input value as a value of type Int16. Unlike [`CAST`](#cast), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
Performs byte reinterpretation by treating the input value as a value of type Int16. Unlike [`CAST`](#cast), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
**Syntax**
@ -7228,6 +7228,45 @@ Result:
└──────────────────────────────┘
```
## toUnixTimestamp64Second
Converts a `DateTime64` to a `Int64` value with fixed second precision. The input value is scaled up or down appropriately depending on its precision.
:::note
The output value is a timestamp in UTC, not in the timezone of `DateTime64`.
:::
**Syntax**
```sql
toUnixTimestamp64Second(value)
```
**Arguments**
- `value` — DateTime64 value with any precision. [DateTime64](../data-types/datetime64.md).
**Returned value**
- `value` converted to the `Int64` data type. [Int64](../data-types/int-uint.md).
**Example**
Query:
```sql
WITH toDateTime64('2009-02-13 23:31:31.011', 3, 'UTC') AS dt64
SELECT toUnixTimestamp64Second(dt64);
```
Result:
```response
┌─toUnixTimestamp64Second(dt64)─┐
│ 1234567891 │
└───────────────────────────────┘
```
## toUnixTimestamp64Micro
Converts a `DateTime64` to a `Int64` value with fixed microsecond precision. The input value is scaled up or down appropriately depending on its precision.

View File

@ -13,5 +13,10 @@ Syntax:
``` sql
ALTER ROLE [IF EXISTS] name1 [RENAME TO new_name |, name2 [,...]]
[ON CLUSTER cluster_name]
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [CONST|READONLY|WRITABLE|CHANGEABLE_IN_READONLY] | PROFILE 'profile_name'] [,...]
[ADD|MODIFY SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [CONST|READONLY|WRITABLE|CHANGEABLE_IN_READONLY] | PROFILE 'profile_name'] [,...]
[DROP SETTINGS variable [,...] ]
[ADD PROFILES 'profile_name' [,...] ]
[DROP PROFILES 'profile_name' [,...] ]
[DROP ALL PROFILES]
[DROP ALL SETTINGS]
```

View File

@ -13,6 +13,11 @@ Syntax:
``` sql
ALTER SETTINGS PROFILE [IF EXISTS] name1 [RENAME TO new_name |, name2 [,...]]
[ON CLUSTER cluster_name]
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [CONST|READONLY|WRITABLE|CHANGEABLE_IN_READONLY] | INHERIT 'profile_name'] [,...]
[ADD|MODIFY SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [CONST|READONLY|WRITABLE|CHANGEABLE_IN_READONLY] | INHERIT 'profile_name'] [,...]
[TO {{role1 | user1 [, role2 | user2 ...]} | NONE | ALL | ALL EXCEPT {role1 | user1 [, role2 | user2 ...]}}]
[DROP SETTINGS variable [,...] ]
[ADD PROFILES 'profile_name' [,...] ]
[DROP PROFILES 'profile_name' [,...] ]
[DROP ALL PROFILES]
[DROP ALL SETTINGS]
```

View File

@ -18,7 +18,12 @@ ALTER USER [IF EXISTS] name1 [RENAME TO new_name |, name2 [,...]]
[VALID UNTIL datetime]
[DEFAULT ROLE role [,...] | ALL | ALL EXCEPT role [,...] ]
[GRANTEES {user | role | ANY | NONE} [,...] [EXCEPT {user | role} [,...]]]
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY | WRITABLE] | PROFILE 'profile_name'] [,...]
[ADD|MODIFY SETTINGS variable [=value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE|CONST|CHANGEABLE_IN_READONLY] [,...] ]
[DROP SETTINGS variable [,...] ]
[ADD PROFILES 'profile_name' [,...] ]
[DROP PROFILES 'profile_name' [,...] ]
[DROP ALL PROFILES]
[DROP ALL SETTINGS]
```
To use `ALTER USER` you must have the [ALTER USER](../../../sql-reference/statements/grant.md#access-management) privilege.

View File

@ -154,16 +154,17 @@ This feature is deprecated and will be removed in the future.
For your convenience, the old documentation is located [here](https://pastila.nl/?00f32652/fdf07272a7b54bda7e13b919264e449f.md)
## Refreshable Materialized View [Experimental] {#refreshable-materialized-view}
## Refreshable Materialized View {#refreshable-materialized-view}
```sql
CREATE MATERIALIZED VIEW [IF NOT EXISTS] [db.]table_name
CREATE MATERIALIZED VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
REFRESH EVERY|AFTER interval [OFFSET interval]
RANDOMIZE FOR interval
DEPENDS ON [db.]name [, [db.]name [, ...]]
SETTINGS name = value [, name = value [, ...]]
[RANDOMIZE FOR interval]
[DEPENDS ON [db.]name [, [db.]name [, ...]]]
[SETTINGS name = value [, name = value [, ...]]]
[APPEND]
[TO[db.]name] [(columns)] [ENGINE = engine] [EMPTY]
[TO[db.]name] [(columns)] [ENGINE = engine]
[EMPTY]
AS SELECT ...
[COMMENT 'comment']
```
@ -281,7 +282,7 @@ This replaces *all* refresh parameters at once: schedule, dependencies, settings
The status of all refreshable materialized views is available in table [`system.view_refreshes`](../../../operations/system-tables/view_refreshes.md). In particular, it contains refresh progress (if running), last and next refresh time, exception message if a refresh failed.
To manually stop, start, trigger, or cancel refreshes use [`SYSTEM STOP|START|REFRESH|CANCEL VIEW`](../system.md#refreshable-materialized-views).
To manually stop, start, trigger, or cancel refreshes use [`SYSTEM STOP|START|REFRESH|WAIT|CANCEL VIEW`](../system.md#refreshable-materialized-views).
To wait for a refresh to complete, use [`SYSTEM WAIT VIEW`](../system.md#refreshable-materialized-views). In particular, useful for waiting for initial refresh after creating a view.

View File

@ -605,6 +605,10 @@ Allows using a specified table engine when creating a table. Applies to [table e
Grants all the privileges on regulated entity to a user account or a role.
:::note
The privilege `ALL` is not supported in ClickHouse Cloud, where the `default` user has limited permissions. Users can grant the maximum permissions to a user by granting the `default_role`. See [here](/docs/en/cloud/security/cloud-access-management#initial-settings) for further details.
:::
### NONE
Doesnt grant any privileges.

View File

@ -173,18 +173,7 @@ Result:
└───┴────┴─────┘
```
## [experimental] Join with inequality conditions for columns from different tables
:::note
This feature is experimental. To use it, set `allow_experimental_join_condition` to 1 in your configuration files or by using the `SET` command:
```sql
SET allow_experimental_join_condition=1
```
Otherwise, you'll get `INVALID_JOIN_ON_EXPRESSION`.
:::
## Join with inequality conditions for columns from different tables
Clickhouse currently supports `ALL/ANY/SEMI/ANTI INNER/LEFT/RIGHT/FULL JOIN` with inequality conditions in addition to equality conditions. The inequality conditions are supported only for `hash` and `grace_hash` join algorithms. The inequality conditions are not supported with `join_use_nulls`.

View File

@ -65,6 +65,14 @@ SELECT * FROM icebergS3(iceberg_conf, filename = 'test_table')
DESCRIBE icebergS3(iceberg_conf, filename = 'test_table')
```
**Schema Evolution**
At the moment, with the help of CH, you can read iceberg tables, the schema of which has changed over time. We currently support reading tables where columns have been added and removed, and their order has changed. You can also change a column where a value is required to one where NULL is allowed. Additionally, we support permitted type casting for simple types, namely:  
* int -> long
* float -> double
* decimal(P, S) -> decimal(P', S) where P' > P.
Currently, it is not possible to change nested structures or the types of elements within arrays and maps.
**Aliases**
Table function `iceberg` is an alias to `icebergS3` now.

View File

@ -50,6 +50,10 @@ SELECT name FROM mysql(`mysql1:3306|mysql2:3306|mysql3:3306`, 'mysql_database',
A table object with the same columns as the original MySQL table.
:::note
Some data types of MySQL can be mapped to different ClickHouse types - this is addressed by query-level setting [mysql_datatypes_support_level](/docs/en/operations/settings/settings.md#mysql_datatypes_support_level)
:::
:::note
In the `INSERT` query to distinguish table function `mysql(...)` from table name with column names list, you must use keywords `FUNCTION` or `TABLE FUNCTION`. See examples below.
:::
@ -141,3 +145,7 @@ WHERE id > (SELECT max(id) from mysql_copy);
- [The MySQL table engine](../../engines/table-engines/integrations/mysql.md)
- [Using MySQL as a dictionary source](../../sql-reference/dictionaries/index.md#dictionary-sources#dicts-external_dicts_dict_sources-mysql)
- [mysql_datatypes_support_level](/docs/en/operations/settings/settings.md#mysql_datatypes_support_level)
- [mysql_map_fixed_string_to_text_in_show_columns](/docs/en/operations/settings/settings.md#mysql_map_fixed_string_to_text_in_show_columns)
- [mysql_map_string_to_text_in_show_columns](/docs/en/operations/settings/settings.md#mysql_map_string_to_text_in_show_columns)
- [mysql_max_rows_to_insert](/docs/en/operations/settings/settings.md#mysql_max_rows_to_insert)

View File

@ -16,7 +16,7 @@ When using the `s3 table function` with [`INSERT INTO...SELECT`](../../sql-refer
**Syntax**
``` sql
s3(url [, NOSIGN | access_key_id, secret_access_key, [session_token]] [,format] [,structure] [,compression_method])
s3(url [, NOSIGN | access_key_id, secret_access_key, [session_token]] [,format] [,structure] [,compression_method],[,headers])
s3(named_collection[, option=value [,..]])
```
@ -44,6 +44,7 @@ For GCS, substitute your HMAC key and HMAC secret where you see `access_key_id`
- `format` — The [format](../../interfaces/formats.md#formats) of the file.
- `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`.
- `compression_method` — Parameter is optional. Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. By default, it will autodetect compression method by file extension.
- `headers` - Parameter is optional. Allows headers to be passed in the S3 request. Pass in the format `headers(key=value)` e.g. `headers('x-amz-request-payer' = 'requester')`.
Arguments can also be passed using [named collections](/docs/en/operations/named-collections.md). In this case `url`, `access_key_id`, `secret_access_key`, `format`, `structure`, `compression_method` work in the same way, and some extra parameters are supported:
@ -313,6 +314,25 @@ SET use_hive_partitioning = 1;
SELECT * from s3('s3://data/path/date=*/country=*/code=*/*.parquet') where _date > '2020-01-01' and _country = 'Netherlands' and _code = 42;
```
## Accessing requester-pays buckets
To access a requester-pays bucket, a header `x-amz-request-payer = requester` must be passed in any requests. This is achieved by passing the parameter `headers('x-amz-request-payer' = 'requester')` to the s3 function. For example:
```sql
SELECT
count() AS num_rows,
uniqExact(_file) AS num_files
FROM s3('https://coiled-datasets-rp.s3.us-east-1.amazonaws.com/1trc/measurements-100*.parquet', 'AWS_ACCESS_KEY_ID', 'AWS_SECRET_ACCESS_KEY', headers('x-amz-request-payer' = 'requester'))
┌───num_rows─┬─num_files─┐
│ 1110000000 │ 111 │
└────────────┴───────────┘
1 row in set. Elapsed: 3.089 sec. Processed 1.09 billion rows, 0.00 B (353.55 million rows/s., 0.00 B/s.)
Peak memory usage: 192.27 KiB.
```
## Storage Settings {#storage-settings}
- [s3_truncate_on_insert](/docs/en/operations/settings/settings.md#s3_truncate_on_insert) - allows to truncate file before insert into it. Disabled by default.

View File

@ -67,8 +67,8 @@ C++ 依存関係と同様に、ClickHouse はベンダリングを使用して
サニタイザを有効にする予定がある場合は、CI で使用されるものと同じ `std` と一致するバージョンを使用する必要がありますcrates をベンダリングしています):
```bash
rustup toolchain install nightly-2024-04-01
rustup default nightly-2024-04-01
rustup toolchain install nightly-2024-12-01
rustup default nightly-2024-12-01
rustup component add rust-src
```

View File

@ -175,7 +175,7 @@ CREATE TABLE table_name
CounterID UInt32,
UserID UInt32,
ver UInt16
) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/table_name', '{replica}', ver)
) ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{layer}-{shard}/table_name', '{replica}', ver)
PARTITION BY toYYYYMM(EventDate)
ORDER BY (CounterID, EventDate, intHash32(UserID))
SAMPLE BY intHash32(UserID);

View File

@ -785,6 +785,24 @@ SELECT indexOf([1, 3, NULL, NULL], NULL)
`NULL` に設定された要素は通常の値として扱われます。
# indexOfAssumeSorted(arr, x)
配列内にある場合は最初の'x'要素(1から始まる)のインデックスを返し、そうでない場合は0を返します。
この関数は、バイナリ検索が検索に使用されるため、降順ではなくソートされた配列に使用する必要があります。
内部配列型がNull許容の場合は、indexOf関数が使用されます
例:
``` sql
SELECT indexOfAssumeSorted([1, 3, 3, 3, 4, 4, 5], 4)
```
``` text
┌─indexOf([1, 3, 3, 3, 4, 4, 5], NULL)─┐
│ 5 │
└──────────────────────────────────--─-┘
```
## arrayCount(\[func,\] arr1, ...)
`func(arr1[i], ..., arrN[i])`が0以外の値を返す要素の数を返します。`func` が指定されていない場合、配列内の0以外の要素の数を返します。

View File

@ -306,6 +306,24 @@ SELECT indexOf([1, 3, NULL, NULL], NULL)
└───────────────────────────────────┘
```
## indexOfAssumeSorted(arr, x)
Возвращает индекс первого элемента x (начиная с 1), если он есть в массиве, или 0, если его нет.
Функция должна использоваться, если массив отсортирован в неубывающем порядке, так как используется бинарный поиск.
Если внутренний тип Nullable, то будет использована функция indexOf.
Пример:
``` sql
SELECT indexOfAssumeSorted([1, 3, 3, 3, 4, 4, 5], 4)
```
``` text
┌─indexOf([1, 3, 3, 3, 4, 4, 5], NULL)─┐
│ 5 │
└──────────────────────────────────--─-┘
```
Элементы, равные `NULL`, обрабатываются как обычные значения.
## arrayCount(\[func,\] arr1, ...) {#array-count}

View File

@ -13,5 +13,10 @@ sidebar_label: ROLE
``` sql
ALTER ROLE [IF EXISTS] name1 [RENAME TO new_name |, name2 [,...]]
[ON CLUSTER cluster_name]
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [CONST|READONLY|WRITABLE|CHANGEABLE_IN_READONLY] | PROFILE 'profile_name'] [,...]
[ADD|MODIFY SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [CONST|READONLY|WRITABLE|CHANGEABLE_IN_READONLY] | PROFILE 'profile_name'] [,...]
[DROP SETTINGS variable [,...] ]
[ADD PROFILES 'profile_name' [,...] ]
[DROP PROFILES 'profile_name' [,...] ]
[DROP ALL PROFILES]
[DROP ALL SETTINGS]
```

View File

@ -13,6 +13,11 @@ sidebar_label: SETTINGS PROFILE
``` sql
ALTER SETTINGS PROFILE [IF EXISTS] name1 [RENAME TO new_name |, name2 [,...]]
[ON CLUSTER cluster_name]
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [CONST|READONLY|WRITABLE|CHANGEABLE_IN_READONLY] | INHERIT 'profile_name'] [,...]
[ADD|MODIFY SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [CONST|READONLY|WRITABLE|CHANGEABLE_IN_READONLY] | INHERIT 'profile_name'] [,...]
[TO {{role1 | user1 [, role2 | user2 ...]} | NONE | ALL | ALL EXCEPT {role1 | user1 [, role2 | user2 ...]}}]
[DROP SETTINGS variable [,...] ]
[ADD PROFILES 'profile_name' [,...] ]
[DROP PROFILES 'profile_name' [,...] ]
[DROP ALL PROFILES]
[DROP ALL SETTINGS]
```

View File

@ -19,7 +19,12 @@ ALTER USER [IF EXISTS] name1 [RENAME TO new_name |, name2 [,...]]
[VALID UNTIL datetime]
[DEFAULT ROLE role [,...] | ALL | ALL EXCEPT role [,...] ]
[GRANTEES {user | role | ANY | NONE} [,...] [EXCEPT {user | role} [,...]]]
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY | WRITABLE] | PROFILE 'profile_name'] [,...]
[ADD|MODIFY SETTINGS variable [=value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE|CONST|CHANGEABLE_IN_READONLY] [,...] ]
[DROP SETTINGS variable [,...] ]
[ADD PROFILES 'profile_name' [,...] ]
[DROP PROFILES 'profile_name' [,...] ]
[DROP ALL PROFILES]
[DROP ALL SETTINGS]
```
Для выполнения `ALTER USER` необходима привилегия [ALTER USER](../grant.md#grant-access-management).

View File

@ -337,6 +337,24 @@ SELECT indexOf([1, 3, NULL, NULL], NULL)
设置为«NULL»的元素将作为普通的元素值处理。
## indexOfAssumeSorted(arr, x)
返回数组中第一个x元素的索引从1开始如果x元素不存在在数组中则返回0.
该函数应用于不按降序排序的数组,因为二进制搜索用于搜索。
如果内部数组类型为空则将使用indexOf函数。
示例:
``` sql
SELECT indexOfAssumeSorted([1, 3, 3, 3, 4, 4, 5], 4)
```
``` text
┌─indexOf([1, 3, 3, 3, 4, 4, 5], NULL)─┐
│ 5 │
└──────────────────────────────────--─-┘
```
## arrayCount(\[func,\] arr1, ...) {#array-count}
`func`将arr数组作为参数其返回结果为非零值的数量。如果未指定“func”则返回数组中非零元素的数量。

View File

@ -480,6 +480,17 @@ void Client::connect()
connection_parameters.timeouts, server_name, server_version_major, server_version_minor, server_version_patch, server_revision);
config().setString("host", connection_parameters.host);
config().setInt("port", connection_parameters.port);
/// Apply setting changes received from server, but with lower priority than settings
/// changed from command line.
SettingsChanges settings_from_server = assert_cast<Connection &>(*connection).settingsFromServer();
const Settings & settings = global_context->getSettingsRef();
std::erase_if(settings_from_server, [&](const SettingChange & change)
{
return settings.isChanged(change.name);
});
global_context->applySettingsChanges(settings_from_server);
break;
}
catch (const Exception & e)

View File

@ -35,7 +35,7 @@ public:
throw Exception(ErrorCodes::BAD_ARGUMENTS, "cannot remove '{}': Is a directory", path);
}
disk.getDisk()->removeRecursive(path);
disk.getDisk()->removeRecursiveWithLimit(path);
}
else if (disk.getDisk()->existsFile(path))
{

View File

@ -277,10 +277,10 @@ int mainEntryClickHouseFormat(int argc, char ** argv)
{
WriteBufferFromOwnString str_buf;
bool oneline_current_query = oneline || approx_query_length < max_line_length;
IAST::FormatSettings settings(str_buf, oneline_current_query, hilite);
IAST::FormatSettings settings(oneline_current_query, hilite);
settings.show_secrets = true;
settings.print_pretty_type_names = !oneline_current_query;
res->format(settings);
res->format(str_buf, settings);
if (insert_query_payload)
{
@ -324,10 +324,10 @@ int mainEntryClickHouseFormat(int argc, char ** argv)
{
WriteBufferFromOwnString str_buf;
bool oneline_current_query = oneline || approx_query_length < max_line_length;
IAST::FormatSettings settings(str_buf, oneline_current_query, hilite);
IAST::FormatSettings settings(oneline_current_query, hilite);
settings.show_secrets = true;
settings.print_pretty_type_names = !oneline_current_query;
res->format(settings);
res->format(str_buf, settings);
auto res_string = str_buf.str();
WriteBufferFromOStream res_cout(std::cout, 4096);

View File

@ -3,6 +3,7 @@
#include <vector>
#include <algorithm>
#include <cctype>
#include <iostream>
#include <unordered_set>
#include <unordered_map>
#include <list>

View File

@ -1,5 +1,6 @@
#include "Commands.h"
#include <Common/StringUtils.h>
#include <queue>
#include "KeeperClient.h"
#include "Parsers/CommonParsers.h"

View File

@ -19,8 +19,9 @@
#include <Common/getHashOfLoadedBinary.h>
#include <Common/IO.h>
#include <base/phdr_cache.h>
#include <base/coverage.h>
#include <base/phdr_cache.h>
#include <base/scope_guard.h>
int mainEntryClickHouseKeeper(int argc, char ** argv);

View File

@ -1,4 +1,5 @@
#include <base/phdr_cache.h>
#include <base/scope_guard.h>
#include <Common/EnvironmentChecks.h>
#include <Common/StringUtils.h>
#include <Common/getHashOfLoadedBinary.h>

View File

@ -15,6 +15,7 @@
#include <Common/logger_useful.h>
#include <base/phdr_cache.h>
#include <Common/ErrorHandlers.h>
#include <Processors/QueryPlan/QueryPlanStepRegistry.h>
#include <base/getMemoryAmount.h>
#include <base/getAvailableMemoryAmount.h>
#include <base/errnoToString.h>
@ -59,6 +60,7 @@
#include <IO/ReadBufferFromFile.h>
#include <IO/SharedThreadPools.h>
#include <IO/UseSSL.h>
#include <Interpreters/CancellationChecker.h>
#include <Interpreters/ServerAsynchronousMetrics.h>
#include <Interpreters/DDLWorker.h>
#include <Interpreters/DNSCacheUpdater.h>
@ -295,6 +297,7 @@ namespace CurrentMetrics
extern const Metric MergesMutationsMemoryTracking;
extern const Metric MaxDDLEntryID;
extern const Metric MaxPushedDDLEntryID;
extern const Metric StartupScriptsExecutionState;
}
namespace ProfileEvents
@ -365,6 +368,14 @@ namespace ErrorCodes
}
enum StartupScriptsExecutionState : CurrentMetrics::Value
{
NotFinished = 0,
Success = 1,
Failure = 2,
};
static std::string getCanonicalPath(std::string && path)
{
Poco::trimInPlace(path);
@ -781,9 +792,12 @@ void loadStartupScripts(const Poco::Util::AbstractConfiguration & config, Contex
startup_context->makeQueryContext();
executeQuery(read_buffer, write_buffer, true, startup_context, callback, QueryFlags{ .internal = true }, std::nullopt, {});
}
CurrentMetrics::set(CurrentMetrics::StartupScriptsExecutionState, StartupScriptsExecutionState::Success);
}
catch (...)
{
CurrentMetrics::set(CurrentMetrics::StartupScriptsExecutionState, StartupScriptsExecutionState::Failure);
tryLogCurrentException(log, "Failed to parse startup scripts file");
}
}
@ -924,6 +938,8 @@ try
registerRemoteFileMetadatas();
registerSchedulerNodes();
QueryPlanStepRegistry::registerPlanSteps();
CurrentMetrics::set(CurrentMetrics::Revision, ClickHouseRevision::getVersionRevision());
CurrentMetrics::set(CurrentMetrics::VersionInteger, ClickHouseRevision::getVersionInteger());
@ -1377,6 +1393,23 @@ try
setOOMScore(oom_score, log);
#endif
std::unique_ptr<DB::BackgroundSchedulePoolTaskHolder> cancellation_task;
SCOPE_EXIT({
if (cancellation_task)
CancellationChecker::getInstance().terminateThread();
});
if (server_settings[ServerSetting::background_schedule_pool_size] > 1)
{
auto cancellation_task_holder = global_context->getSchedulePool().createTask(
"CancellationChecker",
[] { CancellationChecker::getInstance().workerFunction(); }
);
cancellation_task = std::make_unique<DB::BackgroundSchedulePoolTaskHolder>(std::move(cancellation_task_holder));
(*cancellation_task)->activateAndSchedule();
}
global_context->setRemoteHostFilter(config());
global_context->setHTTPHeaderFilter(config());

View File

@ -13,6 +13,7 @@
#include <boost/program_options.hpp>
#include <filesystem>
#include <iostream>
namespace fs = std::filesystem;

View File

@ -1,6 +1,8 @@
#include <Common/Exception.h>
#include <IO/ReadHelpers.h>
#include <fmt/format.h>
#include <iostream>
#include <vector>
#include <sys/types.h>

View File

@ -1,7 +1,7 @@
As we have multiple projects we use a workspace to manage them (it's way simpler and leads to less issues). In order
to vendor all the dependencies we need to store both the registry and the packages themselves.
Note that this includes the exact `std` dependencies for the rustc version used in CI (currently nightly-2024-04-01),
Note that this includes the exact `std` dependencies for the rustc version used in CI (currently nightly-2024-12-01),
so you need to install `rustup component add rust-src` for the specific version.
* First step: (Re)-generate the Cargo.lock file (run under `workspace/`).
@ -16,7 +16,7 @@ Note that we use both commands to vendor both registry and crates. No idea why b
* First we need to install the tool if you don't already have it:
```bash
cargo install --version 0.2.6 cargo-local-registry
cargo install --version 0.2.7 cargo-local-registry
```
* Now add the local packages:
@ -28,9 +28,9 @@ export RUSTC_ROOT=$(rustc --print=sysroot)
cd "$CH_TOP_DIR"/rust/workspace
cargo local-registry --git --sync Cargo.lock "$CH_TOP_DIR"/contrib/rust_vendor
cp "$RUSTC_ROOT"/lib/rustlib/src/rust/Cargo.lock "$RUSTC_ROOT"/lib/rustlib/src/rust/library/std/
cp "$RUSTC_ROOT"/lib/rustlib/src/rust/library/Cargo.lock "$RUSTC_ROOT"/lib/rustlib/src/rust/library/std/
cargo local-registry --no-delete --git --sync "$RUSTC_ROOT"/lib/rustlib/src/rust/library/std/Cargo.lock "$CH_TOP_DIR"/contrib/rust_vendor
cp "$RUSTC_ROOT"/lib/rustlib/src/rust/Cargo.lock "$RUSTC_ROOT"/lib/rustlib/src/rust/library/test/
cp "$RUSTC_ROOT"/lib/rustlib/src/rust/library/Cargo.lock "$RUSTC_ROOT"/lib/rustlib/src/rust/library/test/
cargo local-registry --no-delete --git --sync "$RUSTC_ROOT"/lib/rustlib/src/rust/library/test/Cargo.lock "$CH_TOP_DIR"/contrib/rust_vendor
cargo vendor --no-delete --locked "$CH_TOP_DIR"/contrib/rust_vendor

File diff suppressed because it is too large Load Diff

View File

@ -5,7 +5,7 @@ version = "0.1.0"
[dependencies]
anstream = {version = "0.6.12"}
prqlc = {version = "0.11.3", default-features = false}
prqlc = {version = "0.13.2", default-features = false}
serde_json = "1.0"
[lib]

View File

@ -36,6 +36,7 @@ pub unsafe extern "C" fn prql_to_sql_impl(
target: Target::Sql(Some(Dialect::ClickHouse)),
signature_comment: false,
color: false,
display: prqlc::DisplayOptions::Plain,
};
if let Ok(sql_str) = prqlc::compile(&query_str, &opts) {

View File

@ -6,9 +6,10 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
skim = { version = "0.10.2", default-features = false }
skim = { version = "0.15.5", default-features = false, features = ["cli"] }
cxx = "1.0.83"
term = "0.7.0"
term = "1.0.0"
clap = "4.5.22"
[build-dependencies]
cxx-build = "1.0.83"

View File

@ -44,15 +44,15 @@ fn skim_impl(prefix: &CxxString, words: &CxxVector<CxxString>) -> Result<String,
}
let options = SkimOptionsBuilder::default()
.height(Some("30%"))
.query(Some(prefix.to_str().unwrap()))
.height("30%".to_string())
.query(Some(prefix.to_str().unwrap().to_string()))
.tac(true)
// Do not clear on start and clear on exit will clear skim output from the terminal.
//
// Refs: https://github.com/lotabout/skim/issues/494#issuecomment-1776565846
.no_clear_start(true)
.no_clear(false)
.tiebreak(Some("-score".to_string()))
.tiebreak(vec![RankCriteria::NegScore])
// Exact mode performs better for SQL.
//
// Default fuzzy search is too smart for SQL, it even takes into account the case, which

View File

@ -4,20 +4,17 @@
#include <Interpreters/Access/getValidUntilFromAST.h>
#include <Interpreters/Context.h>
#include <Interpreters/evaluateConstantExpression.h>
#include <Parsers/ASTExpressionList.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/Access/ASTPublicSSHKey.h>
#include <Storages/checkAndGetLiteralArgument.h>
#include <IO/parseDateTimeBestEffort.h>
#include <IO/ReadHelpers.h>
#include <IO/ReadBufferFromString.h>
#include <IO/WriteHelpers.h>
#include <Common/OpenSSLHelpers.h>
#include <Poco/SHA1Engine.h>
#include <base/types.h>
#include <base/hex.h>
#include <boost/algorithm/hex.hpp>
#include <boost/algorithm/string/case_conv.hpp>
#include <Access/Common/SSLCertificateSubjects.h>
#include "config.h"

View File

@ -45,7 +45,7 @@ namespace
roles_info.names_of_roles[role_id] = role->getName();
roles_info.access.makeUnion(role->access);
roles_info.settings_from_enabled_roles.merge(role->settings);
roles_info.settings_from_enabled_roles.merge(role->settings, /* normalize= */ false);
for (const auto & granted_role : role->granted_roles.getGranted())
collectRoles(roles_info, skip_ids, get_role_function, granted_role, false, false);

View File

@ -137,6 +137,13 @@ void SettingsConstraints::merge(const SettingsConstraints & other)
}
void SettingsConstraints::check(const Settings & current_settings, const AlterSettingsProfileElements & profile_elements, SettingSource source) const
{
check(current_settings, profile_elements.add_settings, source);
check(current_settings, profile_elements.modify_settings, source);
/// We don't check `drop_settings` here.
}
void SettingsConstraints::check(const Settings & current_settings, const SettingsProfileElements & profile_elements, SettingSource source) const
{
for (const auto & element : profile_elements)

View File

@ -74,10 +74,11 @@ public:
void merge(const SettingsConstraints & other);
/// Checks whether `change` violates these constraints and throws an exception if so.
void check(const Settings & current_settings, const SettingsProfileElements & profile_elements, SettingSource source) const;
void check(const Settings & current_settings, const SettingChange & change, SettingSource source) const;
void check(const Settings & current_settings, const SettingsChanges & changes, SettingSource source) const;
void check(const Settings & current_settings, SettingsChanges & changes, SettingSource source) const;
void check(const Settings & current_settings, const SettingsProfileElements & profile_elements, SettingSource source) const;
void check(const Settings & current_settings, const AlterSettingsProfileElements & profile_elements, SettingSource source) const;
/// Checks whether `change` violates these constraints and throws an exception if so. (setting short name is expected inside `changes`)
void check(const MergeTreeSettings & current_settings, const SettingChange & change) const;

View File

@ -9,6 +9,8 @@
#include <IO/WriteHelpers.h>
#include <Parsers/Access/ASTSettingsProfileElement.h>
#include <base/removeDuplicates.h>
#include <boost/container/flat_map.hpp>
#include <boost/container/flat_set.hpp>
namespace DB
@ -19,6 +21,7 @@ namespace ErrorCodes
extern const int NOT_IMPLEMENTED;
}
SettingsProfileElement::SettingsProfileElement(const ASTSettingsProfileElement & ast)
{
init(ast, nullptr);
@ -116,16 +119,20 @@ std::shared_ptr<ASTSettingsProfileElement> SettingsProfileElement::toASTWithName
}
SettingsProfileElements::SettingsProfileElements(const ASTSettingsProfileElements & ast)
SettingsProfileElements::SettingsProfileElements(const ASTSettingsProfileElements & ast, bool normalize_)
{
for (const auto & ast_element : ast.elements)
emplace_back(*ast_element);
if (normalize_)
normalize();
}
SettingsProfileElements::SettingsProfileElements(const ASTSettingsProfileElements & ast, const AccessControl & access_control)
SettingsProfileElements::SettingsProfileElements(const ASTSettingsProfileElements & ast, const AccessControl & access_control, bool normalize_)
{
for (const auto & ast_element : ast.elements)
emplace_back(*ast_element, access_control);
if (normalize_)
normalize();
}
@ -133,7 +140,11 @@ std::shared_ptr<ASTSettingsProfileElements> SettingsProfileElements::toAST() con
{
auto res = std::make_shared<ASTSettingsProfileElements>();
for (const auto & element : *this)
res->elements.push_back(element.toAST());
{
auto element_ast = element.toAST();
if (!element_ast->empty())
res->elements.push_back(element_ast);
}
return res;
}
@ -141,7 +152,11 @@ std::shared_ptr<ASTSettingsProfileElements> SettingsProfileElements::toASTWithNa
{
auto res = std::make_shared<ASTSettingsProfileElements>();
for (const auto & element : *this)
res->elements.push_back(element.toASTWithNames(access_control));
{
auto element_ast = element.toASTWithNames(access_control);
if (!element_ast->empty())
res->elements.push_back(element_ast);
}
return res;
}
@ -220,9 +235,11 @@ void SettingsProfileElements::removeSettingsKeepProfiles()
}
void SettingsProfileElements::merge(const SettingsProfileElements & other)
void SettingsProfileElements::merge(const SettingsProfileElements & other, bool normalize_)
{
insert(end(), other.begin(), other.end());
if (normalize_)
normalize();
}
@ -280,6 +297,81 @@ std::vector<UUID> SettingsProfileElements::toProfileIDs() const
return res;
}
void SettingsProfileElements::normalize()
{
/// Ensure that each element represents either a setting or a profile.
{
SettingsProfileElements new_elements;
for (auto & element : *this)
{
if (element.parent_profile && !element.setting_name.empty())
{
SettingsProfileElement new_element;
new_element.parent_profile = element.parent_profile;
element.parent_profile.reset();
new_elements.push_back(std::move(new_element));
}
}
insert(end(), new_elements.begin(), new_elements.end());
}
/// Partitioning: first profiles, then settings.
/// We use std::stable_partition() here because we want to preserve the relative order of profiles and the relative order of settings.
/// (We need that order to be preserved to remove duplicates correctly - see below.)
auto profiles_begin = begin();
auto profiles_end = std::stable_partition(begin(), end(), [](const SettingsProfileElement & element) { return static_cast<bool>(element.parent_profile); });
auto settings_begin = profiles_end;
auto settings_end = end();
/// Remove duplicates among profiles.
/// We keep the last position of any used profile.
/// It's important to keep exactly the last position (and not just any position) because profiles can override settings from each other.
/// For example, [pr_A, pr_B, pr_A, pr_C] is always the same as [pr_B, pr_A, pr_C], but can be not the same as [pr_A, pr_B, pr_C]
/// if pr_A and pr_B give different values to same settings.
{
boost::container::flat_set<UUID> profile_ids;
profile_ids.reserve(profiles_end - profiles_begin);
auto it = profiles_end;
while (it != profiles_begin)
{
--it;
auto & element = *it;
if (element.parent_profile && !profile_ids.emplace(*element.parent_profile).second)
element.parent_profile.reset();
}
}
/// Remove duplicates among settings.
/// We keep the first position of any used setting, and merge settings with the same name to that first element.
{
boost::container::flat_map<std::string_view, SettingsProfileElements::iterator> setting_name_to_first_encounter;
setting_name_to_first_encounter.reserve(settings_end - settings_begin);
for (auto it = settings_begin; it != settings_end; ++it)
{
auto & element = *it;
auto first = setting_name_to_first_encounter.emplace(element.setting_name, it).first->second;
if (it != first)
{
auto & first_element = *first;
if (element.value)
first_element.value = element.value;
if (element.min_value)
first_element.min_value = element.min_value;
if (element.max_value)
first_element.max_value = element.max_value;
if (element.writability)
first_element.writability = element.writability;
element.setting_name.clear();
}
}
}
/// Remove empty elements.
std::erase_if(*this, [](const SettingsProfileElement & element) { return element.empty(); });
}
bool SettingsProfileElements::isBackupAllowed() const
{
for (const auto & setting : *this)
@ -296,4 +388,139 @@ bool SettingsProfileElements::isAllowBackupSetting(const String & setting_name)
return Settings::resolveName(setting_name) == ALLOW_BACKUP_SETTING_NAME;
}
AlterSettingsProfileElements::AlterSettingsProfileElements(const SettingsProfileElements & ast)
{
drop_all_settings = true;
drop_all_profiles = true;
add_settings = ast;
}
AlterSettingsProfileElements::AlterSettingsProfileElements(const ASTSettingsProfileElements & ast)
: AlterSettingsProfileElements(SettingsProfileElements{ast})
{
}
AlterSettingsProfileElements::AlterSettingsProfileElements(const ASTSettingsProfileElements & ast, const AccessControl & access_control)
: AlterSettingsProfileElements(SettingsProfileElements{ast, access_control})
{
}
AlterSettingsProfileElements::AlterSettingsProfileElements(const ASTAlterSettingsProfileElements & ast)
{
drop_all_settings = ast.drop_all_settings;
drop_all_profiles = ast.drop_all_profiles;
if (ast.add_settings)
add_settings = SettingsProfileElements{*ast.add_settings, /* normalize= */ false}; /// For "ALTER" the normalization is unnecessary.
if (ast.modify_settings)
modify_settings = SettingsProfileElements{*ast.modify_settings, /* normalize= */ false};
if (ast.drop_settings)
drop_settings = SettingsProfileElements{*ast.drop_settings, /* normalize= */ false};
}
AlterSettingsProfileElements::AlterSettingsProfileElements(const ASTAlterSettingsProfileElements & ast, const AccessControl & access_control)
{
drop_all_settings = ast.drop_all_settings;
drop_all_profiles = ast.drop_all_profiles;
if (ast.add_settings)
add_settings = SettingsProfileElements{*ast.add_settings, access_control, /* normalize= */ false}; /// For "ALTER" the normalization is unnecessary.
if (ast.modify_settings)
modify_settings = SettingsProfileElements{*ast.modify_settings, access_control, /* normalize= */ false};
if (ast.drop_settings)
drop_settings = SettingsProfileElements{*ast.drop_settings, access_control, /* normalize= */ false};
}
void SettingsProfileElements::applyChanges(const AlterSettingsProfileElements & changes)
{
/// Apply "DROP" changes.
if (changes.drop_all_profiles)
{
for (auto & element : *this)
element.parent_profile.reset(); /// We only make this element empty, the element will be removed in normalizeProfileElements().
}
if (changes.drop_all_settings)
{
for (auto & element : *this)
element.setting_name.clear(); /// We only make this element empty, the element will be removed in normalizeProfileElements().
}
auto apply_drop_setting = [&](const String & setting_name)
{
for (auto & element : *this)
{
if (element.setting_name == setting_name)
element.setting_name.clear();
}
};
auto apply_drop_profile = [&](const UUID & profile_id)
{
for (auto & element : *this)
{
if (element.parent_profile == profile_id)
element.parent_profile.reset();
}
};
for (const auto & drop : changes.drop_settings)
{
if (drop.parent_profile)
apply_drop_profile(*drop.parent_profile);
if (!drop.setting_name.empty())
apply_drop_setting(drop.setting_name);
}
auto apply_modify_setting = [&](const SettingsProfileElement & modify)
{
SettingsProfileElement new_element;
new_element.setting_name = modify.setting_name;
new_element.value = modify.value;
new_element.min_value = modify.min_value;
new_element.max_value = modify.max_value;
new_element.writability = modify.writability;
push_back(new_element); /// normalizeProfileElements() will merge this new element with the previous elements.
};
/// Apply "ADD" changes.
auto apply_add_setting = [&](const SettingsProfileElement & add)
{
/// "ADD SETTING" must replace the value and the constraints of a setting, so first we need drop the previous elements for that setting.
apply_drop_setting(add.setting_name);
apply_modify_setting(add);
};
auto apply_add_profile = [&](const UUID & profile_id)
{
SettingsProfileElement new_element;
new_element.parent_profile = profile_id;
push_back(new_element); /// We don't care about possible duplicates here, normalizeProfileElements() will remove duplicates.
};
for (const auto & add : changes.add_settings)
{
if (add.parent_profile)
apply_add_profile(*add.parent_profile);
if (!add.setting_name.empty())
apply_add_setting(add);
}
/// Apply "MODIFY" changes.
for (const auto & modify : changes.modify_settings)
{
chassert(!modify.parent_profile); /// There is no such thing as "MODIFY PROFILE".
if (!modify.setting_name.empty())
apply_modify_setting(modify);
}
/// Remove empty elements and duplicates, and sort the result.
normalize();
}
}

View File

@ -13,8 +13,10 @@ namespace DB
struct Settings;
class SettingsChanges;
class SettingsConstraints;
struct AlterSettingsProfileElements;
class ASTSettingsProfileElement;
class ASTSettingsProfileElements;
class ASTAlterSettingsProfileElements;
class AccessControl;
@ -44,6 +46,8 @@ struct SettingsProfileElement
std::shared_ptr<ASTSettingsProfileElement> toAST() const;
std::shared_ptr<ASTSettingsProfileElement> toASTWithNames(const AccessControl & access_control) const;
bool empty() const { return !parent_profile && (setting_name.empty() || (!value && !min_value && !max_value && !writability)); }
bool isConstraint() const;
private:
@ -57,8 +61,9 @@ public:
SettingsProfileElements() = default;
/// The constructor from AST requires the AccessControl if `ast.id_mode == false`.
SettingsProfileElements(const ASTSettingsProfileElements & ast); /// NOLINT
SettingsProfileElements(const ASTSettingsProfileElements & ast, const AccessControl & access_control);
SettingsProfileElements(const ASTSettingsProfileElements & ast, bool normalize_ = true); /// NOLINT
SettingsProfileElements(const ASTSettingsProfileElements & ast, const AccessControl & access_control, bool normalize_ = true);
std::shared_ptr<ASTSettingsProfileElements> toAST() const;
std::shared_ptr<ASTSettingsProfileElements> toASTWithNames(const AccessControl & access_control) const;
@ -70,16 +75,41 @@ public:
void removeSettingsKeepProfiles();
void merge(const SettingsProfileElements & other);
Settings toSettings() const;
SettingsChanges toSettingsChanges() const;
SettingsConstraints toSettingsConstraints(const AccessControl & access_control) const;
std::vector<UUID> toProfileIDs() const;
bool isBackupAllowed() const;
/// Normalizes this list of profile elements: removes duplicates and empty elements, and also sorts the elements
/// in the following order: first profiles, then settings.
/// The function is called automatically after parsing profile elements from an AST and
/// at the end of an "ALTER PROFILE (USER/ROLE) command".
void normalize();
/// Appends all the elements of another list of profile elements to this list.
void merge(const SettingsProfileElements & other, bool normalize_ = true);
/// Applies changes from an "ALTER PROFILE (USER/ROLE)" command. Always normalizes the result.
void applyChanges(const AlterSettingsProfileElements & changes);
bool isBackupAllowed() const;
static bool isAllowBackupSetting(const String & setting_name);
};
struct AlterSettingsProfileElements
{
bool drop_all_settings = false;
bool drop_all_profiles = false;
SettingsProfileElements add_settings;
SettingsProfileElements modify_settings;
SettingsProfileElements drop_settings;
AlterSettingsProfileElements() = default;
explicit AlterSettingsProfileElements(const SettingsProfileElements & ast);
explicit AlterSettingsProfileElements(const ASTSettingsProfileElements & ast);
explicit AlterSettingsProfileElements(const ASTAlterSettingsProfileElements & ast);
AlterSettingsProfileElements(const ASTSettingsProfileElements & ast, const AccessControl & access_control);
AlterSettingsProfileElements(const ASTAlterSettingsProfileElements & ast, const AccessControl & access_control);
};
}

View File

@ -135,8 +135,8 @@ void SettingsProfilesCache::mergeSettingsAndConstraintsFor(EnabledSettings & ena
merged_settings.emplace_back(new_element);
}
merged_settings.merge(enabled.params.settings_from_enabled_roles);
merged_settings.merge(enabled.params.settings_from_user);
merged_settings.merge(enabled.params.settings_from_enabled_roles, /* normalize= */ false);
merged_settings.merge(enabled.params.settings_from_user, /* normalize= */ false);
auto info = std::make_shared<SettingsProfilesInfo>(access_control);

View File

@ -10,7 +10,6 @@
#include <AggregateFunctions/IAggregateFunction.h>
#include <AggregateFunctions/AggregateFunctionSum.h>
#include <Core/DecimalFunctions.h>
#include <Core/IResolvedFunction.h>
#include "config.h"
@ -141,6 +140,9 @@ public:
bool isCompilable() const override
{
if constexpr (!canBeNativeType<Numerator>() || !canBeNativeType<Denominator>())
return false;
bool can_be_compiled = true;
for (const auto & argument : this->argument_types)
@ -158,7 +160,8 @@ public:
b.CreateMemSet(aggregate_data_ptr, llvm::ConstantInt::get(b.getInt8Ty(), 0), sizeof(Fraction), llvm::assumeAligned(this->alignOfData()));
}
void compileMerge(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_dst_ptr, llvm::Value * aggregate_data_src_ptr) const override
void compileMergeImpl(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_dst_ptr, llvm::Value * aggregate_data_src_ptr) const
requires(canBeNativeType<Numerator>() && canBeNativeType<Denominator>())
{
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
@ -185,7 +188,15 @@ public:
b.CreateStore(denominator_result_value, denominator_dst_ptr);
}
llvm::Value * compileGetResult(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr) const override
void
compileMerge(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_dst_ptr, llvm::Value * aggregate_data_src_ptr) const override
{
if constexpr (canBeNativeType<Numerator>() && canBeNativeType<Denominator>())
compileMergeImpl(builder, aggregate_data_dst_ptr, aggregate_data_src_ptr);
}
llvm::Value * compileGetResultImpl(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr) const
requires(canBeNativeType<Numerator>() && canBeNativeType<Denominator>())
{
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
@ -204,6 +215,13 @@ public:
return b.CreateFDiv(double_numerator, double_denominator);
}
llvm::Value * compileGetResult(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr) const override
{
if constexpr (canBeNativeType<Numerator>() && canBeNativeType<Denominator>())
return compileGetResultImpl(builder, aggregate_data_ptr);
return nullptr;
}
#endif
private:
@ -308,7 +326,8 @@ public:
#if USE_EMBEDDED_COMPILER
void compileAdd(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, const ValuesWithType & arguments) const override
void compileAddImpl(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, const ValuesWithType & arguments) const
requires(canBeNativeType<Numerator>() && canBeNativeType<Denominator>())
{
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
@ -327,6 +346,12 @@ public:
b.CreateStore(denominator_value_updated, denominator_ptr);
}
void compileAdd(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, const ValuesWithType & arguments) const override
{
if constexpr (canBeNativeType<Numerator>() && canBeNativeType<Denominator>())
compileAddImpl(builder, aggregate_data_ptr, arguments);
}
#endif
private:

View File

@ -59,13 +59,13 @@ public:
bool isCompilable() const override
{
bool can_be_compiled = Base::isCompilable();
can_be_compiled &= canBeNativeType<Weight>();
return can_be_compiled;
if constexpr (!canBeNativeType<Weight>() || !canBeNativeType<Numerator>() || !canBeNativeType<Denominator>())
return false;
return Base::isCompilable();
}
void compileAdd(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, const ValuesWithType & arguments) const override
void compileAddImpl(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, const ValuesWithType & arguments) const
requires(canBeNativeType<Weight>() && canBeNativeType<Numerator>() && canBeNativeType<Denominator>())
{
llvm::IRBuilder<> & b = static_cast<llvm::IRBuilder<> &>(builder);
@ -94,6 +94,26 @@ public:
b.CreateStore(denominator_value_updated, denominator_ptr);
}
void
compileMerge(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_dst_ptr, llvm::Value * aggregate_data_src_ptr) const override
{
if constexpr (canBeNativeType<Weight>() && canBeNativeType<Numerator>() && canBeNativeType<Denominator>())
Base::compileMergeImpl(builder, aggregate_data_dst_ptr, aggregate_data_src_ptr);
}
llvm::Value * compileGetResult(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr) const override
{
if constexpr (canBeNativeType<Weight>() && canBeNativeType<Numerator>() && canBeNativeType<Denominator>())
return Base::compileGetResultImpl(builder, aggregate_data_ptr);
return nullptr;
}
void compileAdd(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, const ValuesWithType & arguments) const override
{
if constexpr (canBeNativeType<Weight>() && canBeNativeType<Numerator>() && canBeNativeType<Denominator>())
compileAddImpl(builder, aggregate_data_ptr, arguments);
}
#endif
};
@ -104,7 +124,7 @@ bool allowTypes(const DataTypePtr& left, const DataTypePtr& right) noexcept
constexpr auto allow = [](WhichDataType t)
{
return t.isInt() || t.isUInt() || t.isFloat();
return t.isInt() || t.isUInt() || t.isNativeFloat();
};
return allow(l_dt) && allow(r_dt);

View File

@ -1,12 +1,13 @@
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/Combinators/AggregateFunctionCombinatorFactory.h>
#include <Core/Settings.h>
#include <DataTypes/DataTypeLowCardinality.h>
#include <DataTypes/DataTypesNumber.h>
#include <Functions/FunctionFactory.h>
#include <IO/WriteHelpers.h>
#include <Interpreters/Context.h>
#include <Parsers/ASTFunction.h>
#include <Common/CurrentThread.h>
#include <Core/Settings.h>
static constexpr size_t MAX_AGGREGATE_FUNCTION_NAME_LENGTH = 1000;
@ -349,4 +350,9 @@ AggregateFunctionFactory & AggregateFunctionFactory::instance()
return ret;
}
bool AggregateUtils::isAggregateFunction(const ASTFunction & node)
{
return AggregateFunctionFactory::instance().isAggregateFunctionName(node.name);
}
}

View File

@ -1,7 +1,6 @@
#pragma once
#include <AggregateFunctions/IAggregateFunction.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/NullsAction.h>
#include <Common/IFactoryWithAliases.h>
@ -23,6 +22,8 @@ class IDataType;
using DataTypePtr = std::shared_ptr<const IDataType>;
using DataTypes = std::vector<DataTypePtr>;
class ASTFunction;
/**
* The invoker has arguments: name of aggregate function, types of arguments, values of parameters.
* Parameters are for "parametric" aggregate functions.
@ -114,10 +115,7 @@ private:
struct AggregateUtils
{
static bool isAggregateFunction(const ASTFunction & node)
{
return AggregateFunctionFactory::instance().isAggregateFunctionName(node.name);
}
static bool isAggregateFunction(const ASTFunction & node);
};
const String & getAggregateFunctionCanonicalNameIfAny(const String & name);

View File

@ -1,27 +1,7 @@
#include <AggregateFunctions/IAggregateFunction.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/FactoryHelpers.h>
#include <Columns/IColumn.h>
#include <Columns/ColumnNullable.h>
#include <AggregateFunctions/AggregateFunctionGroupConcat.h>
#include <Columns/ColumnString.h>
#include <Core/ServerSettings.h>
#include <Core/ColumnWithTypeAndName.h>
#include <Common/ArenaAllocator.h>
#include <Common/assert_cast.h>
#include <Interpreters/castColumn.h>
#include <DataTypes/IDataType.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypesNumber.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
namespace DB
{
struct Settings;
@ -33,209 +13,190 @@ namespace ErrorCodes
extern const int BAD_ARGUMENTS;
}
namespace
void GroupConcatDataBase::checkAndUpdateSize(UInt64 add, Arena * arena)
{
if (data_size + add >= allocated_size)
{
auto old_size = allocated_size;
allocated_size = std::max(2 * allocated_size, data_size + add);
data = arena->realloc(data, old_size, allocated_size);
}
}
struct GroupConcatDataBase
void GroupConcatDataBase::insertChar(const char * str, UInt64 str_size, Arena * arena)
{
UInt64 data_size = 0;
UInt64 allocated_size = 0;
char * data = nullptr;
checkAndUpdateSize(str_size, arena);
memcpy(data + data_size, str, str_size);
data_size += str_size;
}
void checkAndUpdateSize(UInt64 add, Arena * arena)
{
if (data_size + add >= allocated_size)
{
auto old_size = allocated_size;
allocated_size = std::max(2 * allocated_size, data_size + add);
data = arena->realloc(data, old_size, allocated_size);
}
}
void GroupConcatDataBase::insert(const IColumn * column, const SerializationPtr & serialization, size_t row_num, Arena * arena)
{
WriteBufferFromOwnString buff;
serialization->serializeText(*column, row_num, buff, FormatSettings{});
auto string = buff.stringView();
insertChar(string.data(), string.size(), arena);
}
void insertChar(const char * str, UInt64 str_size, Arena * arena)
{
checkAndUpdateSize(str_size, arena);
memcpy(data + data_size, str, str_size);
data_size += str_size;
}
UInt64 GroupConcatData::getSize(size_t i) const
{
return offsets[i * 2 + 1] - offsets[i * 2];
}
void insert(const IColumn * column, const SerializationPtr & serialization, size_t row_num, Arena * arena)
{
WriteBufferFromOwnString buff;
serialization->serializeText(*column, row_num, buff, FormatSettings{});
auto string = buff.stringView();
insertChar(string.data(), string.size(), arena);
}
UInt64 GroupConcatData::getString(size_t i) const
{
return offsets[i * 2];
}
};
void GroupConcatData::insert(const IColumn * column, const SerializationPtr & serialization, size_t row_num, Arena * arena)
{
WriteBufferFromOwnString buff;
serialization->serializeText(*column, row_num, buff, {});
auto string = buff.stringView();
checkAndUpdateSize(string.size(), arena);
memcpy(data + data_size, string.data(), string.size());
offsets.push_back(data_size, arena);
data_size += string.size();
offsets.push_back(data_size, arena);
num_rows++;
}
template <bool has_limit>
struct GroupConcatData;
template<>
struct GroupConcatData<false> final : public GroupConcatDataBase
GroupConcatImpl<has_limit>::GroupConcatImpl(
const DataTypePtr & data_type_, const Array & parameters_, UInt64 limit_, const String & delimiter_)
: IAggregateFunctionDataHelper<GroupConcatData, GroupConcatImpl<has_limit>>(
{data_type_}, parameters_, std::make_shared<DataTypeString>())
, limit(limit_)
, delimiter(delimiter_)
, type(data_type_)
{
};
template<>
struct GroupConcatData<true> final : public GroupConcatDataBase
{
using Offset = UInt64;
using Allocator = MixedAlignedArenaAllocator<alignof(Offset), 4096>;
using Offsets = PODArray<Offset, 32, Allocator>;
/// offset[i * 2] - beginning of the i-th row, offset[i * 2 + 1] - end of the i-th row
Offsets offsets;
UInt64 num_rows = 0;
UInt64 getSize(size_t i) const { return offsets[i * 2 + 1] - offsets[i * 2]; }
UInt64 getString(size_t i) const { return offsets[i * 2]; }
void insert(const IColumn * column, const SerializationPtr & serialization, size_t row_num, Arena * arena)
{
WriteBufferFromOwnString buff;
serialization->serializeText(*column, row_num, buff, {});
auto string = buff.stringView();
checkAndUpdateSize(string.size(), arena);
memcpy(data + data_size, string.data(), string.size());
offsets.push_back(data_size, arena);
data_size += string.size();
offsets.push_back(data_size, arena);
num_rows++;
}
};
serialization = isFixedString(type) ? std::make_shared<DataTypeString>()->getDefaultSerialization() : this->argument_types[0]->getDefaultSerialization();
}
template <bool has_limit>
class GroupConcatImpl final
: public IAggregateFunctionDataHelper<GroupConcatData<has_limit>, GroupConcatImpl<has_limit>>
String GroupConcatImpl<has_limit>::getName() const
{
static constexpr auto name = "groupConcat";
return name;
}
SerializationPtr serialization;
UInt64 limit;
const String delimiter;
const DataTypePtr type;
public:
GroupConcatImpl(const DataTypePtr & data_type_, const Array & parameters_, UInt64 limit_, const String & delimiter_)
: IAggregateFunctionDataHelper<GroupConcatData<has_limit>, GroupConcatImpl<has_limit>>(
{data_type_}, parameters_, std::make_shared<DataTypeString>())
, limit(limit_)
, delimiter(delimiter_)
, type(data_type_)
{
serialization = isFixedString(type) ? std::make_shared<DataTypeString>()->getDefaultSerialization() : this->argument_types[0]->getDefaultSerialization();
}
template <bool has_limit>
void GroupConcatImpl<has_limit>::add(
AggregateDataPtr __restrict place,
const IColumn ** columns,
size_t row_num,
Arena * arena) const
{
auto & cur_data = this->data(place);
String getName() const override { return name; }
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
{
auto & cur_data = this->data(place);
if constexpr (has_limit)
if (cur_data.num_rows >= limit)
return;
if (cur_data.data_size != 0)
cur_data.insertChar(delimiter.c_str(), delimiter.size(), arena);
if (isFixedString(type))
{
ColumnWithTypeAndName col = {columns[0]->getPtr(), type, "column"};
const auto & col_str = castColumn(col, std::make_shared<DataTypeString>());
cur_data.insert(col_str.get(), serialization, row_num, arena);
}
else
cur_data.insert(columns[0], serialization, row_num, arena);
}
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
{
auto & cur_data = this->data(place);
auto & rhs_data = this->data(rhs);
if (rhs_data.data_size == 0)
if constexpr (has_limit)
if (cur_data.num_rows >= limit)
return;
if constexpr (has_limit)
{
UInt64 new_elems_count = std::min(rhs_data.num_rows, limit - cur_data.num_rows);
for (UInt64 i = 0; i < new_elems_count; ++i)
{
if (cur_data.data_size != 0)
cur_data.insertChar(delimiter.c_str(), delimiter.size(), arena);
if (cur_data.data_size != 0)
cur_data.insertChar(delimiter.c_str(), delimiter.size(), arena);
cur_data.offsets.push_back(cur_data.data_size, arena);
cur_data.insertChar(rhs_data.data + rhs_data.getString(i), rhs_data.getSize(i), arena);
cur_data.num_rows++;
cur_data.offsets.push_back(cur_data.data_size, arena);
}
}
else
if (isFixedString(type))
{
ColumnWithTypeAndName col = {columns[0]->getPtr(), type, "column"};
const auto & col_str = castColumn(col, std::make_shared<DataTypeString>());
cur_data.insert(col_str.get(), serialization, row_num, arena);
}
else
cur_data.insert(columns[0], serialization, row_num, arena);
}
template <bool has_limit>
void GroupConcatImpl<has_limit>::merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const
{
auto & cur_data = this->data(place);
auto & rhs_data = this->data(rhs);
if (rhs_data.data_size == 0)
return;
if constexpr (has_limit)
{
UInt64 new_elems_count = std::min(rhs_data.num_rows, limit - cur_data.num_rows);
for (UInt64 i = 0; i < new_elems_count; ++i)
{
if (cur_data.data_size != 0)
cur_data.insertChar(delimiter.c_str(), delimiter.size(), arena);
cur_data.insertChar(rhs_data.data, rhs_data.data_size, arena);
cur_data.offsets.push_back(cur_data.data_size, arena);
cur_data.insertChar(rhs_data.data + rhs_data.getString(i), rhs_data.getSize(i), arena);
cur_data.num_rows++;
cur_data.offsets.push_back(cur_data.data_size, arena);
}
}
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
else
{
auto & cur_data = this->data(place);
if (cur_data.data_size != 0)
cur_data.insertChar(delimiter.c_str(), delimiter.size(), arena);
writeVarUInt(cur_data.data_size, buf);
buf.write(cur_data.data, cur_data.data_size);
if constexpr (has_limit)
{
writeVarUInt(cur_data.num_rows, buf);
for (const auto & offset : cur_data.offsets)
writeVarUInt(offset, buf);
}
cur_data.insertChar(rhs_data.data, rhs_data.data_size, arena);
}
}
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
template <bool has_limit>
void GroupConcatImpl<has_limit>::serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const
{
auto & cur_data = this->data(place);
writeVarUInt(cur_data.data_size, buf);
buf.write(cur_data.data, cur_data.data_size);
if constexpr (has_limit)
{
auto & cur_data = this->data(place);
UInt64 temp_size = 0;
readVarUInt(temp_size, buf);
cur_data.checkAndUpdateSize(temp_size, arena);
buf.readStrict(cur_data.data + cur_data.data_size, temp_size);
cur_data.data_size = temp_size;
if constexpr (has_limit)
{
readVarUInt(cur_data.num_rows, buf);
cur_data.offsets.resize_exact(cur_data.num_rows * 2, arena);
for (auto & offset : cur_data.offsets)
readVarUInt(offset, buf);
}
writeVarUInt(cur_data.num_rows, buf);
for (const auto & offset : cur_data.offsets)
writeVarUInt(offset, buf);
}
}
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
template <bool has_limit>
void GroupConcatImpl<has_limit>::deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const
{
auto & cur_data = this->data(place);
UInt64 temp_size = 0;
readVarUInt(temp_size, buf);
cur_data.checkAndUpdateSize(temp_size, arena);
buf.readStrict(cur_data.data + cur_data.data_size, temp_size);
cur_data.data_size = temp_size;
if constexpr (has_limit)
{
auto & cur_data = this->data(place);
readVarUInt(cur_data.num_rows, buf);
cur_data.offsets.resize_exact(cur_data.num_rows * 2, arena);
for (auto & offset : cur_data.offsets)
readVarUInt(offset, buf);
}
}
if (cur_data.data_size == 0)
{
to.insertDefault();
return;
}
template <bool has_limit>
void GroupConcatImpl<has_limit>::insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const
{
auto & cur_data = this->data(place);
auto & column_string = assert_cast<ColumnString &>(to);
column_string.insertData(cur_data.data, cur_data.data_size);
if (cur_data.data_size == 0)
{
to.insertDefault();
return;
}
bool allocatesMemoryInArena() const override { return true; }
};
auto & column_string = assert_cast<ColumnString &>(to);
column_string.insertData(cur_data.data, cur_data.data_size);
}
template <bool has_limit>
bool GroupConcatImpl<has_limit>::allocatesMemoryInArena() const { return true; }
// Implementation of add, merge, serialize, deserialize, insertResultInto, etc. remains unchanged.
AggregateFunctionPtr createAggregateFunctionGroupConcat(
const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
@ -278,14 +239,12 @@ AggregateFunctionPtr createAggregateFunctionGroupConcat(
return std::make_shared<GroupConcatImpl</* has_limit= */ false>>(argument_types[0], parameters, limit, delimiter);
}
}
void registerAggregateFunctionGroupConcat(AggregateFunctionFactory & factory)
{
AggregateFunctionProperties properties = { .returns_default_when_only_null = false, .is_order_dependent = true };
factory.registerFunction("groupConcat", { createAggregateFunctionGroupConcat, properties });
factory.registerAlias("group_concat", "groupConcat", AggregateFunctionFactory::Case::Insensitive);
factory.registerAlias(GroupConcatImpl<false>::getNameAndAliases().at(1), GroupConcatImpl<false>::getNameAndAliases().at(0), AggregateFunctionFactory::Case::Insensitive);
}
}

View File

@ -0,0 +1,78 @@
#pragma once
#ifndef DB_GROUP_CONCAT_H
#define DB_GROUP_CONCAT_H
#include <AggregateFunctions/IAggregateFunction.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/FactoryHelpers.h>
#include <Core/ServerSettings.h>
#include <Common/ArenaAllocator.h>
#include <DataTypes/IDataType.h>
#include <DataTypes/DataTypeString.h>
#include <IO/WriteHelpers.h>
#include <IO/ReadHelpers.h>
namespace DB
{
struct Settings;
struct GroupConcatDataBase
{
UInt64 data_size = 0;
UInt64 allocated_size = 0;
char * data = nullptr;
void checkAndUpdateSize(UInt64 add, Arena * arena);
void insertChar(const char * str, UInt64 str_size, Arena * arena);
void insert(const IColumn * column, const SerializationPtr & serialization, size_t row_num, Arena * arena);
};
struct GroupConcatData : public GroupConcatDataBase
{
using Offset = UInt64;
using Allocator = MixedAlignedArenaAllocator<alignof(Offset), 4096>;
using Offsets = PODArray<Offset, 32, Allocator>;
Offsets offsets;
UInt64 num_rows = 0;
UInt64 getSize(size_t i) const;
UInt64 getString(size_t i) const;
void insert(const IColumn * column, const SerializationPtr & serialization, size_t row_num, Arena * arena);
};
template <bool has_limit>
class GroupConcatImpl : public IAggregateFunctionDataHelper<GroupConcatData, GroupConcatImpl<has_limit>>
{
static constexpr auto name = "groupConcat";
SerializationPtr serialization;
UInt64 limit;
const String delimiter;
const DataTypePtr type;
public:
GroupConcatImpl(const DataTypePtr & data_type_, const Array & parameters_, UInt64 limit_, const String & delimiter_);
String getName() const override;
static const std::vector<std::string>& getNameAndAliases()
{
static const std::vector<std::string> aliases = {"groupConcat", "group_concat"};
return aliases;
}
void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override;
void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override;
void serialize(ConstAggregateDataPtr place, WriteBuffer & buf, std::optional<size_t> version) const override;
void deserialize(AggregateDataPtr place, ReadBuffer & buf, std::optional<size_t> version, Arena * arena) const override;
void insertResultInto(AggregateDataPtr place, IColumn & to, Arena * arena) const override;
bool allocatesMemoryInArena() const override;
};
}
#endif

View File

@ -8,7 +8,6 @@
#include <Interpreters/AggregationCommon.h>
#include <Common/HashTable/HashSet.h>
#include <Common/HashTable/HashMap.h>
#include <Common/SipHash.h>
#include <IO/ReadHelpersArena.h>

View File

@ -2,7 +2,6 @@
#include <Core/Field.h>
#include <DataTypes/IDataType.h>
#include <IO/WriteHelpers.h>
namespace DB

View File

@ -14,6 +14,7 @@
#include <Common/Exception.h>
#include <Common/ThreadPool_fwd.h>
#include <IO/ReadBuffer.h>
#include "config.h"
#include <cstddef>
@ -176,11 +177,15 @@ public:
/// Serializes state (to transmit it over the network, for example).
virtual void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> version = std::nullopt) const = 0; /// NOLINT
/// Devirtualize serialize call.
virtual void serializeBatch(const PaddedPODArray<AggregateDataPtr> & data, size_t start, size_t size, WriteBuffer & buf, std::optional<size_t> version = std::nullopt) const = 0; /// NOLINT
/// Deserializes state. This function is called only for empty (just created) states.
virtual void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> version = std::nullopt, Arena * arena = nullptr) const = 0; /// NOLINT
/// Devirtualize create and deserialize calls. Used in deserialization of ColumnAggregateFunction.
virtual void createAndDeserializeBatch(PaddedPODArray<AggregateDataPtr> & data, AggregateDataPtr __restrict place, size_t total_size_of_state, size_t limit, ReadBuffer & buf, std::optional<size_t> version, Arena * arena) const = 0;
/// Returns true if a function requires Arena to handle own states (see add(), merge(), deserialize()).
virtual bool allocatesMemoryInArena() const = 0;
@ -479,6 +484,37 @@ public:
static_cast<const Derived *>(this)->serialize(data[i], buf, version);
}
void createAndDeserializeBatch(
PaddedPODArray<AggregateDataPtr> & data,
AggregateDataPtr __restrict place,
size_t total_size_of_state,
size_t limit,
ReadBuffer & buf,
std::optional<size_t> version,
Arena * arena) const override
{
for (size_t i = 0; i < limit; ++i)
{
if (buf.eof())
break;
static_cast<const Derived *>(this)->create(place);
try
{
static_cast<const Derived *>(this)->deserialize(place, buf, version, arena);
}
catch (...)
{
static_cast<const Derived *>(this)->destroy(place);
throw;
}
data.push_back(place);
place += total_size_of_state;
}
}
void addBatchSparse(
size_t row_begin,
size_t row_end,

View File

@ -8,6 +8,7 @@
#include <Parsers/ASTExpressionList.h>
#include <Parsers/ASTTablesInSelectQuery.h>
#include <Common/assert_cast.h>
#include <Common/SipHash.h>
namespace DB
{
@ -66,10 +67,7 @@ ASTPtr ArrayJoinNode::toASTImpl(const ConvertToASTOptions & options) const
auto * column_node = array_join_expression->as<ColumnNode>();
if (column_node && column_node->getExpression())
{
if (const auto * function_node = column_node->getExpression()->as<FunctionNode>(); function_node && function_node->getFunctionName() == "nested")
array_join_expression_ast = array_join_expression->toAST(options);
else
array_join_expression_ast = column_node->getExpression()->toAST(options);
array_join_expression_ast = column_node->getExpression()->toAST(options);
}
else
array_join_expression_ast = array_join_expression->toAST(options);

View File

@ -9,6 +9,7 @@
#include <Parsers/ASTSubquery.h>
#include <Parsers/ASTTablesInSelectQuery.h>
#include <Common/assert_cast.h>
#include <Common/SipHash.h>
namespace DB
{

View File

@ -3,12 +3,8 @@
#include <Analyzer/HashUtils.h>
#include <Analyzer/IQueryTreeNode.h>
#include <Common/SipHash.h>
#include <Interpreters/Context_fwd.h>
#include <unordered_set>
namespace DB::Analyzer
{

View File

@ -1,6 +1,7 @@
#include <Analyzer/QueryTreeBuilder.h>
#include <Common/FieldVisitorToString.h>
#include <Common/quoteString.h>
#include <DataTypes/FieldToDataType.h>
#include <Parsers/ParserSelectWithUnionQuery.h>
@ -40,6 +41,8 @@
#include <Analyzer/JoinNode.h>
#include <Analyzer/UnionNode.h>
#include <AggregateFunctions/AggregateFunctionGroupConcat.h>
#include <Core/Settings.h>
#include <Databases/IDatabase.h>
@ -70,6 +73,7 @@ namespace ErrorCodes
extern const int NOT_IMPLEMENTED;
extern const int BAD_ARGUMENTS;
extern const int UNKNOWN_QUERY_PARAMETER;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
}
namespace
@ -122,6 +126,8 @@ private:
ColumnTransformersNodes buildColumnTransformers(const ASTPtr & matcher_expression, const ContextPtr & context) const;
QueryTreeNodePtr setFirstArgumentAsParameter(const ASTFunction * function, const ContextPtr & context) const;
ASTPtr query;
QueryTreeNodePtr query_tree_node;
};
@ -643,32 +649,43 @@ QueryTreeNodePtr QueryTreeBuilder::buildExpression(const ASTPtr & expression, co
}
else
{
auto function_node = std::make_shared<FunctionNode>(function->name);
function_node->setNullsAction(function->nulls_action);
if (function->parameters)
const auto & group_concat_aliases = GroupConcatImpl<false>::getNameAndAliases();
if (!function->name.empty() && std::any_of(
group_concat_aliases.begin(), group_concat_aliases.end(),
[&](const std::string &s) { return Poco::toLower(s) == Poco::toLower(function->name); })
&& function->arguments && function->arguments->children.size() == 2)
{
const auto & function_parameters_list = function->parameters->as<ASTExpressionList>()->children;
for (const auto & argument : function_parameters_list)
function_node->getParameters().getNodes().push_back(buildExpression(argument, context));
result = setFirstArgumentAsParameter(function, context);
}
if (function->arguments)
else
{
const auto & function_arguments_list = function->arguments->as<ASTExpressionList>()->children;
for (const auto & argument : function_arguments_list)
function_node->getArguments().getNodes().push_back(buildExpression(argument, context));
}
auto function_node = std::make_shared<FunctionNode>(function->name);
function_node->setNullsAction(function->nulls_action);
if (function->is_window_function)
{
if (function->window_definition)
function_node->getWindowNode() = buildWindow(function->window_definition, context);
else
function_node->getWindowNode() = std::make_shared<IdentifierNode>(Identifier(function->window_name));
}
if (function->parameters)
{
const auto & function_parameters_list = function->parameters->as<ASTExpressionList>()->children;
for (const auto & argument : function_parameters_list)
function_node->getParameters().getNodes().push_back(buildExpression(argument, context));
}
result = std::move(function_node);
if (function->arguments)
{
const auto & function_arguments_list = function->arguments->as<ASTExpressionList>()->children;
for (const auto & argument : function_arguments_list)
function_node->getArguments().getNodes().push_back(buildExpression(argument, context));
}
if (function->is_window_function)
{
if (function->window_definition)
function_node->getWindowNode() = buildWindow(function->window_definition, context);
else
function_node->getWindowNode() = std::make_shared<IdentifierNode>(Identifier(function->window_name));
}
result = std::move(function_node);
}
}
}
else if (const auto * subquery = expression->as<ASTSubquery>())
@ -1071,4 +1088,40 @@ QueryTreeNodePtr buildQueryTree(ASTPtr query, ContextPtr context)
return builder.getQueryTreeNode();
}
QueryTreeNodePtr QueryTreeBuilder::setFirstArgumentAsParameter(const ASTFunction * function, const ContextPtr & context) const
{
const auto * first_arg_ast = function->arguments->children[0].get();
const auto * first_arg_literal = first_arg_ast->as<ASTLiteral>();
if (!first_arg_literal)
{
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"If groupConcat is used with two arguments, the first argument must be a constant String");
}
if (first_arg_literal->value.getType() != Field::Types::String)
{
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"If groupConcat is used with two arguments, the first argument must be a constant String");
}
ASTPtr second_arg = function->arguments->children[1]->clone();
auto function_node = std::make_shared<FunctionNode>(function->name);
function_node->setNullsAction(function->nulls_action);
function_node->getParameters().getNodes().push_back(buildExpression(function->arguments->children[0], context)); // Separator
function_node->getArguments().getNodes().push_back(buildExpression(second_arg, context)); // Column to concatenate
if (function->is_window_function)
{
if (function->window_definition)
function_node->getWindowNode() = buildWindow(function->window_definition, context);
else
function_node->getWindowNode() = std::make_shared<IdentifierNode>(Identifier(function->window_name));
}
return std::move(function_node);
}
}

View File

@ -165,6 +165,9 @@ struct IdentifierResolveScope
/// Table expression node to data
std::unordered_map<QueryTreeNodePtr, AnalysisTableExpressionData> table_expression_node_to_data;
/// Table expression nodes that appear in the join tree of the corresponding query
std::unordered_set<QueryTreeNodePtr> registered_table_expression_nodes;
QueryTreeNodePtrWithHashIgnoreTypesSet nullable_group_by_keys;
/// Here we count the number of nullable GROUP BY keys we met resolving expression.
/// E.g. for a query `SELECT tuple(tuple(number)) FROM numbers(10) GROUP BY (number, tuple(number)) with cube`

View File

@ -393,7 +393,7 @@ QueryTreeNodePtr IdentifierResolver::wrapExpressionNodeInTupleElement(QueryTreeN
/// Resolve identifier functions implementation
/// Try resolve table identifier from database catalog
QueryTreeNodePtr IdentifierResolver::tryResolveTableIdentifierFromDatabaseCatalog(const Identifier & table_identifier, ContextPtr context)
std::shared_ptr<TableNode> IdentifierResolver::tryResolveTableIdentifierFromDatabaseCatalog(const Identifier & table_identifier, ContextPtr context)
{
size_t parts_size = table_identifier.getPartsSize();
if (parts_size < 1 || parts_size > 2)
@ -443,6 +443,11 @@ QueryTreeNodePtr IdentifierResolver::tryResolveTableIdentifierFromDatabaseCatalo
if (!storage)
return {};
if (storage->hasExternalDynamicMetadata())
{
storage->updateExternalDynamicMetadata(context);
}
if (!storage_lock)
storage_lock = storage->lockForShare(context->getInitialQueryId(), context->getSettingsRef()[Setting::lock_acquire_timeout]);
auto storage_snapshot = storage->getStorageSnapshot(storage->getInMemoryMetadataPtr(), context);
@ -565,7 +570,7 @@ QueryTreeNodePtr IdentifierResolver::tryResolveIdentifierFromExpressionArguments
bool IdentifierResolver::tryBindIdentifierToAliases(const IdentifierLookup & identifier_lookup, const IdentifierResolveScope & scope)
{
return scope.aliases.find(identifier_lookup, ScopeAliases::FindOption::FIRST_NAME) != nullptr || scope.aliases.array_join_aliases.contains(identifier_lookup.identifier.front());
return scope.aliases.find(identifier_lookup, ScopeAliases::FindOption::FIRST_NAME) != nullptr;
}
/** Resolve identifier from table columns.
@ -680,6 +685,27 @@ bool IdentifierResolver::tryBindIdentifierToTableExpressions(const IdentifierLoo
return can_bind_identifier_to_table_expression;
}
bool IdentifierResolver::tryBindIdentifierToArrayJoinExpressions(const IdentifierLookup & identifier_lookup, const IdentifierResolveScope & scope)
{
bool result = false;
for (const auto & table_expression : scope.registered_table_expression_nodes)
{
auto * array_join_node = table_expression->as<ArrayJoinNode>();
if (!array_join_node)
continue;
for (const auto & array_join_expression : array_join_node->getJoinExpressions())
{
auto array_join_expression_alias = array_join_expression->getAlias();
if (identifier_lookup.identifier.front() == array_join_expression_alias)
return true;
}
}
return result;
}
QueryTreeNodePtr IdentifierResolver::tryResolveIdentifierFromStorage(
const Identifier & identifier,
const QueryTreeNodePtr & table_expression_node,
@ -1415,9 +1441,6 @@ QueryTreeNodePtr IdentifierResolver::tryResolveIdentifierFromArrayJoin(const Ide
IdentifierView identifier_view(identifier_lookup.identifier);
if (identifier_view.isCompound() && from_array_join_node.hasAlias() && identifier_view.front() == from_array_join_node.getAlias())
identifier_view.popFirst();
const auto & alias_or_name = array_join_column_expression_typed.hasAlias()
? array_join_column_expression_typed.getAlias()
: array_join_column_expression_typed.getColumnName();
@ -1429,18 +1452,16 @@ QueryTreeNodePtr IdentifierResolver::tryResolveIdentifierFromArrayJoin(const Ide
else
continue;
auto array_join_column = std::make_shared<ColumnNode>(array_join_column_expression_typed.getColumn(),
array_join_column_expression_typed.getColumnSource());
if (identifier_view.empty())
{
auto array_join_column = std::make_shared<ColumnNode>(array_join_column_expression_typed.getColumn(),
array_join_column_expression_typed.getColumnSource());
return array_join_column;
}
/// Resolve subcolumns. Example : SELECT x.y.z FROM tab ARRAY JOIN arr AS x
auto compound_expr = tryResolveIdentifierFromCompoundExpression(
identifier_lookup.identifier,
identifier_lookup.identifier.getPartsSize() - identifier_view.getPartsSize() /*identifier_bind_size*/,
array_join_column_expression,
array_join_column,
{} /* compound_expression_source */,
scope,
true /* can_be_not_found */);

View File

@ -21,6 +21,7 @@ class QueryExpressionsAliasVisitor ;
class QueryNode;
class JoinNode;
class ColumnNode;
class TableNode;
using ProjectionName = String;
using ProjectionNames = std::vector<ProjectionName>;
@ -86,7 +87,7 @@ public:
/// Resolve identifier functions
static QueryTreeNodePtr tryResolveTableIdentifierFromDatabaseCatalog(const Identifier & table_identifier, ContextPtr context);
static std::shared_ptr<TableNode> tryResolveTableIdentifierFromDatabaseCatalog(const Identifier & table_identifier, ContextPtr context);
QueryTreeNodePtr tryResolveIdentifierFromCompoundExpression(const Identifier & expression_identifier,
size_t identifier_bind_size,
@ -109,6 +110,9 @@ public:
const QueryTreeNodePtr & table_expression_node,
const IdentifierResolveScope & scope);
static bool tryBindIdentifierToArrayJoinExpressions(const IdentifierLookup & identifier_lookup,
const IdentifierResolveScope & scope);
QueryTreeNodePtr tryResolveIdentifierFromTableExpression(const IdentifierLookup & identifier_lookup,
const QueryTreeNodePtr & table_expression_node,
IdentifierResolveScope & scope);

View File

@ -1,6 +1,6 @@
#include <Analyzer/Passes/QueryAnalysisPass.h>
#include <Analyzer/Resolve/QueryAnalyzer.h>
#include <Analyzer/createUniqueTableAliases.h>
#include <Analyzer/createUniqueAliasesIfNecessary.h>
namespace DB
{
@ -16,7 +16,7 @@ void QueryAnalysisPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr conte
{
QueryAnalyzer analyzer(only_analyze);
analyzer.resolve(query_tree_node, table_expression, context);
createUniqueTableAliases(query_tree_node, table_expression, context);
createUniqueAliasesIfNecessary(query_tree_node, context);
}
}

View File

@ -1593,7 +1593,7 @@ void QueryAnalyzer::qualifyColumnNodesWithProjectionNames(const QueryTreeNodes &
if (need_to_qualify)
need_to_qualify = IdentifierResolver::tryBindIdentifierToTableExpressions(identifier_lookup, table_expression_node, scope);
if (IdentifierResolver::tryBindIdentifierToAliases(identifier_lookup, scope))
if (IdentifierResolver::tryBindIdentifierToAliases(identifier_lookup, scope) || IdentifierResolver::tryBindIdentifierToArrayJoinExpressions(identifier_lookup, scope))
need_to_qualify = true;
if (need_to_qualify)
@ -3470,11 +3470,8 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
auto set = std::make_shared<Set>(size_limits_for_set, 0, settings[Setting::transform_null_in]);
set->setHeader(result_block.cloneEmpty().getColumnsWithTypeAndName());
set->insertFromBlock(result_block.getColumnsWithTypeAndName());
set->finishInsert();
auto future_set = std::make_shared<FutureSetFromStorage>(std::move(set));
auto hash = function_arguments[1]->getTreeHash();
auto future_set = std::make_shared<FutureSetFromTuple>(hash, std::move(result_block), settings[Setting::transform_null_in], size_limits_for_set);
/// Create constant set column for constant folding
@ -4977,6 +4974,16 @@ void QueryAnalyzer::resolveArrayJoin(QueryTreeNodePtr & array_join_node, Identif
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"ARRAY JOIN requires at least single expression");
/// Register expression aliases in the scope
for (const auto & elem : array_join_nodes)
{
for (auto & child : elem->getChildren())
{
if (child)
expressions_visitor.visit(child);
}
}
std::vector<QueryTreeNodePtr> array_join_column_expressions;
array_join_column_expressions.reserve(array_join_nodes_size);
@ -4984,18 +4991,6 @@ void QueryAnalyzer::resolveArrayJoin(QueryTreeNodePtr & array_join_node, Identif
{
auto array_join_expression_alias = array_join_expression->getAlias();
for (const auto & elem : array_join_nodes)
{
if (elem->hasAlias())
scope.aliases.array_join_aliases.insert(elem->getAlias());
for (auto & child : elem->getChildren())
{
if (child)
expressions_visitor.visit(child);
}
}
std::string identifier_full_name;
if (auto * identifier_node = array_join_expression->as<IdentifierNode>())
@ -5368,6 +5363,7 @@ void QueryAnalyzer::resolveQueryJoinTreeNode(QueryTreeNodePtr & join_tree_node,
};
add_table_expression_alias_into_scope(join_tree_node);
scope.registered_table_expression_nodes.insert(join_tree_node);
scope.table_expressions_in_resolve_process.erase(join_tree_node.get());
}

View File

@ -27,10 +27,6 @@ struct ScopeAliases
std::unordered_set<QueryTreeNodePtr> nodes_with_duplicated_aliases;
std::vector<QueryTreeNodePtr> cloned_nodes_with_duplicated_aliases;
/// Names which are aliases from ARRAY JOIN.
/// This is needed to properly qualify columns from matchers and avoid name collision.
std::unordered_set<std::string> array_join_aliases;
std::unordered_map<std::string, QueryTreeNodePtr> & getAliasMap(IdentifierLookupContext lookup_context)
{
switch (lookup_context)

View File

@ -62,7 +62,7 @@ size_t getCompoundTypeDepth(const IDataType & type)
}
template <typename Collection>
Block createBlockFromCollection(const Collection & collection, const DataTypes& value_types, const DataTypes & block_types, bool transform_null_in)
ColumnsWithTypeAndName createBlockFromCollection(const Collection & collection, const DataTypes& value_types, const DataTypes & block_types, bool transform_null_in)
{
assert(collection.size() == value_types.size());
size_t columns_size = block_types.size();
@ -132,16 +132,19 @@ Block createBlockFromCollection(const Collection & collection, const DataTypes&
columns[i]->insert(tuple_values[i]);
}
Block res;
ColumnsWithTypeAndName res(columns_size);
for (size_t i = 0; i < columns_size; ++i)
res.insert(ColumnWithTypeAndName{std::move(columns[i]), block_types[i], "argument_" + toString(i)});
{
res[i].type = block_types[i];
res[i].column = std::move(columns[i]);
}
return res;
}
}
Block getSetElementsForConstantValue(const DataTypePtr & expression_type, const Field & value, const DataTypePtr & value_type, bool transform_null_in)
ColumnsWithTypeAndName getSetElementsForConstantValue(const DataTypePtr & expression_type, const Field & value, const DataTypePtr & value_type, bool transform_null_in)
{
DataTypes set_element_types = {expression_type};
const auto * lhs_tuple_type = typeid_cast<const DataTypeTuple *>(expression_type.get());
@ -158,7 +161,7 @@ Block getSetElementsForConstantValue(const DataTypePtr & expression_type, const
size_t lhs_type_depth = getCompoundTypeDepth(*expression_type);
size_t rhs_type_depth = getCompoundTypeDepth(*value_type);
Block result_block;
ColumnsWithTypeAndName result_block;
if (lhs_type_depth == rhs_type_depth)
{

View File

@ -19,6 +19,6 @@ using SetPtr = std::shared_ptr<Set>;
* Example: SELECT id FROM test_table WHERE id IN (1, 2, 3, 4);
* Example: SELECT id FROM test_table WHERE id IN ((1, 2), (3, 4));
*/
Block getSetElementsForConstantValue(const DataTypePtr & expression_type, const Field & value, const DataTypePtr & value_type, bool transform_null_in);
ColumnsWithTypeAndName getSetElementsForConstantValue(const DataTypePtr & expression_type, const Field & value, const DataTypePtr & value_type, bool transform_null_in);
}

View File

@ -1,5 +1,7 @@
#include <Analyzer/TableFunctionNode.h>
#include <Common/SipHash.h>
#include <IO/WriteBuffer.h>
#include <IO/WriteHelpers.h>
#include <IO/Operators.h>

View File

@ -11,6 +11,7 @@
#include <Interpreters/Context.h>
#include <Core/Settings.h>
#include <Common/SipHash.h>
namespace DB
{

View File

@ -1,6 +1,7 @@
#include <memory>
#include <unordered_map>
#include <Analyzer/createUniqueTableAliases.h>
#include <Analyzer/createUniqueAliasesIfNecessary.h>
#include <Analyzer/ArrayJoinNode.h>
#include <Analyzer/ColumnNode.h>
#include <Analyzer/FunctionNode.h>
#include <Analyzer/InDepthQueryTreeVisitor.h>
#include <Analyzer/IQueryTreeNode.h>
@ -48,8 +49,6 @@ public:
case QueryTreeNodeType::TABLE:
[[fallthrough]];
case QueryTreeNodeType::TABLE_FUNCTION:
[[fallthrough]];
case QueryTreeNodeType::ARRAY_JOIN:
{
auto & alias = table_expression_to_alias[node];
if (alias.empty())
@ -60,6 +59,12 @@ public:
}
break;
}
case QueryTreeNodeType::ARRAY_JOIN:
{
/// Simulate previous behaviour and preserve table naming with previous versions
++next_id;
break;
}
default:
break;
}
@ -130,12 +135,97 @@ private:
std::unordered_map<QueryTreeNodePtr, String> table_expression_to_alias;
};
}
void createUniqueTableAliases(QueryTreeNodePtr & node, const QueryTreeNodePtr & /*table_expression*/, const ContextPtr & context)
class CreateUniqueArrayJoinAliasesVisitor : public InDepthQueryTreeVisitorWithContext<CreateUniqueArrayJoinAliasesVisitor>
{
public:
using Base = InDepthQueryTreeVisitorWithContext<CreateUniqueArrayJoinAliasesVisitor>;
using Base::Base;
void enterImpl(QueryTreeNodePtr & node)
{
if (auto * array_join_typed = node->as<ArrayJoinNode>())
{
populateRenamingMap(array_join_typed, renaming[array_join_typed]);
return;
}
auto * column_node = node->as<ColumnNode>();
if (!column_node || replaced_nodes_set.contains(node))
return;
auto column_source = column_node->getColumnSource();
auto * array_join = column_source->as<ArrayJoinNode>();
if (!array_join)
return;
auto & renaming_map = getRenamingMap(array_join);
auto new_column = column_node->getColumn();
new_column.name = renaming_map[column_node->getColumnName()];
auto new_column_node = std::make_shared<ColumnNode>(new_column, column_source);
node = std::move(new_column_node);
replaced_nodes_set.insert(node);
}
private:
using RenamingMap = std::unordered_map<String, String>;
void populateRenamingMap(ArrayJoinNode * array_join, RenamingMap & result)
{
if (result.empty())
{
for (auto & array_join_expression : array_join->getJoinExpressions())
{
auto * array_join_column = array_join_expression->as<ColumnNode>();
chassert(array_join_column != nullptr);
String unique_expression_name = fmt::format("__array_join_exp_{}", ++next_id);
result.emplace(array_join_column->getColumnName(), unique_expression_name);
auto replacement_column = array_join_column->getColumn();
replacement_column.name = unique_expression_name;
auto replacement_column_node = std::make_shared<ColumnNode>(replacement_column, array_join_column->getExpression(), array_join_column->getColumnSource());
replacement_column_node->setAlias(unique_expression_name);
array_join_expression = std::move(replacement_column_node);
replaced_nodes_set.insert(array_join_expression);
}
}
}
RenamingMap & getRenamingMap(ArrayJoinNode * array_join)
{
auto & result = renaming[array_join];
populateRenamingMap(array_join, result);
return result;
}
size_t next_id = 0;
std::unordered_map<ArrayJoinNode *, RenamingMap> renaming;
// TODO: Remove this field when identifier resolution cache removed from analyzer.
std::unordered_set<QueryTreeNodePtr> replaced_nodes_set;
};
}
void createUniqueAliasesIfNecessary(QueryTreeNodePtr & node, const ContextPtr & context)
{
/*
* For each table expression in the Query Tree generate and add a unique alias.
* If table expression had an alias in initial query tree, override it.
*/
CreateUniqueTableAliasesVisitor(context).visit(node);
/* Generate unique aliases for array join expressions.
* It's required to create a valid AST for distributed query.
*/
CreateUniqueArrayJoinAliasesVisitor(context).visit(node);
}
}

View File

@ -0,0 +1,14 @@
#pragma once
#include <memory>
#include <Interpreters/Context_fwd.h>
class IQueryTreeNode;
using QueryTreeNodePtr = std::shared_ptr<IQueryTreeNode>;
namespace DB
{
void createUniqueAliasesIfNecessary(QueryTreeNodePtr & node, const ContextPtr & context);
}

View File

@ -1,18 +0,0 @@
#pragma once
#include <memory>
#include <Interpreters/Context_fwd.h>
class IQueryTreeNode;
using QueryTreeNodePtr = std::shared_ptr<IQueryTreeNode>;
namespace DB
{
/*
* For each table expression in the Query Tree generate and add a unique alias.
* If table expression had an alias in initial query tree, override it.
*/
void createUniqueTableAliases(QueryTreeNodePtr & node, const QueryTreeNodePtr & table_expression, const ContextPtr & context);
}

View File

@ -3,9 +3,6 @@
#include <Common/Exception.h>
#include <Common/ZooKeeper/ZooKeeperRetries.h>
#include <Common/logger_useful.h>
#include <Common/quoteString.h>
#include <fmt/format.h>
namespace DB
{

View File

@ -692,7 +692,8 @@ void BackupCoordinationStageSync::cancelQueryIfError()
if (!exception)
return;
process_list_element->cancelQuery(false, exception);
process_list_element->cancelQuery(CancelReason::CANCELLED_BY_USER, exception);
state_changed.notify_all();
}
@ -746,7 +747,8 @@ void BackupCoordinationStageSync::cancelQueryIfDisconnectedTooLong()
/// we don't want the watching thread to try waiting here for retries or a reconnection).
/// Also we don't set the `state.host_with_error` field here because `state.host_with_error` can only be set
/// AFTER creating the 'error' node (see the comment for `State`).
process_list_element->cancelQuery(false, exception);
process_list_element->cancelQuery(CancelReason::CANCELLED_BY_USER, exception);
state_changed.notify_all();
}

View File

@ -6,6 +6,7 @@
#include <Backups/BackupUtils.h>
#include <Backups/DDLAdjustingForBackupVisitor.h>
#include <Backups/IBackupCoordination.h>
#include <Common/quoteString.h>
#include <Databases/IDatabase.h>
#include <Interpreters/Context.h>
#include <Interpreters/DatabaseCatalog.h>

View File

@ -24,7 +24,6 @@ namespace DB
{
namespace ErrorCodes
{
extern const int AZURE_BLOB_STORAGE_ERROR;
extern const int LOGICAL_ERROR;
}
@ -234,11 +233,8 @@ bool BackupWriterAzureBlobStorage::fileExists(const String & file_name)
UInt64 BackupWriterAzureBlobStorage::getFileSize(const String & file_name)
{
String key = fs::path(blob_path) / file_name;
RelativePathsWithMetadata children;
object_storage->listObjects(key,children,/*max_keys*/0);
if (children.empty())
throw Exception(ErrorCodes::AZURE_BLOB_STORAGE_ERROR, "Object must exist");
return children[0]->metadata->size_bytes;
ObjectMetadata object_metadata = object_storage->getObjectMetadata(key);
return object_metadata.size_bytes;
}
std::unique_ptr<ReadBuffer> BackupWriterAzureBlobStorage::readFile(const String & file_name, size_t /*expected_file_size*/)

Some files were not shown because too many files have changed in this diff Show More