diff --git a/CMakeLists.txt b/CMakeLists.txt index 7b4e0484ab1..884d5be42de 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -187,14 +187,6 @@ else () set(NO_WHOLE_ARCHIVE --no-whole-archive) endif () -if (NOT CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE") - # Can be lld or ld-lld or lld-13 or /path/to/lld. - if (LINKER_NAME MATCHES "lld") - set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--gdb-index") - message (STATUS "Adding .gdb-index via --gdb-index linker option.") - endif () -endif() - if (NOT (SANITIZE_COVERAGE OR WITH_COVERAGE) AND (CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE" OR CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO" @@ -402,7 +394,7 @@ if ((NOT OS_LINUX AND NOT OS_ANDROID) OR (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG") set(ENABLE_GWP_ASAN OFF) endif () -option (ENABLE_FIU "Enable Fiu" ON) +option (ENABLE_LIBFIU "Enable libfiu" ON) option(WERROR "Enable -Werror compiler option" ON) diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index eb3afe0ccdf..b33e7083e32 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -179,7 +179,7 @@ else() message(STATUS "Not using QPL") endif () -if (OS_LINUX AND ARCH_AMD64) +if (OS_LINUX AND ARCH_AMD64 AND NOT NO_SSE3_OR_HIGHER) option (ENABLE_QATLIB "Enable Intel® QuickAssist Technology Library (QATlib)" ${ENABLE_LIBRARIES}) elseif(ENABLE_QATLIB) message (${RECONFIGURE_MESSAGE_LEVEL} "QATLib is only supported on x86_64") diff --git a/contrib/QAT-ZSTD-Plugin-cmake/CMakeLists.txt b/contrib/QAT-ZSTD-Plugin-cmake/CMakeLists.txt index 72d21a8572b..fc18092f574 100644 --- a/contrib/QAT-ZSTD-Plugin-cmake/CMakeLists.txt +++ b/contrib/QAT-ZSTD-Plugin-cmake/CMakeLists.txt @@ -27,7 +27,7 @@ if (ENABLE_QAT_OUT_OF_TREE_BUILD) ${QAT_AL_INCLUDE_DIR} ${QAT_USDM_INCLUDE_DIR} ${ZSTD_LIBRARY_DIR}) - target_compile_definitions(_qatzstd_plugin PRIVATE -DDEBUGLEVEL=0 PUBLIC -DENABLE_ZSTD_QAT_CODEC) + target_compile_definitions(_qatzstd_plugin PRIVATE -DDEBUGLEVEL=0) add_library (ch_contrib::qatzstd_plugin ALIAS _qatzstd_plugin) else () # In-tree build message(STATUS "Intel QATZSTD in-tree build") @@ -78,7 +78,7 @@ else () # In-tree build ${QAT_USDM_INCLUDE_DIR} ${ZSTD_LIBRARY_DIR} ${LIBQAT_HEADER_DIR}) - target_compile_definitions(_qatzstd_plugin PRIVATE -DDEBUGLEVEL=0 PUBLIC -DENABLE_ZSTD_QAT_CODEC -DINTREE) + target_compile_definitions(_qatzstd_plugin PRIVATE -DDEBUGLEVEL=0 PUBLIC -DINTREE) target_include_directories(_qatzstd_plugin SYSTEM PUBLIC $ $) add_library (ch_contrib::qatzstd_plugin ALIAS _qatzstd_plugin) endif () diff --git a/contrib/libfiu-cmake/CMakeLists.txt b/contrib/libfiu-cmake/CMakeLists.txt index e805491edbb..eab55087c98 100644 --- a/contrib/libfiu-cmake/CMakeLists.txt +++ b/contrib/libfiu-cmake/CMakeLists.txt @@ -1,20 +1,21 @@ -if (NOT ENABLE_FIU) - message (STATUS "Not using fiu") +if (NOT ENABLE_LIBFIU) + message (STATUS "Not using libfiu") return () endif () -set(FIU_DIR "${ClickHouse_SOURCE_DIR}/contrib/libfiu/") +set(LIBFIU_DIR "${ClickHouse_SOURCE_DIR}/contrib/libfiu/") -set(FIU_SOURCES - ${FIU_DIR}/libfiu/fiu.c - ${FIU_DIR}/libfiu/fiu-rc.c - ${FIU_DIR}/libfiu/backtrace.c - ${FIU_DIR}/libfiu/wtable.c +set(LIBFIU_SOURCES + ${LIBFIU_DIR}/libfiu/fiu.c + ${LIBFIU_DIR}/libfiu/fiu-rc.c + ${LIBFIU_DIR}/libfiu/backtrace.c + ${LIBFIU_DIR}/libfiu/wtable.c ) -set(FIU_HEADERS "${FIU_DIR}/libfiu") +set(LIBFIU_HEADERS "${LIBFIU_DIR}/libfiu") -add_library(_fiu ${FIU_SOURCES}) -target_compile_definitions(_fiu PUBLIC DUMMY_BACKTRACE) -target_include_directories(_fiu PUBLIC ${FIU_HEADERS}) -add_library(ch_contrib::fiu ALIAS _fiu) +add_library(_libfiu ${LIBFIU_SOURCES}) +target_compile_definitions(_libfiu PUBLIC DUMMY_BACKTRACE) +target_compile_definitions(_libfiu PUBLIC FIU_ENABLE) +target_include_directories(_libfiu PUBLIC ${LIBFIU_HEADERS}) +add_library(ch_contrib::libfiu ALIAS _libfiu) diff --git a/contrib/qpl-cmake/CMakeLists.txt b/contrib/qpl-cmake/CMakeLists.txt index e62612cff5a..89332ae0f7a 100644 --- a/contrib/qpl-cmake/CMakeLists.txt +++ b/contrib/qpl-cmake/CMakeLists.txt @@ -728,10 +728,6 @@ add_library(_qpl STATIC ${LIB_DEPS}) target_include_directories(_qpl PUBLIC $ $) - -target_compile_definitions(_qpl - PUBLIC -DENABLE_QPL_COMPRESSION) - target_link_libraries(_qpl PRIVATE ch_contrib::accel-config) diff --git a/docker/packager/cctools/Dockerfile b/docker/packager/cctools/Dockerfile index d986c6a3c86..570a42d42d5 100644 --- a/docker/packager/cctools/Dockerfile +++ b/docker/packager/cctools/Dockerfile @@ -1,3 +1,5 @@ +# docker build -t clickhouse/cctools . + # This is a hack to significantly reduce the build time of the clickhouse/binary-builder # It's based on the assumption that we don't care of the cctools version so much # It event does not depend on the clickhouse/fasttest in the `docker/images.json` @@ -30,5 +32,29 @@ RUN git clone https://github.com/tpoechtrager/cctools-port.git \ && cd ../.. \ && rm -rf cctools-port +# +# GDB +# +# ld from binutils is 2.38, which has the following error: +# +# DWARF error: invalid or unhandled FORM value: 0x23 +# +ENV LD=ld.lld-${LLVM_VERSION} +ARG GDB_VERSION=15.1 +RUN apt-get update \ + && apt-get install --yes \ + libgmp-dev \ + libmpfr-dev \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* +RUN wget https://sourceware.org/pub/gdb/releases/gdb-$GDB_VERSION.tar.gz \ + && tar -xvf gdb-$GDB_VERSION.tar.gz \ + && cd gdb-$GDB_VERSION \ + && ./configure --prefix=/opt/gdb \ + && make -j $(nproc) \ + && make install \ + && rm -fr gdb-$GDB_VERSION gdb-$GDB_VERSION.tar.gz + FROM scratch COPY --from=builder /cctools /cctools +COPY --from=builder /opt/gdb /opt/gdb diff --git a/docker/test/fasttest/Dockerfile b/docker/test/fasttest/Dockerfile index 4cac2ee6135..5d311c673a4 100644 --- a/docker/test/fasttest/Dockerfile +++ b/docker/test/fasttest/Dockerfile @@ -83,7 +83,7 @@ RUN arch=${TARGETARCH:-amd64} \ # Give suid to gdb to grant it attach permissions # chmod 777 to make the container user independent -RUN chmod u+s /usr/bin/gdb \ +RUN chmod u+s /opt/gdb/bin/gdb \ && mkdir -p /var/lib/clickhouse \ && chmod 777 /var/lib/clickhouse diff --git a/docker/test/integration/base/Dockerfile b/docker/test/integration/base/Dockerfile index 469251f648c..dc4d470a262 100644 --- a/docker/test/integration/base/Dockerfile +++ b/docker/test/integration/base/Dockerfile @@ -11,7 +11,6 @@ RUN apt-get update \ curl \ default-jre \ g++ \ - gdb \ iproute2 \ krb5-user \ libicu-dev \ @@ -73,3 +72,6 @@ maxClientCnxns=80' > /opt/zookeeper/conf/zoo.cfg && \ ENV TZ=Etc/UTC RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone + +COPY --from=clickhouse/cctools:0d6b90a7a490 /opt/gdb /opt/gdb +ENV PATH="/opt/gdb/bin:${PATH}" diff --git a/docker/test/integration/runner/Dockerfile b/docker/test/integration/runner/Dockerfile index d250b746e7d..d62009f1be3 100644 --- a/docker/test/integration/runner/Dockerfile +++ b/docker/test/integration/runner/Dockerfile @@ -30,7 +30,6 @@ RUN apt-get update \ luajit \ libssl-dev \ libcurl4-openssl-dev \ - gdb \ default-jdk \ software-properties-common \ libkrb5-dev \ @@ -87,6 +86,8 @@ COPY modprobe.sh /usr/local/bin/modprobe COPY dockerd-entrypoint.sh /usr/local/bin/ COPY misc/ /misc/ +COPY --from=clickhouse/cctools:0d6b90a7a490 /opt/gdb /opt/gdb +ENV PATH="/opt/gdb/bin:${PATH}" # Same options as in test/base/Dockerfile # (in case you need to override them in tests) diff --git a/docker/test/performance-comparison/Dockerfile b/docker/test/performance-comparison/Dockerfile index c68a39f6f70..f7139275282 100644 --- a/docker/test/performance-comparison/Dockerfile +++ b/docker/test/performance-comparison/Dockerfile @@ -9,7 +9,6 @@ RUN apt-get update \ curl \ dmidecode \ g++ \ - gdb \ git \ gnuplot \ imagemagick \ @@ -42,6 +41,9 @@ RUN pip3 --no-cache-dir install -r requirements.txt COPY run.sh / +COPY --from=clickhouse/cctools:0d6b90a7a490 /opt/gdb /opt/gdb +ENV PATH="/opt/gdb/bin:${PATH}" + CMD ["bash", "/run.sh"] # docker run --network=host --volume :/workspace --volume=:/output -e PR_TO_TEST=<> -e SHA_TO_TEST=<> clickhouse/performance-comparison diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index 3f734bef6a8..c70cbe1fe45 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -54,8 +54,6 @@ source /utils.lib /usr/share/clickhouse-test/config/install.sh ./setup_minio.sh stateless -./mc admin trace clickminio > /test_output/minio.log & -MC_ADMIN_PID=$! ./setup_hdfs_minicluster.sh @@ -176,6 +174,55 @@ done setup_logs_replication attach_gdb_to_clickhouse +# create tables for minio log webhooks +clickhouse-client --query "CREATE TABLE minio_audit_logs +( + log String, + event_time DateTime64(9) MATERIALIZED parseDateTime64BestEffortOrZero(trim(BOTH '\"' FROM JSONExtractRaw(log, 'time')), 9, 'UTC') +) +ENGINE = MergeTree +ORDER BY tuple()" + +clickhouse-client --query "CREATE TABLE minio_server_logs +( + log String, + event_time DateTime64(9) MATERIALIZED parseDateTime64BestEffortOrZero(trim(BOTH '\"' FROM JSONExtractRaw(log, 'time')), 9, 'UTC') +) +ENGINE = MergeTree +ORDER BY tuple()" + +# create minio log webhooks for both audit and server logs +# use async inserts to avoid creating too many parts +./mc admin config set clickminio logger_webhook:ch_server_webhook endpoint="http://localhost:8123/?async_insert=1&wait_for_async_insert=0&async_insert_busy_timeout_min_ms=5000&async_insert_busy_timeout_max_ms=5000&async_insert_max_query_number=1000&async_insert_max_data_size=10485760&query=INSERT%20INTO%20minio_server_logs%20FORMAT%20LineAsString" queue_size=1000000 batch_size=500 +./mc admin config set clickminio audit_webhook:ch_audit_webhook endpoint="http://localhost:8123/?async_insert=1&wait_for_async_insert=0&async_insert_busy_timeout_min_ms=5000&async_insert_busy_timeout_max_ms=5000&async_insert_max_query_number=1000&async_insert_max_data_size=10485760&query=INSERT%20INTO%20minio_audit_logs%20FORMAT%20LineAsString" queue_size=1000000 batch_size=500 + +max_retries=100 +retry=1 +while [ $retry -le $max_retries ]; do + echo "clickminio restart attempt $retry:" + + output=$(./mc admin service restart clickminio --wait --json 2>&1 | jq -r .status) + echo "Output of restart status: $output" + + expected_output="success +success" + if [ "$output" = "$expected_output" ]; then + echo "Restarted clickminio successfully." + break + fi + + sleep 1 + + retry=$((retry + 1)) +done + +if [ $retry -gt $max_retries ]; then + echo "Failed to restart clickminio after $max_retries attempts." +fi + +./mc admin trace clickminio > /test_output/minio.log & +MC_ADMIN_PID=$! + function fn_exists() { declare -F "$1" > /dev/null; } @@ -339,6 +386,14 @@ do fi done + +# collect minio audit and server logs +# wait for minio to flush its batch if it has any +sleep 1 +clickhouse-client -q "SYSTEM FLUSH ASYNC INSERT QUEUE" +clickhouse-client -q "SELECT log FROM minio_audit_logs ORDER BY event_time INTO OUTFILE '/test_output/minio_audit_logs.jsonl.zst' FORMAT JSONEachRow" +clickhouse-client -q "SELECT log FROM minio_server_logs ORDER BY event_time INTO OUTFILE '/test_output/minio_server_logs.jsonl.zst' FORMAT JSONEachRow" + # Stop server so we can safely read data with clickhouse-local. # Why do we read data with clickhouse-local? # Because it's the simplest way to read it when server has crashed. diff --git a/docker/test/stateless/setup_minio.sh b/docker/test/stateless/setup_minio.sh index d8310d072b8..c1508df7e82 100755 --- a/docker/test/stateless/setup_minio.sh +++ b/docker/test/stateless/setup_minio.sh @@ -99,10 +99,9 @@ upload_data() { # iterating over globs will cause redundant file variable to be # a path to a file, not a filename # shellcheck disable=SC2045 - for file in $(ls "${data_path}"); do - echo "${file}"; - ./mc cp "${data_path}"/"${file}" clickminio/test/"${file}"; - done + if [ -d "${data_path}" ]; then + ./mc cp --recursive "${data_path}"/ clickminio/test/ + fi } setup_aws_credentials() { diff --git a/docker/test/util/Dockerfile b/docker/test/util/Dockerfile index dc928ba7195..8b949ed95db 100644 --- a/docker/test/util/Dockerfile +++ b/docker/test/util/Dockerfile @@ -44,7 +44,6 @@ RUN apt-get update \ bash \ bsdmainutils \ build-essential \ - gdb \ git \ gperf \ moreutils \ @@ -58,3 +57,6 @@ RUN apt-get update \ && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* COPY process_functional_tests_result.py / + +COPY --from=clickhouse/cctools:0d6b90a7a490 /opt/gdb /opt/gdb +ENV PATH="/opt/gdb/bin:${PATH}" diff --git a/docs/en/development/tests.md b/docs/en/development/tests.md index 269995a1a96..6cb36e2049b 100644 --- a/docs/en/development/tests.md +++ b/docs/en/development/tests.md @@ -14,7 +14,7 @@ Each functional test sends one or multiple queries to the running ClickHouse ser Tests are located in `queries` directory. There are two subdirectories: `stateless` and `stateful`. Stateless tests run queries without any preloaded test data - they often create small synthetic datasets on the fly, within the test itself. Stateful tests require preloaded test data from ClickHouse and it is available to general public. -Each test can be one of two types: `.sql` and `.sh`. `.sql` test is the simple SQL script that is piped to `clickhouse-client --multiquery`. `.sh` test is a script that is run by itself. SQL tests are generally preferable to `.sh` tests. You should use `.sh` tests only when you have to test some feature that cannot be exercised from pure SQL, such as piping some input data into `clickhouse-client` or testing `clickhouse-local`. +Each test can be one of two types: `.sql` and `.sh`. `.sql` test is the simple SQL script that is piped to `clickhouse-client`. `.sh` test is a script that is run by itself. SQL tests are generally preferable to `.sh` tests. You should use `.sh` tests only when you have to test some feature that cannot be exercised from pure SQL, such as piping some input data into `clickhouse-client` or testing `clickhouse-local`. :::note A common mistake when testing data types `DateTime` and `DateTime64` is assuming that the server uses a specific time zone (e.g. "UTC"). This is not the case, time zones in CI test runs @@ -38,7 +38,7 @@ For more options, see `tests/clickhouse-test --help`. You can simply run all tes ### Adding a New Test -To add new test, create a `.sql` or `.sh` file in `queries/0_stateless` directory, check it manually and then generate `.reference` file in the following way: `clickhouse-client --multiquery < 00000_test.sql > 00000_test.reference` or `./00000_test.sh > ./00000_test.reference`. +To add new test, create a `.sql` or `.sh` file in `queries/0_stateless` directory, check it manually and then generate `.reference` file in the following way: `clickhouse-client < 00000_test.sql > 00000_test.reference` or `./00000_test.sh > ./00000_test.reference`. Tests should use (create, drop, etc) only tables in `test` database that is assumed to be created beforehand; also tests can use temporary tables. diff --git a/docs/en/interfaces/prometheus.md b/docs/en/interfaces/prometheus.md index bf541901b34..8e7023cc51f 100644 --- a/docs/en/interfaces/prometheus.md +++ b/docs/en/interfaces/prometheus.md @@ -75,7 +75,7 @@ Data are received by this protocol and written to a [TimeSeries](/en/engines/tab /write - remote_writeremote_write db_name time_series_table
@@ -105,7 +105,7 @@ Data are read from a [TimeSeries](/en/engines/table-engines/special/time_series) /read - remote_readremote_read db_name time_series_table
@@ -144,14 +144,14 @@ Multiple protocols can be specified together in one place: /write - remote_writeremote_write db_name.time_series_table
/read - remote_readremote_read db_name.time_series_table
diff --git a/docs/en/operations/query-cache.md b/docs/en/operations/query-cache.md index 7a920671fc2..384938e28f6 100644 --- a/docs/en/operations/query-cache.md +++ b/docs/en/operations/query-cache.md @@ -143,6 +143,18 @@ value can be specified at session, profile or query level using setting [query_c Entries in the query cache are compressed by default. This reduces the overall memory consumption at the cost of slower writes into / reads from the query cache. To disable compression, use setting [query_cache_compress_entries](settings/settings.md#query-cache-compress-entries). +Sometimes it is useful to keep multiple results for the same query cached. This can be achieved using setting +[query_cache_tag](settings/settings.md#query-cache-tag) that acts as as a label (or namespace) for a query cache entries. The query cache +considers results of the same query with different tags different. + +Example for creating three different query cache entries for the same query: + +```sql +SELECT 1 SETTINGS use_query_cache = true; -- query_cache_tag is implicitly '' (empty string) +SELECT 1 SETTINGS use_query_cache = true, query_cache_tag = 'tag 1'; +SELECT 1 SETTINGS use_query_cache = true, query_cache_tag = 'tag 2'; +``` + ClickHouse reads table data in blocks of [max_block_size](settings/settings.md#setting-max_block_size) rows. Due to filtering, aggregation, etc., result blocks are typically much smaller than 'max_block_size' but there are also cases where they are much bigger. Setting [query_cache_squash_partial_results](settings/settings.md#query-cache-squash-partial-results) (enabled by default) controls if result blocks diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index e432f4e038f..03ff6067a8f 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -1800,6 +1800,17 @@ Possible values: Default value: `0`. +## query_cache_tag {#query-cache-tag} + +A string which acts as a label for [query cache](../query-cache.md) entries. +The same queries with different tags are considered different by the query cache. + +Possible values: + +- Any string + +Default value: `''` + ## query_cache_max_size_in_bytes {#query-cache-max-size-in-bytes} The maximum amount of memory (in bytes) the current user may allocate in the [query cache](../query-cache.md). 0 means unlimited. @@ -5627,6 +5638,12 @@ Disable all insert and mutations (alter table update / alter table delete / alte Default value: `false`. +## use_hive_partitioning + +When enabled, ClickHouse will detect Hive-style partitioning in path (`/name=value/`) in file-like table engines [File](../../engines/table-engines/special/file.md#hive-style-partitioning)/[S3](../../engines/table-engines/integrations/s3.md#hive-style-partitioning)/[URL](../../engines/table-engines/special/url.md#hive-style-partitioning)/[HDFS](../../engines/table-engines/integrations/hdfs.md#hive-style-partitioning)/[AzureBlobStorage](../../engines/table-engines/integrations/azureBlobStorage.md#hive-style-partitioning) and will allow to use partition columns as virtual columns in the query. These virtual columns will have the same names as in the partitioned path, but starting with `_`. + +Default value: `false`. + ## allow_experimental_time_series_table {#allow-experimental-time-series-table} Allows creation of tables with the [TimeSeries](../../engines/table-engines/integrations/time-series.md) table engine. diff --git a/docs/en/operations/system-tables/kafka_consumers.md b/docs/en/operations/system-tables/kafka_consumers.md index 7e28a251e26..d58c9f754fd 100644 --- a/docs/en/operations/system-tables/kafka_consumers.md +++ b/docs/en/operations/system-tables/kafka_consumers.md @@ -24,6 +24,7 @@ Columns: - `num_rebalance_revocations`, (UInt64) - number of times the consumer was revoked its partitions - `num_rebalance_assignments`, (UInt64) - number of times the consumer was assigned to Kafka cluster - `is_currently_used`, (UInt8) - consumer is in use +- `last_used`, (UInt64) - last time this consumer was in use, unix time in microseconds - `rdkafka_stat` (String) - library internal statistic. See https://github.com/ClickHouse/librdkafka/blob/master/STATISTICS.md . Set `statistics_interval_ms` to 0 disable, default is 3000 (once in three seconds). Example: diff --git a/docs/en/operations/system-tables/query_cache.md b/docs/en/operations/system-tables/query_cache.md index a9f86f5fc2b..9c48574a329 100644 --- a/docs/en/operations/system-tables/query_cache.md +++ b/docs/en/operations/system-tables/query_cache.md @@ -9,6 +9,7 @@ Columns: - `query` ([String](../../sql-reference/data-types/string.md)) — Query string. - `result_size` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Size of the query cache entry. +- `tag` ([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md)) — Tag of the query cache entry. - `stale` ([UInt8](../../sql-reference/data-types/int-uint.md)) — If the query cache entry is stale. - `shared` ([UInt8](../../sql-reference/data-types/int-uint.md)) — If the query cache entry is shared between multiple users. - `compressed` ([UInt8](../../sql-reference/data-types/int-uint.md)) — If the query cache entry is compressed. @@ -26,6 +27,7 @@ Row 1: ────── query: SELECT 1 SETTINGS use_query_cache = 1 result_size: 128 +tag: stale: 0 shared: 0 compressed: 1 diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index afd779ca8cd..1b50104da52 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -4189,3 +4189,94 @@ Result: │ 32 │ └─────────────────────────────┘ ``` + +## getSubcolumn + +Takes a table expression or identifier and constant string with the name of the sub-column, and returns the requested sub-column extracted from the expression. + +**Syntax** + +```sql +getSubcolumn(col_name, subcol_name) +``` + +**Arguments** + +- `col_name` — Table expression or identifier. [Expression](../syntax.md/#expressions), [Identifier](../syntax.md/#identifiers). +- `subcol_name` — The name of the sub-column. [String](../data-types/string.md). + +**Returned value** + +- Returns the extracted sub-column. + +**Example** + +Query: + +```sql +CREATE TABLE t_arr (arr Array(Tuple(subcolumn1 UInt32, subcolumn2 String))) ENGINE = MergeTree ORDER BY tuple(); +INSERT INTO t_arr VALUES ([(1, 'Hello'), (2, 'World')]), ([(3, 'This'), (4, 'is'), (5, 'subcolumn')]); +SELECT getSubcolumn(arr, 'subcolumn1'), getSubcolumn(arr, 'subcolumn2') FROM t_arr; +``` + +Result: + +```response + ┌─getSubcolumn(arr, 'subcolumn1')─┬─getSubcolumn(arr, 'subcolumn2')─┐ +1. │ [1,2] │ ['Hello','World'] │ +2. │ [3,4,5] │ ['This','is','subcolumn'] │ + └─────────────────────────────────┴─────────────────────────────────┘ +``` + +## getTypeSerializationStreams + +Enumerates stream paths of a data type. + +:::note +This function is intended for use by developers. +::: + +**Syntax** + +```sql +getTypeSerializationStreams(col) +``` + +**Arguments** + +- `col` — Column or string representation of a data-type from which the data type will be detected. + +**Returned value** + +- Returns an array with all the serialization sub-stream paths.[Array](../data-types/array.md)([String](../data-types/string.md)). + +**Examples** + +Query: + +```sql +SELECT getTypeSerializationStreams(tuple('a', 1, 'b', 2)); +``` + +Result: + +```response + ┌─getTypeSerializationStreams(('a', 1, 'b', 2))─────────────────────────────────────────────────────────────────────────┐ +1. │ ['{TupleElement(1), Regular}','{TupleElement(2), Regular}','{TupleElement(3), Regular}','{TupleElement(4), Regular}'] │ + └───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ +``` + +Query: + +```sql +SELECT getTypeSerializationStreams('Map(String, Int64)'); +``` + +Result: + +```response + ┌─getTypeSerializationStreams('Map(String, Int64)')────────────────────────────────────────────────────────────────┐ +1. │ ['{ArraySizes}','{ArrayElements, TupleElement(keys), Regular}','{ArrayElements, TupleElement(values), Regular}'] │ + └──────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ +``` + diff --git a/docs/en/sql-reference/statements/alter/statistics.md b/docs/en/sql-reference/statements/alter/statistics.md index 6880cef0e5c..7a1774a01b5 100644 --- a/docs/en/sql-reference/statements/alter/statistics.md +++ b/docs/en/sql-reference/statements/alter/statistics.md @@ -8,26 +8,28 @@ sidebar_label: STATISTICS The following operations are available: -- `ALTER TABLE [db].table ADD STATISTICS (columns list) TYPE (type list)` - Adds statistic description to tables metadata. +- `ALTER TABLE [db].table ADD STATISTICS [IF NOT EXISTS] (column list) TYPE (type list)` - Adds statistic description to tables metadata. -- `ALTER TABLE [db].table MODIFY STATISTICS (columns list) TYPE (type list)` - Modifies statistic description to tables metadata. +- `ALTER TABLE [db].table MODIFY STATISTICS (column list) TYPE (type list)` - Modifies statistic description to tables metadata. -- `ALTER TABLE [db].table DROP STATISTICS (columns list)` - Removes statistics from the metadata of the specified columns and deletes all statistics objects in all parts for the specified columns. +- `ALTER TABLE [db].table DROP STATISTICS [IF EXISTS] (column list)` - Removes statistics from the metadata of the specified columns and deletes all statistics objects in all parts for the specified columns. -- `ALTER TABLE [db].table CLEAR STATISTICS (columns list)` - Deletes all statistics objects in all parts for the specified columns. Statistics objects can be rebuild using `ALTER TABLE MATERIALIZE STATISTICS`. +- `ALTER TABLE [db].table CLEAR STATISTICS [IF EXISTS] (column list)` - Deletes all statistics objects in all parts for the specified columns. Statistics objects can be rebuild using `ALTER TABLE MATERIALIZE STATISTICS`. -- `ALTER TABLE [db.]table MATERIALIZE STATISTICS (columns list)` - Rebuilds the statistic for columns. Implemented as a [mutation](../../../sql-reference/statements/alter/index.md#mutations). +- `ALTER TABLE [db.]table MATERIALIZE STATISTICS [IF EXISTS] (column list)` - Rebuilds the statistic for columns. Implemented as a [mutation](../../../sql-reference/statements/alter/index.md#mutations). The first two commands are lightweight in a sense that they only change metadata or remove files. Also, they are replicated, syncing statistics metadata via ZooKeeper. -There is an example adding two statistics types to two columns: +## Example: + +Adding two statistics types to two columns: ``` ALTER TABLE t1 MODIFY STATISTICS c, d TYPE TDigest, Uniq; ``` :::note -Statistic manipulation is supported only for tables with [`*MergeTree`](../../../engines/table-engines/mergetree-family/mergetree.md) engine (including [replicated](../../../engines/table-engines/mergetree-family/replication.md) variants). +Statistic are supported only for [`*MergeTree`](../../../engines/table-engines/mergetree-family/mergetree.md) engine tables (including [replicated](../../../engines/table-engines/mergetree-family/replication.md) variants). ::: diff --git a/docs/en/sql-reference/table-functions/azureBlobStorage.md b/docs/en/sql-reference/table-functions/azureBlobStorage.md index f59fedeb3a2..6936c807f96 100644 --- a/docs/en/sql-reference/table-functions/azureBlobStorage.md +++ b/docs/en/sql-reference/table-functions/azureBlobStorage.md @@ -77,3 +77,16 @@ SELECT count(*) FROM azureBlobStorage('DefaultEndpointsProtocol=https;AccountNam **See Also** - [AzureBlobStorage Table Engine](/docs/en/engines/table-engines/integrations/azureBlobStorage.md) + +## Hive-style partitioning {#hive-style-partitioning} + +When setting `use_hive_partitioning` is set to 1, ClickHouse will detect Hive-style partitioning in the path (`/name=value/`) and will allow to use partition columns as virtual columns in the query. These virtual columns will have the same names as in the partitioned path, but starting with `_`. + +**Example** + +Use virtual column, created with Hive-style partitioning + +``` sql +SET use_hive_partitioning = 1; +SELECT * from azureBlobStorage(config, storage_account_url='...', container='...', blob_path='http://data/path/date=*/country=*/code=*/*.parquet') where _date > '2020-01-01' and _country = 'Netherlands' and _code = 42; +``` diff --git a/docs/en/sql-reference/table-functions/file.md b/docs/en/sql-reference/table-functions/file.md index 44b1b50620a..7908a3cb934 100644 --- a/docs/en/sql-reference/table-functions/file.md +++ b/docs/en/sql-reference/table-functions/file.md @@ -206,6 +206,19 @@ SELECT count(*) FROM file('big_dir/**/file002', 'CSV', 'name String, value UInt3 - `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the file size is unknown, the value is `NULL`. - `_time` — Last modified time of the file. Type: `Nullable(DateTime)`. If the time is unknown, the value is `NULL`. +## Hive-style partitioning {#hive-style-partitioning} + +When setting `use_hive_partitioning` is set to 1, ClickHouse will detect Hive-style partitioning in the path (`/name=value/`) and will allow to use partition columns as virtual columns in the query. These virtual columns will have the same names as in the partitioned path, but starting with `_`. + +**Example** + +Use virtual column, created with Hive-style partitioning + +``` sql +SET use_hive_partitioning = 1; +SELECT * from file('data/path/date=*/country=*/code=*/*.parquet') where _date > '2020-01-01' and _country = 'Netherlands' and _code = 42; +``` + ## Settings {#settings} - [engine_file_empty_if_not_exists](/docs/en/operations/settings/settings.md#engine-file-empty_if-not-exists) - allows to select empty data from a file that doesn't exist. Disabled by default. diff --git a/docs/en/sql-reference/table-functions/hdfs.md b/docs/en/sql-reference/table-functions/hdfs.md index 405ac477846..f96e48d914d 100644 --- a/docs/en/sql-reference/table-functions/hdfs.md +++ b/docs/en/sql-reference/table-functions/hdfs.md @@ -100,6 +100,19 @@ FROM hdfs('hdfs://hdfs1:9000/big_dir/file{0..9}{0..9}{0..9}', 'CSV', 'name Strin - `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the size is unknown, the value is `NULL`. - `_time` — Last modified time of the file. Type: `Nullable(DateTime)`. If the time is unknown, the value is `NULL`. +## Hive-style partitioning {#hive-style-partitioning} + +When setting `use_hive_partitioning` is set to 1, ClickHouse will detect Hive-style partitioning in the path (`/name=value/`) and will allow to use partition columns as virtual columns in the query. These virtual columns will have the same names as in the partitioned path, but starting with `_`. + +**Example** + +Use virtual column, created with Hive-style partitioning + +``` sql +SET use_hive_partitioning = 1; +SELECT * from HDFS('hdfs://hdfs1:9000/data/path/date=*/country=*/code=*/*.parquet') where _date > '2020-01-01' and _country = 'Netherlands' and _code = 42; +``` + ## Storage Settings {#storage-settings} - [hdfs_truncate_on_insert](/docs/en/operations/settings/settings.md#hdfs_truncate_on_insert) - allows to truncate file before insert into it. Disabled by default. diff --git a/docs/en/sql-reference/table-functions/s3.md b/docs/en/sql-reference/table-functions/s3.md index 35e5d86034c..1bd9f38517e 100644 --- a/docs/en/sql-reference/table-functions/s3.md +++ b/docs/en/sql-reference/table-functions/s3.md @@ -274,6 +274,19 @@ FROM s3( - `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the file size is unknown, the value is `NULL`. In case of archive shows uncompressed file size of the file inside the archive. - `_time` — Last modified time of the file. Type: `Nullable(DateTime)`. If the time is unknown, the value is `NULL`. +## Hive-style partitioning {#hive-style-partitioning} + +When setting `use_hive_partitioning` is set to 1, ClickHouse will detect Hive-style partitioning in the path (`/name=value/`) and will allow to use partition columns as virtual columns in the query. These virtual columns will have the same names as in the partitioned path, but starting with `_`. + +**Example** + +Use virtual column, created with Hive-style partitioning + +``` sql +SET use_hive_partitioning = 1; +SELECT * from s3('s3://data/path/date=*/country=*/code=*/*.parquet') where _date > '2020-01-01' and _country = 'Netherlands' and _code = 42; +``` + ## Storage Settings {#storage-settings} - [s3_truncate_on_insert](/docs/en/operations/settings/settings.md#s3_truncate_on_insert) - allows to truncate file before insert into it. Disabled by default. diff --git a/docs/en/sql-reference/table-functions/url.md b/docs/en/sql-reference/table-functions/url.md index 3bb7aff53a7..b4027594e7c 100644 --- a/docs/en/sql-reference/table-functions/url.md +++ b/docs/en/sql-reference/table-functions/url.md @@ -55,6 +55,19 @@ Character `|` inside patterns is used to specify failover addresses. They are it - `_size` — Size of the resource in bytes. Type: `Nullable(UInt64)`. If the size is unknown, the value is `NULL`. - `_time` — Last modified time of the file. Type: `Nullable(DateTime)`. If the time is unknown, the value is `NULL`. +## Hive-style partitioning {#hive-style-partitioning} + +When setting `use_hive_partitioning` is set to 1, ClickHouse will detect Hive-style partitioning in the path (`/name=value/`) and will allow to use partition columns as virtual columns in the query. These virtual columns will have the same names as in the partitioned path, but starting with `_`. + +**Example** + +Use virtual column, created with Hive-style partitioning + +``` sql +SET use_hive_partitioning = 1; +SELECT * from url('http://data/path/date=*/country=*/code=*/*.parquet') where _date > '2020-01-01' and _country = 'Netherlands' and _code = 42; +``` + ## Storage Settings {#storage-settings} - [engine_url_skip_empty_files](/docs/en/operations/settings/settings.md#engine_url_skip_empty_files) - allows to skip empty files while reading. Disabled by default. diff --git a/docs/zh/operations/external-authenticators/kerberos.md b/docs/zh/operations/external-authenticators/kerberos.md index 649a0b9bd48..d1a39bbc952 100644 --- a/docs/zh/operations/external-authenticators/kerberos.md +++ b/docs/zh/operations/external-authenticators/kerberos.md @@ -23,30 +23,30 @@ slug: /zh/operations/external-authenticators/kerberos 示例 (进入 `config.xml`): ```xml - + - + ``` 主体规范: ```xml - + HTTP/clickhouse.example.com@EXAMPLE.COM - + ``` 按领域过滤: ```xml - + EXAMPLE.COM - + ``` !!! warning "注意" @@ -74,7 +74,7 @@ Kerberos主体名称格式通常遵循以下模式: 示例 (进入 `users.xml`): ``` - + @@ -85,7 +85,7 @@ Kerberos主体名称格式通常遵循以下模式: - + ``` !!! warning "警告" diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 1d99d223ee9..25c94c56aa6 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -223,7 +223,7 @@ std::vector Client::loadWarningMessages() size_t rows = packet.block.rows(); for (size_t i = 0; i < rows; ++i) - messages.emplace_back(column[i].get()); + messages.emplace_back(column[i].safeGet()); } continue; diff --git a/programs/keeper-client/Commands.cpp b/programs/keeper-client/Commands.cpp index df9da8e9613..7226bd82df7 100644 --- a/programs/keeper-client/Commands.cpp +++ b/programs/keeper-client/Commands.cpp @@ -95,7 +95,7 @@ void SetCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) co client->zookeeper->set( client->getAbsolutePath(query->args[0].safeGet()), query->args[1].safeGet(), - static_cast(query->args[2].get())); + static_cast(query->args[2].safeGet())); } bool CreateCommand::parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const @@ -494,7 +494,7 @@ void RMCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) con { Int32 version{-1}; if (query->args.size() == 2) - version = static_cast(query->args[1].get()); + version = static_cast(query->args[1].safeGet()); client->zookeeper->remove(client->getAbsolutePath(query->args[0].safeGet()), version); } @@ -549,7 +549,7 @@ void ReconfigCommand::execute(const DB::ASTKeeperQuery * query, DB::KeeperClient String leaving; String new_members; - auto operation = query->args[0].get(); + auto operation = query->args[0].safeGet(); switch (operation) { case static_cast(ReconfigCommand::Operation::ADD): diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 6b0b8fc5b50..200beea7b63 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -143,7 +143,7 @@ void LocalServer::initialize(Poco::Util::Application & self) if (fs::exists(config_path)) { - ConfigProcessor config_processor(config_path, false, true); + ConfigProcessor config_processor(config_path); ConfigProcessor::setConfigPath(fs::path(config_path).parent_path()); auto loaded_config = config_processor.loadConfig(); getClientConfiguration().add(loaded_config.configuration.duplicate(), PRIO_DEFAULT, false); diff --git a/programs/obfuscator/Obfuscator.cpp b/programs/obfuscator/Obfuscator.cpp index 688ae1a1143..4c3981c1d01 100644 --- a/programs/obfuscator/Obfuscator.cpp +++ b/programs/obfuscator/Obfuscator.cpp @@ -1307,6 +1307,7 @@ try throw ErrnoException(ErrorCodes::CANNOT_SEEK_THROUGH_FILE, "Input must be seekable file (it will be read twice)"); SingleReadBufferIterator read_buffer_iterator(std::move(file)); + schema_columns = readSchemaFromFormat(input_format, {}, read_buffer_iterator, context_const); } else diff --git a/programs/server/config.d/transactions.xml b/programs/server/config.d/transactions.xml new file mode 120000 index 00000000000..be9de46b607 --- /dev/null +++ b/programs/server/config.d/transactions.xml @@ -0,0 +1 @@ +../../../tests/config/config.d/transactions.xml \ No newline at end of file diff --git a/src/AggregateFunctions/AggregateFunctionGroupArray.cpp b/src/AggregateFunctions/AggregateFunctionGroupArray.cpp index 7034e6373b1..5cc9f725b46 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupArray.cpp +++ b/src/AggregateFunctions/AggregateFunctionGroupArray.cpp @@ -780,12 +780,12 @@ AggregateFunctionPtr createAggregateFunctionGroupArray( if (type != Field::Types::Int64 && type != Field::Types::UInt64) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter for aggregate function {} should be positive number", name); - if ((type == Field::Types::Int64 && parameters[0].get() < 0) || - (type == Field::Types::UInt64 && parameters[0].get() == 0)) + if ((type == Field::Types::Int64 && parameters[0].safeGet() < 0) || + (type == Field::Types::UInt64 && parameters[0].safeGet() == 0)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter for aggregate function {} should be positive number", name); has_limit = true; - max_elems = parameters[0].get(); + max_elems = parameters[0].safeGet(); } else throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, @@ -816,11 +816,11 @@ AggregateFunctionPtr createAggregateFunctionGroupArraySample( if (type != Field::Types::Int64 && type != Field::Types::UInt64) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter for aggregate function {} should be positive number", name); - if ((type == Field::Types::Int64 && parameters[i].get() < 0) || - (type == Field::Types::UInt64 && parameters[i].get() == 0)) + if ((type == Field::Types::Int64 && parameters[i].safeGet() < 0) || + (type == Field::Types::UInt64 && parameters[i].safeGet() == 0)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter for aggregate function {} should be positive number", name); - return parameters[i].get(); + return parameters[i].safeGet(); }; UInt64 max_elems = get_parameter(0); diff --git a/src/AggregateFunctions/AggregateFunctionGroupArrayIntersect.cpp b/src/AggregateFunctions/AggregateFunctionGroupArrayIntersect.cpp index 38f2fcb9fb9..36d00b1d9ec 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupArrayIntersect.cpp +++ b/src/AggregateFunctions/AggregateFunctionGroupArrayIntersect.cpp @@ -83,16 +83,16 @@ public: if (version == 1) { for (size_t i = 0; i < arr_size; ++i) - set.insert(static_cast((*data_column)[offset + i].get())); + set.insert(static_cast((*data_column)[offset + i].safeGet())); } else if (!set.empty()) { typename State::Set new_set; for (size_t i = 0; i < arr_size; ++i) { - typename State::Set::LookupResult set_value = set.find(static_cast((*data_column)[offset + i].get())); + typename State::Set::LookupResult set_value = set.find(static_cast((*data_column)[offset + i].safeGet())); if (set_value != nullptr) - new_set.insert(static_cast((*data_column)[offset + i].get())); + new_set.insert(static_cast((*data_column)[offset + i].safeGet())); } set = std::move(new_set); } diff --git a/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.cpp b/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.cpp index 026b8d1956f..2c3ac7f883e 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.cpp +++ b/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.cpp @@ -269,12 +269,12 @@ AggregateFunctionPtr createAggregateFunctionMoving( if (type != Field::Types::Int64 && type != Field::Types::UInt64) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter for aggregate function {} should be positive integer", name); - if ((type == Field::Types::Int64 && parameters[0].get() <= 0) || - (type == Field::Types::UInt64 && parameters[0].get() == 0)) + if ((type == Field::Types::Int64 && parameters[0].safeGet() <= 0) || + (type == Field::Types::UInt64 && parameters[0].safeGet() == 0)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter for aggregate function {} should be positive integer", name); limit_size = true; - max_elems = parameters[0].get(); + max_elems = parameters[0].safeGet(); } else throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, diff --git a/src/AggregateFunctions/AggregateFunctionGroupArraySorted.cpp b/src/AggregateFunctions/AggregateFunctionGroupArraySorted.cpp index d41d743e17a..27043ed6aa6 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupArraySorted.cpp +++ b/src/AggregateFunctions/AggregateFunctionGroupArraySorted.cpp @@ -397,11 +397,11 @@ AggregateFunctionPtr createAggregateFunctionGroupArray( if (type != Field::Types::Int64 && type != Field::Types::UInt64) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter for aggregate function {} should be positive number", name); - if ((type == Field::Types::Int64 && parameters[0].get() < 0) || - (type == Field::Types::UInt64 && parameters[0].get() == 0)) + if ((type == Field::Types::Int64 && parameters[0].safeGet() < 0) || + (type == Field::Types::UInt64 && parameters[0].safeGet() == 0)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter for aggregate function {} should be positive number", name); - max_elems = parameters[0].get(); + max_elems = parameters[0].safeGet(); } else throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, diff --git a/src/AggregateFunctions/AggregateFunctionGroupConcat.cpp b/src/AggregateFunctions/AggregateFunctionGroupConcat.cpp index 5494ef74705..636ac80e350 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupConcat.cpp +++ b/src/AggregateFunctions/AggregateFunctionGroupConcat.cpp @@ -247,7 +247,7 @@ AggregateFunctionPtr createAggregateFunctionGroupConcat( if (type != Field::Types::String) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First parameter for aggregate function {} should be string", name); - delimiter = parameters[0].get(); + delimiter = parameters[0].safeGet(); } if (parameters.size() == 2) { @@ -256,12 +256,12 @@ AggregateFunctionPtr createAggregateFunctionGroupConcat( if (type != Field::Types::Int64 && type != Field::Types::UInt64) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second parameter for aggregate function {} should be a positive number", name); - if ((type == Field::Types::Int64 && parameters[1].get() <= 0) || - (type == Field::Types::UInt64 && parameters[1].get() == 0)) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second parameter for aggregate function {} should be a positive number, got: {}", name, parameters[1].get()); + if ((type == Field::Types::Int64 && parameters[1].safeGet() <= 0) || + (type == Field::Types::UInt64 && parameters[1].safeGet() == 0)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second parameter for aggregate function {} should be a positive number, got: {}", name, parameters[1].safeGet()); has_limit = true; - limit = parameters[1].get(); + limit = parameters[1].safeGet(); } if (has_limit) diff --git a/src/AggregateFunctions/AggregateFunctionGroupUniqArray.cpp b/src/AggregateFunctions/AggregateFunctionGroupUniqArray.cpp index 7b4300b3568..5cbf449c946 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupUniqArray.cpp +++ b/src/AggregateFunctions/AggregateFunctionGroupUniqArray.cpp @@ -323,12 +323,12 @@ AggregateFunctionPtr createAggregateFunctionGroupUniqArray( if (type != Field::Types::Int64 && type != Field::Types::UInt64) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter for aggregate function {} should be positive number", name); - if ((type == Field::Types::Int64 && parameters[0].get() < 0) || - (type == Field::Types::UInt64 && parameters[0].get() == 0)) + if ((type == Field::Types::Int64 && parameters[0].safeGet() < 0) || + (type == Field::Types::UInt64 && parameters[0].safeGet() == 0)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter for aggregate function {} should be positive number", name); limit_size = true; - max_elems = parameters[0].get(); + max_elems = parameters[0].safeGet(); } else throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, diff --git a/src/AggregateFunctions/AggregateFunctionKolmogorovSmirnovTest.cpp b/src/AggregateFunctions/AggregateFunctionKolmogorovSmirnovTest.cpp index 04eebe9f485..28e8d37b8c8 100644 --- a/src/AggregateFunctions/AggregateFunctionKolmogorovSmirnovTest.cpp +++ b/src/AggregateFunctions/AggregateFunctionKolmogorovSmirnovTest.cpp @@ -238,7 +238,7 @@ public: if (params[0].getType() != Field::Types::String) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregate function {} require first parameter to be a String", getName()); - const auto & param = params[0].get(); + const auto & param = params[0].safeGet(); if (param == "two-sided") alternative = Alternative::TwoSided; else if (param == "less") @@ -255,7 +255,7 @@ public: if (params[1].getType() != Field::Types::String) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregate function {} require second parameter to be a String", getName()); - method = params[1].get(); + method = params[1].safeGet(); if (method != "auto" && method != "exact" && method != "asymp" && method != "asymptotic") throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown method in aggregate function {}. " "It must be one of: 'auto', 'exact', 'asymp' (or 'asymptotic')", getName()); diff --git a/src/AggregateFunctions/AggregateFunctionLargestTriangleThreeBuckets.cpp b/src/AggregateFunctions/AggregateFunctionLargestTriangleThreeBuckets.cpp index 6d1e3c0f64b..813b13b6f7b 100644 --- a/src/AggregateFunctions/AggregateFunctionLargestTriangleThreeBuckets.cpp +++ b/src/AggregateFunctions/AggregateFunctionLargestTriangleThreeBuckets.cpp @@ -181,7 +181,7 @@ public: throw Exception( ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregate function {} require first parameter to be a UInt64", getName()); - total_buckets = params[0].get(); + total_buckets = params[0].safeGet(); this->x_type = WhichDataType(arguments[0]).idx; this->y_type = WhichDataType(arguments[1]).idx; diff --git a/src/AggregateFunctions/AggregateFunctionMannWhitney.cpp b/src/AggregateFunctions/AggregateFunctionMannWhitney.cpp index f088737c340..fa90846650d 100644 --- a/src/AggregateFunctions/AggregateFunctionMannWhitney.cpp +++ b/src/AggregateFunctions/AggregateFunctionMannWhitney.cpp @@ -152,7 +152,7 @@ public: if (params[0].getType() != Field::Types::String) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregate function {} require first parameter to be a String", getName()); - const auto & param = params[0].get(); + const auto & param = params[0].safeGet(); if (param == "two-sided") alternative = Alternative::TwoSided; else if (param == "less") @@ -169,7 +169,7 @@ public: if (params[1].getType() != Field::Types::UInt64) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregate function {} require second parameter to be a UInt64", getName()); - continuity_correction = static_cast(params[1].get()); + continuity_correction = static_cast(params[1].safeGet()); } String getName() const override diff --git a/src/AggregateFunctions/AggregateFunctionQuantile.h b/src/AggregateFunctions/AggregateFunctionQuantile.h index 127dc06b642..423fd4bc569 100644 --- a/src/AggregateFunctions/AggregateFunctionQuantile.h +++ b/src/AggregateFunctions/AggregateFunctionQuantile.h @@ -117,7 +117,7 @@ public: throw Exception( ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregate function {} requires relative accuracy parameter with Float64 type", getName()); - relative_accuracy = relative_accuracy_field.get(); + relative_accuracy = relative_accuracy_field.safeGet(); if (relative_accuracy <= 0 || relative_accuracy >= 1 || isNaN(relative_accuracy)) throw Exception( @@ -147,9 +147,9 @@ public: ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregate function {} requires accuracy parameter with integer type", getName()); if (accuracy_field.getType() == Field::Types::Int64) - accuracy = accuracy_field.get(); + accuracy = accuracy_field.safeGet(); else - accuracy = accuracy_field.get(); + accuracy = accuracy_field.safeGet(); if (accuracy <= 0) throw Exception( diff --git a/src/AggregateFunctions/AggregateFunctionSumMap.cpp b/src/AggregateFunctions/AggregateFunctionSumMap.cpp index 666136a91b7..9a94c3dfe1a 100644 --- a/src/AggregateFunctions/AggregateFunctionSumMap.cpp +++ b/src/AggregateFunctions/AggregateFunctionSumMap.cpp @@ -300,12 +300,12 @@ public: /// Compatibility with previous versions. if (value.getType() == Field::Types::Decimal32) { - auto source = value.get>(); + auto source = value.safeGet>(); value = DecimalField(source.getValue(), source.getScale()); } else if (value.getType() == Field::Types::Decimal64) { - auto source = value.get>(); + auto source = value.safeGet>(); value = DecimalField(source.getValue(), source.getScale()); } @@ -355,7 +355,7 @@ public: /// Compatibility with previous versions. if (value.getType() == Field::Types::Decimal128) { - auto source = value.get>(); + auto source = value.safeGet>(); WhichDataType value_type(values_types[col_idx]); if (value_type.isDecimal32()) { @@ -560,7 +560,7 @@ private: template bool compareImpl(FieldType & x) const { - auto val = rhs.get(); + auto val = rhs.safeGet(); if (val > x) { x = val; @@ -600,7 +600,7 @@ private: template bool compareImpl(FieldType & x) const { - auto val = rhs.get(); + auto val = rhs.safeGet(); if (val < x) { x = val; diff --git a/src/Analyzer/Passes/ComparisonTupleEliminationPass.cpp b/src/Analyzer/Passes/ComparisonTupleEliminationPass.cpp index 76dc8ab94b4..f31920f8e33 100644 --- a/src/Analyzer/Passes/ComparisonTupleEliminationPass.cpp +++ b/src/Analyzer/Passes/ComparisonTupleEliminationPass.cpp @@ -137,7 +137,7 @@ private: if (constant_node_value.getType() != Field::Types::Which::Tuple) return {}; - const auto & constant_tuple = constant_node_value.get(); + const auto & constant_tuple = constant_node_value.safeGet(); const auto & function_arguments_nodes = function_node_typed.getArguments().getNodes(); size_t function_arguments_nodes_size = function_arguments_nodes.size(); diff --git a/src/Analyzer/Passes/ConvertOrLikeChainPass.cpp b/src/Analyzer/Passes/ConvertOrLikeChainPass.cpp index 2b2ac95d7b9..6c4ce789993 100644 --- a/src/Analyzer/Passes/ConvertOrLikeChainPass.cpp +++ b/src/Analyzer/Passes/ConvertOrLikeChainPass.cpp @@ -89,7 +89,7 @@ public: if (!pattern || !isString(pattern->getResultType())) continue; - auto regexp = likePatternToRegexp(pattern->getValue().get()); + auto regexp = likePatternToRegexp(pattern->getValue().safeGet()); /// Case insensitive. Works with UTF-8 as well. if (is_ilike) regexp = "(?i)" + regexp; diff --git a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp index b968f43c6a6..1fc3eec6833 100644 --- a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp +++ b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp @@ -68,10 +68,10 @@ void optimizeFunctionEmpty(QueryTreeNodePtr &, FunctionNode & function_node, Col String getSubcolumnNameForElement(const Field & value, const DataTypeTuple & data_type_tuple) { if (value.getType() == Field::Types::String) - return value.get(); + return value.safeGet(); if (value.getType() == Field::Types::UInt64) - return data_type_tuple.getNameByPosition(value.get()); + return data_type_tuple.getNameByPosition(value.safeGet()); return ""; } @@ -79,7 +79,7 @@ String getSubcolumnNameForElement(const Field & value, const DataTypeTuple & dat String getSubcolumnNameForElement(const Field & value, const DataTypeVariant &) { if (value.getType() == Field::Types::String) - return value.get(); + return value.safeGet(); return ""; } diff --git a/src/Analyzer/Passes/FuseFunctionsPass.cpp b/src/Analyzer/Passes/FuseFunctionsPass.cpp index 0175e304a2b..f3b109a10ed 100644 --- a/src/Analyzer/Passes/FuseFunctionsPass.cpp +++ b/src/Analyzer/Passes/FuseFunctionsPass.cpp @@ -187,7 +187,7 @@ FunctionNodePtr createFusedQuantilesNode(std::vector & nodes /// Sort nodes and parameters in ascending order of quantile level std::vector permutation(nodes.size()); iota(permutation.data(), permutation.size(), size_t(0)); - std::sort(permutation.begin(), permutation.end(), [&](size_t i, size_t j) { return parameters[i].get() < parameters[j].get(); }); + std::sort(permutation.begin(), permutation.end(), [&](size_t i, size_t j) { return parameters[i].safeGet() < parameters[j].safeGet(); }); std::vector new_nodes; new_nodes.reserve(permutation.size()); diff --git a/src/Analyzer/Passes/IfTransformStringsToEnumPass.cpp b/src/Analyzer/Passes/IfTransformStringsToEnumPass.cpp index d966f129d08..f81327c5d55 100644 --- a/src/Analyzer/Passes/IfTransformStringsToEnumPass.cpp +++ b/src/Analyzer/Passes/IfTransformStringsToEnumPass.cpp @@ -134,8 +134,8 @@ public: return; std::set string_values; - string_values.insert(first_literal->getValue().get()); - string_values.insert(second_literal->getValue().get()); + string_values.insert(first_literal->getValue().safeGet()); + string_values.insert(second_literal->getValue().safeGet()); changeIfArguments(*function_if_node, string_values, context); wrapIntoToString(*function_node, std::move(modified_if_node), context); @@ -163,7 +163,7 @@ public: if (!isArray(literal_to->getResultType()) || !isString(literal_default->getResultType())) return; - auto array_to = literal_to->getValue().get(); + auto array_to = literal_to->getValue().safeGet(); if (array_to.empty()) return; @@ -178,9 +178,9 @@ public: std::set string_values; for (const auto & value : array_to) - string_values.insert(value.get()); + string_values.insert(value.safeGet()); - string_values.insert(literal_default->getValue().get()); + string_values.insert(literal_default->getValue().safeGet()); changeTransformArguments(*function_modified_transform_node, string_values, context); wrapIntoToString(*function_node, std::move(modified_transform_node), context); diff --git a/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp b/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp index 3a8b6e75d40..02f1c93ea7f 100644 --- a/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp +++ b/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp @@ -54,7 +54,7 @@ public: } else if (function_node->getFunctionName() == "sum" && first_argument_constant_literal.getType() == Field::Types::UInt64 && - first_argument_constant_literal.get() == 1) + first_argument_constant_literal.safeGet() == 1) { function_node->getArguments().getNodes().clear(); resolveAggregateFunctionNodeByName(*function_node, "count"); diff --git a/src/Analyzer/Passes/OptimizeDateOrDateTimeConverterWithPreimagePass.cpp b/src/Analyzer/Passes/OptimizeDateOrDateTimeConverterWithPreimagePass.cpp index feb8bcc792d..0f33c302265 100644 --- a/src/Analyzer/Passes/OptimizeDateOrDateTimeConverterWithPreimagePass.cpp +++ b/src/Analyzer/Passes/OptimizeDateOrDateTimeConverterWithPreimagePass.cpp @@ -143,13 +143,13 @@ private: const auto & column_type = column_node_typed.getColumnType().get(); if (isDateOrDate32(column_type)) { - start_date_or_date_time = date_lut.dateToString(range.first.get()); - end_date_or_date_time = date_lut.dateToString(range.second.get()); + start_date_or_date_time = date_lut.dateToString(range.first.safeGet()); + end_date_or_date_time = date_lut.dateToString(range.second.safeGet()); } else if (isDateTime(column_type) || isDateTime64(column_type)) { - start_date_or_date_time = date_lut.timeToString(range.first.get()); - end_date_or_date_time = date_lut.timeToString(range.second.get()); + start_date_or_date_time = date_lut.timeToString(range.first.safeGet()); + end_date_or_date_time = date_lut.timeToString(range.second.safeGet()); } else [[unlikely]] return {}; diff --git a/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp b/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp index a48e88132a6..091061ceb81 100644 --- a/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp +++ b/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp @@ -60,7 +60,7 @@ public: { const auto & second_const_value = second_const_node->getValue(); if (second_const_value.isNull() - || (lower_name == "sum" && isInt64OrUInt64FieldType(second_const_value.getType()) && second_const_value.get() == 0 + || (lower_name == "sum" && isInt64OrUInt64FieldType(second_const_value.getType()) && second_const_value.safeGet() == 0 && !if_node->getResultType()->isNullable())) { /// avg(if(cond, a, null)) -> avgIf(a::ResultTypeIf, cond) @@ -89,7 +89,7 @@ public: { const auto & first_const_value = first_const_node->getValue(); if (first_const_value.isNull() - || (lower_name == "sum" && isInt64OrUInt64FieldType(first_const_value.getType()) && first_const_value.get() == 0 + || (lower_name == "sum" && isInt64OrUInt64FieldType(first_const_value.getType()) && first_const_value.safeGet() == 0 && !if_node->getResultType()->isNullable())) { /// avg(if(cond, null, a) -> avgIf(a::ResultTypeIf, !cond)) diff --git a/src/Analyzer/Passes/SumIfToCountIfPass.cpp b/src/Analyzer/Passes/SumIfToCountIfPass.cpp index 1524629dc81..a987ced497a 100644 --- a/src/Analyzer/Passes/SumIfToCountIfPass.cpp +++ b/src/Analyzer/Passes/SumIfToCountIfPass.cpp @@ -66,7 +66,7 @@ public: resolveAggregateFunctionNodeByName(*function_node, "countIf"); - if (constant_value_literal.get() != 1) + if (constant_value_literal.safeGet() != 1) { /// Rewrite `sumIf(123, cond)` into `123 * countIf(cond)` node = getMultiplyFunction(std::move(multiplier_node), node); @@ -105,8 +105,8 @@ public: const auto & if_true_condition_constant_value_literal = if_true_condition_constant_node->getValue(); const auto & if_false_condition_constant_value_literal = if_false_condition_constant_node->getValue(); - auto if_true_condition_value = if_true_condition_constant_value_literal.get(); - auto if_false_condition_value = if_false_condition_constant_value_literal.get(); + auto if_true_condition_value = if_true_condition_constant_value_literal.safeGet(); + auto if_false_condition_value = if_false_condition_constant_value_literal.safeGet(); if (if_false_condition_value == 0) { diff --git a/src/Analyzer/QueryTreeBuilder.cpp b/src/Analyzer/QueryTreeBuilder.cpp index fb41826929f..9754897d54d 100644 --- a/src/Analyzer/QueryTreeBuilder.cpp +++ b/src/Analyzer/QueryTreeBuilder.cpp @@ -471,7 +471,7 @@ QueryTreeNodePtr QueryTreeBuilder::buildSortList(const ASTPtr & order_by_express std::shared_ptr collator; if (order_by_element.getCollation()) - collator = std::make_shared(order_by_element.getCollation()->as().value.get()); + collator = std::make_shared(order_by_element.getCollation()->as().value.safeGet()); const auto & sort_expression_ast = order_by_element.children.at(0); auto sort_expression = buildExpression(sort_expression_ast, context); diff --git a/src/Analyzer/Resolve/IdentifierResolver.cpp b/src/Analyzer/Resolve/IdentifierResolver.cpp index 447bf825836..a79433ac130 100644 --- a/src/Analyzer/Resolve/IdentifierResolver.cpp +++ b/src/Analyzer/Resolve/IdentifierResolver.cpp @@ -1273,7 +1273,7 @@ QueryTreeNodePtr IdentifierResolver::matchArrayJoinSubcolumns( const auto & constant_node_value = constant_node.getValue(); if (constant_node_value.getType() == Field::Types::String) { - array_join_subcolumn_prefix = constant_node_value.get() + "."; + array_join_subcolumn_prefix = constant_node_value.safeGet() + "."; array_join_parent_column = argument_nodes.at(0).get(); } } @@ -1287,7 +1287,7 @@ QueryTreeNodePtr IdentifierResolver::matchArrayJoinSubcolumns( if (!second_argument || second_argument->getValue().getType() != Field::Types::String) throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected constant string as second argument of getSubcolumn function {}", resolved_function->dumpTree()); - const auto & resolved_subcolumn_path = second_argument->getValue().get(); + const auto & resolved_subcolumn_path = second_argument->getValue().safeGet(); if (!startsWith(resolved_subcolumn_path, array_join_subcolumn_prefix)) return {}; @@ -1331,7 +1331,7 @@ QueryTreeNodePtr IdentifierResolver::tryResolveExpressionFromArrayJoinExpression size_t nested_function_arguments_size = nested_function_arguments.size(); const auto & nested_keys_names_constant_node = nested_function_arguments[0]->as(); - const auto & nested_keys_names = nested_keys_names_constant_node.getValue().get(); + const auto & nested_keys_names = nested_keys_names_constant_node.getValue().safeGet(); size_t nested_keys_names_size = nested_keys_names.size(); if (nested_keys_names_size == nested_function_arguments_size - 1) @@ -1344,7 +1344,7 @@ QueryTreeNodePtr IdentifierResolver::tryResolveExpressionFromArrayJoinExpression auto array_join_column = std::make_shared(array_join_column_expression_typed.getColumn(), array_join_column_expression_typed.getColumnSource()); - const auto & nested_key_name = nested_keys_names[i - 1].get(); + const auto & nested_key_name = nested_keys_names[i - 1].safeGet(); Identifier nested_identifier = Identifier(nested_key_name); array_join_resolved_expression = wrapExpressionNodeInTupleElement(array_join_column, nested_identifier, scope.context); break; diff --git a/src/Analyzer/Resolve/QueryAnalyzer.cpp b/src/Analyzer/Resolve/QueryAnalyzer.cpp index e04b93314c5..2e8acf2f201 100644 --- a/src/Analyzer/Resolve/QueryAnalyzer.cpp +++ b/src/Analyzer/Resolve/QueryAnalyzer.cpp @@ -748,11 +748,11 @@ void QueryAnalyzer::replaceNodesWithPositionalArguments(QueryTreeNodePtr & node_ UInt64 pos; if (constant_node->getValue().getType() == Field::Types::UInt64) { - pos = constant_node->getValue().get(); + pos = constant_node->getValue().safeGet(); } else // Int64 { - auto value = constant_node->getValue().get(); + auto value = constant_node->getValue().safeGet(); if (value > 0) pos = value; else diff --git a/src/Analyzer/SetUtils.cpp b/src/Analyzer/SetUtils.cpp index e7d1a22f45a..59a243b27f3 100644 --- a/src/Analyzer/SetUtils.cpp +++ b/src/Analyzer/SetUtils.cpp @@ -99,7 +99,7 @@ Block createBlockFromCollection(const Collection & collection, const DataTypes& "Invalid type in set. Expected tuple, got {}", value.getTypeName()); - const auto & tuple = value.template get(); + const auto & tuple = value.template safeGet(); const DataTypePtr & value_type = value_types[collection_index]; const DataTypes & tuple_value_type = typeid_cast(value_type.get())->getElements(); @@ -175,15 +175,15 @@ Block getSetElementsForConstantValue(const DataTypePtr & expression_type, const if (rhs_which_type.isArray()) { const DataTypeArray * value_array_type = assert_cast(value_type.get()); - size_t value_array_size = value.get().size(); + size_t value_array_size = value.safeGet().size(); DataTypes value_types(value_array_size, value_array_type->getNestedType()); - result_block = createBlockFromCollection(value.get(), value_types, set_element_types, transform_null_in); + result_block = createBlockFromCollection(value.safeGet(), value_types, set_element_types, transform_null_in); } else if (rhs_which_type.isTuple()) { const DataTypeTuple * value_tuple_type = assert_cast(value_type.get()); const DataTypes & value_types = value_tuple_type->getElements(); - result_block = createBlockFromCollection(value.get(), value_types, set_element_types, transform_null_in); + result_block = createBlockFromCollection(value.safeGet(), value_types, set_element_types, transform_null_in); } else throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, diff --git a/src/Backups/BackupSettings.cpp b/src/Backups/BackupSettings.cpp index 37ddd344001..e982a806b7c 100644 --- a/src/Backups/BackupSettings.cpp +++ b/src/Backups/BackupSettings.cpp @@ -126,7 +126,7 @@ std::vector BackupSettings::Util::clusterHostIDsFromAST(const IAST & as throw Exception( ErrorCodes::CANNOT_PARSE_BACKUP_SETTINGS, "Setting cluster_host_ids has wrong format, must be array of arrays of string literals"); - const auto & replicas = array_of_replicas->value.get(); + const auto & replicas = array_of_replicas->value.safeGet(); res[i].resize(replicas.size()); for (size_t j = 0; j != replicas.size(); ++j) { @@ -135,7 +135,7 @@ std::vector BackupSettings::Util::clusterHostIDsFromAST(const IAST & as throw Exception( ErrorCodes::CANNOT_PARSE_BACKUP_SETTINGS, "Setting cluster_host_ids has wrong format, must be array of arrays of string literals"); - res[i][j] = replica.get(); + res[i][j] = replica.safeGet(); } } } diff --git a/src/Backups/BackupsWorker.cpp b/src/Backups/BackupsWorker.cpp index 0b93ae6d547..8b45c816817 100644 --- a/src/Backups/BackupsWorker.cpp +++ b/src/Backups/BackupsWorker.cpp @@ -490,6 +490,8 @@ OperationID BackupsWorker::startMakingBackup(const ASTPtr & query, const Context /// process_list_element_holder is used to make an element in ProcessList live while BACKUP is working asynchronously. auto process_list_element = context_in_use->getProcessListElement(); + /// Update context to preserve query information in processlist (settings, current_database) + process_list_element->updateContext(context_in_use); thread_pool.scheduleOrThrowOnError( [this, @@ -853,6 +855,8 @@ OperationID BackupsWorker::startRestoring(const ASTPtr & query, ContextMutablePt /// process_list_element_holder is used to make an element in ProcessList live while RESTORE is working asynchronously. auto process_list_element = context_in_use->getProcessListElement(); + /// Update context to preserve query information in processlist (settings, current_database) + process_list_element->updateContext(context_in_use); thread_pool.scheduleOrThrowOnError( [this, diff --git a/src/Backups/DDLAdjustingForBackupVisitor.cpp b/src/Backups/DDLAdjustingForBackupVisitor.cpp index 910831195a3..4dcbdcc1617 100644 --- a/src/Backups/DDLAdjustingForBackupVisitor.cpp +++ b/src/Backups/DDLAdjustingForBackupVisitor.cpp @@ -46,8 +46,8 @@ namespace if (zookeeper_path_ast && (zookeeper_path_ast->value.getType() == Field::Types::String) && replica_name_ast && (replica_name_ast->value.getType() == Field::Types::String)) { - String & zookeeper_path_arg = zookeeper_path_ast->value.get(); - String & replica_name_arg = replica_name_ast->value.get(); + String & zookeeper_path_arg = zookeeper_path_ast->value.safeGet(); + String & replica_name_arg = replica_name_ast->value.safeGet(); if (create.uuid != UUIDHelpers::Nil) { String table_uuid_str = toString(create.uuid); diff --git a/src/Backups/RestoreSettings.cpp b/src/Backups/RestoreSettings.cpp index a974fc11d00..8e60e8d129e 100644 --- a/src/Backups/RestoreSettings.cpp +++ b/src/Backups/RestoreSettings.cpp @@ -31,7 +31,7 @@ namespace { if (field.getType() == Field::Types::String) { - const String & str = field.get(); + const String & str = field.safeGet(); if (str == "1" || boost::iequals(str, "true") || boost::iequals(str, "create")) { value = RestoreTableCreationMode::kCreate; @@ -54,7 +54,7 @@ namespace if (field.getType() == Field::Types::UInt64) { - UInt64 number = field.get(); + UInt64 number = field.safeGet(); if (number == 1) { value = RestoreTableCreationMode::kCreate; @@ -95,7 +95,7 @@ namespace { if (field.getType() == Field::Types::String) { - const String & str = field.get(); + const String & str = field.safeGet(); if (str == "1" || boost::iequals(str, "true") || boost::iequals(str, "create")) { value = RestoreAccessCreationMode::kCreate; @@ -118,7 +118,7 @@ namespace if (field.getType() == Field::Types::UInt64) { - UInt64 number = field.get(); + UInt64 number = field.safeGet(); if (number == 1) { value = RestoreAccessCreationMode::kCreate; diff --git a/src/Backups/SettingsFieldOptionalString.cpp b/src/Backups/SettingsFieldOptionalString.cpp index 573fd1e052c..684407a533d 100644 --- a/src/Backups/SettingsFieldOptionalString.cpp +++ b/src/Backups/SettingsFieldOptionalString.cpp @@ -19,7 +19,7 @@ SettingFieldOptionalString::SettingFieldOptionalString(const Field & field) if (field.getType() == Field::Types::String) { - value = field.get(); + value = field.safeGet(); return; } diff --git a/src/Backups/SettingsFieldOptionalUUID.cpp b/src/Backups/SettingsFieldOptionalUUID.cpp index 3f14608b206..0011f7f1073 100644 --- a/src/Backups/SettingsFieldOptionalUUID.cpp +++ b/src/Backups/SettingsFieldOptionalUUID.cpp @@ -22,7 +22,7 @@ namespace ErrorCodes if (field.getType() == Field::Types::String) { - const String & str = field.get(); + const String & str = field.safeGet(); if (str.empty()) { value = std::nullopt; diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 98dd0601a1b..43092d10dd2 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -353,8 +353,8 @@ target_link_libraries(clickhouse_common_io Poco::Foundation ) -if (TARGET ch_contrib::fiu) - target_link_libraries(clickhouse_common_io PUBLIC ch_contrib::fiu) +if (TARGET ch_contrib::libfiu) + target_link_libraries(clickhouse_common_io PUBLIC ch_contrib::libfiu) endif() if (TARGET ch_contrib::cpuid) @@ -556,14 +556,13 @@ target_link_libraries (clickhouse_common_io PRIVATE ch_contrib::lz4) if (TARGET ch_contrib::qpl) dbms_target_link_libraries(PUBLIC ch_contrib::qpl) + target_link_libraries (clickhouse_compression PUBLIC ch_contrib::qpl) + target_link_libraries (clickhouse_compression PUBLIC ch_contrib::accel-config) endif () -if (TARGET ch_contrib::accel-config) - dbms_target_link_libraries(PUBLIC ch_contrib::accel-config) -endif () - -if (TARGET ch_contrib::qatzstd_plugin) +if (TARGET ch_contrib::accel-config AND TARGET ch_contrib::qatzstd_plugin) dbms_target_link_libraries(PUBLIC ch_contrib::qatzstd_plugin) + dbms_target_link_libraries(PUBLIC ch_contrib::accel-config) target_link_libraries(clickhouse_common_io PUBLIC ch_contrib::qatzstd_plugin) endif () diff --git a/src/Client/Suggest.cpp b/src/Client/Suggest.cpp index 0188ebc8173..affd620f83a 100644 --- a/src/Client/Suggest.cpp +++ b/src/Client/Suggest.cpp @@ -214,7 +214,7 @@ void Suggest::fillWordsFromBlock(const Block & block) Words new_words; new_words.reserve(rows); for (size_t i = 0; i < rows; ++i) - new_words.emplace_back(column[i].get()); + new_words.emplace_back(column[i].safeGet()); addWords(std::move(new_words)); } diff --git a/src/Columns/ColumnAggregateFunction.cpp b/src/Columns/ColumnAggregateFunction.cpp index 4bc48c62eb4..d3363d91a46 100644 --- a/src/Columns/ColumnAggregateFunction.cpp +++ b/src/Columns/ColumnAggregateFunction.cpp @@ -457,9 +457,9 @@ MutableColumnPtr ColumnAggregateFunction::cloneEmpty() const Field ColumnAggregateFunction::operator[](size_t n) const { Field field = AggregateFunctionStateData(); - field.get().name = type_string; + field.safeGet().name = type_string; { - WriteBufferFromString buffer(field.get().data); + WriteBufferFromString buffer(field.safeGet().data); func->serialize(data[n], buffer, version); } return field; @@ -467,12 +467,7 @@ Field ColumnAggregateFunction::operator[](size_t n) const void ColumnAggregateFunction::get(size_t n, Field & res) const { - res = AggregateFunctionStateData(); - res.get().name = type_string; - { - WriteBufferFromString buffer(res.get().data); - func->serialize(data[n], buffer, version); - } + res = operator[](n); } StringRef ColumnAggregateFunction::getDataAt(size_t n) const @@ -552,7 +547,7 @@ void ColumnAggregateFunction::insert(const Field & x) "Inserting field of type {} into ColumnAggregateFunction. Expected {}", x.getTypeName(), Field::Types::AggregateFunctionState); - const auto & field_name = x.get().name; + const auto & field_name = x.safeGet().name; if (type_string != field_name) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot insert filed with type {} into column with type {}", field_name, type_string); @@ -560,7 +555,7 @@ void ColumnAggregateFunction::insert(const Field & x) ensureOwnership(); Arena & arena = createOrGetArena(); pushBackAndCreateState(data, arena, func.get()); - ReadBufferFromString read_buffer(x.get().data); + ReadBufferFromString read_buffer(x.safeGet().data); func->deserialize(data.back(), read_buffer, version, &arena); } @@ -569,14 +564,14 @@ bool ColumnAggregateFunction::tryInsert(const DB::Field & x) if (x.getType() != Field::Types::AggregateFunctionState) return false; - const auto & field_name = x.get().name; + const auto & field_name = x.safeGet().name; if (type_string != field_name) return false; ensureOwnership(); Arena & arena = createOrGetArena(); pushBackAndCreateState(data, arena, func.get()); - ReadBufferFromString read_buffer(x.get().data); + ReadBufferFromString read_buffer(x.safeGet().data); func->deserialize(data.back(), read_buffer, version, &arena); return true; } diff --git a/src/Columns/ColumnArray.cpp b/src/Columns/ColumnArray.cpp index 19cce678cc7..26841879474 100644 --- a/src/Columns/ColumnArray.cpp +++ b/src/Columns/ColumnArray.cpp @@ -141,7 +141,7 @@ void ColumnArray::get(size_t n, Field & res) const size, max_array_size_as_field); res = Array(); - Array & res_arr = res.get(); + Array & res_arr = res.safeGet(); res_arr.reserve(size); for (size_t i = 0; i < size; ++i) @@ -309,7 +309,7 @@ void ColumnArray::updateHashFast(SipHash & hash) const void ColumnArray::insert(const Field & x) { - const Array & array = x.get(); + const Array & array = x.safeGet(); size_t size = array.size(); for (size_t i = 0; i < size; ++i) getData().insert(array[i]); @@ -321,7 +321,7 @@ bool ColumnArray::tryInsert(const Field & x) if (x.getType() != Field::Types::Which::Array) return false; - const Array & array = x.get(); + const Array & array = x.safeGet(); size_t size = array.size(); for (size_t i = 0; i < size; ++i) { @@ -452,6 +452,22 @@ void ColumnArray::reserve(size_t n) getData().reserve(n); /// The average size of arrays is not taken into account here. Or it is considered to be no more than 1. } +void ColumnArray::prepareForSquashing(const Columns & source_columns) +{ + size_t new_size = size(); + Columns source_data_columns; + source_data_columns.reserve(source_columns.size()); + for (const auto & source_column : source_columns) + { + const auto & source_array_column = assert_cast(*source_column); + new_size += source_array_column.size(); + source_data_columns.push_back(source_array_column.getDataPtr()); + } + + getOffsets().reserve_exact(new_size); + data->prepareForSquashing(source_data_columns); +} + void ColumnArray::shrinkToFit() { getOffsets().shrink_to_fit(); diff --git a/src/Columns/ColumnArray.h b/src/Columns/ColumnArray.h index 63affb86d9d..d6f71b72940 100644 --- a/src/Columns/ColumnArray.h +++ b/src/Columns/ColumnArray.h @@ -118,6 +118,7 @@ public: void updatePermutationWithCollation(const Collator & collator, PermutationSortDirection direction, PermutationSortStability stability, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges& equal_ranges) const override; void reserve(size_t n) override; + void prepareForSquashing(const Columns & source_columns) override; void shrinkToFit() override; void ensureOwnership() override; size_t byteSize() const override; diff --git a/src/Columns/ColumnDecimal.h b/src/Columns/ColumnDecimal.h index 59bfbd2159c..07120f5f035 100644 --- a/src/Columns/ColumnDecimal.h +++ b/src/Columns/ColumnDecimal.h @@ -74,7 +74,7 @@ public: void insertData(const char * src, size_t /*length*/) override; void insertDefault() override { data.push_back(T()); } void insertManyDefaults(size_t length) override { data.resize_fill(data.size() + length); } - void insert(const Field & x) override { data.push_back(x.get()); } + void insert(const Field & x) override { data.push_back(x.safeGet()); } bool tryInsert(const Field & x) override; #if !defined(DEBUG_OR_SANITIZER_BUILD) void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; diff --git a/src/Columns/ColumnDynamic.cpp b/src/Columns/ColumnDynamic.cpp index a92d54dd675..74b7ef69d8d 100644 --- a/src/Columns/ColumnDynamic.cpp +++ b/src/Columns/ColumnDynamic.cpp @@ -643,6 +643,116 @@ ColumnPtr ColumnDynamic::compress() const }); } +void ColumnDynamic::prepareForSquashing(const Columns & source_columns) +{ + if (source_columns.empty()) + return; + + /// Internal variants of source dynamic columns may differ. + /// We want to preallocate memory for all variants we will have after squashing. + /// It may happen that the total number of variants in source columns will + /// exceed the limit, in this case we will choose the most frequent variants. + + /// First, preallocate memory for variant discriminators and offsets. + size_t new_size = size(); + for (const auto & source_column : source_columns) + new_size += source_column->size(); + auto & variant_col = getVariantColumn(); + variant_col.getLocalDiscriminators().reserve_exact(new_size); + variant_col.getOffsets().reserve_exact(new_size); + + /// Second, collect all variants and their total sizes. + std::unordered_map total_variant_sizes; + DataTypes all_variants; + + auto add_variants = [&](const ColumnDynamic & source_dynamic) + { + const auto & source_variant_column = source_dynamic.getVariantColumn(); + const auto & source_variant_info = source_dynamic.getVariantInfo(); + const auto & source_variants = assert_cast(*source_variant_info.variant_type).getVariants(); + + for (size_t i = 0; i != source_variants.size(); ++i) + { + const auto & variant_name = source_variant_info.variant_names[i]; + auto it = total_variant_sizes.find(variant_name); + /// Add this variant to the list of all variants if we didn't see it yet. + if (it == total_variant_sizes.end()) + { + all_variants.push_back(source_variants[i]); + it = total_variant_sizes.emplace(variant_name, 0).first; + } + + it->second += source_variant_column.getVariantByGlobalDiscriminator(i).size(); + } + }; + + for (const auto & source_column : source_columns) + add_variants(assert_cast(*source_column)); + + /// Add variants from this dynamic column. + add_variants(*this); + + DataTypePtr result_variant_type; + /// Check if the number of all variants exceeds the limit. + if (all_variants.size() > max_dynamic_types || (all_variants.size() == max_dynamic_types && !total_variant_sizes.contains("String"))) + { + /// We want to keep the most frequent variants in the resulting dynamic column. + DataTypes result_variants; + result_variants.reserve(max_dynamic_types); + /// Add variants from current variant column as we will not rewrite it. + for (const auto & variant : assert_cast(*variant_info.variant_type).getVariants()) + result_variants.push_back(variant); + /// Add String variant in advance (if we didn't add it yet) as we must have it across variants when we reach the limit. + if (!variant_info.variant_name_to_discriminator.contains("String")) + result_variants.push_back(std::make_shared()); + + /// Create list of remaining variants with their sizes and sort it. + std::vector> variants_with_sizes; + variants_with_sizes.reserve(all_variants.size() - variant_info.variant_names.size()); + for (const auto & variant : all_variants) + { + /// Add variant to the list only of we didn't add it yet. + auto variant_name = variant->getName(); + if (variant_name != "String" && !variant_info.variant_name_to_discriminator.contains(variant_name)) + variants_with_sizes.emplace_back(total_variant_sizes[variant->getName()], variant); + } + + std::sort(variants_with_sizes.begin(), variants_with_sizes.end(), std::greater()); + /// Add the most frequent variants until we reach max_dynamic_types. + size_t num_new_variants = max_dynamic_types - result_variants.size(); + for (size_t i = 0; i != num_new_variants; ++i) + result_variants.push_back(variants_with_sizes[i].second); + + result_variant_type = std::make_shared(result_variants); + } + else + { + result_variant_type = std::make_shared(all_variants); + } + + if (!result_variant_type->equals(*variant_info.variant_type)) + updateVariantInfoAndExpandVariantColumn(result_variant_type); + + /// Now current dynamic column has all resulting variants and we can call + /// prepareForSquashing on them to preallocate the memory. + for (size_t i = 0; i != variant_info.variant_names.size(); ++i) + { + Columns source_variant_columns; + source_variant_columns.reserve(source_columns.size()); + for (const auto & source_column : source_columns) + { + const auto & source_dynamic_column = assert_cast(*source_column); + const auto & source_variant_info = source_dynamic_column.getVariantInfo(); + /// Try to find this variant in the current source column. + auto it = source_variant_info.variant_name_to_discriminator.find(variant_info.variant_names[i]); + if (it != source_variant_info.variant_name_to_discriminator.end()) + source_variant_columns.push_back(source_dynamic_column.getVariantColumn().getVariantPtrByGlobalDiscriminator(it->second)); + } + + variant_col.getVariantByGlobalDiscriminator(i).prepareForSquashing(source_variant_columns); + } +} + void ColumnDynamic::takeDynamicStructureFromSourceColumns(const Columns & source_columns) { if (!empty()) diff --git a/src/Columns/ColumnDynamic.h b/src/Columns/ColumnDynamic.h index e92cabd3db9..cb3a896d2cb 100644 --- a/src/Columns/ColumnDynamic.h +++ b/src/Columns/ColumnDynamic.h @@ -254,6 +254,8 @@ public: variant_column->reserve(n); } + void prepareForSquashing(const Columns & source_columns) override; + void ensureOwnership() override { variant_column->ensureOwnership(); diff --git a/src/Columns/ColumnFixedString.cpp b/src/Columns/ColumnFixedString.cpp index 0bb3f7edb14..04e894ee5ab 100644 --- a/src/Columns/ColumnFixedString.cpp +++ b/src/Columns/ColumnFixedString.cpp @@ -59,7 +59,7 @@ bool ColumnFixedString::isDefaultAt(size_t index) const void ColumnFixedString::insert(const Field & x) { - const String & s = x.get(); + const String & s = x.safeGet(); insertData(s.data(), s.size()); } @@ -67,7 +67,7 @@ bool ColumnFixedString::tryInsert(const Field & x) { if (x.getType() != Field::Types::Which::String) return false; - const String & s = x.get(); + const String & s = x.safeGet(); if (s.size() > n) return false; insertData(s.data(), s.size()); diff --git a/src/Columns/ColumnMap.cpp b/src/Columns/ColumnMap.cpp index 1025b4e77b9..651b073fef7 100644 --- a/src/Columns/ColumnMap.cpp +++ b/src/Columns/ColumnMap.cpp @@ -72,7 +72,7 @@ void ColumnMap::get(size_t n, Field & res) const size_t size = offsets[n] - offsets[n - 1]; res = Map(); - auto & map = res.get(); + auto & map = res.safeGet(); map.reserve(size); for (size_t i = 0; i < size; ++i) @@ -96,7 +96,7 @@ void ColumnMap::insertData(const char *, size_t) void ColumnMap::insert(const Field & x) { - const auto & map = x.get(); + const auto & map = x.safeGet(); nested->insert(Array(map.begin(), map.end())); } @@ -105,7 +105,7 @@ bool ColumnMap::tryInsert(const Field & x) if (x.getType() != Field::Types::Which::Map) return false; - const auto & map = x.get(); + const auto & map = x.safeGet(); return nested->tryInsert(Array(map.begin(), map.end())); } @@ -249,6 +249,15 @@ void ColumnMap::reserve(size_t n) nested->reserve(n); } +void ColumnMap::prepareForSquashing(const Columns & source_columns) +{ + Columns nested_source_columns; + nested_source_columns.reserve(source_columns.size()); + for (const auto & source_column : source_columns) + nested_source_columns.push_back(assert_cast(*source_column).getNestedColumnPtr()); + nested->prepareForSquashing(nested_source_columns); +} + void ColumnMap::shrinkToFit() { nested->shrinkToFit(); @@ -288,8 +297,8 @@ void ColumnMap::getExtremes(Field & min, Field & max) const /// Convert result Array fields to Map fields because client expect min and max field to have type Map - Array nested_min_value = nested_min.get(); - Array nested_max_value = nested_max.get(); + Array nested_min_value = nested_min.safeGet(); + Array nested_max_value = nested_max.safeGet(); Map map_min_value(nested_min_value.begin(), nested_min_value.end()); Map map_max_value(nested_max_value.begin(), nested_max_value.end()); diff --git a/src/Columns/ColumnMap.h b/src/Columns/ColumnMap.h index 3eaaa0ad562..191476839f1 100644 --- a/src/Columns/ColumnMap.h +++ b/src/Columns/ColumnMap.h @@ -94,6 +94,7 @@ public: void updatePermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const override; void reserve(size_t n) override; + void prepareForSquashing(const Columns & source_columns) override; void shrinkToFit() override; void ensureOwnership() override; size_t byteSize() const override; diff --git a/src/Columns/ColumnNullable.cpp b/src/Columns/ColumnNullable.cpp index 6529f0b78db..2a25cac6461 100644 --- a/src/Columns/ColumnNullable.cpp +++ b/src/Columns/ColumnNullable.cpp @@ -706,6 +706,22 @@ void ColumnNullable::reserve(size_t n) getNullMapData().reserve(n); } +void ColumnNullable::prepareForSquashing(const Columns & source_columns) +{ + size_t new_size = size(); + Columns nested_source_columns; + nested_source_columns.reserve(source_columns.size()); + for (const auto & source_column : source_columns) + { + const auto & source_nullable_column = assert_cast(*source_column); + new_size += source_nullable_column.size(); + nested_source_columns.push_back(source_nullable_column.getNestedColumnPtr()); + } + + nested_column->prepareForSquashing(nested_source_columns); + getNullMapData().reserve(new_size); +} + void ColumnNullable::shrinkToFit() { getNestedColumn().shrinkToFit(); diff --git a/src/Columns/ColumnNullable.h b/src/Columns/ColumnNullable.h index fe9f5b6dcc2..2c32e0fe5a0 100644 --- a/src/Columns/ColumnNullable.h +++ b/src/Columns/ColumnNullable.h @@ -125,6 +125,7 @@ public: size_t limit, int null_direction_hint, Permutation & res, EqualRanges& equal_ranges) const override; size_t estimateCardinalityInPermutedRange(const Permutation & permutation, const EqualRange & equal_range) const override; void reserve(size_t n) override; + void prepareForSquashing(const Columns & source_columns) override; void shrinkToFit() override; void ensureOwnership() override; size_t byteSize() const override; diff --git a/src/Columns/ColumnObject.cpp b/src/Columns/ColumnObject.cpp index a6431007cb6..eb99bb4081b 100644 --- a/src/Columns/ColumnObject.cpp +++ b/src/Columns/ColumnObject.cpp @@ -698,7 +698,7 @@ void ColumnObject::forEachSubcolumnRecursively(RecursiveMutableColumnCallback ca void ColumnObject::insert(const Field & field) { - const auto & object = field.get(); + const auto & object = field.safeGet(); HashSet inserted_paths; size_t old_size = size(); @@ -754,7 +754,7 @@ void ColumnObject::get(size_t n, Field & res) const { assert(n < size()); res = Object(); - auto & object = res.get(); + auto & object = res.safeGet(); for (const auto & entry : subcolumns) { diff --git a/src/Columns/ColumnString.cpp b/src/Columns/ColumnString.cpp index 7cfa2571f5a..9ed2c7e3d4d 100644 --- a/src/Columns/ColumnString.cpp +++ b/src/Columns/ColumnString.cpp @@ -557,6 +557,21 @@ void ColumnString::reserve(size_t n) offsets.reserve_exact(n); } +void ColumnString::prepareForSquashing(const Columns & source_columns) +{ + size_t new_size = size(); + size_t new_chars_size = chars.size(); + for (const auto & source_column : source_columns) + { + const auto & source_string_column = assert_cast(*source_column); + new_size += source_string_column.size(); + new_chars_size += source_string_column.chars.size(); + } + + offsets.reserve_exact(new_size); + chars.reserve_exact(new_chars_size); +} + void ColumnString::shrinkToFit() { chars.shrink_to_fit(); diff --git a/src/Columns/ColumnString.h b/src/Columns/ColumnString.h index c1012e1e55e..5bbb3ad5151 100644 --- a/src/Columns/ColumnString.h +++ b/src/Columns/ColumnString.h @@ -123,7 +123,7 @@ public: void insert(const Field & x) override { - const String & s = x.get(); + const String & s = x.safeGet(); const size_t old_size = chars.size(); const size_t size_to_append = s.size() + 1; const size_t new_size = old_size + size_to_append; @@ -283,6 +283,7 @@ public: ColumnPtr compress() const override; void reserve(size_t n) override; + void prepareForSquashing(const Columns & source_columns) override; void shrinkToFit() override; void getExtremes(Field & min, Field & max) const override; diff --git a/src/Columns/ColumnTuple.cpp b/src/Columns/ColumnTuple.cpp index 4fc3f88a87c..1539d7c8c2e 100644 --- a/src/Columns/ColumnTuple.cpp +++ b/src/Columns/ColumnTuple.cpp @@ -141,7 +141,7 @@ void ColumnTuple::get(size_t n, Field & res) const const size_t tuple_size = columns.size(); res = Tuple(); - Tuple & res_tuple = res.get(); + Tuple & res_tuple = res.safeGet(); res_tuple.reserve(tuple_size); for (size_t i = 0; i < tuple_size; ++i) @@ -169,7 +169,7 @@ void ColumnTuple::insertData(const char *, size_t) void ColumnTuple::insert(const Field & x) { - const auto & tuple = x.get(); + const auto & tuple = x.safeGet(); const size_t tuple_size = columns.size(); if (tuple.size() != tuple_size) @@ -185,7 +185,7 @@ bool ColumnTuple::tryInsert(const Field & x) if (x.getType() != Field::Types::Which::Tuple) return false; - const auto & tuple = x.get(); + const auto & tuple = x.safeGet(); const size_t tuple_size = columns.size(); if (tuple.size() != tuple_size) @@ -595,6 +595,19 @@ void ColumnTuple::reserve(size_t n) getColumn(i).reserve(n); } +void ColumnTuple::prepareForSquashing(const Columns & source_columns) +{ + const size_t tuple_size = columns.size(); + for (size_t i = 0; i < tuple_size; ++i) + { + Columns nested_columns; + nested_columns.reserve(source_columns.size()); + for (const auto & source_column : source_columns) + nested_columns.push_back(assert_cast(*source_column).getColumnPtr(i)); + getColumn(i).prepareForSquashing(nested_columns); + } +} + void ColumnTuple::shrinkToFit() { const size_t tuple_size = columns.size(); diff --git a/src/Columns/ColumnTuple.h b/src/Columns/ColumnTuple.h index 16b47a993f6..ef396d6a130 100644 --- a/src/Columns/ColumnTuple.h +++ b/src/Columns/ColumnTuple.h @@ -110,6 +110,7 @@ public: void updatePermutationWithCollation(const Collator & collator, IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges& equal_ranges) const override; void reserve(size_t n) override; + void prepareForSquashing(const Columns & source_columns) override; void shrinkToFit() override; void ensureOwnership() override; size_t byteSize() const override; diff --git a/src/Columns/ColumnVariant.cpp b/src/Columns/ColumnVariant.cpp index de7efb41d19..68e19861c38 100644 --- a/src/Columns/ColumnVariant.cpp +++ b/src/Columns/ColumnVariant.cpp @@ -1247,8 +1247,25 @@ void ColumnVariant::updatePermutation(IColumn::PermutationSortDirection directio void ColumnVariant::reserve(size_t n) { - local_discriminators->reserve(n); - offsets->reserve(n); + getLocalDiscriminators().reserve_exact(n); + getOffsets().reserve_exact(n); +} + +void ColumnVariant::prepareForSquashing(const Columns & source_columns) +{ + size_t new_size = size(); + for (const auto & source_column : source_columns) + new_size += source_column->size(); + reserve(new_size); + + for (size_t i = 0; i != variants.size(); ++i) + { + Columns source_variant_columns; + source_variant_columns.reserve(source_columns.size()); + for (const auto & source_column : source_columns) + source_variant_columns.push_back(assert_cast(*source_column).getVariantPtrByGlobalDiscriminator(i)); + getVariantByGlobalDiscriminator(i).prepareForSquashing(source_variant_columns); + } } void ColumnVariant::ensureOwnership() diff --git a/src/Columns/ColumnVariant.h b/src/Columns/ColumnVariant.h index 34c24b5428d..737eb27abfe 100644 --- a/src/Columns/ColumnVariant.h +++ b/src/Columns/ColumnVariant.h @@ -237,6 +237,7 @@ public: size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const override; void reserve(size_t n) override; + void prepareForSquashing(const Columns & source_columns) override; void ensureOwnership() override; size_t byteSize() const override; size_t byteSizeAt(size_t n) const override; diff --git a/src/Columns/ColumnVector.h b/src/Columns/ColumnVector.h index 2fe5b635bd2..a5e1ee4b462 100644 --- a/src/Columns/ColumnVector.h +++ b/src/Columns/ColumnVector.h @@ -85,7 +85,7 @@ public: void insertMany(const Field & field, size_t length) override { - data.resize_fill(data.size() + length, static_cast(field.get())); + data.resize_fill(data.size() + length, static_cast(field.safeGet())); } void insertData(const char * pos, size_t) override @@ -235,7 +235,7 @@ public: void insert(const Field & x) override { - data.push_back(static_cast(x.get())); + data.push_back(static_cast(x.safeGet())); } bool tryInsert(const DB::Field & x) override; diff --git a/src/Columns/IColumn.h b/src/Columns/IColumn.h index f9c1a3e7034..edcb9f0bc30 100644 --- a/src/Columns/IColumn.h +++ b/src/Columns/IColumn.h @@ -475,6 +475,15 @@ public: /// It affects performance only (not correctness). virtual void reserve(size_t /*n*/) {} + /// Reserve memory before squashing all specified source columns into this column. + virtual void prepareForSquashing(const std::vector & source_columns) + { + size_t new_size = size(); + for (const auto & source_column : source_columns) + new_size += source_column->size(); + reserve(new_size); + } + /// Requests the removal of unused capacity. /// It is a non-binding request to reduce the capacity of the underlying container to its size. virtual void shrinkToFit() {} diff --git a/src/Columns/tests/gtest_column_variant.cpp b/src/Columns/tests/gtest_column_variant.cpp index 25f276b9600..5e481b88409 100644 --- a/src/Columns/tests/gtest_column_variant.cpp +++ b/src/Columns/tests/gtest_column_variant.cpp @@ -108,10 +108,10 @@ void checkColumnVariant1(ColumnVariant * column) ASSERT_EQ(offsets[1], 0); ASSERT_EQ(offsets[3], 1); ASSERT_TRUE(column->isDefaultAt(2) && column->isDefaultAt(4)); - ASSERT_EQ((*column)[0].get(), 42); - ASSERT_EQ((*column)[1].get(), "Hello"); + ASSERT_EQ((*column)[0].safeGet(), 42); + ASSERT_EQ((*column)[1].safeGet(), "Hello"); ASSERT_TRUE((*column)[2].isNull()); - ASSERT_EQ((*column)[3].get(), "World"); + ASSERT_EQ((*column)[3].safeGet(), "World"); ASSERT_TRUE((*column)[4].isNull()); } @@ -209,9 +209,9 @@ TEST(ColumnVariant, CreateFromDiscriminatorsAndOneFullColumnNoNulls) ASSERT_EQ(offsets[0], 0); ASSERT_EQ(offsets[1], 1); ASSERT_EQ(offsets[2], 2); - ASSERT_EQ((*column)[0].get(), 0); - ASSERT_EQ((*column)[1].get(), 1); - ASSERT_EQ((*column)[2].get(), 2); + ASSERT_EQ((*column)[0].safeGet(), 0); + ASSERT_EQ((*column)[1].safeGet(), 1); + ASSERT_EQ((*column)[2].safeGet(), 2); } TEST(ColumnVariant, CreateFromDiscriminatorsAndOneFullColumnNoNullsWithLocalOrder) @@ -222,9 +222,9 @@ TEST(ColumnVariant, CreateFromDiscriminatorsAndOneFullColumnNoNullsWithLocalOrde ASSERT_EQ(offsets[0], 0); ASSERT_EQ(offsets[1], 1); ASSERT_EQ(offsets[2], 2); - ASSERT_EQ((*column)[0].get(), 0); - ASSERT_EQ((*column)[1].get(), 1); - ASSERT_EQ((*column)[2].get(), 2); + ASSERT_EQ((*column)[0].safeGet(), 0); + ASSERT_EQ((*column)[1].safeGet(), 1); + ASSERT_EQ((*column)[2].safeGet(), 2); ASSERT_EQ(column->localDiscriminatorAt(0), 2); ASSERT_EQ(column->localDiscriminatorAt(1), 2); ASSERT_EQ(column->localDiscriminatorAt(2), 2); @@ -331,9 +331,9 @@ TEST(ColumnVariant, CloneResizedGeneral1) ASSERT_EQ(offsets[0], 0); ASSERT_EQ(offsets[1], 0); ASSERT_EQ(offsets[3], 1); - ASSERT_EQ((*resized_column_variant)[0].get(), 42); - ASSERT_EQ((*resized_column_variant)[1].get(), "Hello"); - ASSERT_EQ((*resized_column_variant)[3].get(), 43); + ASSERT_EQ((*resized_column_variant)[0].safeGet(), 42); + ASSERT_EQ((*resized_column_variant)[1].safeGet(), "Hello"); + ASSERT_EQ((*resized_column_variant)[3].safeGet(), 43); } TEST(ColumnVariant, CloneResizedGeneral2) @@ -367,7 +367,7 @@ TEST(ColumnVariant, CloneResizedGeneral2) ASSERT_EQ(discriminators[2], ColumnVariant::NULL_DISCRIMINATOR); const auto & offsets = resized_column_variant->getOffsets(); ASSERT_EQ(offsets[0], 0); - ASSERT_EQ((*resized_column_variant)[0].get(), 42); + ASSERT_EQ((*resized_column_variant)[0].safeGet(), 42); } TEST(ColumnVariant, CloneResizedGeneral3) @@ -405,10 +405,10 @@ TEST(ColumnVariant, CloneResizedGeneral3) ASSERT_EQ(offsets[1], 0); ASSERT_EQ(offsets[2], 1); ASSERT_EQ(offsets[3], 1); - ASSERT_EQ((*resized_column_variant)[0].get(), 42); - ASSERT_EQ((*resized_column_variant)[1].get(), "Hello"); - ASSERT_EQ((*resized_column_variant)[2].get(), "World"); - ASSERT_EQ((*resized_column_variant)[3].get(), 43); + ASSERT_EQ((*resized_column_variant)[0].safeGet(), 42); + ASSERT_EQ((*resized_column_variant)[1].safeGet(), "Hello"); + ASSERT_EQ((*resized_column_variant)[2].safeGet(), "World"); + ASSERT_EQ((*resized_column_variant)[3].safeGet(), 43); } MutableColumnPtr createDiscriminators2() @@ -465,7 +465,7 @@ TEST(ColumnVariant, InsertFrom) auto column_from = createVariantColumn2(change_order); column_to->insertFrom(*column_from, 3); ASSERT_EQ(column_to->globalDiscriminatorAt(5), 0); - ASSERT_EQ((*column_to)[5].get(), 43); + ASSERT_EQ((*column_to)[5].safeGet(), 43); } } @@ -478,8 +478,8 @@ TEST(ColumnVariant, InsertRangeFromOneColumnNoNulls) column_to->insertRangeFrom(*column_from, 2, 2); ASSERT_EQ(column_to->globalDiscriminatorAt(7), 0); ASSERT_EQ(column_to->globalDiscriminatorAt(8), 0); - ASSERT_EQ((*column_to)[7].get(), 2); - ASSERT_EQ((*column_to)[8].get(), 3); + ASSERT_EQ((*column_to)[7].safeGet(), 2); + ASSERT_EQ((*column_to)[8].safeGet(), 3); } } @@ -494,9 +494,9 @@ TEST(ColumnVariant, InsertRangeFromGeneral) ASSERT_EQ(column_to->globalDiscriminatorAt(6), ColumnVariant::NULL_DISCRIMINATOR); ASSERT_EQ(column_to->globalDiscriminatorAt(7), 0); ASSERT_EQ(column_to->globalDiscriminatorAt(8), 1); - ASSERT_EQ((*column_to)[5].get(), "Hello"); - ASSERT_EQ((*column_to)[7].get(), 43); - ASSERT_EQ((*column_to)[8].get(), "World"); + ASSERT_EQ((*column_to)[5].safeGet(), "Hello"); + ASSERT_EQ((*column_to)[7].safeGet(), 43); + ASSERT_EQ((*column_to)[8].safeGet(), "World"); } } @@ -509,8 +509,8 @@ TEST(ColumnVariant, InsertManyFrom) column_to->insertManyFrom(*column_from, 3, 2); ASSERT_EQ(column_to->globalDiscriminatorAt(5), 0); ASSERT_EQ(column_to->globalDiscriminatorAt(6), 0); - ASSERT_EQ((*column_to)[5].get(), 43); - ASSERT_EQ((*column_to)[6].get(), 43); + ASSERT_EQ((*column_to)[5].safeGet(), 43); + ASSERT_EQ((*column_to)[6].safeGet(), 43); } } @@ -520,8 +520,8 @@ TEST(ColumnVariant, PopBackOneColumnNoNulls) column->popBack(3); ASSERT_EQ(column->size(), 2); ASSERT_EQ(column->getVariantByLocalDiscriminator(0).size(), 2); - ASSERT_EQ((*column)[0].get(), 0); - ASSERT_EQ((*column)[1].get(), 1); + ASSERT_EQ((*column)[0].safeGet(), 0); + ASSERT_EQ((*column)[1].safeGet(), 1); } TEST(ColumnVariant, PopBackGeneral) @@ -531,8 +531,8 @@ TEST(ColumnVariant, PopBackGeneral) ASSERT_EQ(column->size(), 3); ASSERT_EQ(column->getVariantByLocalDiscriminator(0).size(), 1); ASSERT_EQ(column->getVariantByLocalDiscriminator(1).size(), 1); - ASSERT_EQ((*column)[0].get(), 42); - ASSERT_EQ((*column)[1].get(), "Hello"); + ASSERT_EQ((*column)[0].safeGet(), 42); + ASSERT_EQ((*column)[1].safeGet(), "Hello"); ASSERT_TRUE((*column)[2].isNull()); } @@ -545,8 +545,8 @@ TEST(ColumnVariant, FilterOneColumnNoNulls) filter.push_back(1); auto filtered_column = column->filter(filter, -1); ASSERT_EQ(filtered_column->size(), 2); - ASSERT_EQ((*filtered_column)[0].get(), 0); - ASSERT_EQ((*filtered_column)[1].get(), 2); + ASSERT_EQ((*filtered_column)[0].safeGet(), 0); + ASSERT_EQ((*filtered_column)[1].safeGet(), 2); } TEST(ColumnVariant, FilterGeneral) @@ -562,7 +562,7 @@ TEST(ColumnVariant, FilterGeneral) filter.push_back(0); auto filtered_column = column->filter(filter, -1); ASSERT_EQ(filtered_column->size(), 3); - ASSERT_EQ((*filtered_column)[0].get(), "Hello"); + ASSERT_EQ((*filtered_column)[0].safeGet(), "Hello"); ASSERT_TRUE((*filtered_column)[1].isNull()); ASSERT_TRUE((*filtered_column)[2].isNull()); } @@ -577,9 +577,9 @@ TEST(ColumnVariant, PermuteAndIndexOneColumnNoNulls) permutation.push_back(0); auto permuted_column = column->permute(permutation, 3); ASSERT_EQ(permuted_column->size(), 3); - ASSERT_EQ((*permuted_column)[0].get(), 1); - ASSERT_EQ((*permuted_column)[1].get(), 3); - ASSERT_EQ((*permuted_column)[2].get(), 2); + ASSERT_EQ((*permuted_column)[0].safeGet(), 1); + ASSERT_EQ((*permuted_column)[1].safeGet(), 3); + ASSERT_EQ((*permuted_column)[2].safeGet(), 2); auto index = ColumnUInt64::create(); index->getData().push_back(1); @@ -588,9 +588,9 @@ TEST(ColumnVariant, PermuteAndIndexOneColumnNoNulls) index->getData().push_back(0); auto indexed_column = column->index(*index, 3); ASSERT_EQ(indexed_column->size(), 3); - ASSERT_EQ((*indexed_column)[0].get(), 1); - ASSERT_EQ((*indexed_column)[1].get(), 3); - ASSERT_EQ((*indexed_column)[2].get(), 2); + ASSERT_EQ((*indexed_column)[0].safeGet(), 1); + ASSERT_EQ((*indexed_column)[1].safeGet(), 3); + ASSERT_EQ((*indexed_column)[2].safeGet(), 2); } TEST(ColumnVariant, PermuteGeneral) @@ -603,9 +603,9 @@ TEST(ColumnVariant, PermuteGeneral) permutation.push_back(5); auto permuted_column = column->permute(permutation, 4); ASSERT_EQ(permuted_column->size(), 4); - ASSERT_EQ((*permuted_column)[0].get(), 43); - ASSERT_EQ((*permuted_column)[1].get(), "World"); - ASSERT_EQ((*permuted_column)[2].get(), "Hello"); + ASSERT_EQ((*permuted_column)[0].safeGet(), 43); + ASSERT_EQ((*permuted_column)[1].safeGet(), "World"); + ASSERT_EQ((*permuted_column)[2].safeGet(), "Hello"); ASSERT_TRUE((*permuted_column)[3].isNull()); } @@ -618,12 +618,12 @@ TEST(ColumnVariant, ReplicateOneColumnNoNull) offsets.push_back(6); auto replicated_column = column->replicate(offsets); ASSERT_EQ(replicated_column->size(), 6); - ASSERT_EQ((*replicated_column)[0].get(), 1); - ASSERT_EQ((*replicated_column)[1].get(), 1); - ASSERT_EQ((*replicated_column)[2].get(), 1); - ASSERT_EQ((*replicated_column)[3].get(), 2); - ASSERT_EQ((*replicated_column)[4].get(), 2); - ASSERT_EQ((*replicated_column)[5].get(), 2); + ASSERT_EQ((*replicated_column)[0].safeGet(), 1); + ASSERT_EQ((*replicated_column)[1].safeGet(), 1); + ASSERT_EQ((*replicated_column)[2].safeGet(), 1); + ASSERT_EQ((*replicated_column)[3].safeGet(), 2); + ASSERT_EQ((*replicated_column)[4].safeGet(), 2); + ASSERT_EQ((*replicated_column)[5].safeGet(), 2); } TEST(ColumnVariant, ReplicateGeneral) @@ -637,9 +637,9 @@ TEST(ColumnVariant, ReplicateGeneral) offsets.push_back(7); auto replicated_column = column->replicate(offsets); ASSERT_EQ(replicated_column->size(), 7); - ASSERT_EQ((*replicated_column)[0].get(), 42); - ASSERT_EQ((*replicated_column)[1].get(), "Hello"); - ASSERT_EQ((*replicated_column)[2].get(), "Hello"); + ASSERT_EQ((*replicated_column)[0].safeGet(), 42); + ASSERT_EQ((*replicated_column)[1].safeGet(), "Hello"); + ASSERT_EQ((*replicated_column)[2].safeGet(), "Hello"); ASSERT_TRUE((*replicated_column)[3].isNull()); ASSERT_TRUE((*replicated_column)[4].isNull()); ASSERT_TRUE((*replicated_column)[5].isNull()); @@ -657,13 +657,13 @@ TEST(ColumnVariant, ScatterOneColumnNoNulls) selector.push_back(1); auto columns = column->scatter(3, selector); ASSERT_EQ(columns[0]->size(), 2); - ASSERT_EQ((*columns[0])[0].get(), 0); - ASSERT_EQ((*columns[0])[1].get(), 3); + ASSERT_EQ((*columns[0])[0].safeGet(), 0); + ASSERT_EQ((*columns[0])[1].safeGet(), 3); ASSERT_EQ(columns[1]->size(), 2); - ASSERT_EQ((*columns[1])[0].get(), 1); - ASSERT_EQ((*columns[1])[1].get(), 4); + ASSERT_EQ((*columns[1])[0].safeGet(), 1); + ASSERT_EQ((*columns[1])[1].safeGet(), 4); ASSERT_EQ(columns[2]->size(), 1); - ASSERT_EQ((*columns[2])[0].get(), 2); + ASSERT_EQ((*columns[2])[0].safeGet(), 2); } TEST(ColumnVariant, ScatterGeneral) @@ -680,12 +680,12 @@ TEST(ColumnVariant, ScatterGeneral) auto columns = column->scatter(3, selector); ASSERT_EQ(columns[0]->size(), 3); - ASSERT_EQ((*columns[0])[0].get(), 42); - ASSERT_EQ((*columns[0])[1].get(), "Hello"); - ASSERT_EQ((*columns[0])[2].get(), 43); + ASSERT_EQ((*columns[0])[0].safeGet(), 42); + ASSERT_EQ((*columns[0])[1].safeGet(), "Hello"); + ASSERT_EQ((*columns[0])[2].safeGet(), 43); ASSERT_EQ(columns[1]->size(), 2); - ASSERT_EQ((*columns[1])[0].get(), "World"); - ASSERT_EQ((*columns[1])[1].get(), 44); + ASSERT_EQ((*columns[1])[0].safeGet(), "World"); + ASSERT_EQ((*columns[1])[1].safeGet(), 44); ASSERT_EQ(columns[2]->size(), 2); ASSERT_TRUE((*columns[2])[0].isNull()); ASSERT_TRUE((*columns[2])[1].isNull()); diff --git a/src/Columns/tests/gtest_low_cardinality.cpp b/src/Columns/tests/gtest_low_cardinality.cpp index 5e01279b7df..ce16d2cadb1 100644 --- a/src/Columns/tests/gtest_low_cardinality.cpp +++ b/src/Columns/tests/gtest_low_cardinality.cpp @@ -20,13 +20,13 @@ void testLowCardinalityNumberInsert(const DataTypePtr & data_type) Field value; column->get(0, value); - ASSERT_EQ(value.get(), 15); + ASSERT_EQ(value.safeGet(), 15); column->get(1, value); - ASSERT_EQ(value.get(), 20); + ASSERT_EQ(value.safeGet(), 20); column->get(2, value); - ASSERT_EQ(value.get(), 25); + ASSERT_EQ(value.safeGet(), 25); } TEST(ColumnLowCardinality, Insert) diff --git a/src/Common/CollectionOfDerived.h b/src/Common/CollectionOfDerived.h index 9f80ff727b4..bcbcc36c67a 100644 --- a/src/Common/CollectionOfDerived.h +++ b/src/Common/CollectionOfDerived.h @@ -168,7 +168,7 @@ private: records.emplace(it, type_idx, item); } - Records::const_iterator getImpl(std::type_index type_idx) const + typename Records::const_iterator getImpl(std::type_index type_idx) const { auto it = std::lower_bound(records.cbegin(), records.cend(), type_idx); diff --git a/src/Common/FailPoint.cpp b/src/Common/FailPoint.cpp index 0b1ec552d43..b2fcbc77c56 100644 --- a/src/Common/FailPoint.cpp +++ b/src/Common/FailPoint.cpp @@ -7,6 +7,8 @@ #include #include +#include "config.h" + namespace DB { @@ -15,7 +17,7 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; }; -#if FIU_ENABLE +#if USE_LIBFIU static struct InitFiu { InitFiu() @@ -135,7 +137,7 @@ void FailPointInjection::pauseFailPoint(const String & fail_point_name) void FailPointInjection::enableFailPoint(const String & fail_point_name) { -#if FIU_ENABLE +#if USE_LIBFIU #define SUB_M(NAME, flags, pause) \ if (fail_point_name == FailPoints::NAME) \ { \ diff --git a/src/Common/FailPoint.h b/src/Common/FailPoint.h index b3e1214d597..1af13d08553 100644 --- a/src/Common/FailPoint.h +++ b/src/Common/FailPoint.h @@ -1,17 +1,16 @@ #pragma once -#include "config.h" #include #include #include +#include "config.h" + #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wdocumentation" #pragma clang diagnostic ignored "-Wreserved-macro-identifier" - -#include -#include - +# include +# include #pragma clang diagnostic pop #include diff --git a/src/Common/FieldBinaryEncoding.cpp b/src/Common/FieldBinaryEncoding.cpp index 6c1a8496fe6..23263c988c3 100644 --- a/src/Common/FieldBinaryEncoding.cpp +++ b/src/Common/FieldBinaryEncoding.cpp @@ -208,7 +208,7 @@ void FieldVisitorEncodeBinary::operator() (const Map & x, WriteBuffer & buf) con writeVarUInt(size, buf); for (size_t i = 0; i < size; ++i) { - const Tuple & key_and_value = x[i].get(); + const Tuple & key_and_value = x[i].safeGet(); Field::dispatch([&buf] (const auto & value) { FieldVisitorEncodeBinary()(value, buf); }, key_and_value[0]); Field::dispatch([&buf] (const auto & value) { FieldVisitorEncodeBinary()(value, buf); }, key_and_value[1]); } diff --git a/src/Common/FieldVisitorSum.cpp b/src/Common/FieldVisitorSum.cpp index b825f188586..af9503ac046 100644 --- a/src/Common/FieldVisitorSum.cpp +++ b/src/Common/FieldVisitorSum.cpp @@ -19,7 +19,7 @@ bool FieldVisitorSum::operator() (UInt64 & x) const return x != 0; } -bool FieldVisitorSum::operator() (Float64 & x) const { x += rhs.get(); return x != 0; } +bool FieldVisitorSum::operator() (Float64 & x) const { x += rhs.safeGet(); return x != 0; } bool FieldVisitorSum::operator() (Null &) const { diff --git a/src/Common/FieldVisitorSum.h b/src/Common/FieldVisitorSum.h index cbb4c4a1de3..d28676b5093 100644 --- a/src/Common/FieldVisitorSum.h +++ b/src/Common/FieldVisitorSum.h @@ -37,7 +37,7 @@ public: template bool operator() (DecimalField & x) const { - x += rhs.get>(); + x += rhs.safeGet>(); return x.getValue() != T(0); } diff --git a/src/Common/FieldVisitorToString.cpp b/src/Common/FieldVisitorToString.cpp index c4cb4266418..2148bac20d1 100644 --- a/src/Common/FieldVisitorToString.cpp +++ b/src/Common/FieldVisitorToString.cpp @@ -172,7 +172,7 @@ String FieldVisitorToString::operator() (const Object & x) const String convertFieldToString(const Field & field) { if (field.getType() == Field::Types::Which::String) - return field.get(); + return field.safeGet(); return applyVisitor(FieldVisitorToString(), field); } diff --git a/src/Common/HashTable/HashMap.h b/src/Common/HashTable/HashMap.h index a26797a687a..92621db5558 100644 --- a/src/Common/HashTable/HashMap.h +++ b/src/Common/HashTable/HashMap.h @@ -297,7 +297,7 @@ public: } /// Only inserts the value if key isn't already present - void ALWAYS_INLINE insertIfNotPresent(const Key & x, const Cell::Mapped & value) + void ALWAYS_INLINE insertIfNotPresent(const Key & x, const typename Cell::Mapped & value) { LookupResult it; bool inserted; diff --git a/src/Common/QueryFuzzer.cpp b/src/Common/QueryFuzzer.cpp index 161c38f20e0..0b2f6c09b45 100644 --- a/src/Common/QueryFuzzer.cpp +++ b/src/Common/QueryFuzzer.cpp @@ -132,7 +132,7 @@ Field QueryFuzzer::fuzzField(Field field) if (type == Field::Types::String) { - auto & str = field.get(); + auto & str = field.safeGet(); UInt64 action = fuzz_rand() % 10; switch (action) { @@ -158,7 +158,7 @@ Field QueryFuzzer::fuzzField(Field field) } else if (type == Field::Types::Array) { - auto & arr = field.get(); + auto & arr = field.safeGet(); if (fuzz_rand() % 5 == 0 && !arr.empty()) { @@ -193,7 +193,7 @@ Field QueryFuzzer::fuzzField(Field field) } else if (type == Field::Types::Tuple) { - auto & arr = field.get(); + auto & arr = field.safeGet(); if (fuzz_rand() % 5 == 0 && !arr.empty()) { @@ -922,17 +922,17 @@ ASTPtr QueryFuzzer::fuzzLiteralUnderExpressionList(ASTPtr child) auto type = l->value.getType(); if (type == Field::Types::Which::String && fuzz_rand() % 7 == 0) { - String value = l->value.get(); + String value = l->value.safeGet(); child = makeASTFunction( "toFixedString", std::make_shared(value), std::make_shared(static_cast(value.size()))); } else if (type == Field::Types::Which::UInt64 && fuzz_rand() % 7 == 0) { - child = makeASTFunction(fuzz_rand() % 2 == 0 ? "toUInt128" : "toUInt256", std::make_shared(l->value.get())); + child = makeASTFunction(fuzz_rand() % 2 == 0 ? "toUInt128" : "toUInt256", std::make_shared(l->value.safeGet())); } else if (type == Field::Types::Which::Int64 && fuzz_rand() % 7 == 0) { - child = makeASTFunction(fuzz_rand() % 2 == 0 ? "toInt128" : "toInt256", std::make_shared(l->value.get())); + child = makeASTFunction(fuzz_rand() % 2 == 0 ? "toInt128" : "toInt256", std::make_shared(l->value.safeGet())); } else if (type == Field::Types::Which::Float64 && fuzz_rand() % 7 == 0) { @@ -940,22 +940,22 @@ ASTPtr QueryFuzzer::fuzzLiteralUnderExpressionList(ASTPtr child) if (decimal == 0) child = makeASTFunction( "toDecimal32", - std::make_shared(l->value.get()), + std::make_shared(l->value.safeGet()), std::make_shared(static_cast(fuzz_rand() % 9))); else if (decimal == 1) child = makeASTFunction( "toDecimal64", - std::make_shared(l->value.get()), + std::make_shared(l->value.safeGet()), std::make_shared(static_cast(fuzz_rand() % 18))); else if (decimal == 2) child = makeASTFunction( "toDecimal128", - std::make_shared(l->value.get()), + std::make_shared(l->value.safeGet()), std::make_shared(static_cast(fuzz_rand() % 38))); else child = makeASTFunction( "toDecimal256", - std::make_shared(l->value.get()), + std::make_shared(l->value.safeGet()), std::make_shared(static_cast(fuzz_rand() % 76))); } diff --git a/src/Common/SystemLogBase.cpp b/src/Common/SystemLogBase.cpp index 7d2c15714e2..127c8862a35 100644 --- a/src/Common/SystemLogBase.cpp +++ b/src/Common/SystemLogBase.cpp @@ -27,12 +27,14 @@ #include #include + namespace DB { namespace ErrorCodes { extern const int TIMEOUT_EXCEEDED; + extern const int ABORTED; } ISystemLog::~ISystemLog() = default; @@ -86,32 +88,18 @@ void SystemLogQueue::push(LogElement&& element) // by one, under exclusive lock, so we will see each message count. // It is enough to only wake the flushing thread once, after the message // count increases past half available size. - const uint64_t queue_end = queue_front_index + queue.size(); - requested_flush_up_to = std::max(requested_flush_up_to, queue_end); - flush_event.notify_all(); + const auto last_log_index = queue_front_index + queue.size(); + notifyFlushUnlocked(last_log_index, /* should_prepare_tables_anyway */ false); } if (queue.size() >= settings.max_size_rows) { + chassert(queue.size() == settings.max_size_rows); + // Ignore all further entries until the queue is flushed. - // Log a message about that. Don't spam it -- this might be especially - // problematic in case of trace log. Remember what the front index of the - // queue was when we last logged the message. If it changed, it means the - // queue was flushed, and we can log again. - if (queue_front_index != logged_queue_full_at_index) - { - logged_queue_full_at_index = queue_front_index; - - // TextLog sets its logger level to 0, so this log is a noop and - // there is no recursive logging. - lock.unlock(); - LOG_ERROR(log, "Queue is full for system log '{}' at {}. max_size_rows {}", - demangle(typeid(*this).name()), - queue_front_index, - settings.max_size_rows); - } - + // To the next batch we add a log message about how much we have lost + ++ignored_logs; return; } @@ -127,20 +115,50 @@ template void SystemLogQueue::handleCrash() { if (settings.notify_flush_on_crash) - notifyFlush(/* force */ true); + { + notifyFlush(getLastLogIndex(), /* should_prepare_tables_anyway */ true); + } } template -void SystemLogQueue::waitFlush(uint64_t expected_flushed_up_to) +void SystemLogQueue::notifyFlushUnlocked(Index expected_flushed_index, bool should_prepare_tables_anyway) { + if (should_prepare_tables_anyway) + requested_prepare_tables = std::max(requested_prepare_tables, expected_flushed_index); + + requested_flush_index = std::max(requested_flush_index, expected_flushed_index); + + flush_event.notify_all(); +} + +template +void SystemLogQueue::notifyFlush(SystemLogQueue::Index expected_flushed_index, bool should_prepare_tables_anyway) +{ + std::lock_guard lock(mutex); + notifyFlushUnlocked(expected_flushed_index, should_prepare_tables_anyway); +} + +template +void SystemLogQueue::waitFlush(SystemLogQueue::Index expected_flushed_index, bool should_prepare_tables_anyway) +{ + LOG_DEBUG(log, "Requested flush up to offset {}", expected_flushed_index); + // Use an arbitrary timeout to avoid endless waiting. 60s proved to be // too fast for our parallel functional tests, probably because they // heavily load the disk. const int timeout_seconds = 180; + std::unique_lock lock(mutex); - bool result = flush_event.wait_for(lock, std::chrono::seconds(timeout_seconds), [&] + + // there is no obligation to call notifyFlush before waitFlush, than we have to be sure that flush_event has been triggered before we wait the result + notifyFlushUnlocked(expected_flushed_index, should_prepare_tables_anyway); + + auto result = confirm_event.wait_for(lock, std::chrono::seconds(timeout_seconds), [&] { - return flushed_up_to >= expected_flushed_up_to && !is_force_prepare_tables; + if (should_prepare_tables_anyway) + return (flushed_index >= expected_flushed_index && prepared_tables >= requested_prepare_tables) || is_shutdown; + else + return (flushed_index >= expected_flushed_index) || is_shutdown; }); if (!result) @@ -148,67 +166,63 @@ void SystemLogQueue::waitFlush(uint64_t expected_flushed_up_to) throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Timeout exceeded ({} s) while flushing system log '{}'.", toString(timeout_seconds), demangle(typeid(*this).name())); } -} - -template -uint64_t SystemLogQueue::notifyFlush(bool should_prepare_tables_anyway) -{ - uint64_t this_thread_requested_offset; + if (is_shutdown) { - std::lock_guard lock(mutex); - if (is_shutdown) - return uint64_t(-1); - - this_thread_requested_offset = queue_front_index + queue.size(); - - // Publish our flush request, taking care not to overwrite the requests - // made by other threads. - is_force_prepare_tables |= should_prepare_tables_anyway; - requested_flush_up_to = std::max(requested_flush_up_to, this_thread_requested_offset); - - flush_event.notify_all(); + throw Exception(ErrorCodes::ABORTED, "Shutdown has been called while flushing system log '{}'. Aborting.", + demangle(typeid(*this).name())); } - - LOG_DEBUG(log, "Requested flush up to offset {}", this_thread_requested_offset); - return this_thread_requested_offset; } template -void SystemLogQueue::confirm(uint64_t to_flush_end) +SystemLogQueue::Index SystemLogQueue::getLastLogIndex() { std::lock_guard lock(mutex); - flushed_up_to = to_flush_end; - is_force_prepare_tables = false; - flush_event.notify_all(); + return queue_front_index + queue.size(); } template -typename SystemLogQueue::Index SystemLogQueue::pop(std::vector & output, - bool & should_prepare_tables_anyway, - bool & exit_this_thread) +void SystemLogQueue::confirm(SystemLogQueue::Index last_flashed_index) { - /// Call dtors and deallocate strings without holding the global lock - output.resize(0); + std::lock_guard lock(mutex); + prepared_tables = std::max(prepared_tables, last_flashed_index); + flushed_index = std::max(flushed_index, last_flashed_index); + confirm_event.notify_all(); +} - std::unique_lock lock(mutex); - flush_event.wait_for(lock, - std::chrono::milliseconds(settings.flush_interval_milliseconds), - [&] () +template +typename SystemLogQueue::PopResult SystemLogQueue::pop() +{ + PopResult result; + size_t prev_ignored_logs = 0; + + { + std::unique_lock lock(mutex); + + flush_event.wait_for(lock, std::chrono::milliseconds(settings.flush_interval_milliseconds), [&] () { - return requested_flush_up_to > flushed_up_to || is_shutdown || is_force_prepare_tables; - } - ); + return requested_flush_index > flushed_index || requested_prepare_tables > prepared_tables || is_shutdown; + }); - queue_front_index += queue.size(); - // Swap with existing array from previous flush, to save memory - // allocations. - queue.swap(output); + if (is_shutdown) + return PopResult{.is_shutdown = true}; - should_prepare_tables_anyway = is_force_prepare_tables; + queue_front_index += queue.size(); + prev_ignored_logs = ignored_logs; + ignored_logs = 0; - exit_this_thread = is_shutdown; - return queue_front_index; + result.last_log_index = queue_front_index; + result.logs.swap(queue); + result.create_table_force = requested_prepare_tables > prepared_tables; + } + + if (prev_ignored_logs) + LOG_ERROR(log, "Queue had been full at {}, accepted {} logs, ignored {} logs.", + result.last_log_index - result.logs.size(), + result.logs.size(), + prev_ignored_logs); + + return result; } template @@ -229,13 +243,21 @@ SystemLogBase::SystemLogBase( } template -void SystemLogBase::flush(bool force) +SystemLogBase::Index SystemLogBase::getLastLogIndex() { - uint64_t this_thread_requested_offset = queue->notifyFlush(force); - if (this_thread_requested_offset == uint64_t(-1)) - return; + return queue->getLastLogIndex(); +} - queue->waitFlush(this_thread_requested_offset); +template +void SystemLogBase::notifyFlush(Index expected_flushed_index, bool should_prepare_tables_anyway) +{ + queue->notifyFlush(expected_flushed_index, should_prepare_tables_anyway); +} + +template +void SystemLogBase::flush(Index expected_flushed_index, bool should_prepare_tables_anyway) +{ + queue->waitFlush(expected_flushed_index, should_prepare_tables_anyway); } template @@ -257,9 +279,6 @@ void SystemLogBase::add(LogElement element) queue->push(std::move(element)); } -template -void SystemLogBase::notifyFlush(bool force) { queue->notifyFlush(force); } - #define INSTANTIATE_SYSTEM_LOG_BASE(ELEMENT) template class SystemLogBase; SYSTEM_LOG_ELEMENTS(INSTANTIATE_SYSTEM_LOG_BASE) diff --git a/src/Common/SystemLogBase.h b/src/Common/SystemLogBase.h index b87fcf419d3..0d7b04d5c57 100644 --- a/src/Common/SystemLogBase.h +++ b/src/Common/SystemLogBase.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include @@ -54,10 +55,19 @@ struct StorageID; class ISystemLog { public: + using Index = int64_t; + virtual String getName() const = 0; - //// force -- force table creation (used for SYSTEM FLUSH LOGS) - virtual void flush(bool force = false) = 0; /// NOLINT + /// Return the index of the latest added log element. That index no less than the flashed index. + /// The flashed index is the index of the last log element which has been flushed successfully. + /// Thereby all the records whose index is less than the flashed index are flushed already. + virtual Index getLastLogIndex() = 0; + /// Call this method to wake up the flush thread and flush the data in the background. It is non blocking call + virtual void notifyFlush(Index expected_flushed_index, bool should_prepare_tables_anyway) = 0; + /// Call this method to wait intill the logs are flushed up to expected_flushed_index. It is blocking call. + virtual void flush(Index expected_flushed_index, bool should_prepare_tables_anyway) = 0; + virtual void prepareTable() = 0; /// Start the background thread. @@ -97,26 +107,38 @@ struct SystemLogQueueSettings template class SystemLogQueue { - using Index = uint64_t; - public: + using Index = ISystemLog::Index; + explicit SystemLogQueue(const SystemLogQueueSettings & settings_); void shutdown(); // producer methods void push(LogElement && element); - Index notifyFlush(bool should_prepare_tables_anyway); - void waitFlush(Index expected_flushed_up_to); + + Index getLastLogIndex(); + void notifyFlush(Index expected_flushed_index, bool should_prepare_tables_anyway); + void waitFlush(Index expected_flushed_index, bool should_prepare_tables_anyway); /// Handles crash, flushes log without blocking if notify_flush_on_crash is set void handleCrash(); + struct PopResult + { + Index last_log_index = 0; + std::vector logs = {}; + bool create_table_force = false; + bool is_shutdown = false; + }; + // consumer methods - Index pop(std::vector& output, bool & should_prepare_tables_anyway, bool & exit_this_thread); - void confirm(Index to_flush_end); + PopResult pop(); + void confirm(Index last_flashed_index); private: + void notifyFlushUnlocked(Index expected_flushed_index, bool should_prepare_tables_anyway); + /// Data shared between callers of add()/flush()/shutdown(), and the saving thread std::mutex mutex; @@ -124,22 +146,32 @@ private: // Queue is bounded. But its size is quite large to not block in all normal cases. std::vector queue; + // An always-incrementing index of the first message currently in the queue. // We use it to give a global sequential index to every message, so that we // can wait until a particular message is flushed. This is used to implement // synchronous log flushing for SYSTEM FLUSH LOGS. Index queue_front_index = 0; - // A flag that says we must create the tables even if the queue is empty. - bool is_force_prepare_tables = false; + // Requested to flush logs up to this index, exclusive - Index requested_flush_up_to = 0; + Index requested_flush_index = std::numeric_limits::min(); // Flushed log up to this index, exclusive - Index flushed_up_to = 0; - // Logged overflow message at this queue front index - Index logged_queue_full_at_index = -1; + Index flushed_index = 0; + + // The same logic for the prepare tables: if requested_prepar_tables > prepared_tables we need to do prepare + // except that initial prepared_tables is -1 + // it is due to the difference: when no logs have been written and we call flush logs + // it becomes in the state: requested_flush_index = 0 and flushed_index = 0 -- we do not want to do anything + // but if we need to prepare tables it becomes requested_prepare_tables = 0 and prepared_tables = -1 + // we trigger background thread and do prepare + Index requested_prepare_tables = std::numeric_limits::min(); + Index prepared_tables = -1; + + size_t ignored_logs = 0; bool is_shutdown = false; + std::condition_variable confirm_event; std::condition_variable flush_event; const SystemLogQueueSettings settings; @@ -150,6 +182,7 @@ template class SystemLogBase : public ISystemLog { public: + using Index = ISystemLog::Index; using Self = SystemLogBase; explicit SystemLogBase( @@ -163,15 +196,16 @@ public: */ void add(LogElement element); + Index getLastLogIndex() override; + + void notifyFlush(Index expected_flushed_index, bool should_prepare_tables_anyway) override; + /// Flush data in the buffer to disk. Block the thread until the data is stored on disk. - void flush(bool force) override; + void flush(Index expected_flushed_index, bool should_prepare_tables_anyway) override; /// Handles crash, flushes log without blocking if notify_flush_on_crash is set void handleCrash() override; - /// Non-blocking flush data in the buffer to disk. - void notifyFlush(bool force); - String getName() const override { return LogElement::name(); } static const char * getDefaultOrderBy() { return "event_date, event_time"; } diff --git a/src/Common/config.h.in b/src/Common/config.h.in index 6a0090130a3..e3f8882850f 100644 --- a/src/Common/config.h.in +++ b/src/Common/config.h.in @@ -32,6 +32,8 @@ #cmakedefine01 USE_IDNA #cmakedefine01 USE_NLP #cmakedefine01 USE_VECTORSCAN +#cmakedefine01 USE_QPL +#cmakedefine01 USE_QATLIB #cmakedefine01 USE_LIBURING #cmakedefine01 USE_AVRO #cmakedefine01 USE_CAPNP @@ -59,7 +61,7 @@ #cmakedefine01 USE_SKIM #cmakedefine01 USE_PRQL #cmakedefine01 USE_ULID -#cmakedefine01 FIU_ENABLE +#cmakedefine01 USE_LIBFIU #cmakedefine01 USE_BCRYPT #cmakedefine01 USE_LIBARCHIVE #cmakedefine01 USE_POCKETFFT diff --git a/src/Common/examples/arena_with_free_lists.cpp b/src/Common/examples/arena_with_free_lists.cpp index 6793d567aca..3a1304e2d94 100644 --- a/src/Common/examples/arena_with_free_lists.cpp +++ b/src/Common/examples/arena_with_free_lists.cpp @@ -174,19 +174,19 @@ struct Dictionary { switch (attribute.type) { - case AttributeUnderlyingTypeTest::UInt8: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingTypeTest::UInt16: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingTypeTest::UInt32: std::get>(attribute.arrays)[idx] = static_cast(value.get()); break; - case AttributeUnderlyingTypeTest::UInt64: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingTypeTest::Int8: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingTypeTest::Int16: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingTypeTest::Int32: std::get>(attribute.arrays)[idx] = static_cast(value.get()); break; - case AttributeUnderlyingTypeTest::Int64: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingTypeTest::Float32: std::get>(attribute.arrays)[idx] = static_cast(value.get()); break; - case AttributeUnderlyingTypeTest::Float64: std::get>(attribute.arrays)[idx] = value.get(); break; + case AttributeUnderlyingTypeTest::UInt8: std::get>(attribute.arrays)[idx] = value.safeGet(); break; + case AttributeUnderlyingTypeTest::UInt16: std::get>(attribute.arrays)[idx] = value.safeGet(); break; + case AttributeUnderlyingTypeTest::UInt32: std::get>(attribute.arrays)[idx] = static_cast(value.safeGet()); break; + case AttributeUnderlyingTypeTest::UInt64: std::get>(attribute.arrays)[idx] = value.safeGet(); break; + case AttributeUnderlyingTypeTest::Int8: std::get>(attribute.arrays)[idx] = value.safeGet(); break; + case AttributeUnderlyingTypeTest::Int16: std::get>(attribute.arrays)[idx] = value.safeGet(); break; + case AttributeUnderlyingTypeTest::Int32: std::get>(attribute.arrays)[idx] = static_cast(value.safeGet()); break; + case AttributeUnderlyingTypeTest::Int64: std::get>(attribute.arrays)[idx] = value.safeGet(); break; + case AttributeUnderlyingTypeTest::Float32: std::get>(attribute.arrays)[idx] = static_cast(value.safeGet()); break; + case AttributeUnderlyingTypeTest::Float64: std::get>(attribute.arrays)[idx] = value.safeGet(); break; case AttributeUnderlyingTypeTest::String: { - const auto & string = value.get(); + const auto & string = value.safeGet(); auto & string_ref = std::get>(attribute.arrays)[idx]; const auto & null_value_ref = std::get(attribute.null_values); diff --git a/src/Compression/CompressionCodecDeflateQpl.cpp b/src/Compression/CompressionCodecDeflateQpl.cpp index f1b5b24e866..c82ee861a6f 100644 --- a/src/Compression/CompressionCodecDeflateQpl.cpp +++ b/src/Compression/CompressionCodecDeflateQpl.cpp @@ -1,7 +1,3 @@ -#ifdef ENABLE_QPL_COMPRESSION - -#include -#include #include #include #include @@ -11,6 +7,10 @@ #include #include #include +#include +#include + +#if USE_QPL #include "libaccel_config.h" diff --git a/src/Compression/CompressionCodecDeflateQpl.h b/src/Compression/CompressionCodecDeflateQpl.h index 86fd9051bd8..d9abc0fb7e0 100644 --- a/src/Compression/CompressionCodecDeflateQpl.h +++ b/src/Compression/CompressionCodecDeflateQpl.h @@ -4,6 +4,11 @@ #include #include #include + +#include "config.h" + +#if USE_QPL + #include namespace Poco @@ -117,3 +122,4 @@ private: }; } +#endif diff --git a/src/Compression/CompressionCodecZSTDQAT.cpp b/src/Compression/CompressionCodecZSTDQAT.cpp index 5a4ef70a30a..e19b7e4a001 100644 --- a/src/Compression/CompressionCodecZSTDQAT.cpp +++ b/src/Compression/CompressionCodecZSTDQAT.cpp @@ -1,4 +1,6 @@ -#ifdef ENABLE_ZSTD_QAT_CODEC +#include "config.h" + +#if USE_QATLIB #include #include @@ -6,6 +8,7 @@ #include #include #include + #include #include diff --git a/src/Compression/CompressionFactory.cpp b/src/Compression/CompressionFactory.cpp index 2e7aa0d086f..ac00f571568 100644 --- a/src/Compression/CompressionFactory.cpp +++ b/src/Compression/CompressionFactory.cpp @@ -1,20 +1,20 @@ -#include "config.h" - #include +#include +#include +#include +#include #include #include #include -#include -#include -#include -#include #include -#include -#include -#include +#include +#include +#include #include +#include "config.h" + namespace DB { @@ -175,11 +175,11 @@ void registerCodecNone(CompressionCodecFactory & factory); void registerCodecLZ4(CompressionCodecFactory & factory); void registerCodecLZ4HC(CompressionCodecFactory & factory); void registerCodecZSTD(CompressionCodecFactory & factory); -#ifdef ENABLE_ZSTD_QAT_CODEC +#if USE_QATLIB void registerCodecZSTDQAT(CompressionCodecFactory & factory); #endif void registerCodecMultiple(CompressionCodecFactory & factory); -#ifdef ENABLE_QPL_COMPRESSION +#if USE_QPL void registerCodecDeflateQpl(CompressionCodecFactory & factory); #endif @@ -198,7 +198,7 @@ CompressionCodecFactory::CompressionCodecFactory() registerCodecNone(*this); registerCodecLZ4(*this); registerCodecZSTD(*this); -#ifdef ENABLE_ZSTD_QAT_CODEC +#if USE_QATLIB registerCodecZSTDQAT(*this); #endif registerCodecLZ4HC(*this); @@ -209,7 +209,7 @@ CompressionCodecFactory::CompressionCodecFactory() registerCodecGorilla(*this); registerCodecEncrypted(*this); registerCodecFPC(*this); -#ifdef ENABLE_QPL_COMPRESSION +#if USE_QPL registerCodecDeflateQpl(*this); #endif registerCodecGCD(*this); diff --git a/src/Core/ExternalTable.cpp b/src/Core/ExternalTable.cpp index af681cd5639..c2bcf6ec651 100644 --- a/src/Core/ExternalTable.cpp +++ b/src/Core/ExternalTable.cpp @@ -49,7 +49,7 @@ ExternalTableDataPtr BaseExternalTable::getData(ContextPtr context) { initReadBuffer(); initSampleBlock(); - auto input = context->getInputFormat(format, *read_buffer, sample_block, context->getSettingsRef().get("max_block_size").get()); + auto input = context->getInputFormat(format, *read_buffer, sample_block, context->getSettingsRef().get("max_block_size").safeGet()); auto data = std::make_unique(); data->pipe = std::make_unique(); diff --git a/src/Core/Field.h b/src/Core/Field.h index f1bb4a72b0d..ba8c66580ad 100644 --- a/src/Core/Field.h +++ b/src/Core/Field.h @@ -457,15 +457,6 @@ public: std::string_view getTypeName() const; bool isNull() const { return which == Types::Null; } - template - NearestFieldType> & get(); - - template - const auto & get() const - { - auto * mutable_this = const_cast *>(this); - return mutable_this->get(); - } bool isNegativeInfinity() const { return which == Types::Null && get().isNegativeInfinity(); } bool isPositiveInfinity() const { return which == Types::Null && get().isPositiveInfinity(); } @@ -681,6 +672,25 @@ private: Types::Which which; + /// This function is prone to type punning and should never be used outside of Field class, + /// whenever it is used within this class the stored type should be checked in advance. + template + NearestFieldType> & get() + { + // Before storing the value in the Field, we static_cast it to the field + // storage type, so here we return the value of storage type as well. + // Otherwise, it is easy to make a mistake of reinterpret_casting the stored + // value to a different and incompatible type. + // For example, a Float32 value is stored as Float64, and it is incorrect to + // return a reference to this value as Float32. + return *reinterpret_cast>*>(&storage); + } + + template + NearestFieldType> & get() const + { + return const_cast(this)->get(); + } /// Assuming there was no allocated state or it was deallocated (see destroy). template @@ -866,48 +876,21 @@ constexpr bool isInt64OrUInt64orBoolFieldType(Field::Types::Which t) || t == Field::Types::Bool; } -// Field value getter with type checking in debug builds. -template -NearestFieldType> & Field::get() -{ - // Before storing the value in the Field, we static_cast it to the field - // storage type, so here we return the value of storage type as well. - // Otherwise, it is easy to make a mistake of reinterpret_casting the stored - // value to a different and incompatible type. - // For example, a Float32 value is stored as Float64, and it is incorrect to - // return a reference to this value as Float32. - using StoredType = NearestFieldType>; - -#ifndef NDEBUG - // Disregard signedness when converting between int64 types. - constexpr Field::Types::Which target = TypeToEnum::value; - if (target != which - && (!isInt64OrUInt64orBoolFieldType(target) || !isInt64OrUInt64orBoolFieldType(which)) && target != Field::Types::IPv4) - throw Exception(ErrorCodes::LOGICAL_ERROR, - "Invalid Field get from type {} to type {}", which, target); -#endif - - StoredType * MAY_ALIAS ptr = reinterpret_cast(&storage); - - return *ptr; -} - - template auto & Field::safeGet() { const Types::Which target = TypeToEnum>>::value; - /// We allow converting int64 <-> uint64, int64 <-> bool, uint64 <-> bool in safeGet(). - if (target != which - && (!isInt64OrUInt64orBoolFieldType(target) || !isInt64OrUInt64orBoolFieldType(which))) - throw Exception(ErrorCodes::BAD_GET, - "Bad get: has {}, requested {}", getTypeName(), target); + /// bool is stored as uint64, will be returned as UInt64 when requested as bool or UInt64, as Int64 when requested as Int64 + /// also allow UInt64 <-> Int64 conversion + if (target != which && + !(which == Field::Types::Bool && (target == Field::Types::UInt64 || target == Field::Types::Int64)) && + !(isInt64OrUInt64FieldType(which) && isInt64OrUInt64FieldType(target))) + throw Exception(ErrorCodes::BAD_GET, "Bad get: has {}, requested {}", getTypeName(), target); return get(); } - template requires not_field_or_bool_or_stringlike Field::Field(T && rhs) diff --git a/src/Core/Range.cpp b/src/Core/Range.cpp index 956b96653a1..1a5ce1e012e 100644 --- a/src/Core/Range.cpp +++ b/src/Core/Range.cpp @@ -62,27 +62,27 @@ void Range::shrinkToIncludedIfPossible() { if (left.isExplicit() && !left_included) { - if (left.getType() == Field::Types::UInt64 && left.get() != std::numeric_limits::max()) + if (left.getType() == Field::Types::UInt64 && left.safeGet() != std::numeric_limits::max()) { - ++left.get(); + ++left.safeGet(); left_included = true; } - if (left.getType() == Field::Types::Int64 && left.get() != std::numeric_limits::max()) + if (left.getType() == Field::Types::Int64 && left.safeGet() != std::numeric_limits::max()) { - ++left.get(); + ++left.safeGet(); left_included = true; } } if (right.isExplicit() && !right_included) { - if (right.getType() == Field::Types::UInt64 && right.get() != std::numeric_limits::min()) + if (right.getType() == Field::Types::UInt64 && right.safeGet() != std::numeric_limits::min()) { - --right.get(); + --right.safeGet(); right_included = true; } - if (right.getType() == Field::Types::Int64 && right.get() != std::numeric_limits::min()) + if (right.getType() == Field::Types::Int64 && right.safeGet() != std::numeric_limits::min()) { - --right.get(); + --right.safeGet(); right_included = true; } } diff --git a/src/Core/Settings.cpp b/src/Core/Settings.cpp index 9c9c9c1db00..45bd2b9eb42 100644 --- a/src/Core/Settings.cpp +++ b/src/Core/Settings.cpp @@ -118,7 +118,7 @@ void Settings::set(std::string_view name, const Field & value) { if (value.getType() != Field::Types::Which::String) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected type of value for setting 'compatibility'. Expected String, got {}", value.getTypeName()); - applyCompatibilitySetting(value.get()); + applyCompatibilitySetting(value.safeGet()); } /// If we change setting that was changed by compatibility setting before /// we should remove it from settings_changed_by_compatibility_setting, diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 4dda05bc54b..cbb62227da0 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -676,6 +676,7 @@ class IColumn; M(Bool, query_cache_squash_partial_results, true, "Squash partial result blocks to blocks of size 'max_block_size'. Reduces performance of inserts into the query cache but improves the compressability of cache entries.", 0) \ M(Seconds, query_cache_ttl, 60, "After this time in seconds entries in the query cache become stale", 0) \ M(Bool, query_cache_share_between_users, false, "Allow other users to read entry in the query cache", 0) \ + M(String, query_cache_tag, "", "A string which acts as a label for query cache entries. The same queries with different tags are considered different by the query cache.", 0) \ M(Bool, enable_sharing_sets_for_mutations, true, "Allow sharing set objects build for IN subqueries between different tasks of the same mutation. This reduces memory usage and CPU consumption", 0) \ \ M(Bool, optimize_rewrite_sum_if_to_count_if, true, "Rewrite sumIf() and sum(if()) function countIf() function when logically equivalent", 0) \ @@ -921,6 +922,7 @@ class IColumn; M(UInt64, grace_hash_join_initial_buckets, 1, "Initial number of grace hash join buckets", 0) \ M(UInt64, grace_hash_join_max_buckets, 1024, "Limit on the number of grace hash join buckets", 0) \ M(Timezone, session_timezone, "", "This setting can be removed in the future due to potential caveats. It is experimental and is not suitable for production usage. The default timezone for current session or query. The server default timezone if empty.", 0) \ + M(Bool, use_hive_partitioning, false, "Allows to use hive partitioning for File, URL, S3, AzureBlobStorage and HDFS engines.", 0)\ \ M(Bool, allow_statistics_optimize, false, "Allows using statistics to optimize queries", 0) ALIAS(allow_statistic_optimize) \ M(Bool, allow_experimental_statistics, false, "Allows using statistics", 0) ALIAS(allow_experimental_statistic) \ diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index 7cfc119f734..d2a6f0423b2 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -80,8 +80,10 @@ static std::initializer_list(f.get()); + return stringToNumber(f.safeGet()); } else if (f.getType() == Field::Types::UInt64) { T result; - if (!accurate::convertNumeric(f.get(), result)) + if (!accurate::convertNumeric(f.safeGet(), result)) throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Field value {} is out of range of {} type", f, demangle(typeid(T).name())); return result; } else if (f.getType() == Field::Types::Int64) { T result; - if (!accurate::convertNumeric(f.get(), result)) + if (!accurate::convertNumeric(f.safeGet(), result)) throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Field value {} is out of range of {} type", f, demangle(typeid(T).name())); return result; } else if (f.getType() == Field::Types::Bool) { - return T(f.get()); + return T(f.safeGet()); } else if (f.getType() == Field::Types::Float64) { - Float64 x = f.get(); + Float64 x = f.safeGet(); if constexpr (std::is_floating_point_v) { return T(x); @@ -120,7 +120,7 @@ namespace if (f.getType() == Field::Types::String) { /// Allow to parse Map from string field. For the convenience. - const auto & str = f.get(); + const auto & str = f.safeGet(); return stringToMap(str); } @@ -218,7 +218,7 @@ namespace UInt64 fieldToMaxThreads(const Field & f) { if (f.getType() == Field::Types::String) - return stringToMaxThreads(f.get()); + return stringToMaxThreads(f.safeGet()); else return fieldToNumber(f); } diff --git a/src/Core/SettingsQuirks.cpp b/src/Core/SettingsQuirks.cpp index 5541cc19653..3127a5ef36d 100644 --- a/src/Core/SettingsQuirks.cpp +++ b/src/Core/SettingsQuirks.cpp @@ -100,7 +100,7 @@ void doSettingsSanityCheckClamp(Settings & current_settings, LoggerPtr log) return current_value; }; - UInt64 max_threads = get_current_value("max_threads").get(); + UInt64 max_threads = get_current_value("max_threads").safeGet(); UInt64 max_threads_max_value = 256 * getNumberOfPhysicalCPUCores(); if (max_threads > max_threads_max_value) { @@ -120,7 +120,7 @@ void doSettingsSanityCheckClamp(Settings & current_settings, LoggerPtr log) "input_format_parquet_max_block_size"}; for (auto const & setting : block_rows_settings) { - if (auto block_size = get_current_value(setting).get(); + if (auto block_size = get_current_value(setting).safeGet(); block_size > max_sane_block_rows_size) { if (log) @@ -129,7 +129,7 @@ void doSettingsSanityCheckClamp(Settings & current_settings, LoggerPtr log) } } - if (auto max_block_size = get_current_value("max_block_size").get(); max_block_size == 0) + if (auto max_block_size = get_current_value("max_block_size").safeGet(); max_block_size == 0) { if (log) LOG_WARNING(log, "Sanity check: 'max_block_size' cannot be 0. Set to default value {}", DEFAULT_BLOCK_SIZE); diff --git a/src/Core/examples/field.cpp b/src/Core/examples/field.cpp index 110e11d0cb1..3064290e127 100644 --- a/src/Core/examples/field.cpp +++ b/src/Core/examples/field.cpp @@ -37,7 +37,7 @@ int main(int argc, char ** argv) std::cerr << applyVisitor(to_string, field) << std::endl; } - field.get().push_back(field); + field.safeGet().push_back(field); std::cerr << applyVisitor(to_string, field) << std::endl; std::cerr << (field < field2) << std::endl; diff --git a/src/Core/tests/gtest_field.cpp b/src/Core/tests/gtest_field.cpp index 5585442d835..7e778be9575 100644 --- a/src/Core/tests/gtest_field.cpp +++ b/src/Core/tests/gtest_field.cpp @@ -8,31 +8,31 @@ GTEST_TEST(Field, FromBool) { Field f{false}; ASSERT_EQ(f.getType(), Field::Types::Bool); - ASSERT_EQ(f.get(), 0); - ASSERT_EQ(f.get(), false); + ASSERT_EQ(f.safeGet(), 0); + ASSERT_EQ(f.safeGet(), false); } { Field f{true}; ASSERT_EQ(f.getType(), Field::Types::Bool); - ASSERT_EQ(f.get(), 1); - ASSERT_EQ(f.get(), true); + ASSERT_EQ(f.safeGet(), 1); + ASSERT_EQ(f.safeGet(), true); } { Field f; f = false; ASSERT_EQ(f.getType(), Field::Types::Bool); - ASSERT_EQ(f.get(), 0); - ASSERT_EQ(f.get(), false); + ASSERT_EQ(f.safeGet(), 0); + ASSERT_EQ(f.safeGet(), false); } { Field f; f = true; ASSERT_EQ(f.getType(), Field::Types::Bool); - ASSERT_EQ(f.get(), 1); - ASSERT_EQ(f.get(), true); + ASSERT_EQ(f.safeGet(), 1); + ASSERT_EQ(f.safeGet(), true); } } @@ -42,15 +42,15 @@ GTEST_TEST(Field, Move) Field f; f = Field{String{"Hello, world (1)"}}; - ASSERT_EQ(f.get(), "Hello, world (1)"); + ASSERT_EQ(f.safeGet(), "Hello, world (1)"); f = Field{String{"Hello, world (2)"}}; - ASSERT_EQ(f.get(), "Hello, world (2)"); + ASSERT_EQ(f.safeGet(), "Hello, world (2)"); f = Field{Array{Field{String{"Hello, world (3)"}}}}; - ASSERT_EQ(f.get()[0].get(), "Hello, world (3)"); + ASSERT_EQ(f.safeGet()[0].safeGet(), "Hello, world (3)"); f = String{"Hello, world (4)"}; - ASSERT_EQ(f.get(), "Hello, world (4)"); + ASSERT_EQ(f.safeGet(), "Hello, world (4)"); f = Array{Field{String{"Hello, world (5)"}}}; - ASSERT_EQ(f.get()[0].get(), "Hello, world (5)"); + ASSERT_EQ(f.safeGet()[0].safeGet(), "Hello, world (5)"); f = Array{String{"Hello, world (6)"}}; - ASSERT_EQ(f.get()[0].get(), "Hello, world (6)"); + ASSERT_EQ(f.safeGet()[0].safeGet(), "Hello, world (6)"); } diff --git a/src/DataTypes/DataTypeAggregateFunction.cpp b/src/DataTypes/DataTypeAggregateFunction.cpp index a4cd3b9e511..1facaaab0d6 100644 --- a/src/DataTypes/DataTypeAggregateFunction.cpp +++ b/src/DataTypes/DataTypeAggregateFunction.cpp @@ -129,7 +129,7 @@ MutableColumnPtr DataTypeAggregateFunction::createColumn() const Field DataTypeAggregateFunction::getDefault() const { Field field = AggregateFunctionStateData(); - field.get().name = getName(); + field.safeGet().name = getName(); AlignedBuffer place_buffer(function->sizeOfData(), function->alignOfData()); AggregateDataPtr place = place_buffer.data(); @@ -138,7 +138,7 @@ Field DataTypeAggregateFunction::getDefault() const try { - WriteBufferFromString buffer_from_field(field.get().data); + WriteBufferFromString buffer_from_field(field.safeGet().data); function->serialize(place, buffer_from_field, version); } catch (...) diff --git a/src/DataTypes/DataTypeDynamic.cpp b/src/DataTypes/DataTypeDynamic.cpp index a1b1f8325f0..e0430272479 100644 --- a/src/DataTypes/DataTypeDynamic.cpp +++ b/src/DataTypes/DataTypeDynamic.cpp @@ -71,10 +71,10 @@ static DataTypePtr create(const ASTPtr & arguments) auto * literal = argument->arguments->children[1]->as(); - if (!literal || literal->value.getType() != Field::Types::UInt64 || literal->value.get() == 0 || literal->value.get() > ColumnVariant::MAX_NESTED_COLUMNS) + if (!literal || literal->value.getType() != Field::Types::UInt64 || literal->value.safeGet() == 0 || literal->value.safeGet() > ColumnVariant::MAX_NESTED_COLUMNS) throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE, "'max_types' argument for Dynamic type should be a positive integer between 1 and 255"); - return std::make_shared(literal->value.get()); + return std::make_shared(literal->value.safeGet()); } void registerDataTypeDynamic(DataTypeFactory & factory) diff --git a/src/DataTypes/DataTypeEnum.cpp b/src/DataTypes/DataTypeEnum.cpp index 08e0c0d2045..b9a5a1a5a68 100644 --- a/src/DataTypes/DataTypeEnum.cpp +++ b/src/DataTypes/DataTypeEnum.cpp @@ -122,12 +122,12 @@ Field DataTypeEnum::castToName(const Field & value_or_name) const { if (value_or_name.getType() == Field::Types::String) { - this->getValue(value_or_name.get()); /// Check correctness - return value_or_name.get(); + this->getValue(value_or_name.safeGet()); /// Check correctness + return value_or_name.safeGet(); } else if (value_or_name.getType() == Field::Types::Int64) { - Int64 value = value_or_name.get(); + Int64 value = value_or_name.safeGet(); checkOverflow(value); return this->getNameForValue(static_cast(value)).toString(); } @@ -141,12 +141,12 @@ Field DataTypeEnum::castToValue(const Field & value_or_name) const { if (value_or_name.getType() == Field::Types::String) { - return this->getValue(value_or_name.get()); + return this->getValue(value_or_name.safeGet()); } else if (value_or_name.getType() == Field::Types::Int64 || value_or_name.getType() == Field::Types::UInt64) { - Int64 value = value_or_name.get(); + Int64 value = value_or_name.safeGet(); checkOverflow(value); this->getNameForValue(static_cast(value)); /// Check correctness return value; @@ -220,7 +220,7 @@ static void autoAssignNumberForEnum(const ASTPtr & arguments) "Elements of Enum data type must be of form: " "'name' = number or 'name', where name is string literal and number is an integer"); - literal_child_assign_num = value_literal->value.get(); + literal_child_assign_num = value_literal->value.safeGet(); } assign_number_child.emplace_back(child); } @@ -269,8 +269,8 @@ static DataTypePtr createExact(const ASTPtr & arguments) "Elements of Enum data type must be of form: " "'name' = number or 'name', where name is string literal and number is an integer"); - const String & field_name = name_literal->value.get(); - const auto value = value_literal->value.get(); + const String & field_name = name_literal->value.safeGet(); + const auto value = value_literal->value.safeGet(); if (value > std::numeric_limits::max() || value < std::numeric_limits::min()) throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Value {} for element '{}' exceeds range of {}", @@ -302,7 +302,7 @@ static DataTypePtr create(const ASTPtr & arguments) "Elements of Enum data type must be of form: " "'name' = number or 'name', where name is string literal and number is an integer"); - Int64 value = value_literal->value.get(); + Int64 value = value_literal->value.safeGet(); if (value > std::numeric_limits::max() || value < std::numeric_limits::min()) return createExact(arguments); diff --git a/src/DataTypes/DataTypeFixedString.cpp b/src/DataTypes/DataTypeFixedString.cpp index 080ff8826a5..63d5245287f 100644 --- a/src/DataTypes/DataTypeFixedString.cpp +++ b/src/DataTypes/DataTypeFixedString.cpp @@ -51,11 +51,11 @@ static DataTypePtr create(const ASTPtr & arguments) "FixedString data type family must have exactly one argument - size in bytes"); const auto * argument = arguments->children[0]->as(); - if (!argument || argument->value.getType() != Field::Types::UInt64 || argument->value.get() == 0) + if (!argument || argument->value.getType() != Field::Types::UInt64 || argument->value.safeGet() == 0) throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE, "FixedString data type family must have a number (positive integer) as its argument"); - return std::make_shared(argument->value.get()); + return std::make_shared(argument->value.safeGet()); } diff --git a/src/DataTypes/DataTypeObject.cpp b/src/DataTypes/DataTypeObject.cpp index 91b9bfcb2a5..5fd69688194 100644 --- a/src/DataTypes/DataTypeObject.cpp +++ b/src/DataTypes/DataTypeObject.cpp @@ -69,7 +69,7 @@ static DataTypePtr create(const ASTPtr & arguments) throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE, "Object data type family must have a const string as its schema name parameter"); - return std::make_shared(literal->value.get(), is_nullable); + return std::make_shared(literal->value.safeGet(), is_nullable); } void registerDataTypeObject(DataTypeFactory & factory) diff --git a/src/DataTypes/DataTypesDecimal.cpp b/src/DataTypes/DataTypesDecimal.cpp index a427fd0717a..1d8f7711de1 100644 --- a/src/DataTypes/DataTypesDecimal.cpp +++ b/src/DataTypes/DataTypesDecimal.cpp @@ -80,14 +80,14 @@ static DataTypePtr create(const ASTPtr & arguments) const auto * precision_arg = arguments->children[0]->as(); if (!precision_arg || precision_arg->value.getType() != Field::Types::UInt64) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Decimal argument precision is invalid"); - precision = precision_arg->value.get(); + precision = precision_arg->value.safeGet(); if (arguments->children.size() == 2) { const auto * scale_arg = arguments->children[1]->as(); if (!scale_arg || !isInt64OrUInt64FieldType(scale_arg->value.getType())) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Decimal argument scale is invalid"); - scale = scale_arg->value.get(); + scale = scale_arg->value.safeGet(); } } @@ -107,7 +107,7 @@ static DataTypePtr createExact(const ASTPtr & arguments) "Decimal32 | Decimal64 | Decimal128 | Decimal256 data type family must have a one number as its argument"); UInt64 precision = DecimalUtils::max_precision; - UInt64 scale = scale_arg->value.get(); + UInt64 scale = scale_arg->value.safeGet(); return createDecimal(precision, scale); } diff --git a/src/DataTypes/ObjectUtils.cpp b/src/DataTypes/ObjectUtils.cpp index 356e609e77a..fe8a5ede102 100644 --- a/src/DataTypes/ObjectUtils.cpp +++ b/src/DataTypes/ObjectUtils.cpp @@ -135,7 +135,7 @@ Array createEmptyArrayField(size_t num_dimensions) for (size_t i = 1; i < num_dimensions; ++i) { current_array->push_back(Array()); - current_array = ¤t_array->back().get(); + current_array = ¤t_array->back().safeGet(); } return array; diff --git a/src/DataTypes/Serializations/JSONDataParser.cpp b/src/DataTypes/Serializations/JSONDataParser.cpp index 56641424396..0f74815f5b4 100644 --- a/src/DataTypes/Serializations/JSONDataParser.cpp +++ b/src/DataTypes/Serializations/JSONDataParser.cpp @@ -131,7 +131,7 @@ void JSONDataParser::traverseArrayElement(const Element & element, P auto nested_hash = getHashOfNestedPath(paths[i], values[i]); if (nested_hash) { - size_t array_size = values[i].template get().size(); + size_t array_size = values[i].template safeGet().size(); auto & current_nested_sizes = ctx.nested_sizes_by_path[*nested_hash]; if (current_nested_sizes.size() == ctx.current_size) @@ -154,7 +154,7 @@ void JSONDataParser::traverseArrayElement(const Element & element, P auto nested_hash = getHashOfNestedPath(paths[i], values[i]); if (nested_hash) { - size_t array_size = values[i].template get().size(); + size_t array_size = values[i].template safeGet().size(); auto & current_nested_sizes = ctx.nested_sizes_by_path[*nested_hash]; if (current_nested_sizes.empty()) diff --git a/src/DataTypes/Serializations/SerializationAggregateFunction.cpp b/src/DataTypes/Serializations/SerializationAggregateFunction.cpp index 55f7641e058..41b198890e4 100644 --- a/src/DataTypes/Serializations/SerializationAggregateFunction.cpp +++ b/src/DataTypes/Serializations/SerializationAggregateFunction.cpp @@ -16,14 +16,14 @@ namespace DB void SerializationAggregateFunction::serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings &) const { - const AggregateFunctionStateData & state = field.get(); + const AggregateFunctionStateData & state = field.safeGet(); writeBinary(state.data, ostr); } void SerializationAggregateFunction::deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings &) const { field = AggregateFunctionStateData(); - AggregateFunctionStateData & s = field.get(); + AggregateFunctionStateData & s = field.safeGet(); readBinary(s.data, istr); s.name = type_name; } diff --git a/src/DataTypes/Serializations/SerializationArray.cpp b/src/DataTypes/Serializations/SerializationArray.cpp index b7d43332085..0a9c4529e23 100644 --- a/src/DataTypes/Serializations/SerializationArray.cpp +++ b/src/DataTypes/Serializations/SerializationArray.cpp @@ -29,7 +29,7 @@ static constexpr size_t MAX_ARRAYS_SIZE = 1ULL << 40; void SerializationArray::serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings & settings) const { - const Array & a = field.get(); + const Array & a = field.safeGet(); writeVarUInt(a.size(), ostr); for (const auto & i : a) { @@ -51,7 +51,7 @@ void SerializationArray::deserializeBinary(Field & field, ReadBuffer & istr, con settings.binary.max_binary_string_size); field = Array(); - Array & arr = field.get(); + Array & arr = field.safeGet(); arr.reserve(size); for (size_t i = 0; i < size; ++i) nested->deserializeBinary(arr.emplace_back(), istr, settings); diff --git a/src/DataTypes/Serializations/SerializationDecimalBase.cpp b/src/DataTypes/Serializations/SerializationDecimalBase.cpp index 49dc042e872..8927f949368 100644 --- a/src/DataTypes/Serializations/SerializationDecimalBase.cpp +++ b/src/DataTypes/Serializations/SerializationDecimalBase.cpp @@ -13,7 +13,7 @@ namespace DB template void SerializationDecimalBase::serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings &) const { - FieldType x = field.get>(); + FieldType x = field.safeGet>(); writeBinaryLittleEndian(x, ostr); } diff --git a/src/DataTypes/Serializations/SerializationFixedString.cpp b/src/DataTypes/Serializations/SerializationFixedString.cpp index f919dc16d33..688c71792fa 100644 --- a/src/DataTypes/Serializations/SerializationFixedString.cpp +++ b/src/DataTypes/Serializations/SerializationFixedString.cpp @@ -28,7 +28,7 @@ static constexpr size_t MAX_STRINGS_SIZE = 1ULL << 30; void SerializationFixedString::serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings &) const { - const String & s = field.get(); + const String & s = field.safeGet(); ostr.write(s.data(), std::min(s.size(), n)); if (s.size() < n) for (size_t i = s.size(); i < n; ++i) @@ -39,7 +39,7 @@ void SerializationFixedString::serializeBinary(const Field & field, WriteBuffer void SerializationFixedString::deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings &) const { field = String(); - String & s = field.get(); + String & s = field.safeGet(); s.resize(n); istr.readStrict(s.data(), n); } diff --git a/src/DataTypes/Serializations/SerializationIPv4andIPv6.cpp b/src/DataTypes/Serializations/SerializationIPv4andIPv6.cpp index dfcd24aff58..c1beceb4533 100644 --- a/src/DataTypes/Serializations/SerializationIPv4andIPv6.cpp +++ b/src/DataTypes/Serializations/SerializationIPv4andIPv6.cpp @@ -125,7 +125,7 @@ bool SerializationIP::tryDeserializeTextCSV(DB::IColumn & column, DB::ReadB template void SerializationIP::serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings &) const { - IPv x = field.get(); + IPv x = field.safeGet(); if constexpr (std::is_same_v) writeBinary(x, ostr); else diff --git a/src/DataTypes/Serializations/SerializationMap.cpp b/src/DataTypes/Serializations/SerializationMap.cpp index 0bef3c7d79d..c722b3ac7a1 100644 --- a/src/DataTypes/Serializations/SerializationMap.cpp +++ b/src/DataTypes/Serializations/SerializationMap.cpp @@ -40,7 +40,7 @@ static IColumn & extractNestedColumn(IColumn & column) void SerializationMap::serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings & settings) const { - const auto & map = field.get(); + const auto & map = field.safeGet(); writeVarUInt(map.size(), ostr); for (const auto & elem : map) { @@ -63,7 +63,7 @@ void SerializationMap::deserializeBinary(Field & field, ReadBuffer & istr, const size, settings.binary.max_binary_string_size); field = Map(); - Map & map = field.get(); + Map & map = field.safeGet(); map.reserve(size); for (size_t i = 0; i < size; ++i) { diff --git a/src/DataTypes/Serializations/SerializationNumber.cpp b/src/DataTypes/Serializations/SerializationNumber.cpp index bdb4dfc6735..bfc13af8ca3 100644 --- a/src/DataTypes/Serializations/SerializationNumber.cpp +++ b/src/DataTypes/Serializations/SerializationNumber.cpp @@ -169,7 +169,7 @@ template void SerializationNumber::serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings &) const { /// ColumnVector::ValueType is a narrower type. For example, UInt8, when the Field type is UInt64 - typename ColumnVector::ValueType x = static_cast::ValueType>(field.get()); + typename ColumnVector::ValueType x = static_cast::ValueType>(field.safeGet()); writeBinaryLittleEndian(x, ostr); } diff --git a/src/DataTypes/Serializations/SerializationString.cpp b/src/DataTypes/Serializations/SerializationString.cpp index 9e523d0d745..ac5d4e3e128 100644 --- a/src/DataTypes/Serializations/SerializationString.cpp +++ b/src/DataTypes/Serializations/SerializationString.cpp @@ -32,7 +32,7 @@ namespace ErrorCodes void SerializationString::serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings & settings) const { - const String & s = field.get(); + const String & s = field.safeGet(); if (settings.binary.max_binary_string_size && s.size() > settings.binary.max_binary_string_size) throw Exception( ErrorCodes::TOO_LARGE_STRING_SIZE, @@ -59,7 +59,7 @@ void SerializationString::deserializeBinary(Field & field, ReadBuffer & istr, co settings.binary.max_binary_string_size); field = String(); - String & s = field.get(); + String & s = field.safeGet(); s.resize(size); istr.readStrict(s.data(), size); } diff --git a/src/DataTypes/Serializations/SerializationTuple.cpp b/src/DataTypes/Serializations/SerializationTuple.cpp index 7a5227ca752..594a23ab507 100644 --- a/src/DataTypes/Serializations/SerializationTuple.cpp +++ b/src/DataTypes/Serializations/SerializationTuple.cpp @@ -34,7 +34,7 @@ static inline const IColumn & extractElementColumn(const IColumn & column, size_ void SerializationTuple::serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings & settings) const { - const auto & tuple = field.get(); + const auto & tuple = field.safeGet(); for (size_t element_index = 0; element_index < elems.size(); ++element_index) { const auto & serialization = elems[element_index]; @@ -47,7 +47,7 @@ void SerializationTuple::deserializeBinary(Field & field, ReadBuffer & istr, con const size_t size = elems.size(); field = Tuple(); - Tuple & tuple = field.get(); + Tuple & tuple = field.safeGet(); tuple.reserve(size); for (size_t i = 0; i < size; ++i) elems[i]->deserializeBinary(tuple.emplace_back(), istr, settings); diff --git a/src/DataTypes/Serializations/SerializationUUID.cpp b/src/DataTypes/Serializations/SerializationUUID.cpp index 58178a896dc..f18466ad8ad 100644 --- a/src/DataTypes/Serializations/SerializationUUID.cpp +++ b/src/DataTypes/Serializations/SerializationUUID.cpp @@ -137,7 +137,7 @@ bool SerializationUUID::tryDeserializeTextCSV(IColumn & column, ReadBuffer & ist void SerializationUUID::serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings &) const { - UUID x = field.get(); + UUID x = field.safeGet(); writeBinaryLittleEndian(x, ostr); } diff --git a/src/DataTypes/registerDataTypeDateTime.cpp b/src/DataTypes/registerDataTypeDateTime.cpp index 84a52d4affb..9a632bd381b 100644 --- a/src/DataTypes/registerDataTypeDateTime.cpp +++ b/src/DataTypes/registerDataTypeDateTime.cpp @@ -55,7 +55,7 @@ getArgument(const ASTPtr & arguments, size_t argument_index, const char * argume } } - return argument->value.get(); + return argument->value.safeGet(); } static DataTypePtr create(const ASTPtr & arguments) diff --git a/src/Databases/DDLLoadingDependencyVisitor.cpp b/src/Databases/DDLLoadingDependencyVisitor.cpp index 67bce915168..b91aa84ecd3 100644 --- a/src/Databases/DDLLoadingDependencyVisitor.cpp +++ b/src/Databases/DDLLoadingDependencyVisitor.cpp @@ -183,7 +183,7 @@ void DDLLoadingDependencyVisitor::extractTableNameFromArgument(const ASTFunction if (name->value.getType() != Field::Types::String) return; - auto maybe_qualified_name = QualifiedTableName::tryParseFromString(name->value.get()); + auto maybe_qualified_name = QualifiedTableName::tryParseFromString(name->value.safeGet()); if (!maybe_qualified_name) return; @@ -194,7 +194,7 @@ void DDLLoadingDependencyVisitor::extractTableNameFromArgument(const ASTFunction if (literal->value.getType() != Field::Types::String) return; - auto maybe_qualified_name = QualifiedTableName::tryParseFromString(literal->value.get()); + auto maybe_qualified_name = QualifiedTableName::tryParseFromString(literal->value.safeGet()); /// Just return if name if invalid if (!maybe_qualified_name) return; diff --git a/src/Databases/DDLRenamingVisitor.cpp b/src/Databases/DDLRenamingVisitor.cpp index 38e100e2470..7556223b30e 100644 --- a/src/Databases/DDLRenamingVisitor.cpp +++ b/src/Databases/DDLRenamingVisitor.cpp @@ -180,7 +180,7 @@ namespace if (database_name_field && table_name_field) { - QualifiedTableName qualified_name{database_name_field->get(), table_name_field->get()}; + QualifiedTableName qualified_name{database_name_field->safeGet(), table_name_field->safeGet()}; if (!qualified_name.database.empty() && !qualified_name.table.empty()) { auto new_qualified_name = data.renaming_map.getNewTableName(qualified_name); @@ -207,7 +207,7 @@ namespace if (literal->value.getType() != Field::Types::String) return; - auto maybe_qualified_name = QualifiedTableName::tryParseFromString(literal->value.get()); + auto maybe_qualified_name = QualifiedTableName::tryParseFromString(literal->value.safeGet()); /// Just return if name if invalid if (!maybe_qualified_name || maybe_qualified_name->database.empty() || maybe_qualified_name->table.empty()) return; @@ -247,7 +247,7 @@ namespace if (!literal || (literal->value.getType() != Field::Types::String)) return; - auto database_name = literal->value.get(); + auto database_name = literal->value.safeGet(); if (database_name.empty()) return; diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp index fe00c1c60aa..f4aa925d6dd 100644 --- a/src/Databases/DatabaseReplicated.cpp +++ b/src/Databases/DatabaseReplicated.cpp @@ -833,8 +833,8 @@ void DatabaseReplicated::checkTableEngine(const ASTCreateQuery & query, ASTStora if (!arg1 || !arg2 || arg1->value.getType() != Field::Types::String || arg2->value.getType() != Field::Types::String) return; - String maybe_path = arg1->value.get(); - String maybe_replica = arg2->value.get(); + String maybe_path = arg1->value.safeGet(); + String maybe_replica = arg2->value.safeGet(); /// Looks like it's ReplicatedMergeTree with explicit zookeeper_path and replica_name arguments. /// Let's ensure that some macros are used. diff --git a/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp b/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp index 04b4070d5af..1364e9ae2b2 100644 --- a/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp +++ b/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp @@ -736,11 +736,11 @@ static void writeFieldsToColumn( { for (size_t index = 0; index < rows_data.size(); ++index) { - const Tuple & row_data = rows_data[index].get(); + const Tuple & row_data = rows_data[index].safeGet(); const Field & value = row_data[column_index]; if (write_data_to_null_map(value, index)) - casted_column->insertValue(static_cast(value.template get())); + casted_column->insertValue(static_cast(value.template safeGet())); } }; @@ -776,17 +776,17 @@ static void writeFieldsToColumn( { for (size_t index = 0; index < rows_data.size(); ++index) { - const Tuple & row_data = rows_data[index].get(); + const Tuple & row_data = rows_data[index].safeGet(); const Field & value = row_data[column_index]; if (write_data_to_null_map(value, index)) { if (value.getType() == Field::Types::UInt64) - casted_int32_column->insertValue(static_cast(value.get())); + casted_int32_column->insertValue(static_cast(value.safeGet())); else if (value.getType() == Field::Types::Int64) { /// For MYSQL_TYPE_INT24 - const Int32 & num = static_cast(value.get()); + const Int32 & num = static_cast(value.safeGet()); casted_int32_column->insertValue(num & 0x800000 ? num | 0xFF000000 : num); } else @@ -798,7 +798,7 @@ static void writeFieldsToColumn( { for (size_t index = 0; index < rows_data.size(); ++index) { - const Tuple & row_data = rows_data[index].get(); + const Tuple & row_data = rows_data[index].safeGet(); const Field & value = row_data[column_index]; if (write_data_to_null_map(value, index)) @@ -812,12 +812,12 @@ static void writeFieldsToColumn( { for (size_t index = 0; index < rows_data.size(); ++index) { - const Tuple & row_data = rows_data[index].get(); + const Tuple & row_data = rows_data[index].safeGet(); const Field & value = row_data[column_index]; if (write_data_to_null_map(value, index)) { - const String & data = value.get(); + const String & data = value.safeGet(); casted_fixed_string_column->insertData(data.data(), data.size()); } } @@ -864,7 +864,7 @@ static inline size_t onUpdateData(const Row & rows_data, Block & buffer, size_t { writeable_rows_mask[index + 1] = true; writeable_rows_mask[index] = differenceSortingKeys( - rows_data[index].get(), rows_data[index + 1].get(), sorting_columns_index); + rows_data[index].safeGet(), rows_data[index + 1].safeGet(), sorting_columns_index); } for (size_t column = 0; column < buffer.columns() - 2; ++column) diff --git a/src/Databases/MySQL/tests/gtest_mysql_binlog.cpp b/src/Databases/MySQL/tests/gtest_mysql_binlog.cpp index 11299c5b8b1..6f1ba26ee33 100644 --- a/src/Databases/MySQL/tests/gtest_mysql_binlog.cpp +++ b/src/Databases/MySQL/tests/gtest_mysql_binlog.cpp @@ -281,12 +281,12 @@ static void testFile1(IBinlog & binlog, UInt64 timeout, bool filtered = false) ASSERT_EQ(write_event->table, "a"); ASSERT_EQ(write_event->rows.size(), 1); ASSERT_EQ(write_event->rows[0].getType(), Field::Types::Tuple); - auto row_data = write_event->rows[0].get(); + auto row_data = write_event->rows[0].safeGet(); ASSERT_EQ(row_data.size(), 4u); - ASSERT_EQ(row_data[0].get(), 1u); - ASSERT_EQ(row_data[1].get(), 1u); - ASSERT_EQ(row_data[2].get(), 1u); - ASSERT_EQ(row_data[3].get(), 1u); + ASSERT_EQ(row_data[0].safeGet(), 1u); + ASSERT_EQ(row_data[1].safeGet(), 1u); + ASSERT_EQ(row_data[2].safeGet(), 1u); + ASSERT_EQ(row_data[3].safeGet(), 1u); ASSERT_TRUE(binlog.tryReadEvent(event, timeout)); ++count; @@ -342,18 +342,18 @@ static void testFile1(IBinlog & binlog, UInt64 timeout, bool filtered = false) ASSERT_EQ(update_event->table, "a"); ASSERT_EQ(update_event->rows.size(), 2); ASSERT_EQ(update_event->rows[0].getType(), Field::Types::Tuple); - row_data = update_event->rows[0].get(); + row_data = update_event->rows[0].safeGet(); ASSERT_EQ(row_data.size(), 4u); - ASSERT_EQ(row_data[0].get(), 1u); - ASSERT_EQ(row_data[1].get(), 1u); - ASSERT_EQ(row_data[2].get(), 1u); - ASSERT_EQ(row_data[3].get(), 1u); - row_data = update_event->rows[1].get(); + ASSERT_EQ(row_data[0].safeGet(), 1u); + ASSERT_EQ(row_data[1].safeGet(), 1u); + ASSERT_EQ(row_data[2].safeGet(), 1u); + ASSERT_EQ(row_data[3].safeGet(), 1u); + row_data = update_event->rows[1].safeGet(); ASSERT_EQ(row_data.size(), 4u); - ASSERT_EQ(row_data[0].get(), 1u); - ASSERT_EQ(row_data[1].get(), 2u); - ASSERT_EQ(row_data[2].get(), 1u); - ASSERT_EQ(row_data[3].get(), 1u); + ASSERT_EQ(row_data[0].safeGet(), 1u); + ASSERT_EQ(row_data[1].safeGet(), 2u); + ASSERT_EQ(row_data[2].safeGet(), 1u); + ASSERT_EQ(row_data[3].safeGet(), 1u); ASSERT_TRUE(binlog.tryReadEvent(event, timeout)); ++count; diff --git a/src/Dictionaries/CacheDictionaryStorage.h b/src/Dictionaries/CacheDictionaryStorage.h index 47f99bd1093..781822533e9 100644 --- a/src/Dictionaries/CacheDictionaryStorage.h +++ b/src/Dictionaries/CacheDictionaryStorage.h @@ -395,13 +395,13 @@ private: } else if constexpr (std::is_same_v) { - const String & string_value = column_value.get(); + const String & string_value = column_value.safeGet(); StringRef inserted_value = copyStringInArena(arena, string_value); container.back() = inserted_value; } else { - container.back() = static_cast(column_value.get()); + container.back() = static_cast(column_value.safeGet()); } }); } @@ -441,7 +441,7 @@ private: } else if constexpr (std::is_same_v) { - const String & string_value = column_value.get(); + const String & string_value = column_value.safeGet(); StringRef inserted_value = copyStringInArena(arena, string_value); if (!cell_was_default) @@ -454,7 +454,7 @@ private: } else { - container[index_to_use] = static_cast(column_value.get()); + container[index_to_use] = static_cast(column_value.safeGet()); } }); } @@ -651,12 +651,12 @@ private: } else if constexpr (std::is_same_v) { - auto & value = default_value.get(); + auto & value = default_value.safeGet(); value_setter(value); } else { - value_setter(default_value.get()); + value_setter(default_value.safeGet()); } } else diff --git a/src/Dictionaries/DictionaryHelpers.h b/src/Dictionaries/DictionaryHelpers.h index 64fc05e99ab..43fd39640c3 100644 --- a/src/Dictionaries/DictionaryHelpers.h +++ b/src/Dictionaries/DictionaryHelpers.h @@ -345,7 +345,7 @@ public: if (attribute_default_value.isNull()) default_value_is_null = true; else - default_value = static_cast(attribute_default_value.get()); + default_value = static_cast(attribute_default_value.safeGet()); } else { @@ -377,7 +377,7 @@ public: if constexpr (std::is_same_v) { Field field = (*default_values_column)[row]; - return field.get(); + return field.safeGet(); } else if constexpr (std::is_same_v) return default_values_column->getDataAt(row); diff --git a/src/Dictionaries/FlatDictionary.cpp b/src/Dictionaries/FlatDictionary.cpp index 999160226d9..b0233766741 100644 --- a/src/Dictionaries/FlatDictionary.cpp +++ b/src/Dictionaries/FlatDictionary.cpp @@ -245,7 +245,7 @@ ColumnPtr FlatDictionary::getHierarchy(ColumnPtr key_column, const DataTypePtr & std::optional null_value; if (!dictionary_attribute.null_value.isNull()) - null_value = dictionary_attribute.null_value.get(); + null_value = dictionary_attribute.null_value.safeGet(); const ContainerType & parent_keys = std::get>(hierarchical_attribute.container); @@ -300,7 +300,7 @@ ColumnUInt8::Ptr FlatDictionary::isInHierarchy( std::optional null_value; if (!dictionary_attribute.null_value.isNull()) - null_value = dictionary_attribute.null_value.get(); + null_value = dictionary_attribute.null_value.safeGet(); const ContainerType & parent_keys = std::get>(hierarchical_attribute.container); @@ -701,7 +701,7 @@ void FlatDictionary::setAttributeValue(Attribute & attribute, const UInt64 key, return; } - auto & attribute_value = value.get(); + auto & attribute_value = value.safeGet(); auto & container = std::get>(attribute.container); loaded_keys[key] = true; diff --git a/src/Dictionaries/HashedArrayDictionary.cpp b/src/Dictionaries/HashedArrayDictionary.cpp index d7d50dfb0a6..8768be8e5ec 100644 --- a/src/Dictionaries/HashedArrayDictionary.cpp +++ b/src/Dictionaries/HashedArrayDictionary.cpp @@ -240,7 +240,7 @@ ColumnPtr HashedArrayDictionary::getHierarchy(Colu std::optional null_value; if (!dictionary_attribute.null_value.isNull()) - null_value = dictionary_attribute.null_value.get(); + null_value = dictionary_attribute.null_value.safeGet(); auto is_key_valid_func = [&, this](auto & key) @@ -313,7 +313,7 @@ ColumnUInt8::Ptr HashedArrayDictionary::isInHierar std::optional null_value; if (!dictionary_attribute.null_value.isNull()) - null_value = dictionary_attribute.null_value.get(); + null_value = dictionary_attribute.null_value.safeGet(); auto is_key_valid_func = [&](auto & key) @@ -581,13 +581,13 @@ void HashedArrayDictionary::blockToAttributes(cons if constexpr (std::is_same_v) { - String & value_to_insert = column_value_to_insert.get(); + String & value_to_insert = column_value_to_insert.safeGet(); StringRef string_in_arena_reference = copyStringInArena(*string_arenas[shard], value_to_insert); attribute_container.back() = string_in_arena_reference; } else { - auto value_to_insert = static_cast(column_value_to_insert.get()); + auto value_to_insert = static_cast(column_value_to_insert.safeGet()); attribute_container.back() = value_to_insert; } }; diff --git a/src/Dictionaries/HashedDictionary.h b/src/Dictionaries/HashedDictionary.h index 3a2b61e5149..7e935fe4855 100644 --- a/src/Dictionaries/HashedDictionary.h +++ b/src/Dictionaries/HashedDictionary.h @@ -636,7 +636,7 @@ ColumnPtr HashedDictionary::getHierarchy(C std::optional null_value; if (!dictionary_attribute.null_value.isNull()) - null_value = dictionary_attribute.null_value.get(); + null_value = dictionary_attribute.null_value.safeGet(); const CollectionsHolder & child_key_to_parent_key_maps = std::get>(hierarchical_attribute.containers); @@ -710,7 +710,7 @@ ColumnUInt8::Ptr HashedDictionary::isInHie std::optional null_value; if (!dictionary_attribute.null_value.isNull()) - null_value = dictionary_attribute.null_value.get(); + null_value = dictionary_attribute.null_value.safeGet(); const CollectionsHolder & child_key_to_parent_key_maps = std::get>(hierarchical_attribute.containers); @@ -1004,13 +1004,13 @@ void HashedDictionary::blockToAttributes(c if constexpr (std::is_same_v) { - String & value_to_insert = column_value_to_insert.get(); + String & value_to_insert = column_value_to_insert.safeGet(); StringRef arena_value = copyStringInArena(*string_arenas[shard], value_to_insert); container.insert({key, arena_value}); } else { - auto value_to_insert = static_cast(column_value_to_insert.get()); + auto value_to_insert = static_cast(column_value_to_insert.safeGet()); container.insert({key, value_to_insert}); } diff --git a/src/Dictionaries/HierarchyDictionariesUtils.cpp b/src/Dictionaries/HierarchyDictionariesUtils.cpp index e1119982a34..de532ade26d 100644 --- a/src/Dictionaries/HierarchyDictionariesUtils.cpp +++ b/src/Dictionaries/HierarchyDictionariesUtils.cpp @@ -50,7 +50,7 @@ namespace std::optional null_value; if (!hierarchical_attribute.null_value.isNull()) - null_value = hierarchical_attribute.null_value.get(); + null_value = hierarchical_attribute.null_value.safeGet(); ColumnPtr key_to_request_column = ColumnVector::create(); auto * key_to_request_column_typed = static_cast *>(key_to_request_column->assumeMutable().get()); @@ -190,7 +190,7 @@ ColumnPtr getKeysHierarchyDefaultImplementation( std::optional null_value; if (!hierarchical_attribute.null_value.isNull()) - null_value = hierarchical_attribute.null_value.get(); + null_value = hierarchical_attribute.null_value.safeGet(); auto get_parent_key_func = [&](auto & key) { @@ -252,7 +252,7 @@ ColumnUInt8::Ptr getKeysIsInHierarchyDefaultImplementation( std::optional null_value; if (!hierarchical_attribute.null_value.isNull()) - null_value = hierarchical_attribute.null_value.get(); + null_value = hierarchical_attribute.null_value.safeGet(); auto get_parent_key_func = [&](auto & key) { diff --git a/src/Dictionaries/IPAddressDictionary.cpp b/src/Dictionaries/IPAddressDictionary.cpp index 41fafcc162b..4f9e991752f 100644 --- a/src/Dictionaries/IPAddressDictionary.cpp +++ b/src/Dictionaries/IPAddressDictionary.cpp @@ -613,14 +613,14 @@ void IPAddressDictionary::calculateBytesAllocated() template void IPAddressDictionary::createAttributeImpl(Attribute & attribute, const Field & null_value) { - attribute.null_values = null_value.isNull() ? T{} : T(null_value.get()); + attribute.null_values = null_value.isNull() ? T{} : T(null_value.safeGet()); attribute.maps.emplace>(); } template <> void IPAddressDictionary::createAttributeImpl(Attribute & attribute, const Field & null_value) { - attribute.null_values = null_value.isNull() ? String() : null_value.get(); + attribute.null_values = null_value.isNull() ? String() : null_value.safeGet(); attribute.maps.emplace>(); attribute.string_arena = std::make_unique(); } @@ -976,13 +976,13 @@ void IPAddressDictionary::setAttributeValue(Attribute & attribute, const Field & if constexpr (std::is_same_v) { - const auto & string = value.get(); + const auto & string = value.safeGet(); const auto * string_in_arena = attribute.string_arena->insert(string.data(), string.size()); setAttributeValueImpl(attribute, StringRef{string_in_arena, string.size()}); } else { - setAttributeValueImpl(attribute, static_cast(value.get())); + setAttributeValueImpl(attribute, static_cast(value.safeGet())); } }; diff --git a/src/Dictionaries/MongoDBDictionarySource.cpp b/src/Dictionaries/MongoDBDictionarySource.cpp index 46910fa9f6a..c30a6f90e44 100644 --- a/src/Dictionaries/MongoDBDictionarySource.cpp +++ b/src/Dictionaries/MongoDBDictionarySource.cpp @@ -1,7 +1,6 @@ #include "MongoDBDictionarySource.h" #include "DictionarySourceFactory.h" #include "DictionaryStructure.h" -#include "registerDictionaries.h" #include #include @@ -233,7 +232,7 @@ QueryPipeline MongoDBDictionarySource::loadKeys(const Columns & key_columns, con } case AttributeUnderlyingType::String: { - String loaded_str((*key_columns[attribute_index])[row_idx].get()); + String loaded_str((*key_columns[attribute_index])[row_idx].safeGet()); /// Convert string to ObjectID if (key_attribute.is_object_id) { diff --git a/src/Dictionaries/PolygonDictionary.cpp b/src/Dictionaries/PolygonDictionary.cpp index dfc920623e3..ff29ca1f6b8 100644 --- a/src/Dictionaries/PolygonDictionary.cpp +++ b/src/Dictionaries/PolygonDictionary.cpp @@ -141,7 +141,7 @@ ColumnPtr IPolygonDictionary::getColumn( { getItemsShortCircuitImpl( requested_key_points, - [&](size_t row) { return (*attribute_values_column)[row].get(); }, + [&](size_t row) { return (*attribute_values_column)[row].safeGet(); }, [&](Array & value) { result_column_typed.insert(value); }, default_mask.value()); } @@ -149,7 +149,7 @@ ColumnPtr IPolygonDictionary::getColumn( { getItemsImpl( requested_key_points, - [&](size_t row) { return (*attribute_values_column)[row].get(); }, + [&](size_t row) { return (*attribute_values_column)[row].safeGet(); }, [&](Array & value) { result_column_typed.insert(value); }, default_value_provider.value()); } @@ -432,16 +432,16 @@ void IPolygonDictionary::getItemsImpl( } else if constexpr (std::is_same_v) { - set_value(default_value.get()); + set_value(default_value.safeGet()); } else if constexpr (std::is_same_v) { - auto default_value_string = default_value.get(); + auto default_value_string = default_value.safeGet(); set_value(default_value_string); } else { - set_value(default_value.get>()); + set_value(default_value.safeGet>()); } } } diff --git a/src/Dictionaries/RangeHashedDictionary.h b/src/Dictionaries/RangeHashedDictionary.h index fc6c98990d0..c264b480bcb 100644 --- a/src/Dictionaries/RangeHashedDictionary.h +++ b/src/Dictionaries/RangeHashedDictionary.h @@ -906,13 +906,13 @@ void RangeHashedDictionary::setAttributeValue(Attribute & a if constexpr (std::is_same_v) { - const auto & string = value.get(); + const auto & string = value.safeGet(); StringRef string_ref = copyStringInArena(string_arena, string); value_to_insert = string_ref; } else { - value_to_insert = static_cast(value.get()); + value_to_insert = static_cast(value.safeGet()); } container.back() = value_to_insert; diff --git a/src/Dictionaries/RedisDictionarySource.cpp b/src/Dictionaries/RedisDictionarySource.cpp index 1736cdff306..9db639a0ca4 100644 --- a/src/Dictionaries/RedisDictionarySource.cpp +++ b/src/Dictionaries/RedisDictionarySource.cpp @@ -1,7 +1,6 @@ #include "RedisDictionarySource.h" #include "DictionarySourceFactory.h" #include "DictionaryStructure.h" -#include "registerDictionaries.h" #include #include @@ -160,7 +159,7 @@ namespace DB if (isInteger(type)) key << DB::toString(key_columns[i]->get64(row)); else if (isString(type)) - key << (*key_columns[i])[row].get(); + key << (*key_columns[i])[row].safeGet(); else throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected type of key in Redis dictionary"); } diff --git a/src/Disks/DiskFomAST.cpp b/src/Disks/DiskFomAST.cpp index b2f1280c507..5329ff8748a 100644 --- a/src/Disks/DiskFomAST.cpp +++ b/src/Disks/DiskFomAST.cpp @@ -132,7 +132,7 @@ std::string DiskFomAST::createCustomDisk(const ASTPtr & disk_function_ast, Conte FlattenDiskConfigurationVisitor::Data data{context, attach}; FlattenDiskConfigurationVisitor{data}.visit(ast); - return assert_cast(*ast).value.get(); + return assert_cast(*ast).value.safeGet(); } void DiskFomAST::ensureDiskIsNotCustom(const std::string & disk_name, ContextPtr context) diff --git a/src/Formats/ReadSchemaUtils.cpp b/src/Formats/ReadSchemaUtils.cpp index 617595c19a2..dbbd728ed72 100644 --- a/src/Formats/ReadSchemaUtils.cpp +++ b/src/Formats/ReadSchemaUtils.cpp @@ -164,7 +164,7 @@ try return {*iterator_data.cached_columns, *format_name}; } - schemas_for_union_mode.emplace_back(iterator_data.cached_columns->getAll(), read_buffer_iterator.getLastFileName()); + schemas_for_union_mode.emplace_back(iterator_data.cached_columns->getAll(), read_buffer_iterator.getLastFilePath()); continue; } @@ -250,7 +250,7 @@ try if (!names_and_types.empty()) read_buffer_iterator.setSchemaToLastFile(ColumnsDescription(names_and_types)); - schemas_for_union_mode.emplace_back(names_and_types, read_buffer_iterator.getLastFileName()); + schemas_for_union_mode.emplace_back(names_and_types, read_buffer_iterator.getLastFilePath()); } catch (...) { @@ -411,7 +411,7 @@ try throw Exception(ErrorCodes::CANNOT_DETECT_FORMAT, "The data format cannot be detected by the contents of the files. You can specify the format manually"); read_buffer_iterator.setSchemaToLastFile(ColumnsDescription(names_and_types)); - schemas_for_union_mode.emplace_back(names_and_types, read_buffer_iterator.getLastFileName()); + schemas_for_union_mode.emplace_back(names_and_types, read_buffer_iterator.getLastFilePath()); } if (format_name && mode == SchemaInferenceMode::DEFAULT) @@ -527,9 +527,9 @@ try } catch (Exception & e) { - auto file_name = read_buffer_iterator.getLastFileName(); - if (!file_name.empty()) - e.addMessage(fmt::format("(in file/uri {})", file_name)); + auto file_path = read_buffer_iterator.getLastFilePath(); + if (!file_path.empty()) + e.addMessage(fmt::format("(in file/uri {})", file_path)); throw; } diff --git a/src/Formats/ReadSchemaUtils.h b/src/Formats/ReadSchemaUtils.h index bb5e068f696..7168e7f0817 100644 --- a/src/Formats/ReadSchemaUtils.h +++ b/src/Formats/ReadSchemaUtils.h @@ -56,8 +56,8 @@ struct IReadBufferIterator /// Set auto detected format name. virtual void setFormatName(const String & /*format_name*/) {} - /// Get last processed file name for better exception messages. - virtual String getLastFileName() const { return ""; } + /// Get last processed file path for better exception messages. + virtual String getLastFilePath() const { return ""; } /// Return true if method recreateLastReadBuffer is implemented. virtual bool supportsLastReadBufferRecreation() const { return false; } diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h index 6f0bdf406db..dfb4b76e5e2 100644 --- a/src/Functions/DateTimeTransforms.h +++ b/src/Functions/DateTimeTransforms.h @@ -1198,7 +1198,7 @@ struct ToYearImpl { if (point.getType() != Field::Types::UInt64) return std::nullopt; - auto year = point.get(); + auto year = point.safeGet(); if (year < DATE_LUT_MIN_YEAR || year >= DATE_LUT_MAX_YEAR) return std::nullopt; const DateLUTImpl & date_lut = DateLUT::instance("UTC"); @@ -2003,7 +2003,7 @@ struct ToYYYYMMImpl { if (point.getType() != Field::Types::UInt64) return std::nullopt; - auto year_month = point.get(); + auto year_month = point.safeGet(); auto year = year_month / 100; auto month = year_month % 100; diff --git a/src/Functions/FunctionsConsistentHashing.h b/src/Functions/FunctionsConsistentHashing.h index 306b6395dc5..210bb69e16d 100644 --- a/src/Functions/FunctionsConsistentHashing.h +++ b/src/Functions/FunctionsConsistentHashing.h @@ -101,9 +101,9 @@ private: BucketsType num_buckets; if (buckets_field.getType() == Field::Types::Int64) - num_buckets = checkBucketsRange(buckets_field.get()); + num_buckets = checkBucketsRange(buckets_field.safeGet()); else if (buckets_field.getType() == Field::Types::UInt64) - num_buckets = checkBucketsRange(buckets_field.get()); + num_buckets = checkBucketsRange(buckets_field.safeGet()); else throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of the second argument of function {}", diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index 675283d011e..0760b929652 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -117,7 +117,7 @@ UInt32 extractToDecimalScale(const ColumnWithTypeAndName & named_column) Field field; named_column.column->get(0, field); - return static_cast(field.get()); + return static_cast(field.safeGet()); } @@ -2604,8 +2604,8 @@ struct ToNumberMonotonicity if (left.isNull() || right.isNull()) return {}; - Float64 left_float = left.get(); - Float64 right_float = right.get(); + Float64 left_float = left.safeGet(); + Float64 right_float = right.safeGet(); if (left_float >= static_cast(std::numeric_limits::min()) && left_float <= static_cast(std::numeric_limits::max()) @@ -2633,11 +2633,11 @@ struct ToNumberMonotonicity const bool left_in_first_half = left.isNull() ? from_is_unsigned - : (left.get() >= 0); + : (left.safeGet() >= 0); const bool right_in_first_half = right.isNull() ? !from_is_unsigned - : (right.get() >= 0); + : (right.safeGet() >= 0); /// Size of type is the same. if (size_of_from == size_of_to) @@ -2675,7 +2675,7 @@ struct ToNumberMonotonicity return {}; /// Function cannot be monotonic when left and right are not on the same ranges. - if (divideByRangeOfType(left.get()) != divideByRangeOfType(right.get())) + if (divideByRangeOfType(left.safeGet()) != divideByRangeOfType(right.safeGet())) return {}; if (to_is_unsigned) @@ -2683,7 +2683,7 @@ struct ToNumberMonotonicity else { // If To is signed, it's possible that the signedness is different after conversion. So we check it explicitly. - const bool is_monotonic = (T(left.get()) >= 0) == (T(right.get()) >= 0); + const bool is_monotonic = (T(left.safeGet()) >= 0) == (T(right.safeGet()) >= 0); return { .is_monotonic = is_monotonic }; } @@ -2707,13 +2707,13 @@ struct ToDateMonotonicity } else if ( ((left.getType() == Field::Types::UInt64 || left.isNull()) && (right.getType() == Field::Types::UInt64 || right.isNull()) - && ((left.isNull() || left.get() < 0xFFFF) && (right.isNull() || right.get() >= 0xFFFF))) + && ((left.isNull() || left.safeGet() < 0xFFFF) && (right.isNull() || right.safeGet() >= 0xFFFF))) || ((left.getType() == Field::Types::Int64 || left.isNull()) && (right.getType() == Field::Types::Int64 || right.isNull()) - && ((left.isNull() || left.get() < 0xFFFF) && (right.isNull() || right.get() >= 0xFFFF))) + && ((left.isNull() || left.safeGet() < 0xFFFF) && (right.isNull() || right.safeGet() >= 0xFFFF))) || (( (left.getType() == Field::Types::Float64 || left.isNull()) && (right.getType() == Field::Types::Float64 || right.isNull()) - && ((left.isNull() || left.get() < 0xFFFF) && (right.isNull() || right.get() >= 0xFFFF)))) + && ((left.isNull() || left.safeGet() < 0xFFFF) && (right.isNull() || right.safeGet() >= 0xFFFF)))) || !isNativeNumber(type)) { return {}; @@ -2768,16 +2768,16 @@ struct ToStringMonotonicity if (left.getType() == Field::Types::UInt64 && right.getType() == Field::Types::UInt64) { - return (left.get() == 0 && right.get() == 0) - || (floor(log10(left.get())) == floor(log10(right.get()))) + return (left.safeGet() == 0 && right.safeGet() == 0) + || (floor(log10(left.safeGet())) == floor(log10(right.safeGet()))) ? positive : not_monotonic; } if (left.getType() == Field::Types::Int64 && right.getType() == Field::Types::Int64) { - return (left.get() == 0 && right.get() == 0) - || (left.get() > 0 && right.get() > 0 && floor(log10(left.get())) == floor(log10(right.get()))) + return (left.safeGet() == 0 && right.safeGet() == 0) + || (left.safeGet() > 0 && right.safeGet() > 0 && floor(log10(left.safeGet())) == floor(log10(right.safeGet()))) ? positive : not_monotonic; } @@ -4673,7 +4673,7 @@ private: return [function_name] ( ColumnsWithTypeAndName & arguments, const DataTypePtr & res_type, const ColumnNullable * nullable_col, size_t /*input_rows_count*/) { - using ColumnEnumType = EnumType::ColumnType; + using ColumnEnumType = typename EnumType::ColumnType; const auto & first_col = arguments.front().column.get(); const auto & first_type = arguments.front().type.get(); diff --git a/src/Functions/FunctionsJSON.cpp b/src/Functions/FunctionsJSON.cpp index c35df8ba72d..1eaf0d1609a 100644 --- a/src/Functions/FunctionsJSON.cpp +++ b/src/Functions/FunctionsJSON.cpp @@ -243,7 +243,7 @@ private: } case MoveType::Index: { - Int64 index = (*arguments[j + 1].column)[row].get(); + Int64 index = (*arguments[j + 1].column)[row].safeGet(); if (!moveToElementByIndex(res_element, static_cast(index), key)) return false; break; diff --git a/src/Functions/FunctionsLogical.cpp b/src/Functions/FunctionsLogical.cpp index 65d7473b945..ff0cff09c9e 100644 --- a/src/Functions/FunctionsLogical.cpp +++ b/src/Functions/FunctionsLogical.cpp @@ -701,11 +701,11 @@ ColumnPtr FunctionAnyArityLogical::getConstantResultForNonConstArgum bool constant_value_bool = false; if (field_type == Field::Types::Float64) - constant_value_bool = static_cast(constant_field_value.get()); + constant_value_bool = static_cast(constant_field_value.safeGet()); else if (field_type == Field::Types::Int64) - constant_value_bool = static_cast(constant_field_value.get()); + constant_value_bool = static_cast(constant_field_value.safeGet()); else if (field_type == Field::Types::UInt64) - constant_value_bool = static_cast(constant_field_value.get()); + constant_value_bool = static_cast(constant_field_value.safeGet()); has_true_constant = has_true_constant || constant_value_bool; has_false_constant = has_false_constant || !constant_value_bool; diff --git a/src/Functions/FunctionsRound.h b/src/Functions/FunctionsRound.h index 6b65a5feaec..ed7fe1a5de1 100644 --- a/src/Functions/FunctionsRound.h +++ b/src/Functions/FunctionsRound.h @@ -500,7 +500,7 @@ inline Scale getScaleArg(const ColumnConst* scale_col) { const auto & scale_field = scale_col->getField(); - Int64 scale64 = scale_field.get(); + Int64 scale64 = scale_field.safeGet(); validateScale(scale64); return scale64; @@ -632,7 +632,7 @@ public: Scale raw_scale = scale64; DecimalRoundingImpl::applyOne(value_col_typed->getElement(i), value_col_typed->getScale(), - reinterpret_cast::NativeT&>(col_res->getElement(i)), raw_scale); + reinterpret_cast::NativeT&>(col_res->getElement(i)), raw_scale); } } } @@ -854,7 +854,7 @@ private: using ValueType = typename Container::value_type; std::vector boundary_values(boundaries.size()); for (size_t i = 0; i < boundaries.size(); ++i) - boundary_values[i] = static_cast(boundaries[i].get()); + boundary_values[i] = static_cast(boundaries[i].safeGet()); ::sort(boundary_values.begin(), boundary_values.end()); boundary_values.erase(std::unique(boundary_values.begin(), boundary_values.end()), boundary_values.end()); diff --git a/src/Functions/FunctionsTimeWindow.cpp b/src/Functions/FunctionsTimeWindow.cpp index 77d740803be..88b85c48326 100644 --- a/src/Functions/FunctionsTimeWindow.cpp +++ b/src/Functions/FunctionsTimeWindow.cpp @@ -267,7 +267,12 @@ struct TimeWindowImpl { auto type = WhichDataType(arguments[0].type); if (type.isTuple()) - return std::static_pointer_cast(arguments[0].type)->getElement(0); + { + const auto & tuple_elems = std::static_pointer_cast(arguments[0].type)->getElements(); + if (tuple_elems.empty()) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Tuple passed to {} should not be empty", function_name); + return tuple_elems[0]; + } else if (type.isUInt32()) return std::make_shared(); else @@ -622,7 +627,12 @@ struct TimeWindowImpl { auto type = WhichDataType(arguments[0].type); if (type.isTuple()) - return std::static_pointer_cast(arguments[0].type)->getElement(0); + { + const auto & tuple_elems = std::static_pointer_cast(arguments[0].type)->getElements(); + if (tuple_elems.empty()) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Tuple passed to {} should not be empty", function_name); + return tuple_elems[0]; + } else if (type.isUInt32()) return std::make_shared(); else diff --git a/src/Functions/IFunctionCustomWeek.h b/src/Functions/IFunctionCustomWeek.h index 51542c9cab1..ba0baa35819 100644 --- a/src/Functions/IFunctionCustomWeek.h +++ b/src/Functions/IFunctionCustomWeek.h @@ -50,15 +50,15 @@ public: if (checkAndGetDataType(&type)) { - return Transform::FactorTransform::execute(UInt16(left.get()), date_lut) - == Transform::FactorTransform::execute(UInt16(right.get()), date_lut) + return Transform::FactorTransform::execute(UInt16(left.safeGet()), date_lut) + == Transform::FactorTransform::execute(UInt16(right.safeGet()), date_lut) ? is_monotonic : is_not_monotonic; } else { - return Transform::FactorTransform::execute(UInt32(left.get()), date_lut) - == Transform::FactorTransform::execute(UInt32(right.get()), date_lut) + return Transform::FactorTransform::execute(UInt32(left.safeGet()), date_lut) + == Transform::FactorTransform::execute(UInt32(right.safeGet()), date_lut) ? is_monotonic : is_not_monotonic; } diff --git a/src/Functions/IFunctionDateOrDateTime.h b/src/Functions/IFunctionDateOrDateTime.h index 762b79bfafc..899aa2c305d 100644 --- a/src/Functions/IFunctionDateOrDateTime.h +++ b/src/Functions/IFunctionDateOrDateTime.h @@ -72,30 +72,30 @@ public: if (checkAndGetDataType(type_ptr)) { - return Transform::FactorTransform::execute(UInt16(left.get()), *date_lut) - == Transform::FactorTransform::execute(UInt16(right.get()), *date_lut) + return Transform::FactorTransform::execute(UInt16(left.safeGet()), *date_lut) + == Transform::FactorTransform::execute(UInt16(right.safeGet()), *date_lut) ? is_monotonic : is_not_monotonic; } else if (checkAndGetDataType(type_ptr)) { - return Transform::FactorTransform::execute(Int32(left.get()), *date_lut) - == Transform::FactorTransform::execute(Int32(right.get()), *date_lut) + return Transform::FactorTransform::execute(Int32(left.safeGet()), *date_lut) + == Transform::FactorTransform::execute(Int32(right.safeGet()), *date_lut) ? is_monotonic : is_not_monotonic; } else if (checkAndGetDataType(type_ptr)) { - return Transform::FactorTransform::execute(UInt32(left.get()), *date_lut) - == Transform::FactorTransform::execute(UInt32(right.get()), *date_lut) + return Transform::FactorTransform::execute(UInt32(left.safeGet()), *date_lut) + == Transform::FactorTransform::execute(UInt32(right.safeGet()), *date_lut) ? is_monotonic : is_not_monotonic; } else { assert(checkAndGetDataType(type_ptr)); - const auto & left_date_time = left.get(); + const auto & left_date_time = left.safeGet(); TransformDateTime64 transformer_left(left_date_time.getScale()); - const auto & right_date_time = right.get(); + const auto & right_date_time = right.safeGet(); TransformDateTime64 transformer_right(right_date_time.getScale()); return transformer_left.execute(left_date_time.getValue(), *date_lut) diff --git a/src/Functions/JSONPath/Parsers/ParserJSONPathRange.cpp b/src/Functions/JSONPath/Parsers/ParserJSONPathRange.cpp index fb74018b330..84ac0ff08f3 100644 --- a/src/Functions/JSONPath/Parsers/ParserJSONPathRange.cpp +++ b/src/Functions/JSONPath/Parsers/ParserJSONPathRange.cpp @@ -46,7 +46,7 @@ bool ParserJSONPathRange::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte { return false; } - range_indices.first = static_cast(number_ptr->as()->value.get()); + range_indices.first = static_cast(number_ptr->as()->value.safeGet()); if (pos->type == TokenType::Comma || pos->type == TokenType::ClosingSquareBracket) { @@ -63,7 +63,7 @@ bool ParserJSONPathRange::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte { return false; } - range_indices.second = static_cast(number_ptr->as()->value.get()); + range_indices.second = static_cast(number_ptr->as()->value.safeGet()); } else { diff --git a/src/Functions/MultiMatchAllIndicesImpl.h b/src/Functions/MultiMatchAllIndicesImpl.h index 3aeac808880..e7c3aebf794 100644 --- a/src/Functions/MultiMatchAllIndicesImpl.h +++ b/src/Functions/MultiMatchAllIndicesImpl.h @@ -77,7 +77,7 @@ struct MultiMatchAllIndicesImpl std::vector needles; needles.reserve(needles_arr.size()); for (const auto & needle : needles_arr) - needles.emplace_back(needle.get()); + needles.emplace_back(needle.safeGet()); checkHyperscanRegexp(needles, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length); diff --git a/src/Functions/MultiMatchAnyImpl.h b/src/Functions/MultiMatchAnyImpl.h index fda752cbacc..54413cbc1cd 100644 --- a/src/Functions/MultiMatchAnyImpl.h +++ b/src/Functions/MultiMatchAnyImpl.h @@ -91,7 +91,7 @@ struct MultiMatchAnyImpl std::vector needles; needles.reserve(needles_arr.size()); for (const auto & needle : needles_arr) - needles.emplace_back(needle.get()); + needles.emplace_back(needle.safeGet()); checkHyperscanRegexp(needles, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length); diff --git a/src/Functions/MultiSearchAllPositionsImpl.h b/src/Functions/MultiSearchAllPositionsImpl.h index cfe60e51bcd..6c2cd215638 100644 --- a/src/Functions/MultiSearchAllPositionsImpl.h +++ b/src/Functions/MultiSearchAllPositionsImpl.h @@ -33,7 +33,7 @@ struct MultiSearchAllPositionsImpl std::vector needles; needles.reserve(needles_arr.size()); for (const auto & needle : needles_arr) - needles.emplace_back(needle.get()); + needles.emplace_back(needle.safeGet()); auto res_callback = [](const UInt8 * start, const UInt8 * end) -> UInt64 { diff --git a/src/Functions/MultiSearchFirstIndexImpl.h b/src/Functions/MultiSearchFirstIndexImpl.h index 5b34dbfe601..f1dc9ab9e11 100644 --- a/src/Functions/MultiSearchFirstIndexImpl.h +++ b/src/Functions/MultiSearchFirstIndexImpl.h @@ -45,7 +45,7 @@ struct MultiSearchFirstIndexImpl std::vector needles; needles.reserve(needles_arr.size()); for (const auto & needle : needles_arr) - needles.emplace_back(needle.get()); + needles.emplace_back(needle.safeGet()); auto searcher = Impl::createMultiSearcherInBigHaystack(needles); diff --git a/src/Functions/MultiSearchFirstPositionImpl.h b/src/Functions/MultiSearchFirstPositionImpl.h index 06bf7aa94d8..4380eeb1b29 100644 --- a/src/Functions/MultiSearchFirstPositionImpl.h +++ b/src/Functions/MultiSearchFirstPositionImpl.h @@ -45,7 +45,7 @@ struct MultiSearchFirstPositionImpl std::vector needles; needles.reserve(needles_arr.size()); for (const auto & needle : needles_arr) - needles.emplace_back(needle.get()); + needles.emplace_back(needle.safeGet()); auto res_callback = [](const UInt8 * start, const UInt8 * end) -> UInt64 { diff --git a/src/Functions/MultiSearchImpl.h b/src/Functions/MultiSearchImpl.h index 909425f5a93..5c652ddcb74 100644 --- a/src/Functions/MultiSearchImpl.h +++ b/src/Functions/MultiSearchImpl.h @@ -45,7 +45,7 @@ struct MultiSearchImpl std::vector needles; needles.reserve(needles_arr.size()); for (const auto & needle : needles_arr) - needles.emplace_back(needle.get()); + needles.emplace_back(needle.safeGet()); auto searcher = Impl::createMultiSearcherInBigHaystack(needles); diff --git a/src/Functions/URL/cutURLParameter.cpp b/src/Functions/URL/cutURLParameter.cpp index 3ab9cad1ea7..4439e79e962 100644 --- a/src/Functions/URL/cutURLParameter.cpp +++ b/src/Functions/URL/cutURLParameter.cpp @@ -156,7 +156,7 @@ public: for (size_t j = 0; j < num_needles; ++j) { auto field = col_needle_const_array->getData()[j]; - cutURL(res_data, field.get(), res_offset, cur_res_offset); + cutURL(res_data, field.safeGet(), res_offset, cur_res_offset); } } else diff --git a/src/Functions/array/arrayElement.cpp b/src/Functions/array/arrayElement.cpp index 227b29d5d9f..81f3f97979b 100644 --- a/src/Functions/array/arrayElement.cpp +++ b/src/Functions/array/arrayElement.cpp @@ -904,10 +904,10 @@ ColumnPtr FunctionArrayElement::executeNumberConst( return nullptr; if (index.getType() == Field::Types::UInt64 - || (index.getType() == Field::Types::Int64 && index.get() >= 0)) + || (index.getType() == Field::Types::Int64 && index.safeGet() >= 0)) { ArrayElementNumImpl::template vectorConst( - col_nested->getData(), col_array->getOffsets(), index.get() - 1, col_res_vec->getData(), builder); + col_nested->getData(), col_array->getOffsets(), index.safeGet() - 1, col_res_vec->getData(), builder); } else if (index.getType() == Field::Types::Int64) { @@ -972,14 +972,14 @@ FunctionArrayElement::executeStringConst(const ColumnsWithTypeAndName & argument auto col_res = ColumnString::create(); if (index.getType() == Field::Types::UInt64 - || (index.getType() == Field::Types::Int64 && index.get() >= 0)) + || (index.getType() == Field::Types::Int64 && index.safeGet() >= 0)) { if (builder) ArrayElementStringImpl::vectorConst( col_nested->getChars(), col_array->getOffsets(), col_nested->getOffsets(), - index.get() - 1, + index.safeGet() - 1, col_res->getChars(), col_res->getOffsets(), builder); @@ -988,7 +988,7 @@ FunctionArrayElement::executeStringConst(const ColumnsWithTypeAndName & argument col_nested->getChars(), col_array->getOffsets(), col_nested->getOffsets(), - index.get() - 1, + index.safeGet() - 1, col_res->getChars(), col_res->getOffsets(), builder); @@ -1000,7 +1000,7 @@ FunctionArrayElement::executeStringConst(const ColumnsWithTypeAndName & argument col_nested->getChars(), col_array->getOffsets(), col_nested->getOffsets(), - -(UInt64(index.get()) + 1), + -(UInt64(index.safeGet()) + 1), col_res->getChars(), col_res->getOffsets(), builder); @@ -1009,7 +1009,7 @@ FunctionArrayElement::executeStringConst(const ColumnsWithTypeAndName & argument col_nested->getChars(), col_array->getOffsets(), col_nested->getOffsets(), - -(UInt64(index.get()) + 1), + -(UInt64(index.safeGet()) + 1), col_res->getChars(), col_res->getOffsets(), builder); @@ -1046,7 +1046,7 @@ ColumnPtr FunctionArrayElement::executeArrayStringConst( auto res_offsets = ColumnArray::ColumnOffsets::create(); auto res_string_null_map = col_nullable ? ColumnUInt8::create() : nullptr; if (index.getType() == Field::Types::UInt64 - || (index.getType() == Field::Types::Int64 && index.get() >= 0)) + || (index.getType() == Field::Types::Int64 && index.safeGet() >= 0)) { if (col_nullable) ArrayElementArrayStringImpl::vectorConst( @@ -1055,7 +1055,7 @@ ColumnPtr FunctionArrayElement::executeArrayStringConst( col_nested_array->getOffsets(), col_nested_elem->getOffsets(), &string_null_map->getData(), - index.get() - 1, + index.safeGet() - 1, res_string->getChars(), res_offsets->getData(), res_string->getOffsets(), @@ -1068,7 +1068,7 @@ ColumnPtr FunctionArrayElement::executeArrayStringConst( col_nested_array->getOffsets(), col_nested_elem->getOffsets(), nullptr, - index.get() - 1, + index.safeGet() - 1, res_string->getChars(), res_offsets->getData(), res_string->getOffsets(), @@ -1084,7 +1084,7 @@ ColumnPtr FunctionArrayElement::executeArrayStringConst( col_nested_array->getOffsets(), col_nested_elem->getOffsets(), &string_null_map->getData(), - -(UInt64(index.get()) + 1), + -(UInt64(index.safeGet()) + 1), res_string->getChars(), res_offsets->getData(), res_string->getOffsets(), @@ -1097,7 +1097,7 @@ ColumnPtr FunctionArrayElement::executeArrayStringConst( col_nested_array->getOffsets(), col_nested_elem->getOffsets(), nullptr, - -(UInt64(index.get()) + 1), + -(UInt64(index.safeGet()) + 1), res_string->getChars(), res_offsets->getData(), res_string->getOffsets(), @@ -1153,7 +1153,7 @@ ColumnPtr FunctionArrayElement::executeArrayNumberConst( auto & res_offsets = res_array->getOffsets(); NullMap * res_null_map = res_nullable ? &res_nullable->getNullMapData() : nullptr; - if (index.getType() == Field::Types::UInt64 || (index.getType() == Field::Types::Int64 && index.get() >= 0)) + if (index.getType() == Field::Types::UInt64 || (index.getType() == Field::Types::Int64 && index.safeGet() >= 0)) { if (col_nullable) ArrayElementArrayNumImpl::template vectorConst( @@ -1161,7 +1161,7 @@ ColumnPtr FunctionArrayElement::executeArrayNumberConst( col_array->getOffsets(), col_nested_array->getOffsets(), null_map, - index.get() - 1, + index.safeGet() - 1, res_data->getData(), res_offsets, res_null_map, @@ -1172,7 +1172,7 @@ ColumnPtr FunctionArrayElement::executeArrayNumberConst( col_array->getOffsets(), col_nested_array->getOffsets(), null_map, - index.get() - 1, + index.safeGet() - 1, res_data->getData(), res_offsets, res_null_map, @@ -1392,12 +1392,12 @@ ColumnPtr FunctionArrayElement::executeGenericConst( auto col_res = col_nested.cloneEmpty(); if (index.getType() == Field::Types::UInt64 - || (index.getType() == Field::Types::Int64 && index.get() >= 0)) + || (index.getType() == Field::Types::Int64 && index.safeGet() >= 0)) ArrayElementGenericImpl::vectorConst( - col_nested, col_array->getOffsets(), index.get() - 1, *col_res, builder); + col_nested, col_array->getOffsets(), index.safeGet() - 1, *col_res, builder); else if (index.getType() == Field::Types::Int64) ArrayElementGenericImpl::vectorConst( - col_nested, col_array->getOffsets(), -(static_cast(index.get() + 1)), *col_res, builder); + col_nested, col_array->getOffsets(), -(static_cast(index.safeGet() + 1)), *col_res, builder); else throw Exception(ErrorCodes::LOGICAL_ERROR, "Illegal type of array index"); @@ -1789,7 +1789,7 @@ bool FunctionArrayElement::matchKeyToIndexStringConst( using DataColumn = std::decay_t; if (index.getType() != Field::Types::String) return false; - MatcherStringConst matcher{data_column, index.get()}; + MatcherStringConst matcher{data_column, index.safeGet()}; executeMatchKeyToIndex(offsets, matched_idxs, matcher); return true; }); diff --git a/src/Functions/array/mapOp.cpp b/src/Functions/array/mapOp.cpp index 86797cb5db0..614b01c2ac8 100644 --- a/src/Functions/array/mapOp.cpp +++ b/src/Functions/array/mapOp.cpp @@ -237,7 +237,7 @@ private: } arg.val_column->get(offset + j, temp_val); - ValType value = temp_val.get(); + ValType value = temp_val.safeGet(); if constexpr (op_type == OpTypes::ADD) { diff --git a/src/Functions/getClientHTTPHeader.cpp b/src/Functions/getClientHTTPHeader.cpp index 140f39d03b8..50a6275fc82 100644 --- a/src/Functions/getClientHTTPHeader.cpp +++ b/src/Functions/getClientHTTPHeader.cpp @@ -58,7 +58,7 @@ public: { Field header; source->get(row, header); - if (auto it = client_info.http_headers.find(header.get()); it != client_info.http_headers.end()) + if (auto it = client_info.http_headers.find(header.safeGet()); it != client_info.http_headers.end()) result->insert(it->second); else result->insertDefault(); diff --git a/src/Functions/multiIf.cpp b/src/Functions/multiIf.cpp index c4b675fcf6c..14b8b70b22c 100644 --- a/src/Functions/multiIf.cpp +++ b/src/Functions/multiIf.cpp @@ -200,7 +200,7 @@ public: if (value.isNull()) continue; - if (value.get() == 0) + if (value.safeGet() == 0) continue; instruction.condition_always_true = true; diff --git a/src/Functions/nested.cpp b/src/Functions/nested.cpp index bdaf57d65c9..85c342b5e7c 100644 --- a/src/Functions/nested.cpp +++ b/src/Functions/nested.cpp @@ -145,7 +145,7 @@ private: if (nested_names_field.getType() != Field::Types::Array) return {}; - const auto & nested_names_array = nested_names_field.get(); + const auto & nested_names_array = nested_names_field.safeGet(); Names nested_names; nested_names.reserve(nested_names_array.size()); @@ -155,7 +155,7 @@ private: if (nested_name_field.getType() != Field::Types::String) return {}; - nested_names.push_back(nested_name_field.get()); + nested_names.push_back(nested_name_field.safeGet()); } return nested_names; diff --git a/src/IO/S3Common.cpp b/src/IO/S3Common.cpp index a794cdbcf05..59040bf1fea 100644 --- a/src/IO/S3Common.cpp +++ b/src/IO/S3Common.cpp @@ -294,21 +294,21 @@ void RequestSettings::finishInit(const DB::Settings & settings, bool validate_se /// to avoid losing token bucket state on every config reload, /// which could lead to exceeding limit for short time. /// But it is good enough unless very high `burst` values are used. - if (UInt64 max_get_rps = isChanged("max_get_rps") ? get("max_get_rps").get() : settings.s3_max_get_rps) + if (UInt64 max_get_rps = isChanged("max_get_rps") ? get("max_get_rps").safeGet() : settings.s3_max_get_rps) { size_t default_max_get_burst = settings.s3_max_get_burst ? settings.s3_max_get_burst : (Throttler::default_burst_seconds * max_get_rps); - size_t max_get_burst = isChanged("max_get_burts") ? get("max_get_burst").get() : default_max_get_burst; + size_t max_get_burst = isChanged("max_get_burts") ? get("max_get_burst").safeGet() : default_max_get_burst; get_request_throttler = std::make_shared(max_get_rps, max_get_burst); } - if (UInt64 max_put_rps = isChanged("max_put_rps") ? get("max_put_rps").get() : settings.s3_max_put_rps) + if (UInt64 max_put_rps = isChanged("max_put_rps") ? get("max_put_rps").safeGet() : settings.s3_max_put_rps) { size_t default_max_put_burst = settings.s3_max_put_burst ? settings.s3_max_put_burst : (Throttler::default_burst_seconds * max_put_rps); - size_t max_put_burst = isChanged("max_put_burts") ? get("max_put_burst").get() : default_max_put_burst; + size_t max_put_burst = isChanged("max_put_burts") ? get("max_put_burst").safeGet() : default_max_put_burst; put_request_throttler = std::make_shared(max_put_rps, max_put_burst); } } diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index e1b7e92ee5d..368eb8174f0 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -131,7 +131,7 @@ static Block createBlockFromCollection(const Collection & collection, const Data throw Exception(ErrorCodes::INCORRECT_ELEMENT_OF_SET, "Invalid type in set. Expected tuple, got {}", String(value.getTypeName())); - const auto & tuple = value.template get(); + const auto & tuple = value.template safeGet(); size_t tuple_size = tuple.size(); if (tuple_size != columns_num) throw Exception(ErrorCodes::INCORRECT_ELEMENT_OF_SET, "Incorrect size of tuple in set: {} instead of {}", @@ -233,7 +233,7 @@ static Block createBlockFromAST(const ASTPtr & node, const DataTypes & types, Co "Invalid type of set. Expected tuple, got {}", function_result.getTypeName()); - tuple = &function_result.get(); + tuple = &function_result.safeGet(); } /// Tuple can be represented as a literal in AST. @@ -246,7 +246,7 @@ static Block createBlockFromAST(const ASTPtr & node, const DataTypes & types, Co "Invalid type in set. Expected tuple, got {}", literal->value.getTypeName()); - tuple = &literal->value.get(); + tuple = &literal->value.safeGet(); } assert(tuple || func); @@ -332,14 +332,14 @@ Block createBlockForSet( if (type_index == TypeIndex::Tuple) { const DataTypes & value_types = assert_cast(right_arg_type.get())->getElements(); - block = createBlockFromCollection(right_arg_value.get(), value_types, set_element_types, tranform_null_in); + block = createBlockFromCollection(right_arg_value.safeGet(), value_types, set_element_types, tranform_null_in); } else if (type_index == TypeIndex::Array) { const auto* right_arg_array_type = assert_cast(right_arg_type.get()); - size_t right_arg_array_size = right_arg_value.get().size(); + size_t right_arg_array_size = right_arg_value.safeGet().size(); DataTypes value_types(right_arg_array_size, right_arg_array_type->getNestedType()); - block = createBlockFromCollection(right_arg_value.get(), value_types, set_element_types, tranform_null_in); + block = createBlockFromCollection(right_arg_value.safeGet(), value_types, set_element_types, tranform_null_in); } else throw_unsupported_type(right_arg_type); diff --git a/src/Interpreters/AddDefaultDatabaseVisitor.h b/src/Interpreters/AddDefaultDatabaseVisitor.h index a28c7c1bff3..d59fd35df77 100644 --- a/src/Interpreters/AddDefaultDatabaseVisitor.h +++ b/src/Interpreters/AddDefaultDatabaseVisitor.h @@ -210,7 +210,7 @@ private: if (literal_value.getType() != Field::Types::String) continue; - auto dictionary_name = literal_value.get(); + auto dictionary_name = literal_value.safeGet(); auto qualified_dictionary_name = context->getExternalDictionariesLoader().qualifyDictionaryNameWithDatabase(dictionary_name, context); literal_value = qualified_dictionary_name.getFullName(); } diff --git a/src/Interpreters/Cache/QueryCache.cpp b/src/Interpreters/Cache/QueryCache.cpp index a3fe8c2e779..ab926037c67 100644 --- a/src/Interpreters/Cache/QueryCache.cpp +++ b/src/Interpreters/Cache/QueryCache.cpp @@ -128,7 +128,7 @@ namespace bool isQueryCacheRelatedSetting(const String & setting_name) { - return setting_name.starts_with("query_cache_") || setting_name.ends_with("_query_cache"); + return (setting_name.starts_with("query_cache_") || setting_name.ends_with("_query_cache")) && setting_name != "query_cache_tag"; } class RemoveQueryCacheSettingsMatcher @@ -242,11 +242,18 @@ QueryCache::Key::Key( , expires_at(expires_at_) , is_compressed(is_compressed_) , query_string(queryStringFromAST(ast_)) + , tag(settings.query_cache_tag) { } -QueryCache::Key::Key(ASTPtr ast_, const String & current_database, const Settings & settings, std::optional user_id_, const std::vector & current_user_roles_) - : QueryCache::Key(ast_, current_database, settings, {}, user_id_, current_user_roles_, false, std::chrono::system_clock::from_time_t(1), false) /// dummy values for everything != AST, current database, user name/roles +QueryCache::Key::Key( + ASTPtr ast_, + const String & current_database, + const Settings & settings, + std::optional user_id_, + const std::vector & current_user_roles_) + : QueryCache::Key(ast_, current_database, settings, {}, user_id_, current_user_roles_, false, std::chrono::system_clock::from_time_t(1), false) + /// ^^ dummy values for everything != AST, current database, user name/roles { } diff --git a/src/Interpreters/Cache/QueryCache.h b/src/Interpreters/Cache/QueryCache.h index 461197cac32..c7ebaf4d26a 100644 --- a/src/Interpreters/Cache/QueryCache.h +++ b/src/Interpreters/Cache/QueryCache.h @@ -88,6 +88,11 @@ public: /// SYSTEM.QUERY_CACHE. const String query_string; + /// A tag (namespace) for distinguish multiple entries of the same query. + /// This member has currently no use besides that SYSTEM.QUERY_CACHE can populate the 'tag' column conveniently without having to + /// compute the tag from the query AST. + const String tag; + /// Ctor to construct a Key for writing into query cache. Key(ASTPtr ast_, const String & current_database, @@ -99,7 +104,10 @@ public: bool is_compressed); /// Ctor to construct a Key for reading from query cache (this operation only needs the AST + user name). - Key(ASTPtr ast_, const String & current_database, const Settings & settings, std::optional user_id_, const std::vector & current_user_roles_); + Key(ASTPtr ast_, + const String & current_database, + const Settings & settings, + std::optional user_id_, const std::vector & current_user_roles_); bool operator==(const Key & other) const; }; diff --git a/src/Interpreters/ComparisonTupleEliminationVisitor.cpp b/src/Interpreters/ComparisonTupleEliminationVisitor.cpp index 4f06f345b96..b9f7f37b338 100644 --- a/src/Interpreters/ComparisonTupleEliminationVisitor.cpp +++ b/src/Interpreters/ComparisonTupleEliminationVisitor.cpp @@ -22,7 +22,7 @@ ASTs splitTuple(const ASTPtr & node) if (const auto * literal = node->as(); literal && literal->value.getType() == Field::Types::Tuple) { ASTs result; - const auto & tuple = literal->value.get(); + const auto & tuple = literal->value.safeGet(); for (const auto & child : tuple) result.emplace_back(std::make_shared(child)); return result; diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 5413b568068..4a08fd5fe5b 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -99,6 +99,7 @@ #include #include #include +#include #include #include #include @@ -618,7 +619,7 @@ struct ContextSharedPart : boost::noncopyable /** After system_logs have been shut down it is guaranteed that no system table gets created or written to. * Note that part changes at shutdown won't be logged to part log. */ - SHUTDOWN(log, "system logs", system_logs, shutdown()); + SHUTDOWN(log, "system logs", system_logs, flushAndShutdown()); LOG_TRACE(log, "Shutting down database catalog"); DatabaseCatalog::shutdown(); @@ -4255,7 +4256,7 @@ std::shared_ptr Context::getS3QueueLog() const if (!shared->system_logs) return {}; - return shared->system_logs->s3_queue_log; + return shared->system_logs->s3queue_log; } std::shared_ptr Context::getAzureQueueLog() const @@ -4312,13 +4313,13 @@ std::shared_ptr Context::getBlobStorageLog() const return shared->system_logs->blob_storage_log; } -std::vector Context::getSystemLogs() const +SystemLogs Context::getSystemLogs() const { SharedLockGuard lock(shared->mutex); if (!shared->system_logs) return {}; - return shared->system_logs->logs; + return *shared->system_logs; } std::optional Context::getDashboards() const diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index d5e35c3e4b3..3da4f124553 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -48,6 +48,8 @@ namespace DB class ASTSelectQuery; +class SystemLogs; + struct ContextSharedPart; class ContextAccess; class ContextAccessWrapper; @@ -1150,7 +1152,7 @@ public: std::shared_ptr getBackupLog() const; std::shared_ptr getBlobStorageLog() const; - std::vector getSystemLogs() const; + SystemLogs getSystemLogs() const; using Dashboards = std::vector>; std::optional getDashboards() const; diff --git a/src/Interpreters/ConvertFunctionOrLikeVisitor.cpp b/src/Interpreters/ConvertFunctionOrLikeVisitor.cpp index 084bb0a1bb9..220355e0741 100644 --- a/src/Interpreters/ConvertFunctionOrLikeVisitor.cpp +++ b/src/Interpreters/ConvertFunctionOrLikeVisitor.cpp @@ -45,7 +45,7 @@ void ConvertFunctionOrLikeData::visit(ASTFunction & function, ASTPtr &) if (!identifier || !literal || literal->value.getType() != Field::Types::String) continue; - String regexp = likePatternToRegexp(literal->value.get()); + String regexp = likePatternToRegexp(literal->value.safeGet()); /// Case insensitive. Works with UTF-8 as well. if (is_ilike) regexp = "(?i)" + regexp; @@ -61,7 +61,7 @@ void ConvertFunctionOrLikeData::visit(ASTFunction & function, ASTPtr &) match->arguments->children.push_back(it->second); unique_elems.push_back(std::move(match)); } - it->second->value.get().push_back(regexp); + it->second->value.safeGet().push_back(regexp); } } diff --git a/src/Interpreters/ConvertStringsToEnumVisitor.cpp b/src/Interpreters/ConvertStringsToEnumVisitor.cpp index 7cc95dc521b..d35baa92900 100644 --- a/src/Interpreters/ConvertStringsToEnumVisitor.cpp +++ b/src/Interpreters/ConvertStringsToEnumVisitor.cpp @@ -33,8 +33,8 @@ String makeStringsEnum(const std::set & values) void changeIfArguments(ASTPtr & first, ASTPtr & second) { - String first_value = first->as()->value.get(); - String second_value = second->as()->value.get(); + String first_value = first->as()->value.safeGet(); + String second_value = second->as()->value.safeGet(); std::set values; values.insert(first_value); @@ -59,9 +59,9 @@ void changeTransformArguments(ASTPtr & array_to, ASTPtr & other) { std::set values; - for (const auto & item : array_to->as()->value.get()) - values.insert(item.get()); - values.insert(other->as()->value.get()); + for (const auto & item : array_to->as()->value.safeGet()) + values.insert(item.safeGet()); + values.insert(other->as()->value.safeGet()); String enum_string = makeStringsEnum(values); @@ -168,7 +168,7 @@ void ConvertStringsToEnumMatcher::visit(ASTFunction & function_node, Data & data if (literal_to->value.getTypeName() != "Array" || literal_other->value.getTypeName() != "String") return; - Array array_to = literal_to->value.get(); + Array array_to = literal_to->value.safeGet(); if (array_to.empty()) return; diff --git a/src/Interpreters/DDLTask.cpp b/src/Interpreters/DDLTask.cpp index fa197d59c13..6e08dd5e2cc 100644 --- a/src/Interpreters/DDLTask.cpp +++ b/src/Interpreters/DDLTask.cpp @@ -538,7 +538,7 @@ void DatabaseReplicatedTask::createSyncedNodeIfNeed(const ZooKeeperPtr & zookeep /// Bool type is really weird, sometimes it's Bool and sometimes it's UInt64... assert(value.getType() == Field::Types::Bool || value.getType() == Field::Types::UInt64); - if (!value.get()) + if (!value.safeGet()) return; zookeeper->createIfNotExists(getSyncedNodePath(), ""); diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index a101e5e8f09..8bcb7f18a0f 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -690,7 +690,7 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription( throw Exception(ErrorCodes::LOGICAL_ERROR, "Neither default value expression nor type is provided for a column"); if (col_decl.comment) - column.comment = col_decl.comment->as().value.get(); + column.comment = col_decl.comment->as().value.safeGet(); if (col_decl.codec) { @@ -1916,7 +1916,7 @@ void InterpreterCreateQuery::prepareOnClusterQuery(ASTCreateQuery & create, Cont if (has_explicit_zk_path_arg) { - String zk_path = create.storage->engine->arguments->children[0]->as()->value.get(); + String zk_path = create.storage->engine->arguments->children[0]->as()->value.safeGet(); Macros::MacroExpansionInfo info; info.table_id.uuid = create.uuid; info.ignore_unknown = true; diff --git a/src/Interpreters/InterpreterExplainQuery.cpp b/src/Interpreters/InterpreterExplainQuery.cpp index bedd9cb4a80..c820f999e0c 100644 --- a/src/Interpreters/InterpreterExplainQuery.cpp +++ b/src/Interpreters/InterpreterExplainQuery.cpp @@ -332,7 +332,7 @@ ExplainSettings checkAndGetSettings(const ASTPtr & ast_settings) if (settings.hasBooleanSetting(change.name)) { - auto value = change.value.get(); + auto value = change.value.safeGet(); if (value > 1) throw Exception(ErrorCodes::INVALID_SETTING_VALUE, "Invalid value {} for setting \"{}\". " "Expected boolean type", value, change.name); @@ -341,7 +341,7 @@ ExplainSettings checkAndGetSettings(const ASTPtr & ast_settings) } else { - auto value = change.value.get(); + auto value = change.value.safeGet(); settings.setIntegerSetting(change.name, value); } } diff --git a/src/Interpreters/InterpreterKillQueryQuery.cpp b/src/Interpreters/InterpreterKillQueryQuery.cpp index 7eb487ba7b3..2c579f3b468 100644 --- a/src/Interpreters/InterpreterKillQueryQuery.cpp +++ b/src/Interpreters/InterpreterKillQueryQuery.cpp @@ -334,7 +334,7 @@ BlockIO InterpreterKillQueryQuery::execute() for (size_t i = 0; i < moves_block.rows(); ++i) { table_id = StorageID{database_col.getDataAt(i).toString(), table_col.getDataAt(i).toString()}; - auto task_uuid = task_uuid_col[i].get(); + auto task_uuid = task_uuid_col[i].safeGet(); CancellationCode code = CancellationCode::Unknown; diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index cb42a8abf9c..0c79f4310ce 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -1232,7 +1232,7 @@ SortDescription InterpreterSelectQuery::getSortDescription(const ASTSelectQuery std::shared_ptr collator; if (order_by_elem.getCollation()) - collator = std::make_shared(order_by_elem.getCollation()->as().value.get()); + collator = std::make_shared(order_by_elem.getCollation()->as().value.safeGet()); if (order_by_elem.with_fill) { diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index c284acfa308..ef6d1040c5e 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -710,14 +710,8 @@ BlockIO InterpreterSystemQuery::execute() case Type::FLUSH_LOGS: { getContext()->checkAccess(AccessType::SYSTEM_FLUSH_LOGS); - - auto logs = getContext()->getSystemLogs(); - std::vector> commands; - commands.reserve(logs.size()); - for (auto * system_log : logs) - commands.emplace_back([system_log] { system_log->flush(true); }); - - executeCommandsAndThrowIfError(commands); + auto system_logs = getContext()->getSystemLogs(); + system_logs.flush(true); break; } case Type::STOP_LISTEN: diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index 5de33b3ed86..24635870e62 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -219,7 +219,7 @@ bool isStorageTouchedByMutations( Block tmp_block; while (executor.pull(tmp_block)); - auto count = (*block.getByName("count()").column)[0].get(); + auto count = (*block.getByName("count()").column)[0].safeGet(); return count != 0; } diff --git a/src/Interpreters/OptimizeDateOrDateTimeConverterWithPreimageVisitor.cpp b/src/Interpreters/OptimizeDateOrDateTimeConverterWithPreimageVisitor.cpp index dd205ae6508..913f9900b77 100644 --- a/src/Interpreters/OptimizeDateOrDateTimeConverterWithPreimageVisitor.cpp +++ b/src/Interpreters/OptimizeDateOrDateTimeConverterWithPreimageVisitor.cpp @@ -42,13 +42,13 @@ ASTPtr generateOptimizedDateFilterAST(const String & comparator, const NameAndTy if (isDateOrDate32(column.type.get())) { - start_date_or_date_time = date_lut.dateToString(range.first.get()); - end_date_or_date_time = date_lut.dateToString(range.second.get()); + start_date_or_date_time = date_lut.dateToString(range.first.safeGet()); + end_date_or_date_time = date_lut.dateToString(range.second.safeGet()); } else if (isDateTime(column.type.get()) || isDateTime64(column.type.get())) { - start_date_or_date_time = date_lut.timeToString(range.first.get()); - end_date_or_date_time = date_lut.timeToString(range.second.get()); + start_date_or_date_time = date_lut.timeToString(range.first.safeGet()); + end_date_or_date_time = date_lut.timeToString(range.second.safeGet()); } else [[unlikely]] return {}; diff --git a/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp b/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp index 48c9988b6fc..e9a663d53b0 100644 --- a/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp +++ b/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp @@ -24,7 +24,7 @@ static bool tryExtractConstValueFromCondition(const ASTPtr & condition, bool & v if (literal->value.getType() == Field::Types::Int64 || literal->value.getType() == Field::Types::UInt64) { - value = literal->value.get(); + value = literal->value.safeGet(); return true; } if (literal->value.getType() == Field::Types::Null) @@ -51,7 +51,7 @@ static bool tryExtractConstValueFromCondition(const ASTPtr & condition, bool & v { if (type_literal->value.getType() == Field::Types::String) { - const auto & type_str = type_literal->value.get(); + const auto & type_str = type_literal->value.safeGet(); if (type_str == "UInt8" || type_str == "Nullable(UInt8)") return tryExtractConstValueFromCondition(expr_list->children.at(0), value); } diff --git a/src/Interpreters/OptimizeShardingKeyRewriteInVisitor.cpp b/src/Interpreters/OptimizeShardingKeyRewriteInVisitor.cpp index 54515ea072a..86cec8659f5 100644 --- a/src/Interpreters/OptimizeShardingKeyRewriteInVisitor.cpp +++ b/src/Interpreters/OptimizeShardingKeyRewriteInVisitor.cpp @@ -72,7 +72,7 @@ bool shardContains( if (sharding_value.isNull()) return false; - UInt64 value = sharding_value.get(); + UInt64 value = sharding_value.safeGet(); const auto shard_num = data.slots[value % data.slots.size()] + 1; return data.shard_info.shard_num == shard_num; } @@ -120,11 +120,20 @@ void OptimizeShardingKeyRewriteInMatcher::visit(ASTFunction & function, Data & d else if (auto * tuple_literal = right->as(); tuple_literal && tuple_literal->value.getType() == Field::Types::Tuple) { - auto & tuple = tuple_literal->value.get(); - std::erase_if(tuple, [&](auto & child) + auto & tuple = tuple_literal->value.safeGet(); + if (tuple.size() > 1) { - return tuple.size() > 1 && !shardContains(child, name, data); - }); + Tuple new_tuple; + + for (auto & child : tuple) + if (shardContains(child, name, data)) + new_tuple.emplace_back(std::move(child)); + + if (new_tuple.empty()) + new_tuple.emplace_back(std::move(tuple.back())); + + tuple_literal->value = std::move(new_tuple); + } } } @@ -159,7 +168,7 @@ public: { if (isTuple(constant->getResultType())) { - const auto & tuple = constant->getValue().get(); + const auto & tuple = constant->getValue().safeGet(); Tuple new_tuple; new_tuple.reserve(tuple.size()); diff --git a/src/Interpreters/ProcessList.h b/src/Interpreters/ProcessList.h index accb73e12df..248ba947bc1 100644 --- a/src/Interpreters/ProcessList.h +++ b/src/Interpreters/ProcessList.h @@ -244,6 +244,9 @@ public: /// Same as checkTimeLimit but it never throws [[nodiscard]] bool checkTimeLimitSoft(); + /// Use it in case of the query left in background to execute asynchronously + void updateContext(ContextWeakPtr weak_context) { context = std::move(weak_context); } + /// Get the reference for the start of the query. Used to synchronize with other Stopwatches UInt64 getQueryCPUStartTime() { return watch.getStart(); } }; diff --git a/src/Interpreters/RewriteCountVariantsVisitor.cpp b/src/Interpreters/RewriteCountVariantsVisitor.cpp index 4a541c3765a..272e1ac735f 100644 --- a/src/Interpreters/RewriteCountVariantsVisitor.cpp +++ b/src/Interpreters/RewriteCountVariantsVisitor.cpp @@ -53,7 +53,7 @@ void RewriteCountVariantsVisitor::visit(ASTFunction & func) { if (first_arg_literal->value.getType() == Field::Types::UInt64) { - auto constant = first_arg_literal->value.get(); + auto constant = first_arg_literal->value.safeGet(); if (constant == 1 && !context->getSettingsRef().aggregate_functions_null_for_empty) transform = true; } diff --git a/src/Interpreters/Squashing.cpp b/src/Interpreters/Squashing.cpp index 483112df6cb..95b76c60063 100644 --- a/src/Interpreters/Squashing.cpp +++ b/src/Interpreters/Squashing.cpp @@ -5,7 +5,6 @@ #include #include - namespace DB { @@ -125,20 +124,32 @@ Chunk Squashing::squash(std::vector && input_chunks, Chunk::ChunkInfoColl { auto & first_chunk = input_chunks[0]; Columns columns = first_chunk.detachColumns(); + mutable_columns.reserve(columns.size()); for (auto & column : columns) - { mutable_columns.push_back(IColumn::mutate(std::move(column))); - mutable_columns.back()->reserve(rows); - } } + size_t num_columns = mutable_columns.size(); + /// Collect the list of source columns for each column. + std::vector source_columns_list(num_columns, Columns{}); + for (size_t i = 0; i != num_columns; ++i) + source_columns_list[i].reserve(input_chunks.size() - 1); + for (size_t i = 1; i < input_chunks.size(); ++i) // We've already processed the first chunk above { - Columns columns = input_chunks[i].detachColumns(); - for (size_t j = 0, size = mutable_columns.size(); j < size; ++j) + auto columns = input_chunks[i].detachColumns(); + for (size_t j = 0; j != num_columns; ++j) + source_columns_list[j].emplace_back(std::move(columns[j])); + } + + for (size_t i = 0; i != num_columns; ++i) + { + /// We know all the data we will insert in advance and can make all necessary pre-allocations. + mutable_columns[i]->prepareForSquashing(source_columns_list[i]); + for (auto & source_column : source_columns_list[i]) { - const auto source_column = columns[j]; - mutable_columns[j]->insertRangeFrom(*source_column, 0, source_column->size()); + auto column = std::move(source_column); + mutable_columns[i]->insertRangeFrom(*column, 0, column->size()); } } diff --git a/src/Interpreters/SystemLog.cpp b/src/Interpreters/SystemLog.cpp index 572481e6b12..832c39bfaf8 100644 --- a/src/Interpreters/SystemLog.cpp +++ b/src/Interpreters/SystemLog.cpp @@ -1,6 +1,7 @@ #include #include +#include #include #include #include @@ -49,6 +50,7 @@ #include + namespace DB { @@ -282,85 +284,21 @@ ASTPtr getCreateTableQueryClean(const StorageID & table_id, ContextPtr context) SystemLogs::SystemLogs(ContextPtr global_context, const Poco::Util::AbstractConfiguration & config) { - query_log = createSystemLog(global_context, "system", "query_log", config, "query_log", "Contains information about executed queries, for example, start time, duration of processing, error messages."); - query_thread_log = createSystemLog(global_context, "system", "query_thread_log", config, "query_thread_log", "Contains information about threads that execute queries, for example, thread name, thread start time, duration of query processing."); - part_log = createSystemLog(global_context, "system", "part_log", config, "part_log", "This table contains information about events that occurred with data parts in the MergeTree family tables, such as adding or merging data."); - trace_log = createSystemLog(global_context, "system", "trace_log", config, "trace_log", "Contains stack traces collected by the sampling query profiler."); - crash_log = createSystemLog(global_context, "system", "crash_log", config, "crash_log", "Contains information about stack traces for fatal errors. The table does not exist in the database by default, it is created only when fatal errors occur."); - text_log = createSystemLog(global_context, "system", "text_log", config, "text_log", "Contains logging entries which are normally written to a log file or to stdout."); - metric_log = createSystemLog(global_context, "system", "metric_log", config, "metric_log", "Contains history of metrics values from tables system.metrics and system.events, periodically flushed to disk."); - error_log = createSystemLog(global_context, "system", "error_log", config, "error_log", "Contains history of error values from table system.errors, periodically flushed to disk."); - filesystem_cache_log = createSystemLog(global_context, "system", "filesystem_cache_log", config, "filesystem_cache_log", "Contains a history of all events occurred with filesystem cache for objects on a remote filesystem."); - filesystem_read_prefetches_log = createSystemLog( - global_context, "system", "filesystem_read_prefetches_log", config, "filesystem_read_prefetches_log", "Contains a history of all prefetches done during reading from MergeTables backed by a remote filesystem."); - asynchronous_metric_log = createSystemLog( - global_context, "system", "asynchronous_metric_log", config, - "asynchronous_metric_log", "Contains the historical values for system.asynchronous_metrics, once per time interval (one second by default)."); - opentelemetry_span_log = createSystemLog( - global_context, "system", "opentelemetry_span_log", config, - "opentelemetry_span_log", "Contains information about trace spans for executed queries."); - query_views_log = createSystemLog(global_context, "system", "query_views_log", config, "query_views_log", "Contains information about the dependent views executed when running a query, for example, the view type or the execution time."); - zookeeper_log = createSystemLog(global_context, "system", "zookeeper_log", config, "zookeeper_log", "This table contains information about the parameters of the request to the ZooKeeper server and the response from it."); - session_log = createSystemLog(global_context, "system", "session_log", config, "session_log", "Contains information about all successful and failed login and logout events."); - transactions_info_log = createSystemLog( - global_context, "system", "transactions_info_log", config, "transactions_info_log", "Contains information about all transactions executed on a current server."); - processors_profile_log = createSystemLog(global_context, "system", "processors_profile_log", config, "processors_profile_log", "Contains profiling information on processors level (building blocks for a pipeline for query execution."); - asynchronous_insert_log = createSystemLog(global_context, "system", "asynchronous_insert_log", config, "asynchronous_insert_log", "Contains a history for all asynchronous inserts executed on current server."); - backup_log = createSystemLog(global_context, "system", "backup_log", config, "backup_log", "Contains logging entries with the information about BACKUP and RESTORE operations."); - s3_queue_log = createSystemLog(global_context, "system", "s3queue_log", config, "s3queue_log", "Contains logging entries with the information files processes by S3Queue engine."); - azure_queue_log = createSystemLog(global_context, "system", "azure_queue_log", config, "azure_queue_log", "Contains logging entries with the information files processes by S3Queue engine."); - blob_storage_log = createSystemLog(global_context, "system", "blob_storage_log", config, "blob_storage_log", "Contains logging entries with information about various blob storage operations such as uploads and deletes."); +/// NOLINTBEGIN(bugprone-macro-parentheses) +#define CREATE_PUBLIC_MEMBERS(log_type, member, descr) \ + member = createSystemLog(global_context, "system", #member, config, #member, descr); \ + + LIST_OF_ALL_SYSTEM_LOGS(CREATE_PUBLIC_MEMBERS) +#undef CREATE_PUBLIC_MEMBERS +/// NOLINTEND(bugprone-macro-parentheses) - if (query_log) - logs.emplace_back(query_log.get()); - if (query_thread_log) - logs.emplace_back(query_thread_log.get()); - if (part_log) - logs.emplace_back(part_log.get()); - if (trace_log) - logs.emplace_back(trace_log.get()); - if (crash_log) - logs.emplace_back(crash_log.get()); - if (text_log) - logs.emplace_back(text_log.get()); - if (metric_log) - logs.emplace_back(metric_log.get()); - if (error_log) - logs.emplace_back(error_log.get()); - if (asynchronous_metric_log) - logs.emplace_back(asynchronous_metric_log.get()); - if (opentelemetry_span_log) - logs.emplace_back(opentelemetry_span_log.get()); - if (query_views_log) - logs.emplace_back(query_views_log.get()); - if (zookeeper_log) - logs.emplace_back(zookeeper_log.get()); if (session_log) - { - logs.emplace_back(session_log.get()); global_context->addWarningMessage("Table system.session_log is enabled. It's unreliable and may contain garbage. Do not use it for any kind of security monitoring."); - } - if (transactions_info_log) - logs.emplace_back(transactions_info_log.get()); - if (processors_profile_log) - logs.emplace_back(processors_profile_log.get()); - if (filesystem_cache_log) - logs.emplace_back(filesystem_cache_log.get()); - if (filesystem_read_prefetches_log) - logs.emplace_back(filesystem_read_prefetches_log.get()); - if (asynchronous_insert_log) - logs.emplace_back(asynchronous_insert_log.get()); - if (backup_log) - logs.emplace_back(backup_log.get()); - if (s3_queue_log) - logs.emplace_back(s3_queue_log.get()); - if (blob_storage_log) - logs.emplace_back(blob_storage_log.get()); bool should_prepare = global_context->getServerSettings().prepare_system_log_tables_on_startup; try { - for (auto & log : logs) + for (auto & log : getAllLogs()) { log->startup(); if (should_prepare) @@ -394,20 +332,54 @@ SystemLogs::SystemLogs(ContextPtr global_context, const Poco::Util::AbstractConf } } - -SystemLogs::~SystemLogs() +std::vector SystemLogs::getAllLogs() const { +#define GET_RAW_POINTERS(log_type, member, descr) \ + (member).get(), \ + + std::vector result = { + LIST_OF_ALL_SYSTEM_LOGS(GET_RAW_POINTERS) + }; +#undef GET_RAW_POINTERS + + auto last_it = std::remove(result.begin(), result.end(), nullptr); + result.erase(last_it, result.end()); + + return result; +} + +void SystemLogs::flush(bool should_prepare_tables_anyway) +{ + auto logs = getAllLogs(); + std::vector logs_indexes(logs.size(), 0); + + for (size_t i = 0; i < logs.size(); ++i) + { + auto last_log_index = logs[i]->getLastLogIndex(); + logs_indexes[i] = last_log_index; + logs[i]->notifyFlush(last_log_index, should_prepare_tables_anyway); + } + + for (size_t i = 0; i < logs.size(); ++i) + logs[i]->flush(logs_indexes[i], should_prepare_tables_anyway); +} + +void SystemLogs::flushAndShutdown() +{ + flush(/* should_prepare_tables_anyway */ false); shutdown(); } void SystemLogs::shutdown() { + auto logs = getAllLogs(); for (auto & log : logs) log->shutdown(); } void SystemLogs::handleCrash() { + auto logs = getAllLogs(); for (auto & log : logs) log->handleCrash(); } @@ -462,33 +434,26 @@ void SystemLog::savingThreadFunction() { setThreadName("SystemLogFlush"); - std::vector to_flush; - bool exit_this_thread = false; - while (!exit_this_thread) + while (true) { try { - // The end index (exclusive, like std end()) of the messages we are - // going to flush. - uint64_t to_flush_end = 0; - // Should we prepare table even if there are no new messages. - bool should_prepare_tables_anyway = false; + auto result = queue->pop(); - to_flush_end = queue->pop(to_flush, should_prepare_tables_anyway, exit_this_thread); - - if (to_flush.empty()) + if (result.is_shutdown) { - if (should_prepare_tables_anyway) - { - prepareTable(); - LOG_TRACE(log, "Table created (force)"); - - queue->confirm(to_flush_end); - } + LOG_TRACE(log, "Terminating"); + return; } - else + + if (!result.logs.empty()) { - flushImpl(to_flush, to_flush_end); + flushImpl(result.logs, result.last_log_index); + } + else if (result.create_table_force) + { + prepareTable(); + queue->confirm(result.last_log_index); } } catch (...) @@ -496,7 +461,6 @@ void SystemLog::savingThreadFunction() tryLogCurrentException(__PRETTY_FUNCTION__); } } - LOG_TRACE(log, "Terminating"); } diff --git a/src/Interpreters/SystemLog.h b/src/Interpreters/SystemLog.h index 0ac468b15ec..9e1af3578bd 100644 --- a/src/Interpreters/SystemLog.h +++ b/src/Interpreters/SystemLog.h @@ -5,6 +5,32 @@ #include #include +#include + +#define LIST_OF_ALL_SYSTEM_LOGS(M) \ + M(QueryLog, query_log, "Contains information about executed queries, for example, start time, duration of processing, error messages.") \ + M(QueryThreadLog, query_thread_log, "Contains information about threads that execute queries, for example, thread name, thread start time, duration of query processing.") \ + M(PartLog, part_log, "This table contains information about events that occurred with data parts in the MergeTree family tables, such as adding or merging data.") \ + M(TraceLog, trace_log, "Contains stack traces collected by the sampling query profiler.") \ + M(CrashLog, crash_log, "Contains information about stack traces for fatal errors. The table does not exist in the database by default, it is created only when fatal errors occur.") \ + M(TextLog, text_log, "Contains logging entries which are normally written to a log file or to stdout.") \ + M(MetricLog, metric_log, "Contains history of metrics values from tables system.metrics and system.events, periodically flushed to disk.") \ + M(ErrorLog, error_log, "Contains history of error values from table system.errors, periodically flushed to disk.") \ + M(FilesystemCacheLog, filesystem_cache_log, "Contains a history of all events occurred with filesystem cache for objects on a remote filesystem.") \ + M(FilesystemReadPrefetchesLog, filesystem_read_prefetches_log, "Contains a history of all prefetches done during reading from MergeTables backed by a remote filesystem.") \ + M(ObjectStorageQueueLog, s3queue_log, "Contains logging entries with the information files processes by S3Queue engine.") \ + M(ObjectStorageQueueLog, azure_queue_log, "Contains logging entries with the information files processes by S3Queue engine.") \ + M(AsynchronousMetricLog, asynchronous_metric_log, "Contains the historical values for system.asynchronous_metrics, once per time interval (one second by default).") \ + M(OpenTelemetrySpanLog, opentelemetry_span_log, "Contains information about trace spans for executed queries.") \ + M(QueryViewsLog, query_views_log, "Contains information about the dependent views executed when running a query, for example, the view type or the execution time.") \ + M(ZooKeeperLog, zookeeper_log, "This table contains information about the parameters of the request to the ZooKeeper server and the response from it.") \ + M(SessionLog, session_log, "Contains information about all successful and failed login and logout events.") \ + M(TransactionsInfoLog, transactions_info_log, "Contains information about all transactions executed on a current server.") \ + M(ProcessorsProfileLog, processors_profile_log, "Contains profiling information on processors level (building blocks for a pipeline for query execution.") \ + M(AsynchronousInsertLog, asynchronous_insert_log, "Contains a history for all asynchronous inserts executed on current server.") \ + M(BackupLog, backup_log, "Contains logging entries with the information about BACKUP and RESTORE operations.") \ + M(BlobStorageLog, blob_storage_log, "Contains logging entries with information about various blob storage operations such as uploads and deletes.") \ + namespace DB { @@ -34,71 +60,37 @@ namespace DB }; */ -class QueryLog; -class QueryThreadLog; -class PartLog; -class TextLog; -class TraceLog; -class CrashLog; -class ErrorLog; -class MetricLog; -class AsynchronousMetricLog; -class OpenTelemetrySpanLog; -class QueryViewsLog; -class ZooKeeperLog; -class SessionLog; -class TransactionsInfoLog; -class ProcessorsProfileLog; -class FilesystemCacheLog; -class FilesystemReadPrefetchesLog; -class AsynchronousInsertLog; -class BackupLog; -class ObjectStorageQueueLog; -class BlobStorageLog; +/// NOLINTBEGIN(bugprone-macro-parentheses) +#define FORWARD_DECLARATION(log_type, member, descr) \ + class log_type; \ + +LIST_OF_ALL_SYSTEM_LOGS(FORWARD_DECLARATION) +#undef FORWARD_DECLARATION +/// NOLINTEND(bugprone-macro-parentheses) + /// System logs should be destroyed in destructor of the last Context and before tables, /// because SystemLog destruction makes insert query while flushing data into underlying tables -struct SystemLogs +class SystemLogs { +public: + SystemLogs() = default; SystemLogs(ContextPtr global_context, const Poco::Util::AbstractConfiguration & config); - ~SystemLogs(); + SystemLogs(const SystemLogs & other) = default; + void flush(bool should_prepare_tables_anyway); + void flushAndShutdown(); void shutdown(); void handleCrash(); - std::shared_ptr query_log; /// Used to log queries. - std::shared_ptr query_thread_log; /// Used to log query threads. - std::shared_ptr part_log; /// Used to log operations with parts - std::shared_ptr trace_log; /// Used to log traces from query profiler - std::shared_ptr crash_log; /// Used to log server crashes. - std::shared_ptr text_log; /// Used to log all text messages. - std::shared_ptr metric_log; /// Used to log all metrics. - std::shared_ptr error_log; /// Used to log errors. - std::shared_ptr filesystem_cache_log; - std::shared_ptr filesystem_read_prefetches_log; - std::shared_ptr s3_queue_log; - std::shared_ptr azure_queue_log; - /// Metrics from system.asynchronous_metrics. - std::shared_ptr asynchronous_metric_log; - /// OpenTelemetry trace spans. - std::shared_ptr opentelemetry_span_log; - /// Used to log queries of materialized and live views - std::shared_ptr query_views_log; - /// Used to log all actions of ZooKeeper client - std::shared_ptr zookeeper_log; - /// Login, LogOut and Login failure events - std::shared_ptr session_log; - /// Events related to transactions - std::shared_ptr transactions_info_log; - /// Used to log processors profiling - std::shared_ptr processors_profile_log; - std::shared_ptr asynchronous_insert_log; - /// Backup and restore events - std::shared_ptr backup_log; - /// Log blob storage operations - std::shared_ptr blob_storage_log; +#define DECLARE_PUBLIC_MEMBERS(log_type, member, descr) \ + std::shared_ptr member; \ - std::vector logs; + LIST_OF_ALL_SYSTEM_LOGS(DECLARE_PUBLIC_MEMBERS) +#undef DECLARE_PUBLIC_MEMBERS + +private: + std::vector getAllLogs() const; }; struct SystemLogSettings diff --git a/src/Interpreters/TreeOptimizer.cpp b/src/Interpreters/TreeOptimizer.cpp index b872eb94fde..6483dd3be48 100644 --- a/src/Interpreters/TreeOptimizer.cpp +++ b/src/Interpreters/TreeOptimizer.cpp @@ -184,7 +184,7 @@ void optimizeGroupBy(ASTSelectQuery * select_query, ContextPtr context) const auto & value = group_exprs[i]->as()->value; if (value.getType() == Field::Types::UInt64) { - auto pos = value.get(); + auto pos = value.safeGet(); if (pos > 0 && pos <= select_query->select()->children.size()) keep_position = true; } diff --git a/src/Interpreters/WindowDescription.cpp b/src/Interpreters/WindowDescription.cpp index 31a881001e3..b1e12ff8048 100644 --- a/src/Interpreters/WindowDescription.cpp +++ b/src/Interpreters/WindowDescription.cpp @@ -94,8 +94,8 @@ void WindowFrame::checkValid() const if (begin_type == BoundaryType::Offset && !((begin_offset.getType() == Field::Types::UInt64 || begin_offset.getType() == Field::Types::Int64) - && begin_offset.get() >= 0 - && begin_offset.get() < INT_MAX)) + && begin_offset.safeGet() >= 0 + && begin_offset.safeGet() < INT_MAX)) { throw Exception(ErrorCodes::BAD_ARGUMENTS, "Frame start offset for '{}' frame must be a nonnegative 32-bit integer, '{}' of type '{}' given", @@ -107,8 +107,8 @@ void WindowFrame::checkValid() const if (end_type == BoundaryType::Offset && !((end_offset.getType() == Field::Types::UInt64 || end_offset.getType() == Field::Types::Int64) - && end_offset.get() >= 0 - && end_offset.get() < INT_MAX)) + && end_offset.safeGet() >= 0 + && end_offset.safeGet() < INT_MAX)) { throw Exception(ErrorCodes::BAD_ARGUMENTS, "Frame end offset for '{}' frame must be a nonnegative 32-bit integer, '{}' of type '{}' given", diff --git a/src/Interpreters/convertFieldToType.cpp b/src/Interpreters/convertFieldToType.cpp index bee4b494958..738c51baa64 100644 --- a/src/Interpreters/convertFieldToType.cpp +++ b/src/Interpreters/convertFieldToType.cpp @@ -57,7 +57,7 @@ template Field convertNumericTypeImpl(const Field & from) { To result; - if (!accurate::convertNumeric(from.get(), result)) + if (!accurate::convertNumeric(from.safeGet(), result)) return {}; return result; } @@ -88,7 +88,7 @@ Field convertNumericType(const Field & from, const IDataType & type) template Field convertIntToDecimalType(const Field & from, const DataTypeDecimal & type) { - From value = from.get(); + From value = from.safeGet(); if (!type.canStoreWhole(value)) throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Number is too big to place in {}", type.getName()); @@ -100,7 +100,7 @@ Field convertIntToDecimalType(const Field & from, const DataTypeDecimal & typ template Field convertStringToDecimalType(const Field & from, const DataTypeDecimal & type) { - const String & str_value = from.get(); + const String & str_value = from.safeGet(); T value = type.parseFromString(str_value); return DecimalField(value, type.getScale()); } @@ -108,7 +108,7 @@ Field convertStringToDecimalType(const Field & from, const DataTypeDecimal & template Field convertDecimalToDecimalType(const Field & from, const DataTypeDecimal & type) { - auto field = from.get>(); + auto field = from.safeGet>(); T value = convertDecimals, DataTypeDecimal>(field.getValue(), field.getScale(), type.getScale()); return DecimalField(value, type.getScale()); } @@ -116,7 +116,7 @@ Field convertDecimalToDecimalType(const Field & from, const DataTypeDecimal & template Field convertFloatToDecimalType(const Field & from, const DataTypeDecimal & type) { - From value = from.get(); + From value = from.safeGet(); if (!type.canStoreWhole(value)) throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Number is too big to place in {}", type.getName()); @@ -182,24 +182,24 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID /// Conversion between Date and DateTime and vice versa. if (which_type.isDate() && which_from_type.isDateTime()) { - return static_cast(static_cast(*from_type_hint).getTimeZone().toDayNum(src.get()).toUnderType()); + return static_cast(static_cast(*from_type_hint).getTimeZone().toDayNum(src.safeGet()).toUnderType()); } else if (which_type.isDate32() && which_from_type.isDateTime()) { - return static_cast(static_cast(*from_type_hint).getTimeZone().toDayNum(src.get()).toUnderType()); + return static_cast(static_cast(*from_type_hint).getTimeZone().toDayNum(src.safeGet()).toUnderType()); } else if (which_type.isDateTime() && which_from_type.isDate()) { - return static_cast(type).getTimeZone().fromDayNum(DayNum(src.get())); + return static_cast(type).getTimeZone().fromDayNum(DayNum(src.safeGet())); } else if (which_type.isDateTime() && which_from_type.isDate32()) { - return static_cast(type).getTimeZone().fromDayNum(DayNum(src.get())); + return static_cast(type).getTimeZone().fromDayNum(DayNum(src.safeGet())); } else if (which_type.isDateTime64() && which_from_type.isDate()) { const auto & date_time64_type = static_cast(type); - const auto value = date_time64_type.getTimeZone().fromDayNum(DayNum(src.get())); + const auto value = date_time64_type.getTimeZone().fromDayNum(DayNum(src.safeGet())); return DecimalField( DecimalUtils::decimalFromComponentsWithMultiplier(value, 0, date_time64_type.getScaleMultiplier()), date_time64_type.getScale()); @@ -207,7 +207,7 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID else if (which_type.isDateTime64() && which_from_type.isDate32()) { const auto & date_time64_type = static_cast(type); - const auto value = date_time64_type.getTimeZone().fromDayNum(ExtendedDayNum(static_cast(src.get()))); + const auto value = date_time64_type.getTimeZone().fromDayNum(ExtendedDayNum(static_cast(src.safeGet()))); return DecimalField( DecimalUtils::decimalFromComponentsWithMultiplier(value, 0, date_time64_type.getScaleMultiplier()), date_time64_type.getScale()); @@ -257,7 +257,7 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID if (which_type.isDateTime64() && src.getType() == Field::Types::Decimal64) { - const auto & from_type = src.get(); + const auto & from_type = src.safeGet(); const auto & to_type = static_cast(type); const auto scale_from = from_type.getScale(); @@ -304,8 +304,8 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID } if (which_type.isIPv4() && src.getType() == Field::Types::UInt64) { - /// convert to UInt32 which is the underlying type for native IPv4 - return convertNumericType(src, type); + /// convert through UInt32 which is the underlying type for native IPv4 + return static_cast(convertNumericType(src, type).safeGet()); } } else if (which_type.isUUID() && src.getType() == Field::Types::UUID) @@ -322,7 +322,7 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID if (which_from_type.isFixedString() && assert_cast(from_type_hint)->getN() == IPV6_BINARY_LENGTH) { const auto col = type.createColumn(); - ReadBufferFromString in_buffer(src.get()); + ReadBufferFromString in_buffer(src.safeGet()); type.getDefaultSerialization()->deserializeBinary(*col, in_buffer, {}); return (*col)[0]; } @@ -334,7 +334,7 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID if (which_type.isFixedString()) { size_t n = assert_cast(type).getN(); - const auto & src_str = src.get(); + const auto & src_str = src.safeGet(); if (src_str.size() < n) { String src_str_extended = src_str; @@ -351,7 +351,7 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID { if (src.getType() == Field::Types::Array) { - const Array & src_arr = src.get(); + const Array & src_arr = src.safeGet(); size_t src_arr_size = src_arr.size(); const auto & element_type = *(type_array->getNestedType()); @@ -374,7 +374,7 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID { if (src.getType() == Field::Types::Tuple) { - const auto & src_tuple = src.get(); + const auto & src_tuple = src.safeGet(); size_t src_tuple_size = src_tuple.size(); size_t dst_tuple_size = type_tuple->getElements().size(); @@ -419,7 +419,7 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID const auto & key_type = *type_map->getKeyType(); const auto & value_type = *type_map->getValueType(); - const auto & map = src.get(); + const auto & map = src.safeGet(); size_t map_size = map.size(); Map res(map_size); @@ -428,7 +428,7 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID for (size_t i = 0; i < map_size; ++i) { - const auto & map_entry = map[i].get(); + const auto & map_entry = map[i].safeGet(); const auto & key = map_entry[0]; const auto & value = map_entry[1]; @@ -457,7 +457,7 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID "Cannot convert {} to {}", src.getTypeName(), agg_func_type->getName()); - const auto & name = src.get().name; + const auto & name = src.safeGet().name; if (agg_func_type->getName() != name) throw Exception(ErrorCodes::TYPE_MISMATCH, "Cannot convert {} to {}", name, agg_func_type->getName()); @@ -472,7 +472,7 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID if (src.getType() == Field::Types::Tuple && from_type_tuple && from_type_tuple->haveExplicitNames()) { const auto & names = from_type_tuple->getElementNames(); - const auto & tuple = src.get(); + const auto & tuple = src.safeGet(); if (names.size() != tuple.size()) throw Exception(ErrorCodes::TYPE_MISMATCH, @@ -489,10 +489,10 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID if (src.getType() == Field::Types::Map) { Object object; - const auto & map = src.get(); + const auto & map = src.safeGet(); for (const auto & element : map) { - const auto & map_entry = element.get(); + const auto & map_entry = element.safeGet(); const auto & key = map_entry[0]; const auto & value = map_entry[1]; @@ -500,7 +500,7 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID throw Exception(ErrorCodes::TYPE_MISMATCH, "Cannot convert from Map with key of type {} to Object", key.getTypeName()); - object[key.get()] = value; + object[key.safeGet()] = value; } return object; @@ -541,7 +541,7 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID } const auto col = type_to_parse->createColumn(); - ReadBufferFromString in_buffer(src.get()); + ReadBufferFromString in_buffer(src.safeGet()); try { type_to_parse->getDefaultSerialization()->deserializeWholeText(*col, in_buffer, FormatSettings{}); @@ -549,9 +549,9 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID catch (Exception & e) { if (e.code() == ErrorCodes::UNEXPECTED_DATA_AFTER_PARSED_VALUE) - throw Exception(ErrorCodes::TYPE_MISMATCH, "Cannot convert string '{}' to type {}", src.get(), type.getName()); + throw Exception(ErrorCodes::TYPE_MISMATCH, "Cannot convert string '{}' to type {}", src.safeGet(), type.getName()); - e.addMessage(fmt::format("while converting '{}' to {}", src.get(), type.getName())); + e.addMessage(fmt::format("while converting '{}' to {}", src.safeGet(), type.getName())); throw; } @@ -614,7 +614,7 @@ Field convertFieldToTypeOrThrow(const Field & from_value, const IDataType & to_t template static bool decimalEqualsFloat(Field field, Float64 float_value) { - auto decimal_field = field.get>(); + auto decimal_field = field.safeGet>(); auto decimal_to_float = DecimalUtils::convertTo(decimal_field.getValue(), decimal_field.getScale()); return decimal_to_float == float_value; } @@ -633,13 +633,13 @@ std::optional convertFieldToTypeStrict(const Field & from_value, const ID { /// Convert back to Float64 and compare if (result_value.getType() == Field::Types::Decimal32) - return decimalEqualsFloat(result_value, from_value.get()) ? result_value : std::optional{}; + return decimalEqualsFloat(result_value, from_value.safeGet()) ? result_value : std::optional{}; if (result_value.getType() == Field::Types::Decimal64) - return decimalEqualsFloat(result_value, from_value.get()) ? result_value : std::optional{}; + return decimalEqualsFloat(result_value, from_value.safeGet()) ? result_value : std::optional{}; if (result_value.getType() == Field::Types::Decimal128) - return decimalEqualsFloat(result_value, from_value.get()) ? result_value : std::optional{}; + return decimalEqualsFloat(result_value, from_value.safeGet()) ? result_value : std::optional{}; if (result_value.getType() == Field::Types::Decimal256) - return decimalEqualsFloat(result_value, from_value.get()) ? result_value : std::optional{}; + return decimalEqualsFloat(result_value, from_value.safeGet()) ? result_value : std::optional{}; throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown decimal type {}", result_value.getTypeName()); } diff --git a/src/Interpreters/evaluateConstantExpression.cpp b/src/Interpreters/evaluateConstantExpression.cpp index 4bfc80af1fe..d4bb0cc2f8a 100644 --- a/src/Interpreters/evaluateConstantExpression.cpp +++ b/src/Interpreters/evaluateConstantExpression.cpp @@ -297,7 +297,7 @@ namespace { if (tuple_literal->value.getType() == Field::Types::Tuple) { - const auto & tuple = tuple_literal->value.get(); + const auto & tuple = tuple_literal->value.safeGet(); for (const auto & child : tuple) { const auto dnf = analyzeEquals(identifier, child, expr); @@ -792,7 +792,7 @@ std::optional evaluateExpressionOverConstantCondition(const ASTPtr & nod else if (const auto * literal = node->as()) { // Check if it's always true or false. - if (literal->value.getType() == Field::Types::UInt64 && literal->value.get() == 0) + if (literal->value.getType() == Field::Types::UInt64 && literal->value.safeGet() == 0) return {result}; else return {}; diff --git a/src/Interpreters/replaceForPositionalArguments.cpp b/src/Interpreters/replaceForPositionalArguments.cpp index 3d60723a167..ee967f45c74 100644 --- a/src/Interpreters/replaceForPositionalArguments.cpp +++ b/src/Interpreters/replaceForPositionalArguments.cpp @@ -35,11 +35,11 @@ bool replaceForPositionalArguments(ASTPtr & argument, const ASTSelectQuery * sel if (which == Field::Types::UInt64) { - pos = ast_literal->value.get(); + pos = ast_literal->value.safeGet(); } else if (which == Field::Types::Int64) { - auto value = ast_literal->value.get(); + auto value = ast_literal->value.safeGet(); if (value > 0) pos = value; else diff --git a/src/Interpreters/tests/gtest_comparison_graph.cpp b/src/Interpreters/tests/gtest_comparison_graph.cpp index ac24a8de368..5f93bb983c1 100644 --- a/src/Interpreters/tests/gtest_comparison_graph.cpp +++ b/src/Interpreters/tests/gtest_comparison_graph.cpp @@ -29,7 +29,7 @@ TEST(ComparisonGraph, Bounds) const auto & [lower, strict] = *res; - ASSERT_EQ(lower.get(), 3); + ASSERT_EQ(lower.safeGet(), 3); ASSERT_TRUE(strict); } @@ -39,7 +39,7 @@ TEST(ComparisonGraph, Bounds) const auto & [upper, strict] = *res; - ASSERT_EQ(upper.get(), 7); + ASSERT_EQ(upper.safeGet(), 7); ASSERT_TRUE(strict); } diff --git a/src/Parsers/ASTFunction.cpp b/src/Parsers/ASTFunction.cpp index cd9e910d45a..d42728addb7 100644 --- a/src/Parsers/ASTFunction.cpp +++ b/src/Parsers/ASTFunction.cpp @@ -522,7 +522,7 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format if (tuple_arguments_valid && lit_right) { if (isInt64OrUInt64FieldType(lit_right->value.getType()) - && lit_right->value.get() >= 0) + && lit_right->value.safeGet() >= 0) { if (frame.need_parens) settings.ostr << '('; diff --git a/src/Parsers/ASTLiteral.cpp b/src/Parsers/ASTLiteral.cpp index 8dedc5dc95d..515f4f0cb9f 100644 --- a/src/Parsers/ASTLiteral.cpp +++ b/src/Parsers/ASTLiteral.cpp @@ -73,8 +73,8 @@ void ASTLiteral::appendColumnNameImpl(WriteBuffer & ostr) const /// Special case for very large arrays and tuples. Instead of listing all elements, will use hash of them. /// (Otherwise column name will be too long, that will lead to significant slowdown of expression analysis.) auto type = value.getType(); - if ((type == Field::Types::Array && value.get().size() > min_elements_for_hashing) - || (type == Field::Types::Tuple && value.get().size() > min_elements_for_hashing)) + if ((type == Field::Types::Array && value.safeGet().size() > min_elements_for_hashing) + || (type == Field::Types::Tuple && value.safeGet().size() > min_elements_for_hashing)) { SipHash hash; applyVisitor(FieldVisitorHash(hash), value); @@ -92,7 +92,7 @@ void ASTLiteral::appendColumnNameImpl(WriteBuffer & ostr) const /// for tons of literals as it creates temporary String. if (value.getType() == Field::Types::String) { - writeQuoted(value.get(), ostr); + writeQuoted(value.safeGet(), ostr); } else { @@ -110,7 +110,7 @@ void ASTLiteral::appendColumnNameImplLegacy(WriteBuffer & ostr) const /// Special case for very large arrays. Instead of listing all elements, will use hash of them. /// (Otherwise column name will be too long, that will lead to significant slowdown of expression analysis.) auto type = value.getType(); - if ((type == Field::Types::Array && value.get().size() > min_elements_for_hashing)) + if ((type == Field::Types::Array && value.safeGet().size() > min_elements_for_hashing)) { SipHash hash; applyVisitor(FieldVisitorHash(hash), value); diff --git a/src/Parsers/Access/ASTGrantQuery.cpp b/src/Parsers/Access/ASTGrantQuery.cpp index f60fa7e4a23..eac88c75513 100644 --- a/src/Parsers/Access/ASTGrantQuery.cpp +++ b/src/Parsers/Access/ASTGrantQuery.cpp @@ -97,24 +97,9 @@ namespace void formatCurrentGrantsElements(const AccessRightsElements & elements, const IAST::FormatSettings & settings) { - for (size_t i = 0; i != elements.size(); ++i) - { - const auto & element = elements[i]; - - bool next_element_on_same_db_and_table = false; - if (i != elements.size() - 1) - { - const auto & next_element = elements[i + 1]; - if (element.sameDatabaseAndTableAndParameter(next_element)) - next_element_on_same_db_and_table = true; - } - - if (!next_element_on_same_db_and_table) - { - settings.ostr << " "; - formatONClause(element, settings); - } - } + settings.ostr << "("; + formatElementsWithoutOptions(elements, settings); + settings.ostr << ")"; } } diff --git a/src/Parsers/Access/ParserCreateQuotaQuery.cpp b/src/Parsers/Access/ParserCreateQuotaQuery.cpp index ddfdbe38903..ddf4e9ecda5 100644 --- a/src/Parsers/Access/ParserCreateQuotaQuery.cpp +++ b/src/Parsers/Access/ParserCreateQuotaQuery.cpp @@ -114,7 +114,7 @@ namespace T fieldToNumber(const Field & f) { if (f.getType() == Field::Types::String) - return parseWithSizeSuffix(boost::algorithm::trim_copy(f.get())); + return parseWithSizeSuffix(boost::algorithm::trim_copy(f.safeGet())); else return applyVisitor(FieldVisitorConvertToNumber(), f); } diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index 9927acdcf17..de395d120d7 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -314,7 +314,7 @@ bool ParserCompoundIdentifier::parseImpl(Pos & pos, ASTPtr & node, Expected & ex ASTPtr ast_uuid; if (!uuid_p.parse(pos, ast_uuid, expected)) return false; - uuid = parseFromString(ast_uuid->as()->value.get()); + uuid = parseFromString(ast_uuid->as()->value.safeGet()); } if (parts.size() == 1) node = std::make_shared(parts[0], std::move(params)); @@ -1626,7 +1626,7 @@ bool ParserColumnsTransformers::parseImpl(Pos & pos, ASTPtr & node, Expected & e if (!parser_string_literal.parse(pos, ast_prefix_name, expected)) return false; - column_name_prefix = ast_prefix_name->as().value.get(); + column_name_prefix = ast_prefix_name->as().value.safeGet(); } if (with_open_round_bracket) @@ -1689,7 +1689,7 @@ bool ParserColumnsTransformers::parseImpl(Pos & pos, ASTPtr & node, Expected & e auto res = std::make_shared(); if (regexp_node) - res->setPattern(regexp_node->as().value.get()); + res->setPattern(regexp_node->as().value.safeGet()); else res->children = std::move(identifiers); res->is_strict = is_strict; @@ -1861,7 +1861,7 @@ static bool parseColumnsMatcherBody(IParser::Pos & pos, ASTPtr & node, Expected else { auto regexp_matcher = std::make_shared(); - regexp_matcher->setPattern(regexp_node->as().value.get()); + regexp_matcher->setPattern(regexp_node->as().value.safeGet()); if (!transformers->children.empty()) { @@ -2310,7 +2310,7 @@ bool ParserTTLElement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!parser_string_literal.parse(pos, ast_space_name, expected)) return false; - destination_name = ast_space_name->as().value.get(); + destination_name = ast_space_name->as().value.safeGet(); } else if (mode == TTLMode::GROUP_BY) { diff --git a/src/Parsers/ParserAlterQuery.cpp b/src/Parsers/ParserAlterQuery.cpp index dbefb0cb966..73fd563faf6 100644 --- a/src/Parsers/ParserAlterQuery.cpp +++ b/src/Parsers/ParserAlterQuery.cpp @@ -517,7 +517,7 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected if (!parser_string_literal.parse(pos, ast_space_name, expected)) return false; - command->move_destination_name = ast_space_name->as().value.get(); + command->move_destination_name = ast_space_name->as().value.safeGet(); } else if (s_move_partition.ignore(pos, expected)) { @@ -545,7 +545,7 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected if (!parser_string_literal.parse(pos, ast_space_name, expected)) return false; - command->move_destination_name = ast_space_name->as().value.get(); + command->move_destination_name = ast_space_name->as().value.safeGet(); } } else if (s_add_constraint.ignore(pos, expected)) @@ -638,7 +638,7 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected if (!parser_string_literal.parse(pos, ast_from, expected)) return false; - command->from = ast_from->as().value.get(); + command->from = ast_from->as().value.safeGet(); command->type = ASTAlterCommand::FETCH_PARTITION; } else if (s_fetch_part.ignore(pos, expected)) @@ -652,7 +652,7 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected ASTPtr ast_from; if (!parser_string_literal.parse(pos, ast_from, expected)) return false; - command->from = ast_from->as().value.get(); + command->from = ast_from->as().value.safeGet(); command->part = true; command->type = ASTAlterCommand::FETCH_PARTITION; } @@ -680,7 +680,7 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected if (!parser_string_literal.parse(pos, ast_with_name, expected)) return false; - command->with_name = ast_with_name->as().value.get(); + command->with_name = ast_with_name->as().value.safeGet(); } } else if (s_unfreeze.ignore(pos, expected)) @@ -707,7 +707,7 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected if (!parser_string_literal.parse(pos, ast_with_name, expected)) return false; - command->with_name = ast_with_name->as().value.get(); + command->with_name = ast_with_name->as().value.safeGet(); } else { diff --git a/src/Parsers/ParserCheckQuery.cpp b/src/Parsers/ParserCheckQuery.cpp index 42716ba7f2c..33b6a5a1ac2 100644 --- a/src/Parsers/ParserCheckQuery.cpp +++ b/src/Parsers/ParserCheckQuery.cpp @@ -55,7 +55,7 @@ bool ParserCheckQuery::parseCheckTable(Pos & pos, ASTPtr & node, Expected & expe const auto * ast_literal = ast_part_name->as(); if (!ast_literal || ast_literal->value.getType() != Field::Types::String) return false; - query->part_name = ast_literal->value.get(); + query->part_name = ast_literal->value.safeGet(); } if (query->database) diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp index 66965903ab0..318fe9da1b4 100644 --- a/src/Parsers/ParserCreateQuery.cpp +++ b/src/Parsers/ParserCreateQuery.cpp @@ -931,7 +931,7 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe query->is_create_empty = is_create_empty; if (from_path) - query->attach_from_path = from_path->as().value.get(); + query->attach_from_path = from_path->as().value.safeGet(); return true; } @@ -1440,7 +1440,7 @@ bool ParserCreateDatabaseQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & e ASTPtr ast_uuid; if (!uuid_p.parse(pos, ast_uuid, expected)) return false; - uuid = parseFromString(ast_uuid->as()->value.get()); + uuid = parseFromString(ast_uuid->as()->value.safeGet()); } if (s_on.ignore(pos, expected)) diff --git a/src/Parsers/ParserDictionary.cpp b/src/Parsers/ParserDictionary.cpp index 83a006231d9..ce38d1b54d1 100644 --- a/src/Parsers/ParserDictionary.cpp +++ b/src/Parsers/ParserDictionary.cpp @@ -33,7 +33,7 @@ bool ParserDictionaryLifetime::parseImpl(Pos & pos, ASTPtr & node, Expected & ex if (literal.value.getType() != Field::Types::UInt64) return false; - res->max_sec = literal.value.get(); + res->max_sec = literal.value.safeGet(); node = res; return true; } @@ -58,10 +58,10 @@ bool ParserDictionaryLifetime::parseImpl(Pos & pos, ASTPtr & node, Expected & ex return false; if (pair.first == "min") - res->min_sec = literal->value.get(); + res->min_sec = literal->value.safeGet(); else if (pair.first == "max") { - res->max_sec = literal->value.get(); + res->max_sec = literal->value.safeGet(); initialized_max = true; } else diff --git a/src/Parsers/ParserPartition.cpp b/src/Parsers/ParserPartition.cpp index 80a28f4803e..ab97b3d0e3b 100644 --- a/src/Parsers/ParserPartition.cpp +++ b/src/Parsers/ParserPartition.cpp @@ -65,7 +65,7 @@ bool ParserPartition::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { if (literal_ast->value.getType() == Field::Types::Tuple) { - fields_count = literal_ast->value.get().size(); + fields_count = literal_ast->value.safeGet().size(); } else { diff --git a/src/Parsers/ParserSystemQuery.cpp b/src/Parsers/ParserSystemQuery.cpp index 0545c3e5568..81b64ab47c6 100644 --- a/src/Parsers/ParserSystemQuery.cpp +++ b/src/Parsers/ParserSystemQuery.cpp @@ -445,7 +445,7 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & ASTPtr ast; if (!ParserStringLiteral{}.parse(pos, ast, expected)) return false; - String time_str = ast->as().value.get(); + String time_str = ast->as().value.safeGet(); ReadBufferFromString buf(time_str); time_t time; readDateTimeText(time, buf); @@ -467,7 +467,7 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & return false; } - res->seconds = seconds->as()->value.get(); + res->seconds = seconds->as()->value.safeGet(); break; } case Type::DROP_FILESYSTEM_CACHE: @@ -538,7 +538,7 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & ASTPtr ast; if (ParserKeyword{Keyword::WITH_NAME}.ignore(pos, expected) && ParserStringLiteral{}.parse(pos, ast, expected)) { - res->backup_name = ast->as().value.get(); + res->backup_name = ast->as().value.safeGet(); } else { @@ -577,7 +577,7 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & if (!ParserStringLiteral{}.parse(pos, ast, expected)) return false; - custom_name = ast->as().value.get(); + custom_name = ast->as().value.safeGet(); } return true; diff --git a/src/Parsers/ParserUndropQuery.cpp b/src/Parsers/ParserUndropQuery.cpp index 07ca8a3b5fd..57da47df70d 100644 --- a/src/Parsers/ParserUndropQuery.cpp +++ b/src/Parsers/ParserUndropQuery.cpp @@ -41,7 +41,7 @@ bool parseUndropQuery(IParser::Pos & pos, ASTPtr & node, Expected & expected) ASTPtr ast_uuid; if (!uuid_p.parse(pos, ast_uuid, expected)) return false; - uuid = parseFromString(ast_uuid->as()->value.get()); + uuid = parseFromString(ast_uuid->as()->value.safeGet()); } if (ParserKeyword{Keyword::ON}.ignore(pos, expected)) { diff --git a/src/Parsers/tests/gtest_dictionary_parser.cpp b/src/Parsers/tests/gtest_dictionary_parser.cpp index a1ba46125a7..af3591750a1 100644 --- a/src/Parsers/tests/gtest_dictionary_parser.cpp +++ b/src/Parsers/tests/gtest_dictionary_parser.cpp @@ -56,21 +56,21 @@ TEST(ParserDictionaryDDL, SimpleDictionary) EXPECT_EQ(create->dictionary->source->name, "clickhouse"); auto children = create->dictionary->source->elements->children; EXPECT_EQ(children[0]->as() -> first, "host"); - EXPECT_EQ(children[0]->as()->second->as()->value.get(), "localhost"); + EXPECT_EQ(children[0]->as()->second->as()->value.safeGet(), "localhost"); EXPECT_EQ(children[1]->as()->first, "port"); - EXPECT_EQ(children[1]->as()->second->as()->value.get(), 9000); + EXPECT_EQ(children[1]->as()->second->as()->value.safeGet(), 9000); EXPECT_EQ(children[2]->as()->first, "user"); - EXPECT_EQ(children[2]->as()->second->as()->value.get(), "default"); + EXPECT_EQ(children[2]->as()->second->as()->value.safeGet(), "default"); EXPECT_EQ(children[3]->as()->first, "password"); - EXPECT_EQ(children[3]->as()->second->as()->value.get(), ""); + EXPECT_EQ(children[3]->as()->second->as()->value.safeGet(), ""); EXPECT_EQ(children[4]->as()->first, "db"); - EXPECT_EQ(children[4]->as()->second->as()->value.get(), "test"); + EXPECT_EQ(children[4]->as()->second->as()->value.safeGet(), "test"); EXPECT_EQ(children[5]->as()->first, "table"); - EXPECT_EQ(children[5]->as()->second->as()->value.get(), "table_for_dict"); + EXPECT_EQ(children[5]->as()->second->as()->value.safeGet(), "table_for_dict"); /// layout test auto * layout = create->dictionary->layout; @@ -102,9 +102,9 @@ TEST(ParserDictionaryDDL, SimpleDictionary) EXPECT_EQ(attributes_children[1]->as()->name, "second_column"); EXPECT_EQ(attributes_children[2]->as()->name, "third_column"); - EXPECT_EQ(attributes_children[0]->as()->default_value->as()->value.get(), 0); - EXPECT_EQ(attributes_children[1]->as()->default_value->as()->value.get(), 1); - EXPECT_EQ(attributes_children[2]->as()->default_value->as()->value.get(), 2); + EXPECT_EQ(attributes_children[0]->as()->default_value->as()->value.safeGet(), 0); + EXPECT_EQ(attributes_children[1]->as()->default_value->as()->value.safeGet(), 1); + EXPECT_EQ(attributes_children[2]->as()->default_value->as()->value.safeGet(), 2); EXPECT_EQ(attributes_children[0]->as()->expression, nullptr); EXPECT_EQ(attributes_children[1]->as()->expression, nullptr); @@ -150,8 +150,8 @@ TEST(ParserDictionaryDDL, AttributesWithMultipleProperties) EXPECT_EQ(attributes_children[2]->as()->name, "third_column"); EXPECT_EQ(attributes_children[0]->as()->default_value, nullptr); - EXPECT_EQ(attributes_children[1]->as()->default_value->as()->value.get(), 1); - EXPECT_EQ(attributes_children[2]->as()->default_value->as()->value.get(), 2); + EXPECT_EQ(attributes_children[1]->as()->default_value->as()->value.safeGet(), 1); + EXPECT_EQ(attributes_children[2]->as()->default_value->as()->value.safeGet(), 2); EXPECT_EQ(attributes_children[0]->as()->expression, nullptr); EXPECT_EQ(attributes_children[1]->as()->expression, nullptr); @@ -195,9 +195,9 @@ TEST(ParserDictionaryDDL, CustomAttributePropertiesOrder) EXPECT_EQ(attributes_children[1]->as()->name, "second_column"); EXPECT_EQ(attributes_children[2]->as()->name, "third_column"); - EXPECT_EQ(attributes_children[0]->as()->default_value->as()->value.get(), 100); - EXPECT_EQ(attributes_children[1]->as()->default_value->as()->value.get(), 1); - EXPECT_EQ(attributes_children[2]->as()->default_value->as()->value.get(), 2); + EXPECT_EQ(attributes_children[0]->as()->default_value->as()->value.safeGet(), 100); + EXPECT_EQ(attributes_children[1]->as()->default_value->as()->value.safeGet(), 1); + EXPECT_EQ(attributes_children[2]->as()->default_value->as()->value.safeGet(), 2); EXPECT_EQ(attributes_children[0]->as()->expression, nullptr); EXPECT_EQ(attributes_children[1]->as()->expression, nullptr); @@ -248,25 +248,25 @@ TEST(ParserDictionaryDDL, NestedSource) auto children = create->dictionary->source->elements->children; EXPECT_EQ(children[0]->as()->first, "host"); - EXPECT_EQ(children[0]->as()->second->as()->value.get(), "localhost"); + EXPECT_EQ(children[0]->as()->second->as()->value.safeGet(), "localhost"); EXPECT_EQ(children[1]->as()->first, "port"); - EXPECT_EQ(children[1]->as()->second->as()->value.get(), 9000); + EXPECT_EQ(children[1]->as()->second->as()->value.safeGet(), 9000); EXPECT_EQ(children[2]->as()->first, "user"); - EXPECT_EQ(children[2]->as()->second->as()->value.get(), "default"); + EXPECT_EQ(children[2]->as()->second->as()->value.safeGet(), "default"); EXPECT_EQ(children[3]->as()->first, "replica"); auto replica = children[3]->as()->second->children; EXPECT_EQ(replica[0]->as()->first, "host"); - EXPECT_EQ(replica[0]->as()->second->as()->value.get(), "127.0.0.1"); + EXPECT_EQ(replica[0]->as()->second->as()->value.safeGet(), "127.0.0.1"); EXPECT_EQ(replica[1]->as()->first, "priority"); - EXPECT_EQ(replica[1]->as()->second->as()->value.get(), 1); + EXPECT_EQ(replica[1]->as()->second->as()->value.safeGet(), 1); EXPECT_EQ(children[4]->as()->first, "password"); - EXPECT_EQ(children[4]->as()->second->as()->value.get(), ""); + EXPECT_EQ(children[4]->as()->second->as()->value.safeGet(), ""); } diff --git a/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp b/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp index 58bf4c1a2fc..30301b242db 100644 --- a/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp +++ b/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp @@ -185,7 +185,7 @@ namespace DB } else { - auto value = static_cast(column[value_i].get>().getValue()); + auto value = static_cast(column[value_i].safeGet>().getValue()); if (need_rescale) { if (common::mulOverflow(value, rescale_multiplier, value)) diff --git a/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp b/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp index 06e8668cd7c..566a036d79c 100644 --- a/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp +++ b/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp @@ -208,20 +208,20 @@ private: /// Do not replace empty array and array of NULLs if (literal->value.getType() == Field::Types::Array) { - const Array & array = literal->value.get(); + const Array & array = literal->value.safeGet(); auto not_null = std::find_if_not(array.begin(), array.end(), [](const auto & elem) { return elem.isNull(); }); if (not_null == array.end()) return true; } else if (literal->value.getType() == Field::Types::Map) { - const Map & map = literal->value.get(); + const Map & map = literal->value.safeGet(); if (map.size() % 2) return false; } else if (literal->value.getType() == Field::Types::Tuple) { - const Tuple & tuple = literal->value.get(); + const Tuple & tuple = literal->value.safeGet(); for (const auto & value : tuple) if (value.isNull()) diff --git a/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp b/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp index 649721f28bf..58bec8120f1 100644 --- a/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp @@ -262,7 +262,7 @@ convertFieldToORCLiteral(const orc::Type & orc_type, const Field & field, DataTy { case orc::BOOLEAN: { /// May throw exception - auto val = field.get(); + auto val = field.safeGet(); return orc::Literal(val != 0); } case orc::BYTE: @@ -275,7 +275,7 @@ convertFieldToORCLiteral(const orc::Type & orc_type, const Field & field, DataTy /// SELECT * FROM file('t.orc', ORC, 'x UInt8') WHERE x > 10 /// We have to reject this, otherwise it would miss values > 127 (because /// they're treated as negative by ORC). - auto val = field.get(); + auto val = field.safeGet(); return orc::Literal(val); } case orc::FLOAT: diff --git a/src/Processors/Formats/Impl/PrometheusTextOutputFormat.cpp b/src/Processors/Formats/Impl/PrometheusTextOutputFormat.cpp index 3578401a0f8..b43c195f201 100644 --- a/src/Processors/Formats/Impl/PrometheusTextOutputFormat.cpp +++ b/src/Processors/Formats/Impl/PrometheusTextOutputFormat.cpp @@ -286,10 +286,10 @@ static void columnMapToContainer(const ColumnMap * col_map, size_t row_num, Cont { Field field; col_map->get(row_num, field); - const auto & map_field = field.get(); + const auto & map_field = field.safeGet(); for (const auto & map_element : map_field) { - const auto & map_entry = map_element.get(); + const auto & map_entry = map_element.safeGet(); String entry_key; String entry_value; diff --git a/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp b/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp index de34a8aa04f..9839f64b947 100644 --- a/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp @@ -333,7 +333,7 @@ namespace { const DataTypeTuple & type_tuple = static_cast(data_type); - Tuple & tuple_value = value.get(); + Tuple & tuple_value = value.safeGet(); size_t src_tuple_size = tuple_value.size(); size_t dst_tuple_size = type_tuple.getElements().size(); @@ -360,7 +360,7 @@ namespace if (element_type.isNullable()) return; - Array & array_value = value.get(); + Array & array_value = value.safeGet(); size_t array_value_size = array_value.size(); for (size_t i = 0; i < array_value_size; ++i) @@ -378,12 +378,12 @@ namespace const auto & key_type = *type_map.getKeyType(); const auto & value_type = *type_map.getValueType(); - auto & map = value.get(); + auto & map = value.safeGet(); size_t map_size = map.size(); for (size_t i = 0; i < map_size; ++i) { - auto & map_entry = map[i].get(); + auto & map_entry = map[i].safeGet(); auto & entry_key = map_entry[0]; auto & entry_value = map_entry[1]; diff --git a/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp index e2c6371c44f..80c00f91d82 100644 --- a/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp @@ -127,14 +127,14 @@ static bool mergeMap(const SummingSortedAlgorithm::MapDescription & desc, Row right(left.size()); for (size_t col_num : desc.key_col_nums) - right[col_num] = (*raw_columns[col_num])[row_number].template get(); + right[col_num] = (*raw_columns[col_num])[row_number].template safeGet(); for (size_t col_num : desc.val_col_nums) - right[col_num] = (*raw_columns[col_num])[row_number].template get(); + right[col_num] = (*raw_columns[col_num])[row_number].template safeGet(); auto at_ith_column_jth_row = [&](const Row & matrix, size_t i, size_t j) -> const Field & { - return matrix[i].get()[j]; + return matrix[i].safeGet()[j]; }; auto tuple_of_nth_columns_at_jth_row = [&](const Row & matrix, const ColumnNumbers & col_nums, size_t j) -> Array @@ -160,7 +160,7 @@ static bool mergeMap(const SummingSortedAlgorithm::MapDescription & desc, auto merge = [&](const Row & matrix) { - size_t rows = matrix[desc.key_col_nums[0]].get().size(); + size_t rows = matrix[desc.key_col_nums[0]].safeGet().size(); for (size_t j = 0; j < rows; ++j) { @@ -190,10 +190,10 @@ static bool mergeMap(const SummingSortedAlgorithm::MapDescription & desc, for (const auto & key_value : merged) { for (size_t col_num_index = 0, size = desc.key_col_nums.size(); col_num_index < size; ++col_num_index) - row[desc.key_col_nums[col_num_index]].get()[row_num] = key_value.first[col_num_index]; + row[desc.key_col_nums[col_num_index]].safeGet()[row_num] = key_value.first[col_num_index]; for (size_t col_num_index = 0, size = desc.val_col_nums.size(); col_num_index < size; ++col_num_index) - row[desc.val_col_nums[col_num_index]].get()[row_num] = key_value.second[col_num_index]; + row[desc.val_col_nums[col_num_index]].safeGet()[row_num] = key_value.second[col_num_index]; ++row_num; } diff --git a/src/Processors/QueryPlan/Optimizations/removeRedundantSorting.cpp b/src/Processors/QueryPlan/Optimizations/removeRedundantSorting.cpp index 7cac7bee6ec..f0094f0f8d2 100644 --- a/src/Processors/QueryPlan/Optimizations/removeRedundantSorting.cpp +++ b/src/Processors/QueryPlan/Optimizations/removeRedundantSorting.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -59,9 +60,10 @@ public: if (typeid_cast(current_step) || typeid_cast(current_step) /// (1) if there are LIMITs on top of ORDER BY, the ORDER BY is non-removable - || typeid_cast(current_step) /// (2) if ORDER BY is with FILL WITH, it is non-removable - || typeid_cast(current_step) /// (3) ORDER BY will change order of previous sorting - || typeid_cast(current_step)) /// (4) aggregation change order + || typeid_cast(current_step) /// (2) OFFSET on top of ORDER BY, the ORDER BY is non-removable + || typeid_cast(current_step) /// (3) if ORDER BY is with FILL WITH, it is non-removable + || typeid_cast(current_step) /// (4) ORDER BY will change order of previous sorting + || typeid_cast(current_step)) /// (5) aggregation change order { logStep("nodes_affect_order/push", current_node); nodes_affect_order.push_back(current_node); diff --git a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp index 2080e29ceba..596d08845e1 100644 --- a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp +++ b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp @@ -119,23 +119,23 @@ using RangesWithStep = std::vector; std::optional steppedRangeFromRange(const Range & r, UInt64 step, UInt64 remainder) { - if ((r.right.get() == 0) && (!r.right_included)) + if ((r.right.safeGet() == 0) && (!r.right_included)) return std::nullopt; - UInt64 begin = (r.left.get() / step) * step; + UInt64 begin = (r.left.safeGet() / step) * step; if (begin > std::numeric_limits::max() - remainder) return std::nullopt; begin += remainder; - while ((r.left_included <= r.left.get()) && (begin <= r.left.get() - r.left_included)) + while ((r.left_included <= r.left.safeGet()) && (begin <= r.left.safeGet() - r.left_included)) { if (std::numeric_limits::max() - step < begin) return std::nullopt; begin += step; } - if ((begin >= r.right_included) && (begin - r.right_included >= r.right.get())) + if ((begin >= r.right_included) && (begin - r.right_included >= r.right.safeGet())) return std::nullopt; - UInt64 right_edge_included = r.right.get() - (1 - r.right_included); + UInt64 right_edge_included = r.right.safeGet() - (1 - r.right_included); return std::optional{RangeWithStep{begin, step, static_cast(right_edge_included - begin) / step + 1}}; } diff --git a/src/Processors/Sources/MySQLSource.cpp b/src/Processors/Sources/MySQLSource.cpp index 5d533a7747e..52be9a6e84a 100644 --- a/src/Processors/Sources/MySQLSource.cpp +++ b/src/Processors/Sources/MySQLSource.cpp @@ -219,11 +219,11 @@ namespace read_bytes_size += 8; break; case ValueType::vtEnum8: - assert_cast(column).insertValue(assert_cast &>(data_type).castToValue(value.data()).get()); + assert_cast(column).insertValue(assert_cast &>(data_type).castToValue(value.data()).safeGet()); read_bytes_size += assert_cast(column).byteSize(); break; case ValueType::vtEnum16: - assert_cast(column).insertValue(assert_cast &>(data_type).castToValue(value.data()).get()); + assert_cast(column).insertValue(assert_cast &>(data_type).castToValue(value.data()).safeGet()); read_bytes_size += assert_cast(column).byteSize(); break; case ValueType::vtString: diff --git a/src/Processors/Transforms/FillingTransform.cpp b/src/Processors/Transforms/FillingTransform.cpp index 9601f821cc8..95f4a674ebb 100644 --- a/src/Processors/Transforms/FillingTransform.cpp +++ b/src/Processors/Transforms/FillingTransform.cpp @@ -62,7 +62,7 @@ static FillColumnDescription::StepFunction getStepFunction( case IntervalKind::Kind::NAME: \ return [step, scale, &date_lut](Field & field) { \ field = Add##NAME##sImpl::execute(static_cast(\ - field.get()), static_cast(step), date_lut, utc_time_zone, scale); }; + field.safeGet()), static_cast(step), date_lut, utc_time_zone, scale); }; FOR_EACH_INTERVAL_KIND(DECLARE_CASE) #undef DECLARE_CASE @@ -139,21 +139,21 @@ static bool tryConvertFields(FillColumnDescription & descr, const DataTypePtr & { if (which.isDate() || which.isDate32()) { - Int64 avg_seconds = descr.fill_step.get() * descr.step_kind->toAvgSeconds(); + Int64 avg_seconds = descr.fill_step.safeGet() * descr.step_kind->toAvgSeconds(); if (std::abs(avg_seconds) < 86400) throw Exception(ErrorCodes::INVALID_WITH_FILL_EXPRESSION, "Value of step is to low ({} seconds). Must be >= 1 day", std::abs(avg_seconds)); } if (which.isDate()) - descr.step_func = getStepFunction(*descr.step_kind, descr.fill_step.get(), DateLUT::instance()); + descr.step_func = getStepFunction(*descr.step_kind, descr.fill_step.safeGet(), DateLUT::instance()); else if (which.isDate32()) - descr.step_func = getStepFunction(*descr.step_kind, descr.fill_step.get(), DateLUT::instance()); + descr.step_func = getStepFunction(*descr.step_kind, descr.fill_step.safeGet(), DateLUT::instance()); else if (const auto * date_time = checkAndGetDataType(type.get())) - descr.step_func = getStepFunction(*descr.step_kind, descr.fill_step.get(), date_time->getTimeZone()); + descr.step_func = getStepFunction(*descr.step_kind, descr.fill_step.safeGet(), date_time->getTimeZone()); else if (const auto * date_time64 = checkAndGetDataType(type.get())) { - const auto & step_dec = descr.fill_step.get &>(); + const auto & step_dec = descr.fill_step.safeGet &>(); Int64 step = DecimalUtils::convertTo(step_dec.getValue(), step_dec.getScale()); static const DateLUTImpl & utc_time_zone = DateLUT::instance("UTC"); @@ -163,7 +163,7 @@ static bool tryConvertFields(FillColumnDescription & descr, const DataTypePtr & case IntervalKind::Kind::NAME: \ descr.step_func = [step, &time_zone = date_time64->getTimeZone()](Field & field) \ { \ - auto field_decimal = field.get>(); \ + auto field_decimal = field.safeGet>(); \ auto res = Add##NAME##sImpl::execute(field_decimal.getValue(), step, time_zone, utc_time_zone, field_decimal.getScale()); \ field = DecimalField(res, field_decimal.getScale()); \ }; \ diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp index c27c230c741..85e6b2ec55e 100644 --- a/src/Processors/Transforms/WindowTransform.cpp +++ b/src/Processors/Transforms/WindowTransform.cpp @@ -85,7 +85,7 @@ static int compareValuesWithOffset(const IColumn * _compared_column, using ValueType = typename ColumnType::ValueType; // Note that the storage type of offset returned by get<> is different, so // we need to specify the type explicitly. - const ValueType offset = static_cast(_offset.get()); + const ValueType offset = static_cast(_offset.safeGet()); assert(offset >= 0); const auto compared_value_data = compared_column->getDataAt(compared_row); @@ -140,7 +140,7 @@ static int compareValuesWithOffsetFloat(const IColumn * _compared_column, _compared_column); const auto * reference_column = assert_cast( _reference_column); - const auto offset = _offset.get(); + const auto offset = _offset.safeGet(); chassert(offset >= 0); const auto compared_value_data = compared_column->getDataAt(compared_row); @@ -609,7 +609,7 @@ void WindowTransform::advanceFrameStartRowsOffset() { // Just recalculate it each time by walking blocks. const auto [moved_row, offset_left] = moveRowNumber(current_row, - window_description.frame.begin_offset.get() + window_description.frame.begin_offset.safeGet() * (window_description.frame.begin_preceding ? -1 : 1)); frame_start = moved_row; @@ -848,7 +848,7 @@ void WindowTransform::advanceFrameEndRowsOffset() // Walk the specified offset from the current row. The "+1" is needed // because the frame_end is a past-the-end pointer. const auto [moved_row, offset_left] = moveRowNumber(current_row, - window_description.frame.end_offset.get() + window_description.frame.end_offset.safeGet() * (window_description.frame.end_preceding ? -1 : 1) + 1); @@ -2105,13 +2105,13 @@ namespace throw Exception(ErrorCodes::BAD_ARGUMENTS, "Argument of 'ntile' function must be a constant"); auto type_id = argument_types[0]->getTypeId(); if (type_id == TypeIndex::UInt8) - buckets = arg_col[transform->current_row.row].get(); + buckets = arg_col[transform->current_row.row].safeGet(); else if (type_id == TypeIndex::UInt16) - buckets = arg_col[transform->current_row.row].get(); + buckets = arg_col[transform->current_row.row].safeGet(); else if (type_id == TypeIndex::UInt32) - buckets = arg_col[transform->current_row.row].get(); + buckets = arg_col[transform->current_row.row].safeGet(); else if (type_id == TypeIndex::UInt64) - buckets = arg_col[transform->current_row.row].get(); + buckets = arg_col[transform->current_row.row].safeGet(); if (!buckets) { @@ -2389,7 +2389,7 @@ struct WindowFunctionLagLeadInFrame final : public WindowFunction { offset = (*current_block.input_columns[ workspace.argument_column_indices[1]])[ - transform->current_row.row].get(); + transform->current_row.row].safeGet(); /// Either overflow or really negative value, both is not acceptable. if (offset < 0) @@ -2475,7 +2475,7 @@ struct WindowFunctionNthValue final : public WindowFunction Int64 offset = (*current_block.input_columns[ workspace.argument_column_indices[1]])[ - transform->current_row.row].get(); + transform->current_row.row].safeGet(); /// Either overflow or really negative value, both is not acceptable. if (offset <= 0) diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp index dfb388ffdb2..d5780e32db3 100644 --- a/src/Storages/AlterCommands.cpp +++ b/src/Storages/AlterCommands.cpp @@ -110,7 +110,7 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ if (ast_col_decl.comment) { const auto & ast_comment = typeid_cast(*ast_col_decl.comment); - command.comment = ast_comment.value.get(); + command.comment = ast_comment.value.safeGet(); } if (ast_col_decl.codec) @@ -168,7 +168,7 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ if (ast_col_decl.comment) { const auto & ast_comment = ast_col_decl.comment->as(); - command.comment.emplace(ast_comment.value.get()); + command.comment.emplace(ast_comment.value.safeGet()); } if (ast_col_decl.ttl) @@ -211,7 +211,7 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ command.type = COMMENT_COLUMN; command.column_name = getIdentifierName(command_ast->column); const auto & ast_comment = command_ast->comment->as(); - command.comment = ast_comment.value.get(); + command.comment = ast_comment.value.safeGet(); command.if_exists = command_ast->if_exists; return command; } @@ -221,7 +221,7 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ command.ast = command_ast->clone(); command.type = COMMENT_TABLE; const auto & ast_comment = command_ast->comment->as(); - command.comment = ast_comment.value.get(); + command.comment = ast_comment.value.safeGet(); return command; } else if (command_ast->type == ASTAlterCommand::MODIFY_ORDER_BY) diff --git a/src/Storages/ColumnsDescription.cpp b/src/Storages/ColumnsDescription.cpp index da749812167..0d724245b49 100644 --- a/src/Storages/ColumnsDescription.cpp +++ b/src/Storages/ColumnsDescription.cpp @@ -197,7 +197,7 @@ void ColumnDescription::readText(ReadBuffer & buf) } if (col_ast->comment) - comment = col_ast->comment->as().value.get(); + comment = col_ast->comment->as().value.safeGet(); if (col_ast->codec) codec = CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST(col_ast->codec, type, false, true, true, true); diff --git a/src/Storages/Hive/StorageHive.cpp b/src/Storages/Hive/StorageHive.cpp index 089fb5c585c..ae2e8cffe28 100644 --- a/src/Storages/Hive/StorageHive.cpp +++ b/src/Storages/Hive/StorageHive.cpp @@ -445,7 +445,7 @@ StorageHive::StorageHive( storage_metadata.partition_key = KeyDescription::getKeyFromAST(partition_by_ast, storage_metadata.columns, getContext()); setInMemoryMetadata(storage_metadata); - setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns())); + setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns(), getContext())); } void StorageHive::lazyInitialize() diff --git a/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.cpp b/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.cpp index 7354243732c..d6a8af3238e 100644 --- a/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.cpp +++ b/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.cpp @@ -268,7 +268,7 @@ bool ApproximateNearestNeighborCondition::tryCastToConstType(const ASTPtr & node if (const_value.getType() == Field::Types::Float64) { out.function = RPNElement::FUNCTION_FLOAT_LITERAL; - out.float_literal.emplace(const_value.get()); + out.float_literal.emplace(const_value.safeGet()); out.func_name = "Float literal"; return true; } @@ -276,7 +276,7 @@ bool ApproximateNearestNeighborCondition::tryCastToConstType(const ASTPtr & node if (const_value.getType() == Field::Types::UInt64) { out.function = RPNElement::FUNCTION_INT_LITERAL; - out.int_literal.emplace(const_value.get()); + out.int_literal.emplace(const_value.safeGet()); out.func_name = "Int literal"; return true; } @@ -284,7 +284,7 @@ bool ApproximateNearestNeighborCondition::tryCastToConstType(const ASTPtr & node if (const_value.getType() == Field::Types::Int64) { out.function = RPNElement::FUNCTION_INT_LITERAL; - out.int_literal.emplace(const_value.get()); + out.int_literal.emplace(const_value.safeGet()); out.func_name = "Int literal"; return true; } @@ -292,7 +292,7 @@ bool ApproximateNearestNeighborCondition::tryCastToConstType(const ASTPtr & node if (const_value.getType() == Field::Types::Tuple) { out.function = RPNElement::FUNCTION_LITERAL_TUPLE; - out.tuple_literal = const_value.get(); + out.tuple_literal = const_value.safeGet(); out.func_name = "Tuple literal"; return true; } @@ -300,7 +300,7 @@ bool ApproximateNearestNeighborCondition::tryCastToConstType(const ASTPtr & node if (const_value.getType() == Field::Types::Array) { out.function = RPNElement::FUNCTION_LITERAL_ARRAY; - out.array_literal = const_value.get(); + out.array_literal = const_value.safeGet(); out.func_name = "Array literal"; return true; } @@ -308,7 +308,7 @@ bool ApproximateNearestNeighborCondition::tryCastToConstType(const ASTPtr & node if (const_value.getType() == Field::Types::String) { out.function = RPNElement::FUNCTION_STRING_LITERAL; - out.func_name = const_value.get(); + out.func_name = const_value.safeGet(); return true; } } diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 93904c1a838..b6e4f0af696 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -428,7 +428,7 @@ std::pair IMergeTreeDataPart::getMinMaxDate() const if (storage.minmax_idx_date_column_pos != -1 && minmax_idx->initialized) { const auto & hyperrectangle = minmax_idx->hyperrectangle[storage.minmax_idx_date_column_pos]; - return {DayNum(hyperrectangle.left.get()), DayNum(hyperrectangle.right.get())}; + return {DayNum(hyperrectangle.left.safeGet()), DayNum(hyperrectangle.right.safeGet())}; } else return {}; @@ -444,15 +444,15 @@ std::pair IMergeTreeDataPart::getMinMaxTime() const if (hyperrectangle.left.getType() == Field::Types::UInt64) { assert(hyperrectangle.right.getType() == Field::Types::UInt64); - return {hyperrectangle.left.get(), hyperrectangle.right.get()}; + return {hyperrectangle.left.safeGet(), hyperrectangle.right.safeGet()}; } /// The case of DateTime64 else if (hyperrectangle.left.getType() == Field::Types::Decimal64) { assert(hyperrectangle.right.getType() == Field::Types::Decimal64); - auto left = hyperrectangle.left.get>(); - auto right = hyperrectangle.right.get>(); + auto left = hyperrectangle.left.safeGet>(); + auto right = hyperrectangle.right.safeGet>(); assert(left.getScale() == right.getScale()); diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp index aa7a498d5a3..2e57e172a7f 100644 --- a/src/Storages/MergeTree/KeyCondition.cpp +++ b/src/Storages/MergeTree/KeyCondition.cpp @@ -349,7 +349,7 @@ const KeyCondition::AtomMap KeyCondition::atom_map if (value.getType() != Field::Types::String) return false; - String prefix = extractFixedPrefixFromLikePattern(value.get(), /*requires_perfect_prefix*/ false); + String prefix = extractFixedPrefixFromLikePattern(value.safeGet(), /*requires_perfect_prefix*/ false); if (prefix.empty()) return false; @@ -370,7 +370,7 @@ const KeyCondition::AtomMap KeyCondition::atom_map if (value.getType() != Field::Types::String) return false; - String prefix = extractFixedPrefixFromLikePattern(value.get(), /*requires_perfect_prefix*/ true); + String prefix = extractFixedPrefixFromLikePattern(value.safeGet(), /*requires_perfect_prefix*/ true); if (prefix.empty()) return false; @@ -391,7 +391,7 @@ const KeyCondition::AtomMap KeyCondition::atom_map if (value.getType() != Field::Types::String) return false; - String prefix = value.get(); + String prefix = value.safeGet(); if (prefix.empty()) return false; @@ -412,7 +412,7 @@ const KeyCondition::AtomMap KeyCondition::atom_map if (value.getType() != Field::Types::String) return false; - const String & expression = value.get(); + const String & expression = value.safeGet(); /// This optimization can't process alternation - this would require /// a comprehensive parsing of regular expression. @@ -2928,8 +2928,8 @@ BoolMask KeyCondition::checkInHyperrectangle( /// Let's support only the case of 2d, because I'm not confident in other cases. if (num_dimensions == 2) { - UInt64 left = key_range.left.get(); - UInt64 right = key_range.right.get(); + UInt64 left = key_range.left.safeGet(); + UInt64 right = key_range.right.safeGet(); BoolMask mask(false, true); auto hyperrectangle_intersection_callback = [&](std::array, 2> curve_hyperrectangle) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 625b1281c61..3e01294a6ab 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -3517,7 +3517,7 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, Context const auto & new_column = new_metadata.getColumns().get(command.column_name); if (!old_column.type->equals(*new_column.type)) throw Exception(ErrorCodes::ALTER_OF_COLUMN_IS_FORBIDDEN, - "ALTER types of column {} with statistics is not not safe " + "ALTER types of column {} with statistics is not safe " "because it can change the representation of statistics", backQuoteIfNeed(command.column_name)); } @@ -5888,7 +5888,7 @@ String MergeTreeData::getPartitionIDFromQuery(const ASTPtr & ast, ContextPtr loc if (partition_lit && partition_lit->value.getType() == Field::Types::String) { MergeTreePartInfo::validatePartitionID(partition_ast.value->clone(), format_version); - return partition_lit->value.get(); + return partition_lit->value.safeGet(); } } @@ -5951,7 +5951,7 @@ String MergeTreeData::getPartitionIDFromQuery(const ASTPtr & ast, ContextPtr loc throw Exception(ErrorCodes::INVALID_PARTITION_VALUE, "Expected tuple for complex partition key, got {}", partition_key_value.getTypeName()); - const Tuple & tuple = partition_key_value.get(); + const Tuple & tuple = partition_key_value.safeGet(); if (tuple.size() != fields_count) throw Exception(ErrorCodes::LOGICAL_ERROR, "Wrong number of fields in the partition expression: {}, must be: {}", tuple.size(), fields_count); @@ -6890,7 +6890,7 @@ Block MergeTreeData::getMinMaxCountProjectionBlock( auto * place = arena.alignedAlloc(size_of_state, align_of_state); func->create(place); if (const AggregateFunctionCount * agg_count = typeid_cast(func.get())) - AggregateFunctionCount::set(place, value.get()); + AggregateFunctionCount::set(place, value.safeGet()); else { auto value_column = func->getArgumentTypes().front()->createColumnConst(1, value)->convertToFullColumnIfConst(); @@ -7528,7 +7528,7 @@ MergeTreeData::MatcherFn MergeTreeData::getPartitionMatcher(const ASTPtr & parti if (const auto * partition_lit = partition_ast->as().value->as()) { id = partition_lit->value.getType() == Field::Types::UInt64 - ? toString(partition_lit->value.get()) + ? toString(partition_lit->value.safeGet()) : partition_lit->value.safeGet(); prefixed = true; } diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp index cb02f1cf5f2..f29d715e791 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -451,8 +451,8 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPartImpl( String part_name; if (data.format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING) { - DayNum min_date(minmax_idx->hyperrectangle[data.minmax_idx_date_column_pos].left.get()); - DayNum max_date(minmax_idx->hyperrectangle[data.minmax_idx_date_column_pos].right.get()); + DayNum min_date(minmax_idx->hyperrectangle[data.minmax_idx_date_column_pos].left.safeGet()); + DayNum max_date(minmax_idx->hyperrectangle[data.minmax_idx_date_column_pos].right.safeGet()); const auto & date_lut = DateLUT::serverTimezoneInstance(); diff --git a/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp b/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp index 497e86334f3..b68e48eeb3a 100644 --- a/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp @@ -343,12 +343,12 @@ MergeTreeIndexPtr annoyIndexCreator(const IndexDescription & index) static constexpr auto DEFAULT_DISTANCE_FUNCTION = DISTANCE_FUNCTION_L2; String distance_function = DEFAULT_DISTANCE_FUNCTION; if (!index.arguments.empty()) - distance_function = index.arguments[0].get(); + distance_function = index.arguments[0].safeGet(); static constexpr auto DEFAULT_TREES = 100uz; UInt64 trees = DEFAULT_TREES; if (index.arguments.size() > 1) - trees = index.arguments[1].get(); + trees = index.arguments[1].safeGet(); return std::make_shared(index, trees, distance_function); } @@ -375,7 +375,7 @@ void annoyIndexValidator(const IndexDescription & index, bool /* attach */) if (!index.arguments.empty()) { - String distance_name = index.arguments[0].get(); + String distance_name = index.arguments[0].safeGet(); if (distance_name != DISTANCE_FUNCTION_L2 && distance_name != DISTANCE_FUNCTION_COSINE) throw Exception(ErrorCodes::INCORRECT_DATA, "Annoy index only supports distance functions '{}' and '{}'", DISTANCE_FUNCTION_L2, DISTANCE_FUNCTION_COSINE); } diff --git a/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp b/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp index dc314ce53d4..b796ed7114e 100644 --- a/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp @@ -348,19 +348,19 @@ bool MergeTreeIndexConditionBloomFilter::extractAtomFromTree(const RPNBuilderTre { if (const_value.getType() == Field::Types::UInt64) { - out.function = const_value.get() ? RPNElement::ALWAYS_TRUE : RPNElement::ALWAYS_FALSE; + out.function = const_value.safeGet() ? RPNElement::ALWAYS_TRUE : RPNElement::ALWAYS_FALSE; return true; } if (const_value.getType() == Field::Types::Int64) { - out.function = const_value.get() ? RPNElement::ALWAYS_TRUE : RPNElement::ALWAYS_FALSE; + out.function = const_value.safeGet() ? RPNElement::ALWAYS_TRUE : RPNElement::ALWAYS_FALSE; return true; } if (const_value.getType() == Field::Types::Float64) { - out.function = const_value.get() != 0.0 ? RPNElement::ALWAYS_TRUE : RPNElement::ALWAYS_FALSE; + out.function = const_value.safeGet() != 0.0 ? RPNElement::ALWAYS_TRUE : RPNElement::ALWAYS_FALSE; return true; } } @@ -703,7 +703,7 @@ bool MergeTreeIndexConditionBloomFilter::traverseTreeEquals( const bool is_nullable = actual_type->isNullable(); auto mutable_column = actual_type->createColumn(); - for (const auto & f : value_field.get()) + for (const auto & f : value_field.safeGet()) { if ((f.isNull() && !is_nullable) || f.isDecimal(f.getType())) /// NOLINT(readability-static-accessed-through-instance) return false; @@ -774,7 +774,7 @@ bool MergeTreeIndexConditionBloomFilter::traverseTreeEquals( if (which.isTuple() && key_node_function_name == "tuple") { - const Tuple & tuple = value_field.get(); + const Tuple & tuple = value_field.safeGet(); const auto * value_tuple_data_type = typeid_cast(value_type.get()); if (tuple.size() != key_node_function_arguments_size) @@ -963,7 +963,7 @@ void bloomFilterIndexValidator(const IndexDescription & index, bool attach) { const auto & argument = index.arguments[0]; - if (!attach && (argument.getType() != Field::Types::Float64 || argument.get() < 0 || argument.get() > 1)) + if (!attach && (argument.getType() != Field::Types::Float64 || argument.safeGet() < 0 || argument.safeGet() > 1)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "The BloomFilter false positive must be a double number between 0 and 1."); } } diff --git a/src/Storages/MergeTree/MergeTreeIndexBloomFilterText.cpp b/src/Storages/MergeTree/MergeTreeIndexBloomFilterText.cpp index 5b6813d12e3..857b7903588 100644 --- a/src/Storages/MergeTree/MergeTreeIndexBloomFilterText.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexBloomFilterText.cpp @@ -341,19 +341,19 @@ bool MergeTreeConditionBloomFilterText::extractAtomFromTree(const RPNBuilderTree if (const_value.getType() == Field::Types::UInt64) { - out.function = const_value.get() ? RPNElement::ALWAYS_TRUE : RPNElement::ALWAYS_FALSE; + out.function = const_value.safeGet() ? RPNElement::ALWAYS_TRUE : RPNElement::ALWAYS_FALSE; return true; } if (const_value.getType() == Field::Types::Int64) { - out.function = const_value.get() ? RPNElement::ALWAYS_TRUE : RPNElement::ALWAYS_FALSE; + out.function = const_value.safeGet() ? RPNElement::ALWAYS_TRUE : RPNElement::ALWAYS_FALSE; return true; } if (const_value.getType() == Field::Types::Float64) { - out.function = const_value.get() != 0.0 ? RPNElement::ALWAYS_TRUE : RPNElement::ALWAYS_FALSE; + out.function = const_value.safeGet() != 0.0 ? RPNElement::ALWAYS_TRUE : RPNElement::ALWAYS_FALSE; return true; } } @@ -493,7 +493,7 @@ bool MergeTreeConditionBloomFilterText::traverseTreeEquals( out.function = RPNElement::FUNCTION_EQUALS; out.bloom_filter = std::make_unique(params); - auto value = const_value.get(); + auto value = const_value.safeGet(); if (is_case_insensitive_scenario) std::ranges::transform(value, value.begin(), [](const auto & c) { return static_cast(std::tolower(c)); }); @@ -509,7 +509,7 @@ bool MergeTreeConditionBloomFilterText::traverseTreeEquals( out.key_column = *key_index; out.function = RPNElement::FUNCTION_HAS; out.bloom_filter = std::make_unique(params); - auto & value = const_value.get(); + auto & value = const_value.safeGet(); token_extractor->stringToBloomFilter(value.data(), value.size(), *out.bloom_filter); return true; } @@ -519,7 +519,7 @@ bool MergeTreeConditionBloomFilterText::traverseTreeEquals( out.key_column = *key_index; out.function = RPNElement::FUNCTION_HAS; out.bloom_filter = std::make_unique(params); - auto & value = const_value.get(); + auto & value = const_value.safeGet(); token_extractor->stringToBloomFilter(value.data(), value.size(), *out.bloom_filter); return true; } @@ -529,7 +529,7 @@ bool MergeTreeConditionBloomFilterText::traverseTreeEquals( out.key_column = *key_index; out.function = RPNElement::FUNCTION_NOT_EQUALS; out.bloom_filter = std::make_unique(params); - const auto & value = const_value.get(); + const auto & value = const_value.safeGet(); token_extractor->stringToBloomFilter(value.data(), value.size(), *out.bloom_filter); return true; } @@ -538,7 +538,7 @@ bool MergeTreeConditionBloomFilterText::traverseTreeEquals( out.key_column = *key_index; out.function = RPNElement::FUNCTION_EQUALS; out.bloom_filter = std::make_unique(params); - const auto & value = const_value.get(); + const auto & value = const_value.safeGet(); token_extractor->stringToBloomFilter(value.data(), value.size(), *out.bloom_filter); return true; } @@ -547,7 +547,7 @@ bool MergeTreeConditionBloomFilterText::traverseTreeEquals( out.key_column = *key_index; out.function = RPNElement::FUNCTION_EQUALS; out.bloom_filter = std::make_unique(params); - const auto & value = const_value.get(); + const auto & value = const_value.safeGet(); token_extractor->stringLikeToBloomFilter(value.data(), value.size(), *out.bloom_filter); return true; } @@ -556,7 +556,7 @@ bool MergeTreeConditionBloomFilterText::traverseTreeEquals( out.key_column = *key_index; out.function = RPNElement::FUNCTION_NOT_EQUALS; out.bloom_filter = std::make_unique(params); - const auto & value = const_value.get(); + const auto & value = const_value.safeGet(); token_extractor->stringLikeToBloomFilter(value.data(), value.size(), *out.bloom_filter); return true; } @@ -565,7 +565,7 @@ bool MergeTreeConditionBloomFilterText::traverseTreeEquals( out.key_column = *key_index; out.function = RPNElement::FUNCTION_EQUALS; out.bloom_filter = std::make_unique(params); - const auto & value = const_value.get(); + const auto & value = const_value.safeGet(); token_extractor->substringToBloomFilter(value.data(), value.size(), *out.bloom_filter, true, false); return true; } @@ -574,7 +574,7 @@ bool MergeTreeConditionBloomFilterText::traverseTreeEquals( out.key_column = *key_index; out.function = RPNElement::FUNCTION_EQUALS; out.bloom_filter = std::make_unique(params); - const auto & value = const_value.get(); + const auto & value = const_value.safeGet(); token_extractor->substringToBloomFilter(value.data(), value.size(), *out.bloom_filter, false, true); return true; } @@ -589,13 +589,13 @@ bool MergeTreeConditionBloomFilterText::traverseTreeEquals( /// 2d vector is not needed here but is used because already exists for FUNCTION_IN std::vector> bloom_filters; bloom_filters.emplace_back(); - for (const auto & element : const_value.get()) + for (const auto & element : const_value.safeGet()) { if (element.getType() != Field::Types::String) return false; bloom_filters.back().emplace_back(params); - const auto & value = element.get(); + const auto & value = element.safeGet(); if (function_name == "multiSearchAny") { @@ -615,7 +615,7 @@ bool MergeTreeConditionBloomFilterText::traverseTreeEquals( out.function = RPNElement::FUNCTION_MATCH; out.bloom_filter = std::make_unique(params); - auto & value = const_value.get(); + auto & value = const_value.safeGet(); String required_substring; bool dummy_is_trivial, dummy_required_substring_is_prefix; std::vector alternatives; @@ -743,11 +743,11 @@ MergeTreeIndexPtr bloomFilterIndexTextCreator( { if (index.type == NgramTokenExtractor::getName()) { - size_t n = index.arguments[0].get(); + size_t n = index.arguments[0].safeGet(); BloomFilterParameters params( - index.arguments[1].get(), - index.arguments[2].get(), - index.arguments[3].get()); + index.arguments[1].safeGet(), + index.arguments[2].safeGet(), + index.arguments[3].safeGet()); auto tokenizer = std::make_unique(n); @@ -756,9 +756,9 @@ MergeTreeIndexPtr bloomFilterIndexTextCreator( else if (index.type == SplitTokenExtractor::getName()) { BloomFilterParameters params( - index.arguments[0].get(), - index.arguments[1].get(), - index.arguments[2].get()); + index.arguments[0].safeGet(), + index.arguments[1].safeGet(), + index.arguments[2].safeGet()); auto tokenizer = std::make_unique(); @@ -815,9 +815,9 @@ void bloomFilterIndexTextValidator(const IndexDescription & index, bool /*attach /// Just validate BloomFilterParameters params( - index.arguments[0].get(), - index.arguments[1].get(), - index.arguments[2].get()); + index.arguments[0].safeGet(), + index.arguments[1].safeGet(), + index.arguments[2].safeGet()); } } diff --git a/src/Storages/MergeTree/MergeTreeIndexFullText.cpp b/src/Storages/MergeTree/MergeTreeIndexFullText.cpp index cd6af68ebcc..b5c6bb95d37 100644 --- a/src/Storages/MergeTree/MergeTreeIndexFullText.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexFullText.cpp @@ -74,7 +74,7 @@ void MergeTreeIndexGranuleFullText::deserializeBinary(ReadBuffer & istr, MergeTr for (auto & gin_filter : gin_filters) { size_serialization->deserializeBinary(field_rows, istr, {}); - size_t filter_size = field_rows.get(); + size_t filter_size = field_rows.safeGet(); gin_filter.getFilter().resize(filter_size); if (filter_size == 0) @@ -379,19 +379,19 @@ bool MergeTreeConditionFullText::traverseAtomAST(const RPNBuilderTreeNode & node /// Check constant like in KeyCondition if (const_value.getType() == Field::Types::UInt64) { - out.function = const_value.get() ? RPNElement::ALWAYS_TRUE : RPNElement::ALWAYS_FALSE; + out.function = const_value.safeGet() ? RPNElement::ALWAYS_TRUE : RPNElement::ALWAYS_FALSE; return true; } if (const_value.getType() == Field::Types::Int64) { - out.function = const_value.get() ? RPNElement::ALWAYS_TRUE : RPNElement::ALWAYS_FALSE; + out.function = const_value.safeGet() ? RPNElement::ALWAYS_TRUE : RPNElement::ALWAYS_FALSE; return true; } if (const_value.getType() == Field::Types::Float64) { - out.function = const_value.get() != 0.00 ? RPNElement::ALWAYS_TRUE : RPNElement::ALWAYS_FALSE; + out.function = const_value.safeGet() != 0.00 ? RPNElement::ALWAYS_TRUE : RPNElement::ALWAYS_FALSE; return true; } } @@ -530,7 +530,7 @@ bool MergeTreeConditionFullText::traverseASTEquals( out.key_column = key_column_num; out.function = RPNElement::FUNCTION_HAS; out.gin_filter = std::make_unique(params); - auto & value = const_value.get(); + auto & value = const_value.safeGet(); token_extractor->stringToGinFilter(value.data(), value.size(), *out.gin_filter); return true; } @@ -539,7 +539,7 @@ bool MergeTreeConditionFullText::traverseASTEquals( out.key_column = key_column_num; out.function = RPNElement::FUNCTION_HAS; out.gin_filter = std::make_unique(params); - auto & value = const_value.get(); + auto & value = const_value.safeGet(); token_extractor->stringToGinFilter(value.data(), value.size(), *out.gin_filter); return true; } @@ -549,7 +549,7 @@ bool MergeTreeConditionFullText::traverseASTEquals( out.key_column = key_column_num; out.function = RPNElement::FUNCTION_NOT_EQUALS; out.gin_filter = std::make_unique(params); - const auto & value = const_value.get(); + const auto & value = const_value.safeGet(); token_extractor->stringToGinFilter(value.data(), value.size(), *out.gin_filter); return true; } @@ -558,7 +558,7 @@ bool MergeTreeConditionFullText::traverseASTEquals( out.key_column = key_column_num; out.function = RPNElement::FUNCTION_EQUALS; out.gin_filter = std::make_unique(params); - const auto & value = const_value.get(); + const auto & value = const_value.safeGet(); token_extractor->stringToGinFilter(value.data(), value.size(), *out.gin_filter); return true; } @@ -567,7 +567,7 @@ bool MergeTreeConditionFullText::traverseASTEquals( out.key_column = key_column_num; out.function = RPNElement::FUNCTION_EQUALS; out.gin_filter = std::make_unique(params); - const auto & value = const_value.get(); + const auto & value = const_value.safeGet(); token_extractor->stringLikeToGinFilter(value.data(), value.size(), *out.gin_filter); return true; } @@ -576,7 +576,7 @@ bool MergeTreeConditionFullText::traverseASTEquals( out.key_column = key_column_num; out.function = RPNElement::FUNCTION_NOT_EQUALS; out.gin_filter = std::make_unique(params); - const auto & value = const_value.get(); + const auto & value = const_value.safeGet(); token_extractor->stringLikeToGinFilter(value.data(), value.size(), *out.gin_filter); return true; } @@ -585,7 +585,7 @@ bool MergeTreeConditionFullText::traverseASTEquals( out.key_column = key_column_num; out.function = RPNElement::FUNCTION_EQUALS; out.gin_filter = std::make_unique(params); - const auto & value = const_value.get(); + const auto & value = const_value.safeGet(); token_extractor->stringToGinFilter(value.data(), value.size(), *out.gin_filter); return true; } @@ -594,7 +594,7 @@ bool MergeTreeConditionFullText::traverseASTEquals( out.key_column = key_column_num; out.function = RPNElement::FUNCTION_EQUALS; out.gin_filter = std::make_unique(params); - const auto & value = const_value.get(); + const auto & value = const_value.safeGet(); token_extractor->substringToGinFilter(value.data(), value.size(), *out.gin_filter, true, false); return true; } @@ -603,7 +603,7 @@ bool MergeTreeConditionFullText::traverseASTEquals( out.key_column = key_column_num; out.function = RPNElement::FUNCTION_EQUALS; out.gin_filter = std::make_unique(params); - const auto & value = const_value.get(); + const auto & value = const_value.safeGet(); token_extractor->substringToGinFilter(value.data(), value.size(), *out.gin_filter, false, true); return true; } @@ -615,13 +615,13 @@ bool MergeTreeConditionFullText::traverseASTEquals( /// 2d vector is not needed here but is used because already exists for FUNCTION_IN std::vector gin_filters; gin_filters.emplace_back(); - for (const auto & element : const_value.get()) + for (const auto & element : const_value.safeGet()) { if (element.getType() != Field::Types::String) return false; gin_filters.back().emplace_back(params); - const auto & value = element.get(); + const auto & value = element.safeGet(); token_extractor->substringToGinFilter(value.data(), value.size(), gin_filters.back().back(), false, false); } out.set_gin_filters = std::move(gin_filters); @@ -632,7 +632,7 @@ bool MergeTreeConditionFullText::traverseASTEquals( out.key_column = key_column_num; out.function = RPNElement::FUNCTION_MATCH; - auto & value = const_value.get(); + auto & value = const_value.safeGet(); String required_substring; bool dummy_is_trivial, dummy_required_substring_is_prefix; std::vector alternatives; @@ -776,8 +776,8 @@ MergeTreeIndexConditionPtr MergeTreeIndexFullText::createIndexCondition( MergeTreeIndexPtr fullTextIndexCreator( const IndexDescription & index) { - size_t n = index.arguments.empty() ? 0 : index.arguments[0].get(); - UInt64 max_rows = index.arguments.size() < 2 ? DEFAULT_MAX_ROWS_PER_POSTINGS_LIST : index.arguments[1].get(); + size_t n = index.arguments.empty() ? 0 : index.arguments[0].safeGet(); + UInt64 max_rows = index.arguments.size() < 2 ? DEFAULT_MAX_ROWS_PER_POSTINGS_LIST : index.arguments[1].safeGet(); GinFilterParameters params(n, max_rows); /// Use SplitTokenExtractor when n is 0, otherwise use NgramTokenExtractor @@ -826,12 +826,12 @@ void fullTextIndexValidator(const IndexDescription & index, bool /*attach*/) { if (index.arguments[1].getType() != Field::Types::UInt64) throw Exception(ErrorCodes::INCORRECT_QUERY, "The second full text index argument must be UInt64"); - if (index.arguments[1].get() != UNLIMITED_ROWS_PER_POSTINGS_LIST && index.arguments[1].get() < MIN_ROWS_PER_POSTINGS_LIST) + if (index.arguments[1].safeGet() != UNLIMITED_ROWS_PER_POSTINGS_LIST && index.arguments[1].safeGet() < MIN_ROWS_PER_POSTINGS_LIST) throw Exception(ErrorCodes::INCORRECT_QUERY, "The maximum rows per postings list must be no less than {}", MIN_ROWS_PER_POSTINGS_LIST); } /// Just validate - size_t ngrams = index.arguments.empty() ? 0 : index.arguments[0].get(); - UInt64 max_rows_per_postings_list = index.arguments.size() < 2 ? DEFAULT_MAX_ROWS_PER_POSTINGS_LIST : index.arguments[1].get(); + size_t ngrams = index.arguments.empty() ? 0 : index.arguments[0].safeGet(); + UInt64 max_rows_per_postings_list = index.arguments.size() < 2 ? DEFAULT_MAX_ROWS_PER_POSTINGS_LIST : index.arguments[1].safeGet(); GinFilterParameters params(ngrams, max_rows_per_postings_list); } } diff --git a/src/Storages/MergeTree/MergeTreeIndexHypothesis.cpp b/src/Storages/MergeTree/MergeTreeIndexHypothesis.cpp index cd8065ecadf..abf3ae56376 100644 --- a/src/Storages/MergeTree/MergeTreeIndexHypothesis.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexHypothesis.cpp @@ -37,7 +37,7 @@ void MergeTreeIndexGranuleHypothesis::deserializeBinary(ReadBuffer & istr, Merge Field field_met; const auto & size_type = DataTypePtr(std::make_shared()); size_type->getDefaultSerialization()->deserializeBinary(field_met, istr, {}); - met = field_met.get(); + met = field_met.safeGet(); is_empty = false; } diff --git a/src/Storages/MergeTree/MergeTreeIndexSet.cpp b/src/Storages/MergeTree/MergeTreeIndexSet.cpp index a92df4ac72d..fa242fccbc1 100644 --- a/src/Storages/MergeTree/MergeTreeIndexSet.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexSet.cpp @@ -97,7 +97,7 @@ void MergeTreeIndexGranuleSet::deserializeBinary(ReadBuffer & istr, MergeTreeInd Field field_rows; const auto & size_type = DataTypePtr(std::make_shared()); size_type->getDefaultSerialization()->deserializeBinary(field_rows, istr, {}); - size_t rows_to_read = field_rows.get(); + size_t rows_to_read = field_rows.safeGet(); if (rows_to_read == 0) return; @@ -591,7 +591,7 @@ MergeTreeIndexConditionPtr MergeTreeIndexSet::createIndexCondition( MergeTreeIndexPtr setIndexCreator(const IndexDescription & index) { - size_t max_rows = index.arguments[0].get(); + size_t max_rows = index.arguments[0].safeGet(); return std::make_shared(index, max_rows); } diff --git a/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp b/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp index 59a4b0fbf9c..efd9bb754e1 100644 --- a/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp @@ -377,12 +377,12 @@ MergeTreeIndexPtr usearchIndexCreator(const IndexDescription & index) static constexpr auto default_distance_function = DISTANCE_FUNCTION_L2; String distance_function = default_distance_function; if (!index.arguments.empty()) - distance_function = index.arguments[0].get(); + distance_function = index.arguments[0].safeGet(); static constexpr auto default_scalar_kind = unum::usearch::scalar_kind_t::f16_k; auto scalar_kind = default_scalar_kind; if (index.arguments.size() > 1) - scalar_kind = nameToScalarKind.at(index.arguments[1].get()); + scalar_kind = nameToScalarKind.at(index.arguments[1].safeGet()); return std::make_shared(index, distance_function, scalar_kind); } @@ -408,14 +408,14 @@ void usearchIndexValidator(const IndexDescription & index, bool /* attach */) if (!index.arguments.empty()) { - String distance_name = index.arguments[0].get(); + String distance_name = index.arguments[0].safeGet(); if (distance_name != DISTANCE_FUNCTION_L2 && distance_name != DISTANCE_FUNCTION_COSINE) throw Exception(ErrorCodes::INCORRECT_DATA, "USearch index only supports distance functions '{}' and '{}'", DISTANCE_FUNCTION_L2, DISTANCE_FUNCTION_COSINE); } /// Check that a supported kind was passed as a second argument - if (index.arguments.size() > 1 && !nameToScalarKind.contains(index.arguments[1].get())) + if (index.arguments.size() > 1 && !nameToScalarKind.contains(index.arguments[1].safeGet())) { String supported_kinds; for (const auto & [name, kind] : nameToScalarKind) diff --git a/src/Storages/MergeTree/MergeTreePartition.cpp b/src/Storages/MergeTree/MergeTreePartition.cpp index b240f80ee13..5b5bc244f92 100644 --- a/src/Storages/MergeTree/MergeTreePartition.cpp +++ b/src/Storages/MergeTree/MergeTreePartition.cpp @@ -241,7 +241,7 @@ String MergeTreePartition::getID(const Block & partition_key_sample) const if (typeid_cast(partition_key_sample.getByPosition(i).type.get())) result += toString(DateLUT::serverTimezoneInstance().toNumYYYYMMDD(DayNum(value[i].safeGet()))); else if (typeid_cast(partition_key_sample.getByPosition(i).type.get())) - result += toString(value[i].get().toUnderType()); + result += toString(value[i].safeGet().toUnderType()); else result += applyVisitor(to_string_visitor, value[i]); diff --git a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp index 43c40dee77d..f0c26c302e1 100644 --- a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp +++ b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp @@ -222,17 +222,17 @@ static bool isConditionGood(const RPNBuilderTreeNode & condition, const NameSet /// check the value with respect to threshold if (type == Field::Types::UInt64) { - const auto value = output_value.get(); + const auto value = output_value.safeGet(); return value > threshold; } else if (type == Field::Types::Int64) { - const auto value = output_value.get(); + const auto value = output_value.safeGet(); return value < -threshold || threshold < value; } else if (type == Field::Types::Float64) { - const auto value = output_value.get(); + const auto value = output_value.safeGet(); return value < -threshold || threshold < value; } diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp index b7887c35590..44548e33d46 100644 --- a/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -592,7 +592,7 @@ static StoragePtr create(const StorageFactory::Arguments & args) if (ast->value.getType() != Field::Types::String) throw Exception(ErrorCodes::BAD_ARGUMENTS, format_str, error_msg); - graphite_config_name = ast->value.get(); + graphite_config_name = ast->value.safeGet(); } else throw Exception(ErrorCodes::BAD_ARGUMENTS, format_str, error_msg); diff --git a/src/Storages/MessageQueueSink.cpp b/src/Storages/MessageQueueSink.cpp index 9136300ab3b..8190a375b97 100644 --- a/src/Storages/MessageQueueSink.cpp +++ b/src/Storages/MessageQueueSink.cpp @@ -47,6 +47,8 @@ void MessageQueueSink::consume(Chunk & chunk) if (columns.empty()) return; + /// The formatter might hold pointers to buffer (e.g. if PeekableWriteBuffer is used), which means the formatter + /// needs to be reset after buffer might reallocate its memory. In this exact case after restarting the buffer. if (row_format) { size_t row = 0; @@ -61,12 +63,12 @@ void MessageQueueSink::consume(Chunk & chunk) row_format->writeRow(columns, row); } row_format->finalize(); - row_format->resetFormatter(); producer->produce(buffer->str(), i, columns, row - 1); /// Reallocate buffer if it's capacity is large then DBMS_DEFAULT_BUFFER_SIZE, /// because most likely in this case we serialized abnormally large row /// and won't need this large allocated buffer anymore. buffer->restart(DBMS_DEFAULT_BUFFER_SIZE); + row_format->resetFormatter(); } } else @@ -74,12 +76,11 @@ void MessageQueueSink::consume(Chunk & chunk) format->write(getHeader().cloneWithColumns(chunk.detachColumns())); format->finalize(); producer->produce(buffer->str(), chunk.getNumRows(), columns, chunk.getNumRows() - 1); - format->resetFormatter(); buffer->restart(); + format->resetFormatter(); } } - void MessageQueueSink::onCancel() noexcept { try diff --git a/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h b/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h index c8603fccb86..087207d3860 100644 --- a/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h +++ b/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h @@ -91,8 +91,9 @@ public: { ConfigurationPtr configuration = base_configuration->clone(); configuration->setPaths(metadata->getDataFiles()); + std::string sample_path; return Storage::resolveSchemaFromData( - object_storage_, configuration, format_settings_, local_context); + object_storage_, configuration, format_settings_, sample_path, local_context); } } diff --git a/src/Storages/ObjectStorage/ReadBufferIterator.cpp b/src/Storages/ObjectStorage/ReadBufferIterator.cpp index df78f128c80..fadf683fce7 100644 --- a/src/Storages/ObjectStorage/ReadBufferIterator.cpp +++ b/src/Storages/ObjectStorage/ReadBufferIterator.cpp @@ -132,7 +132,7 @@ void ReadBufferIterator::setFormatName(const String & format_name) format = format_name; } -String ReadBufferIterator::getLastFileName() const +String ReadBufferIterator::getLastFilePath() const { if (current_object_info) return current_object_info->getPath(); diff --git a/src/Storages/ObjectStorage/ReadBufferIterator.h b/src/Storages/ObjectStorage/ReadBufferIterator.h index 6eeb52ec2ed..b81aebb7b07 100644 --- a/src/Storages/ObjectStorage/ReadBufferIterator.h +++ b/src/Storages/ObjectStorage/ReadBufferIterator.h @@ -33,7 +33,7 @@ public: void setResultingSchema(const ColumnsDescription & columns) override; - String getLastFileName() const override; + String getLastFilePath() const override; void setFormatName(const String & format_name) override; diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.cpp b/src/Storages/ObjectStorage/StorageObjectStorage.cpp index fe675a17d54..d9c82d68791 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorage.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp @@ -1,4 +1,5 @@ #include +#include #include #include @@ -33,6 +34,33 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } +String StorageObjectStorage::getPathSample(StorageInMemoryMetadata metadata, ContextPtr context) +{ + auto query_settings = configuration->getQuerySettings(context); + /// We don't want to throw an exception if there are no files with specified path. + query_settings.throw_on_zero_files_match = false; + + bool local_distributed_processing = distributed_processing; + if (context->getSettingsRef().use_hive_partitioning) + local_distributed_processing = false; + + auto file_iterator = StorageObjectStorageSource::createFileIterator( + configuration, + query_settings, + object_storage, + local_distributed_processing, + context, + {}, // predicate + metadata.getColumns().getAll(), // virtual_columns + nullptr, // read_keys + {} // file_progress_callback + ); + + if (auto file = file_iterator->next(0)) + return file->getPath(); + return ""; +} + StorageObjectStorage::StorageObjectStorage( ConfigurationPtr configuration_, ObjectStoragePtr object_storage_, @@ -53,7 +81,9 @@ StorageObjectStorage::StorageObjectStorage( , log(getLogger(fmt::format("Storage{}({})", configuration->getEngineName(), table_id_.getFullTableName()))) { ColumnsDescription columns{columns_}; - resolveSchemaAndFormat(columns, configuration->format, object_storage, configuration, format_settings, context); + + std::string sample_path; + resolveSchemaAndFormat(columns, configuration->format, object_storage, configuration, format_settings, sample_path, context); configuration->check(context); StorageInMemoryMetadata metadata; @@ -61,7 +91,10 @@ StorageObjectStorage::StorageObjectStorage( metadata.setConstraints(constraints_); metadata.setComment(comment); - setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(metadata.getColumns())); + if (sample_path.empty() && context->getSettingsRef().use_hive_partitioning) + sample_path = getPathSample(metadata, context); + + setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(metadata.getColumns(), context, sample_path, format_settings)); setInMemoryMetadata(metadata); } @@ -198,7 +231,7 @@ private: return; auto context = getContext(); iterator_wrapper = StorageObjectStorageSource::createFileIterator( - configuration, object_storage, distributed_processing, + configuration, configuration->getQuerySettings(context), object_storage, distributed_processing, context, predicate, virtual_columns, nullptr, context->getFileProgressCallback()); } }; @@ -350,6 +383,7 @@ std::unique_ptr StorageObjectStorage::createReadBufferIterat { auto file_iterator = StorageObjectStorageSource::createFileIterator( configuration, + configuration->getQuerySettings(context), object_storage, false/* distributed_processing */, context, @@ -366,33 +400,41 @@ ColumnsDescription StorageObjectStorage::resolveSchemaFromData( const ObjectStoragePtr & object_storage, const ConfigurationPtr & configuration, const std::optional & format_settings, + std::string & sample_path, const ContextPtr & context) { ObjectInfos read_keys; auto iterator = createReadBufferIterator(object_storage, configuration, format_settings, read_keys, context); - return readSchemaFromFormat(configuration->format, format_settings, *iterator, context); + auto schema = readSchemaFromFormat(configuration->format, format_settings, *iterator, context); + sample_path = iterator->getLastFilePath(); + return schema; } std::string StorageObjectStorage::resolveFormatFromData( const ObjectStoragePtr & object_storage, const ConfigurationPtr & configuration, const std::optional & format_settings, + std::string & sample_path, const ContextPtr & context) { ObjectInfos read_keys; auto iterator = createReadBufferIterator(object_storage, configuration, format_settings, read_keys, context); - return detectFormatAndReadSchema(format_settings, *iterator, context).second; + auto format_and_schema = detectFormatAndReadSchema(format_settings, *iterator, context).second; + sample_path = iterator->getLastFilePath(); + return format_and_schema; } std::pair StorageObjectStorage::resolveSchemaAndFormatFromData( const ObjectStoragePtr & object_storage, const ConfigurationPtr & configuration, const std::optional & format_settings, + std::string & sample_path, const ContextPtr & context) { ObjectInfos read_keys; auto iterator = createReadBufferIterator(object_storage, configuration, format_settings, read_keys, context); auto [columns, format] = detectFormatAndReadSchema(format_settings, *iterator, context); + sample_path = iterator->getLastFilePath(); configuration->format = format; return std::pair(columns, format); } diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.h b/src/Storages/ObjectStorage/StorageObjectStorage.h index 818ce055c77..cae0db48f31 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorage.h +++ b/src/Storages/ObjectStorage/StorageObjectStorage.h @@ -102,23 +102,28 @@ public: const ObjectStoragePtr & object_storage, const ConfigurationPtr & configuration, const std::optional & format_settings, + std::string & sample_path, const ContextPtr & context); static std::string resolveFormatFromData( const ObjectStoragePtr & object_storage, const ConfigurationPtr & configuration, const std::optional & format_settings, + std::string & sample_path, const ContextPtr & context); static std::pair resolveSchemaAndFormatFromData( const ObjectStoragePtr & object_storage, const ConfigurationPtr & configuration, const std::optional & format_settings, + std::string & sample_path, const ContextPtr & context); protected: virtual void updateConfiguration(ContextPtr local_context); + String getPathSample(StorageInMemoryMetadata metadata, ContextPtr context); + virtual ReadFromFormatInfo prepareReadingFromFormat( const Strings & requested_columns, const StorageSnapshotPtr & storage_snapshot, diff --git a/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp b/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp index 78f568d8ae2..c214665f7e0 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp @@ -1,6 +1,8 @@ #include "Storages/ObjectStorage/StorageObjectStorageCluster.h" #include +#include +#include #include #include #include @@ -19,6 +21,28 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } +String StorageObjectStorageCluster::getPathSample(StorageInMemoryMetadata metadata, ContextPtr context) +{ + auto query_settings = configuration->getQuerySettings(context); + /// We don't want to throw an exception if there are no files with specified path. + query_settings.throw_on_zero_files_match = false; + auto file_iterator = StorageObjectStorageSource::createFileIterator( + configuration, + query_settings, + object_storage, + false, // distributed_processing + context, + {}, // predicate + metadata.getColumns().getAll(), // virtual_columns + nullptr, // read_keys + {} // file_progress_callback + ); + + if (auto file = file_iterator->next(0)) + return file->getPath(); + return ""; +} + StorageObjectStorageCluster::StorageObjectStorageCluster( const String & cluster_name_, ConfigurationPtr configuration_, @@ -33,14 +57,18 @@ StorageObjectStorageCluster::StorageObjectStorageCluster( , object_storage(object_storage_) { ColumnsDescription columns{columns_}; - resolveSchemaAndFormat(columns, configuration->format, object_storage, configuration, {}, context_); + std::string sample_path; + resolveSchemaAndFormat(columns, configuration->format, object_storage, configuration, {}, sample_path, context_); configuration->check(context_); StorageInMemoryMetadata metadata; metadata.setColumns(columns); metadata.setConstraints(constraints_); - setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(metadata.getColumns())); + if (sample_path.empty() && context_->getSettingsRef().use_hive_partitioning) + sample_path = getPathSample(metadata, context_); + + setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(metadata.getColumns(), context_, sample_path)); setInMemoryMetadata(metadata); } @@ -83,8 +111,8 @@ RemoteQueryExecutor::Extension StorageObjectStorageCluster::getTaskIteratorExten const ActionsDAG::Node * predicate, const ContextPtr & local_context) const { auto iterator = StorageObjectStorageSource::createFileIterator( - configuration, object_storage, /* distributed_processing */false, local_context, - predicate, virtual_columns, nullptr, local_context->getFileProgressCallback()); + configuration, configuration->getQuerySettings(local_context), object_storage, /* distributed_processing */false, + local_context, predicate, virtual_columns, nullptr, local_context->getFileProgressCallback()); auto callback = std::make_shared>([iterator]() mutable -> String { diff --git a/src/Storages/ObjectStorage/StorageObjectStorageCluster.h b/src/Storages/ObjectStorage/StorageObjectStorageCluster.h index 108aa109616..0088ff28fc2 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageCluster.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageCluster.h @@ -27,6 +27,8 @@ public: RemoteQueryExecutor::Extension getTaskIteratorExtension( const ActionsDAG::Node * predicate, const ContextPtr & context) const override; + String getPathSample(StorageInMemoryMetadata metadata, ContextPtr context); + private: void updateQueryToSendIfNeeded( ASTPtr & query, diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp index 320799c7166..d8e26977e75 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp @@ -99,6 +99,7 @@ std::string StorageObjectStorageSource::getUniqueStoragePathIdentifier( std::shared_ptr StorageObjectStorageSource::createFileIterator( ConfigurationPtr configuration, + const StorageObjectStorage::QuerySettings & query_settings, ObjectStoragePtr object_storage, bool distributed_processing, const ContextPtr & local_context, @@ -116,7 +117,6 @@ std::shared_ptr StorageObjectStorageSourc throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expression can not have wildcards inside {} name", configuration->getNamespaceType()); - auto settings = configuration->getQuerySettings(local_context); const bool is_archive = configuration->isArchive(); std::unique_ptr iterator; @@ -125,8 +125,8 @@ std::shared_ptr StorageObjectStorageSourc /// Iterate through disclosed globs and make a source for each file iterator = std::make_unique( object_storage, configuration, predicate, virtual_columns, - local_context, is_archive ? nullptr : read_keys, settings.list_object_keys_size, - settings.throw_on_zero_files_match, file_progress_callback); + local_context, is_archive ? nullptr : read_keys, query_settings.list_object_keys_size, + query_settings.throw_on_zero_files_match, file_progress_callback); } else { @@ -148,7 +148,7 @@ std::shared_ptr StorageObjectStorageSourc iterator = std::make_unique( object_storage, copy_configuration, virtual_columns, is_archive ? nullptr : read_keys, - settings.ignore_non_existent_file, file_progress_callback); + query_settings.ignore_non_existent_file, file_progress_callback); } if (is_archive) @@ -198,15 +198,17 @@ Chunk StorageObjectStorageSource::generate() const auto & object_info = reader.getObjectInfo(); const auto & filename = object_info->getFileName(); chassert(object_info->metadata); + VirtualColumnUtils::addRequestedFileLikeStorageVirtualsToChunk( chunk, read_from_format_info.requested_virtual_columns, - {.path = getUniqueStoragePathIdentifier(*configuration, *object_info, false), - .size = object_info->isArchive() ? object_info->fileSizeInArchive() : object_info->metadata->size_bytes, - .filename = &filename, - .last_modified = object_info->metadata->last_modified, - .etag = &(object_info->metadata->etag) - }); + { + .path = getUniqueStoragePathIdentifier(*configuration, *object_info, false), + .size = object_info->isArchive() ? object_info->fileSizeInArchive() : object_info->metadata->size_bytes, + .filename = &filename, + .last_modified = object_info->metadata->last_modified, + .etag = &(object_info->metadata->etag) + }, getContext(), read_from_format_info.columns_description); const auto & partition_columns = configuration->getPartitionColumns(); if (!partition_columns.empty() && chunk_size && chunk.hasColumns()) diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.h b/src/Storages/ObjectStorage/StorageObjectStorageSource.h index e466621e1e1..6681dbf4578 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.h @@ -52,6 +52,7 @@ public: static std::shared_ptr createFileIterator( ConfigurationPtr configuration, + const StorageObjectStorage::QuerySettings & query_settings, ObjectStoragePtr object_storage, bool distributed_processing, const ContextPtr & local_context, diff --git a/src/Storages/ObjectStorage/Utils.cpp b/src/Storages/ObjectStorage/Utils.cpp index e49e14d2a0c..73410d959e0 100644 --- a/src/Storages/ObjectStorage/Utils.cpp +++ b/src/Storages/ObjectStorage/Utils.cpp @@ -49,19 +49,20 @@ void resolveSchemaAndFormat( ObjectStoragePtr object_storage, const StorageObjectStorage::ConfigurationPtr & configuration, std::optional format_settings, + std::string & sample_path, const ContextPtr & context) { if (columns.empty()) { if (format == "auto") std::tie(columns, format) = - StorageObjectStorage::resolveSchemaAndFormatFromData(object_storage, configuration, format_settings, context); + StorageObjectStorage::resolveSchemaAndFormatFromData(object_storage, configuration, format_settings, sample_path, context); else - columns = StorageObjectStorage::resolveSchemaFromData(object_storage, configuration, format_settings, context); + columns = StorageObjectStorage::resolveSchemaFromData(object_storage, configuration, format_settings, sample_path, context); } else if (format == "auto") { - format = StorageObjectStorage::resolveFormatFromData(object_storage, configuration, format_settings, context); + format = StorageObjectStorage::resolveFormatFromData(object_storage, configuration, format_settings, sample_path, context); } if (!columns.hasOnlyOrdinary()) diff --git a/src/Storages/ObjectStorage/Utils.h b/src/Storages/ObjectStorage/Utils.h index 2077999df41..7ee14f50979 100644 --- a/src/Storages/ObjectStorage/Utils.h +++ b/src/Storages/ObjectStorage/Utils.h @@ -19,6 +19,7 @@ void resolveSchemaAndFormat( ObjectStoragePtr object_storage, const StorageObjectStorage::ConfigurationPtr & configuration, std::optional format_settings, + std::string & sample_path, const ContextPtr & context); } diff --git a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp index 4d921003e04..2634a7b2f1e 100644 --- a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp @@ -524,7 +524,7 @@ Chunk ObjectStorageQueueSource::generateImpl() { .path = path, .size = reader.getObjectInfo()->metadata->size_bytes - }); + }, getContext(), read_from_format_info.columns_description); return chunk; } diff --git a/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp b/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp index f51a7a913b8..5dc3e01962c 100644 --- a/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp +++ b/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp @@ -161,14 +161,15 @@ StorageObjectStorageQueue::StorageObjectStorageQueue( configuration->check(context_); ColumnsDescription columns{columns_}; - resolveSchemaAndFormat(columns, configuration->format, object_storage, configuration, format_settings, context_); + std::string sample_path; + resolveSchemaAndFormat(columns, configuration->format, object_storage, configuration, format_settings, sample_path, context_); configuration->check(context_); StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(columns); storage_metadata.setConstraints(constraints_); storage_metadata.setComment(comment); - setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns())); + setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns(), context_)); setInMemoryMetadata(storage_metadata); LOG_INFO(log, "Using zookeeper path: {}", zk_path.string()); diff --git a/src/Storages/Statistics/StatisticsCountMinSketch.cpp b/src/Storages/Statistics/StatisticsCountMinSketch.cpp index 0dc01f5fcf0..6dbd0625d3d 100644 --- a/src/Storages/Statistics/StatisticsCountMinSketch.cpp +++ b/src/Storages/Statistics/StatisticsCountMinSketch.cpp @@ -48,7 +48,7 @@ Float64 StatisticsCountMinSketch::estimateEqual(const Field & val) const return sketch.get_estimate(&val_converted, data_type->getSizeOfValueInMemory()); if (isStringOrFixedString(data_type)) - return sketch.get_estimate(val.get()); + return sketch.get_estimate(val.safeGet()); throw Exception(ErrorCodes::LOGICAL_ERROR, "Statistics 'count_min' does not support estimate data type of {}", data_type->getName()); } diff --git a/src/Storages/StorageFactory.cpp b/src/Storages/StorageFactory.cpp index 060b271d8f4..b95ccedb093 100644 --- a/src/Storages/StorageFactory.cpp +++ b/src/Storages/StorageFactory.cpp @@ -203,7 +203,7 @@ StoragePtr StorageFactory::get( } if (query.comment) - comment = query.comment->as().value.get(); + comment = query.comment->as().value.safeGet(); ASTs empty_engine_args; Arguments arguments{ diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 324c88d643d..766b7722cdf 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -53,6 +53,7 @@ #include #include #include +#include #include "base/defines.h" #include @@ -516,7 +517,7 @@ namespace StorageFile::getSchemaCache(getContext()).addManyColumns(cache_keys, columns); } - String getLastFileName() const override + String getLastFilePath() const override { if (current_index != 0) return paths[current_index - 1]; @@ -793,7 +794,7 @@ namespace format = format_name; } - String getLastFileName() const override + String getLastFilePath() const override { return last_read_file_path; } @@ -1112,7 +1113,8 @@ void StorageFile::setStorageMetadata(CommonArguments args) storage_metadata.setConstraints(args.constraints); storage_metadata.setComment(args.comment); setInMemoryMetadata(storage_metadata); - setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns())); + + setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns(), args.getContext(), paths.empty() ? "" : paths[0], format_settings)); } @@ -1466,7 +1468,7 @@ Chunk StorageFileSource::generate() .size = current_file_size, .filename = (filename_override.has_value() ? &filename_override.value() : nullptr), .last_modified = current_file_last_modified - }); + }, getContext(), columns_description); return chunk; } @@ -2185,12 +2187,12 @@ void registerStorageFile(StorageFactory & factory) { auto type = literal->value.getType(); if (type == Field::Types::Int64) - source_fd = static_cast(literal->value.get()); + source_fd = static_cast(literal->value.safeGet()); else if (type == Field::Types::UInt64) - source_fd = static_cast(literal->value.get()); + source_fd = static_cast(literal->value.safeGet()); else if (type == Field::Types::String) StorageFile::parseFileSource( - literal->value.get(), + literal->value.safeGet(), source_path, storage_args.path_to_archive, factory_args.getLocalContext()->getSettingsRef().allow_archive_path_syntax); diff --git a/src/Storages/StorageFileCluster.cpp b/src/Storages/StorageFileCluster.cpp index d43e242f70c..82ae0b761ae 100644 --- a/src/Storages/StorageFileCluster.cpp +++ b/src/Storages/StorageFileCluster.cpp @@ -61,7 +61,7 @@ StorageFileCluster::StorageFileCluster( storage_metadata.setConstraints(constraints_); setInMemoryMetadata(storage_metadata); - setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns())); + setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns(), context, paths.empty() ? "" : paths[0])); } void StorageFileCluster::updateQueryToSendIfNeeded(DB::ASTPtr & query, const StorageSnapshotPtr & storage_snapshot, const DB::ContextPtr & context) diff --git a/src/Storages/StorageFuzzJSON.cpp b/src/Storages/StorageFuzzJSON.cpp index 9950d41f1c2..fc73f246d35 100644 --- a/src/Storages/StorageFuzzJSON.cpp +++ b/src/Storages/StorageFuzzJSON.cpp @@ -419,7 +419,7 @@ void fuzzJSONObject( if (val.fixed->getType() == Field::Types::Which::String) { out << fuzzJSONStructure(config, rnd, "\""); - writeText(val.fixed->get(), out); + writeText(val.fixed->safeGet(), out); out << fuzzJSONStructure(config, rnd, "\""); } else diff --git a/src/Storages/StorageJoin.cpp b/src/Storages/StorageJoin.cpp index ff0b6882f50..efa15c382dd 100644 --- a/src/Storages/StorageJoin.cpp +++ b/src/Storages/StorageJoin.cpp @@ -383,10 +383,10 @@ void registerStorageJoin(StorageFactory & factory) else if (setting.name == "any_join_distinct_right_table_keys") old_any_join = setting.value; else if (setting.name == "disk") - disk_name = setting.value.get(); + disk_name = setting.value.safeGet(); else if (setting.name == "persistent") { - persistent = setting.value.get(); + persistent = setting.value.safeGet(); } else throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown setting {} for storage {}", setting.name, args.engine_name); diff --git a/src/Storages/StoragePostgreSQL.cpp b/src/Storages/StoragePostgreSQL.cpp index d3214e7ed13..e0a4af68824 100644 --- a/src/Storages/StoragePostgreSQL.cpp +++ b/src/Storages/StoragePostgreSQL.cpp @@ -294,7 +294,7 @@ public: { const auto * array_type = typeid_cast(data_type.get()); const auto & nested = array_type->getNestedType(); - const auto & array = array_field.get(); + const auto & array = array_field.safeGet(); if (!isArray(nested)) { @@ -312,7 +312,7 @@ public: if (!isArray(nested_array_type->getNestedType())) { - parseArrayContent(iter->get(), nested, ostr); + parseArrayContent(iter->safeGet(), nested, ostr); } else { diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index 76c49c50f76..6442891cf23 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -36,6 +36,8 @@ #include #include #include + +#include #include #include #include @@ -90,11 +92,22 @@ static const std::vector> optional_regex_keys = { std::make_shared(R"(headers.header\[[0-9]*\].value)"), }; -static bool urlWithGlobs(const String & uri) +bool urlWithGlobs(const String & uri) { return (uri.find('{') != std::string::npos && uri.find('}') != std::string::npos) || uri.find('|') != std::string::npos; } +String getSampleURI(String uri, ContextPtr context) +{ + if (urlWithGlobs(uri)) + { + auto uris = parseRemoteDescription(uri, 0, uri.size(), ',', context->getSettingsRef().glob_expansion_max_elements); + if (!uris.empty()) + return uris[0]; + } + return uri; +} + static ConnectionTimeouts getHTTPTimeouts(ContextPtr context) { return ConnectionTimeouts::getHTTPTimeouts(context->getSettingsRef(), context->getServerSettings().keep_alive_timeout); @@ -153,7 +166,8 @@ IStorageURLBase::IStorageURLBase( storage_metadata.setConstraints(constraints_); storage_metadata.setComment(comment); setInMemoryMetadata(storage_metadata); - setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns())); + + setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns(), context_, getSampleURI(uri, context_), format_settings)); } @@ -414,13 +428,14 @@ Chunk StorageURLSource::generate() size_t chunk_size = 0; if (input_format) chunk_size = input_format->getApproxBytesReadForChunk(); + progress(num_rows, chunk_size ? chunk_size : chunk.bytes()); VirtualColumnUtils::addRequestedFileLikeStorageVirtualsToChunk( chunk, requested_virtual_columns, { .path = curr_uri.getPath(), - .size = current_file_size - }); + .size = current_file_size, + }, getContext(), columns_description); return chunk; } @@ -839,7 +854,7 @@ namespace format = format_name; } - String getLastFileName() const override { return current_url_option; } + String getLastFilePath() const override { return current_url_option; } bool supportsLastReadBufferRecreation() const override { return true; } @@ -1160,6 +1175,7 @@ void ReadFromURL::createIterator(const ActionsDAG::Node * predicate) void ReadFromURL::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) { createIterator(nullptr); + const auto & settings = context->getSettingsRef(); if (is_empty_glob) { @@ -1170,7 +1186,6 @@ void ReadFromURL::initializePipeline(QueryPipelineBuilder & pipeline, const Buil Pipes pipes; pipes.reserve(num_streams); - const auto & settings = context->getSettingsRef(); const size_t max_parsing_threads = num_streams >= settings.max_parsing_threads ? 1 : (settings.max_parsing_threads / num_streams); for (size_t i = 0; i < num_streams; ++i) diff --git a/src/Storages/StorageURL.h b/src/Storages/StorageURL.h index 9d060e9e11c..19daf843431 100644 --- a/src/Storages/StorageURL.h +++ b/src/Storages/StorageURL.h @@ -141,6 +141,9 @@ private: virtual Block getHeaderBlock(const Names & column_names, const StorageSnapshotPtr & storage_snapshot) const = 0; }; +bool urlWithGlobs(const String & uri); + +String getSampleURI(String uri, ContextPtr context); class StorageURLSource : public SourceWithKeyCondition, WithContext { diff --git a/src/Storages/StorageURLCluster.cpp b/src/Storages/StorageURLCluster.cpp index 69041482fd8..7c7a299c64e 100644 --- a/src/Storages/StorageURLCluster.cpp +++ b/src/Storages/StorageURLCluster.cpp @@ -76,7 +76,7 @@ StorageURLCluster::StorageURLCluster( storage_metadata.setConstraints(constraints_); setInMemoryMetadata(storage_metadata); - setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns())); + setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns(), context, getSampleURI(uri, context))); } void StorageURLCluster::updateQueryToSendIfNeeded(ASTPtr & query, const StorageSnapshotPtr & storage_snapshot, const ContextPtr & context) diff --git a/src/Storages/System/StorageSystemBuildOptions.cpp.in b/src/Storages/System/StorageSystemBuildOptions.cpp.in index a81bcb08bfc..9e5adbfe825 100644 --- a/src/Storages/System/StorageSystemBuildOptions.cpp.in +++ b/src/Storages/System/StorageSystemBuildOptions.cpp.in @@ -21,7 +21,7 @@ const char * auto_config_build[] "BUILD_COMPILE_DEFINITIONS", "@BUILD_COMPILE_DEFINITIONS@", "USE_EMBEDDED_COMPILER", "@USE_EMBEDDED_COMPILER@", "USE_GLIBC_COMPATIBILITY", "@GLIBC_COMPATIBILITY@", - "USE_JEMALLOC", "@ENABLE_JEMALLOC@", + "USE_JEMALLOC", "@USE_JEMALLOC@", "USE_ICU", "@USE_ICU@", "USE_H3", "@USE_H3@", "USE_MYSQL", "@USE_MYSQL@", @@ -36,7 +36,7 @@ const char * auto_config_build[] "USE_SSL", "@USE_SSL@", "OPENSSL_VERSION", "@OPENSSL_VERSION@", "OPENSSL_IS_BORING_SSL", "@OPENSSL_IS_BORING_SSL@", - "USE_VECTORSCAN", "@ENABLE_VECTORSCAN@", + "USE_VECTORSCAN", "@USE_VECTORSCAN@", "USE_SIMDJSON", "@USE_SIMDJSON@", "USE_ODBC", "@USE_ODBC@", "USE_GRPC", "@USE_GRPC@", @@ -62,8 +62,8 @@ const char * auto_config_build[] "USE_ARROW", "@USE_ARROW@", "USE_ORC", "@USE_ORC@", "USE_MSGPACK", "@USE_MSGPACK@", - "USE_QPL", "@ENABLE_QPL@", - "USE_QAT", "@ENABLE_QATLIB@", + "USE_QPL", "@USE_QPL@", + "USE_QATLIB", "@USE_QATLIB@", "GIT_HASH", "@GIT_HASH@", "GIT_BRANCH", R"IRjaNsZIL9Yh7FQ4(@GIT_BRANCH@)IRjaNsZIL9Yh7FQ4", "GIT_DATE", "@GIT_DATE@", diff --git a/src/Storages/System/StorageSystemColumns.cpp b/src/Storages/System/StorageSystemColumns.cpp index 03c569303c5..bc13cb77d5e 100644 --- a/src/Storages/System/StorageSystemColumns.cpp +++ b/src/Storages/System/StorageSystemColumns.cpp @@ -105,8 +105,8 @@ protected: while (rows_count < max_block_size && db_table_num < total_tables) { - const std::string database_name = (*databases)[db_table_num].get(); - const std::string table_name = (*tables)[db_table_num].get(); + const std::string database_name = (*databases)[db_table_num].safeGet(); + const std::string table_name = (*tables)[db_table_num].safeGet(); ++db_table_num; ColumnsDescription columns; @@ -437,7 +437,7 @@ void ReadFromSystemColumns::initializePipeline(QueryPipelineBuilder & pipeline, for (size_t i = 0; i < num_databases; ++i) { - const std::string database_name = (*database_column)[i].get(); + const std::string database_name = (*database_column)[i].safeGet(); if (database_name.empty()) { for (auto & [table_name, table] : external_tables) diff --git a/src/Storages/System/StorageSystemKafkaConsumers.cpp b/src/Storages/System/StorageSystemKafkaConsumers.cpp index 86713632339..db6804d3ad7 100644 --- a/src/Storages/System/StorageSystemKafkaConsumers.cpp +++ b/src/Storages/System/StorageSystemKafkaConsumers.cpp @@ -42,7 +42,7 @@ ColumnsDescription StorageSystemKafkaConsumers::getColumnsDescription() {"num_rebalance_revocations", std::make_shared(), "Number of times the consumer was revoked its partitions."}, {"num_rebalance_assignments", std::make_shared(), "Number of times the consumer was assigned to Kafka cluster."}, {"is_currently_used", std::make_shared(), "The flag which shows whether the consumer is in use."}, - {"last_used", std::make_shared(6), "The last time this consumer was in use."}, + {"last_used", std::make_shared(), "The last time this consumer was in use, unix time in microseconds."}, {"rdkafka_stat", std::make_shared(), "Library internal statistic. Set statistics_interval_ms to 0 disable, default is 3000 (once in three seconds)."}, }; } @@ -79,7 +79,7 @@ void StorageSystemKafkaConsumers::fillData(MutableColumns & res_columns, Context auto & num_rebalance_revocations = assert_cast(*res_columns[index++]); auto & num_rebalance_assigments = assert_cast(*res_columns[index++]); auto & is_currently_used = assert_cast(*res_columns[index++]); - auto & last_used = assert_cast(*res_columns[index++]); + auto & last_used = assert_cast(*res_columns[index++]); auto & rdkafka_stat = assert_cast(*res_columns[index++]); const auto access = context->getAccess(); diff --git a/src/Storages/System/StorageSystemPartsBase.cpp b/src/Storages/System/StorageSystemPartsBase.cpp index 7ace8ee24aa..c87bdb6d26a 100644 --- a/src/Storages/System/StorageSystemPartsBase.cpp +++ b/src/Storages/System/StorageSystemPartsBase.cpp @@ -138,7 +138,7 @@ StoragesInfoStream::StoragesInfoStream(std::optional filter_by_datab for (size_t i = 0; i < rows; ++i) { - String database_name = (*database_column_for_filter)[i].get(); + String database_name = (*database_column_for_filter)[i].safeGet(); const DatabasePtr database = databases.at(database_name); offsets[i] = i ? offsets[i - 1] : 0; diff --git a/src/Storages/System/StorageSystemPartsBase.h b/src/Storages/System/StorageSystemPartsBase.h index 806af4a7bf8..3be73aeda17 100644 --- a/src/Storages/System/StorageSystemPartsBase.h +++ b/src/Storages/System/StorageSystemPartsBase.h @@ -52,13 +52,13 @@ public: { StoragesInfo info; - info.database = (*database_column)[next_row].get(); - info.table = (*table_column)[next_row].get(); - UUID storage_uuid = (*storage_uuid_column)[next_row].get(); + info.database = (*database_column)[next_row].safeGet(); + info.table = (*table_column)[next_row].safeGet(); + UUID storage_uuid = (*storage_uuid_column)[next_row].safeGet(); auto is_same_table = [&storage_uuid, this] (size_t row) -> bool { - return (*storage_uuid_column)[row].get() == storage_uuid; + return (*storage_uuid_column)[row].safeGet() == storage_uuid; }; /// We may have two rows per table which differ in 'active' value. @@ -66,7 +66,7 @@ public: /// must collect the inactive parts. Remember this fact in StoragesInfo. for (; next_row < rows && is_same_table(next_row); ++next_row) { - const auto active = (*active_column)[next_row].get(); + const auto active = (*active_column)[next_row].safeGet(); if (active == 0) info.need_inactive_parts = true; } diff --git a/src/Storages/System/StorageSystemQueryCache.cpp b/src/Storages/System/StorageSystemQueryCache.cpp index 4c54d4ae16f..b3532ba40a7 100644 --- a/src/Storages/System/StorageSystemQueryCache.cpp +++ b/src/Storages/System/StorageSystemQueryCache.cpp @@ -1,6 +1,7 @@ #include "StorageSystemQueryCache.h" #include #include +#include #include #include #include @@ -15,6 +16,7 @@ ColumnsDescription StorageSystemQueryCache::getColumnsDescription() { {"query", std::make_shared(), "Query string."}, {"result_size", std::make_shared(), "Size of the query cache entry."}, + {"tag", std::make_shared(std::make_shared()), "Tag of the query cache entry."}, {"stale", std::make_shared(), "If the query cache entry is stale."}, {"shared", std::make_shared(), "If the query cache entry is shared between multiple users."}, {"compressed", std::make_shared(), "If the query cache entry is compressed."}, @@ -51,11 +53,12 @@ void StorageSystemQueryCache::fillData(MutableColumns & res_columns, ContextPtr res_columns[0]->insert(key.query_string); /// approximates the original query string res_columns[1]->insert(QueryCache::QueryCacheEntryWeight()(*query_result)); - res_columns[2]->insert(key.expires_at < std::chrono::system_clock::now()); - res_columns[3]->insert(key.is_shared); - res_columns[4]->insert(key.is_compressed); - res_columns[5]->insert(std::chrono::system_clock::to_time_t(key.expires_at)); - res_columns[6]->insert(key.ast_hash.low64); /// query cache considers aliases (issue #56258) + res_columns[2]->insert(key.tag); + res_columns[3]->insert(key.expires_at < std::chrono::system_clock::now()); + res_columns[4]->insert(key.is_shared); + res_columns[5]->insert(key.is_compressed); + res_columns[6]->insert(std::chrono::system_clock::to_time_t(key.expires_at)); + res_columns[7]->insert(key.ast_hash.low64); /// query cache considers aliases (issue #56258) } } diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index ba59b012f95..d932f5cc469 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -1,51 +1,66 @@ -#include - #include #include -#include -#include -#include -#include -#include #include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include + #include -#include -#include -#include #include +#include +#include +#include #include -#include -#include + #include +#include #include +#include #include #include -#include -#include + +#include +#include +#include +#include + +#include +#include +#include +#include + +#include #include #include -#include #include +#include #include + +#include +#include +#include #include +#include +#include +#include +#include +#include "Functions/FunctionsLogical.h" +#include "Functions/IFunction.h" +#include "Functions/IFunctionAdaptors.h" +#include "Functions/indexHint.h" +#include +#include +#include +#include +#include namespace DB { +namespace ErrorCodes +{ + extern const int INCORRECT_DATA; +} + namespace VirtualColumnUtils { @@ -114,7 +129,29 @@ NameSet getVirtualNamesForFileLikeStorage() return {"_path", "_file", "_size", "_time", "_etag"}; } -VirtualColumnsDescription getVirtualsForFileLikeStorage(const ColumnsDescription & storage_columns) +std::unordered_map parseHivePartitioningKeysAndValues(const String & path, const ColumnsDescription & storage_columns) +{ + std::string pattern = "([^/]+)=([^/]+)/"; + re2::StringPiece input_piece(path); + + std::unordered_map key_values; + std::string key, value; + std::unordered_set used_keys; + while (RE2::FindAndConsume(&input_piece, pattern, &key, &value)) + { + if (used_keys.contains(key)) + throw Exception(ErrorCodes::INCORRECT_DATA, "Path '{}' to file with enabled hive-style partitioning contains duplicated partition key {}, only unique keys are allowed", path, key); + used_keys.insert(key); + + auto col_name = "_" + key; + while (storage_columns.has(col_name)) + col_name = "_" + col_name; + key_values[col_name] = value; + } + return key_values; +} + +VirtualColumnsDescription getVirtualsForFileLikeStorage(const ColumnsDescription & storage_columns, const ContextPtr & context, const std::string & path, std::optional format_settings_) { VirtualColumnsDescription desc; @@ -132,6 +169,22 @@ VirtualColumnsDescription getVirtualsForFileLikeStorage(const ColumnsDescription add_virtual("_time", makeNullable(std::make_shared())); add_virtual("_etag", std::make_shared(std::make_shared())); + if (context->getSettingsRef().use_hive_partitioning) + { + auto map = parseHivePartitioningKeysAndValues(path, storage_columns); + auto format_settings = format_settings_ ? *format_settings_ : getFormatSettings(context); + for (auto & item : map) + { + auto type = tryInferDataTypeByEscapingRule(item.second, format_settings, FormatSettings::EscapingRule::Raw); + if (type == nullptr) + type = std::make_shared(); + if (type->canBeInsideLowCardinality()) + add_virtual(item.first, std::make_shared(type)); + else + add_virtual(item.first, type); + } + } + return desc; } @@ -191,8 +244,12 @@ ColumnPtr getFilterByPathAndFileIndexes(const std::vector & paths, const void addRequestedFileLikeStorageVirtualsToChunk( Chunk & chunk, const NamesAndTypesList & requested_virtual_columns, - VirtualsForFileLikeStorage virtual_values) + VirtualsForFileLikeStorage virtual_values, ContextPtr context, const ColumnsDescription & columns) { + std::unordered_map hive_map; + if (context->getSettingsRef().use_hive_partitioning) + hive_map = parseHivePartitioningKeysAndValues(virtual_values.path, columns); + for (const auto & virtual_column : requested_virtual_columns) { if (virtual_column.name == "_path") @@ -226,6 +283,10 @@ void addRequestedFileLikeStorageVirtualsToChunk( else chunk.addColumn(virtual_column.type->createColumnConstWithDefaultValue(chunk.getNumRows())->convertToFullColumnIfConst()); } + else if (auto it = hive_map.find(virtual_column.getNameInStorage()); it != hive_map.end()) + { + chunk.addColumn(virtual_column.type->createColumnConst(chunk.getNumRows(), convertFieldToType(Field(it->second), *virtual_column.type))->convertToFullColumnIfConst()); + } else if (virtual_column.name == "_etag") { if (virtual_values.etag) diff --git a/src/Storages/VirtualColumnUtils.h b/src/Storages/VirtualColumnUtils.h index 980614fd27c..23e16871798 100644 --- a/src/Storages/VirtualColumnUtils.h +++ b/src/Storages/VirtualColumnUtils.h @@ -5,6 +5,7 @@ #include #include #include +#include #include @@ -63,12 +64,16 @@ auto extractSingleValueFromBlock(const Block & block, const String & name) const ColumnWithTypeAndName & data = block.getByName(name); size_t rows = block.rows(); for (size_t i = 0; i < rows; ++i) - res.insert((*data.column)[i].get()); + res.insert((*data.column)[i].safeGet()); return res; } NameSet getVirtualNamesForFileLikeStorage(); -VirtualColumnsDescription getVirtualsForFileLikeStorage(const ColumnsDescription & storage_columns); +VirtualColumnsDescription getVirtualsForFileLikeStorage( + const ColumnsDescription & storage_columns, + const ContextPtr & context, + const std::string & sample_path = "", + std::optional format_settings_ = std::nullopt); std::optional createPathAndFileFilterDAG(const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns); @@ -100,7 +105,7 @@ struct VirtualsForFileLikeStorage void addRequestedFileLikeStorageVirtualsToChunk( Chunk & chunk, const NamesAndTypesList & requested_virtual_columns, - VirtualsForFileLikeStorage virtual_values); + VirtualsForFileLikeStorage virtual_values, ContextPtr context, const ColumnsDescription & columns); } } diff --git a/src/Storages/getStructureOfRemoteTable.cpp b/src/Storages/getStructureOfRemoteTable.cpp index 9d23f132759..1408e120bc5 100644 --- a/src/Storages/getStructureOfRemoteTable.cpp +++ b/src/Storages/getStructureOfRemoteTable.cpp @@ -102,16 +102,16 @@ ColumnsDescription getStructureOfRemoteTableInShard( { ColumnDescription column; - column.name = (*name)[i].get(); + column.name = (*name)[i].safeGet(); - String data_type_name = (*type)[i].get(); + String data_type_name = (*type)[i].safeGet(); column.type = data_type_factory.get(data_type_name); - String kind_name = (*default_kind)[i].get(); + String kind_name = (*default_kind)[i].safeGet(); if (!kind_name.empty()) { column.default_desc.kind = columnDefaultKindFromString(kind_name); - String expr_str = (*default_expr)[i].get(); + String expr_str = (*default_expr)[i].safeGet(); column.default_desc.expression = parseQuery( expr_parser, expr_str.data(), expr_str.data() + expr_str.size(), "default expression", 0, settings.max_parser_depth, settings.max_parser_backtracks); @@ -207,8 +207,8 @@ ColumnsDescriptionByShardNum getExtendedObjectsOfRemoteTables( size_t size = name_col.size(); for (size_t i = 0; i < size; ++i) { - auto name = name_col[i].get(); - auto type_name = type_col[i].get(); + auto name = name_col[i].safeGet(); + auto type_name = type_col[i].safeGet(); auto storage_column = storage_columns.tryGetPhysical(name); if (storage_column && storage_column->type->hasDynamicSubcolumnsDeprecated()) diff --git a/src/TableFunctions/TableFunctionExplain.cpp b/src/TableFunctions/TableFunctionExplain.cpp index 552b9fde986..69d24c879bd 100644 --- a/src/TableFunctions/TableFunctionExplain.cpp +++ b/src/TableFunctions/TableFunctionExplain.cpp @@ -83,7 +83,7 @@ void TableFunctionExplain::parseArguments(const ASTPtr & ast_function, ContextPt "Table function '{}' requires a String argument for EXPLAIN kind, got '{}'", getName(), queryToString(kind_arg)); - ASTExplainQuery::ExplainKind kind = ASTExplainQuery::fromString(kind_literal->value.get()); + ASTExplainQuery::ExplainKind kind = ASTExplainQuery::fromString(kind_literal->value.safeGet()); auto explain_query = std::make_shared(kind); const auto * settings_arg = function->arguments->children[1]->as(); @@ -92,7 +92,7 @@ void TableFunctionExplain::parseArguments(const ASTPtr & ast_function, ContextPt "Table function '{}' requires a serialized string settings argument, got '{}'", getName(), queryToString(function->arguments->children[1])); - const auto & settings_str = settings_arg->value.get(); + const auto & settings_str = settings_arg->value.safeGet(); if (!settings_str.empty()) { const Settings & settings = context->getSettingsRef(); diff --git a/src/TableFunctions/TableFunctionFile.cpp b/src/TableFunctions/TableFunctionFile.cpp index af327cfe54e..5cd249f000d 100644 --- a/src/TableFunctions/TableFunctionFile.cpp +++ b/src/TableFunctions/TableFunctionFile.cpp @@ -48,7 +48,7 @@ void TableFunctionFile::parseFirstArguments(const ASTPtr & arg, const ContextPtr else if (type == Field::Types::Int64 || type == Field::Types::UInt64) { fd = static_cast( - (type == Field::Types::Int64) ? literal->value.get() : literal->value.get()); + (type == Field::Types::Int64) ? literal->value.safeGet() : literal->value.safeGet()); if (fd < 0) throw Exception(ErrorCodes::BAD_ARGUMENTS, "File descriptor must be non-negative"); } diff --git a/src/TableFunctions/TableFunctionMergeTreeIndex.cpp b/src/TableFunctions/TableFunctionMergeTreeIndex.cpp index 06a48f0e25f..27ed50fb711 100644 --- a/src/TableFunctions/TableFunctionMergeTreeIndex.cpp +++ b/src/TableFunctions/TableFunctionMergeTreeIndex.cpp @@ -76,9 +76,9 @@ void TableFunctionMergeTreeIndex::parseArguments(const ASTPtr & ast_function, Co "Table function '{}' expected bool flag for 'with_marks' argument", getName()); if (value.getType() == Field::Types::Bool) - with_marks = value.get(); + with_marks = value.safeGet(); else - with_marks = value.get(); + with_marks = value.safeGet(); } if (!params.empty()) diff --git a/src/TableFunctions/TableFunctionObjectStorage.cpp b/src/TableFunctions/TableFunctionObjectStorage.cpp index 550d9cc799b..39392a4c44c 100644 --- a/src/TableFunctions/TableFunctionObjectStorage.cpp +++ b/src/TableFunctions/TableFunctionObjectStorage.cpp @@ -84,7 +84,8 @@ ColumnsDescription TableFunctionObjectStorage< context->checkAccess(getSourceAccessType()); ColumnsDescription columns; auto storage = getObjectStorage(context, !is_insert_query); - resolveSchemaAndFormat(columns, configuration->format, storage, configuration, std::nullopt, context); + std::string sample_path; + resolveSchemaAndFormat(columns, configuration->format, storage, configuration, std::nullopt, sample_path, context); return columns; } else diff --git a/src/configure_config.cmake b/src/configure_config.cmake index d22bf674df4..5b24f79ef6e 100644 --- a/src/configure_config.cmake +++ b/src/configure_config.cmake @@ -135,6 +135,12 @@ endif() if (TARGET ch_contrib::vectorscan) set(USE_VECTORSCAN 1) endif() +if (TARGET ch_contrib::qpl) + set(USE_QPL 1) +endif() +if (TARGET ch_contrib::qatlib) + set(USE_QATLIB 1) +endif() if (TARGET ch_contrib::avrocpp) set(USE_AVRO 1) endif() @@ -161,8 +167,8 @@ endif() if (TARGET ch_contrib::ssh) set(USE_SSH 1) endif() -if (TARGET ch_contrib::fiu) - set(FIU_ENABLE 1) +if (TARGET ch_contrib::libfiu) + set(USE_LIBFIU 1) endif() if (TARGET ch_contrib::libarchive) set(USE_LIBARCHIVE 1) diff --git a/tests/ci/integration_tests_runner.py b/tests/ci/integration_tests_runner.py index 84718462ab5..f5dbef4f6db 100755 --- a/tests/ci/integration_tests_runner.py +++ b/tests/ci/integration_tests_runner.py @@ -69,9 +69,9 @@ def get_changed_tests_to_run(pr_info, repo_path): return [] for fpath in changed_files: - if "tests/integration/test_" in fpath: + if re.search(r"tests/integration/test_.*/test.*\.py", fpath) is not None: logging.info("File %s changed and seems like integration test", fpath) - result.add(fpath.split("/")[2]) + result.add("/".join(fpath.split("/")[2:])) return filter_existing_tests(result, repo_path) diff --git a/tests/config/config.d/transactions.xml b/tests/config/config.d/transactions.xml index 9948b1f1865..64e166b81b5 100644 --- a/tests/config/config.d/transactions.xml +++ b/tests/config/config.d/transactions.xml @@ -1,4 +1,4 @@ - + 42 @@ -18,4 +18,4 @@ 0.01 - + diff --git a/tests/integration/parallel_skip.json b/tests/integration/parallel_skip.json index 99fa626bd1e..fca2126d824 100644 --- a/tests/integration/parallel_skip.json +++ b/tests/integration/parallel_skip.json @@ -162,9 +162,13 @@ "test_storage_kafka/test.py::test_system_kafka_consumers_rebalance_mv", "test_storage_kafka/test.py::test_formats_errors", "test_storage_kafka/test.py::test_multiple_read_in_materialized_views", + "test_storage_kafka/test.py::test_kafka_null_message", + + "test_storage_kafka/test_produce_http_interface.py::test_kafka_produce_http_interface_row_based_format", "test_storage_kerberized_kafka/test.py::test_kafka_json_as_string", "test_storage_kerberized_kafka/test.py::test_kafka_json_as_string_request_new_ticket_after_expiration", "test_storage_kerberized_kafka/test.py::test_kafka_json_as_string_no_kdc", "test_storage_kerberized_kafka/test.py::test_kafka_config_from_sql_named_collection" + ] diff --git a/tests/integration/test_cluster_all_replicas/test.py b/tests/integration/test_cluster_all_replicas/test.py index d8bad180e1b..9797db7c498 100644 --- a/tests/integration/test_cluster_all_replicas/test.py +++ b/tests/integration/test_cluster_all_replicas/test.py @@ -21,14 +21,14 @@ def start_cluster(): def test_cluster(start_cluster): assert ( node1.query( - "SELECT hostName() FROM clusterAllReplicas('one_shard_two_nodes', system.one)" + "SELECT hostName() FROM clusterAllReplicas('one_shard_two_nodes', system.one) ORDER BY ALL" ) == "node1\nnode2\n" ) assert set( node1.query( - """SELECT hostName(), * FROM clusterAllReplicas("one_shard_two_nodes", system.one) ORDER BY dummy""" + """SELECT hostName(), * FROM clusterAllReplicas("one_shard_two_nodes", system.one) ORDER BY ALL""" ).splitlines() ) == {"node1\t0", "node2\t0"} @@ -48,7 +48,7 @@ def test_global_in(start_cluster): assert set( node1.query( - """SELECT hostName(), * FROM clusterAllReplicas("one_shard_two_nodes", system.one) where dummy GLOBAL IN u""" + """SELECT hostName(), * FROM clusterAllReplicas("one_shard_two_nodes", system.one) where dummy GLOBAL IN u ORDER BY ALL""" ).splitlines() ) == {"node1\t0", "node2\t0"} @@ -63,7 +63,7 @@ def test_global_in(start_cluster): def test_skip_unavailable_replica(start_cluster, cluster): assert ( node1.query( - f"SELECT hostName() FROM clusterAllReplicas('{cluster}', system.one) settings skip_unavailable_shards=1" + f"SELECT hostName() FROM clusterAllReplicas('{cluster}', system.one) ORDER BY ALL settings skip_unavailable_shards=1" ) == "node1\nnode2\n" ) @@ -81,5 +81,5 @@ def test_error_on_unavailable_replica(start_cluster, cluster): # so when skip_unavailable_shards=0 - any unavailable replica should lead to an error with pytest.raises(QueryRuntimeException): node1.query( - f"SELECT hostName() FROM clusterAllReplicas('{cluster}', system.one) settings skip_unavailable_shards=0" + f"SELECT hostName() FROM clusterAllReplicas('{cluster}', system.one) ORDER BY ALL settings skip_unavailable_shards=0" ) diff --git a/tests/integration/test_distributed_type_object/configs/remote_servers.xml b/tests/integration/test_distributed_type_object/configs/remote_servers.xml index ebce4697529..68b420f36b4 100644 --- a/tests/integration/test_distributed_type_object/configs/remote_servers.xml +++ b/tests/integration/test_distributed_type_object/configs/remote_servers.xml @@ -1,4 +1,4 @@ - + @@ -15,4 +15,4 @@ - + diff --git a/tests/integration/test_host_regexp_hosts_file_resolution/configs/host_regexp.xml b/tests/integration/test_host_regexp_hosts_file_resolution/configs/host_regexp.xml index 7a2141e6c7e..9329c8dbde2 100644 --- a/tests/integration/test_host_regexp_hosts_file_resolution/configs/host_regexp.xml +++ b/tests/integration/test_host_regexp_hosts_file_resolution/configs/host_regexp.xml @@ -1,4 +1,4 @@ - + @@ -8,4 +8,4 @@ default - \ No newline at end of file + diff --git a/tests/integration/test_host_regexp_hosts_file_resolution/configs/listen_host.xml b/tests/integration/test_host_regexp_hosts_file_resolution/configs/listen_host.xml index 58ef55cd3f3..9c27c612f63 100644 --- a/tests/integration/test_host_regexp_hosts_file_resolution/configs/listen_host.xml +++ b/tests/integration/test_host_regexp_hosts_file_resolution/configs/listen_host.xml @@ -1,5 +1,5 @@ - + :: 0.0.0.0 1 - + diff --git a/tests/integration/test_jbod_ha/configs/config.d/storage_configuration.xml b/tests/integration/test_jbod_ha/configs/config.d/storage_configuration.xml index b5c351d105b..fb9acc58ad6 100644 --- a/tests/integration/test_jbod_ha/configs/config.d/storage_configuration.xml +++ b/tests/integration/test_jbod_ha/configs/config.d/storage_configuration.xml @@ -1,4 +1,4 @@ - + 1000 @@ -27,4 +27,4 @@ - + diff --git a/tests/integration/test_server_reload/configs/default_passwd.xml b/tests/integration/test_server_reload/configs/default_passwd.xml index f79149e7e23..9d664cbf9c4 100644 --- a/tests/integration/test_server_reload/configs/default_passwd.xml +++ b/tests/integration/test_server_reload/configs/default_passwd.xml @@ -1,4 +1,4 @@ - + @@ -9,4 +9,4 @@ 123 - + diff --git a/tests/integration/test_server_reload/configs/overrides_from_zk.xml b/tests/integration/test_server_reload/configs/overrides_from_zk.xml index d420faa88a2..aa6105f6ebe 100644 --- a/tests/integration/test_server_reload/configs/overrides_from_zk.xml +++ b/tests/integration/test_server_reload/configs/overrides_from_zk.xml @@ -1,4 +1,4 @@ - + @@ -7,4 +7,4 @@ - + diff --git a/tests/integration/test_storage_azure_blob_storage/test.py b/tests/integration/test_storage_azure_blob_storage/test.py index 6fbe7634642..fbdc7f29f98 100644 --- a/tests/integration/test_storage_azure_blob_storage/test.py +++ b/tests/integration/test_storage_azure_blob_storage/test.py @@ -5,6 +5,7 @@ import json import logging import os import io +import re import random import threading import time @@ -134,6 +135,7 @@ def test_create_table_connection_string(cluster): Engine = AzureBlobStorage('{cluster.env_variables['AZURITE_CONNECTION_STRING']}', 'cont', 'test_create_connection_string', 'CSV') """, ) + azure_query(node, "DROP TABLE IF EXISTS test_create_table_conn_string") def test_create_table_account_string(cluster): @@ -143,6 +145,7 @@ def test_create_table_account_string(cluster): f"CREATE TABLE test_create_table_account_url (key UInt64, data String) Engine = AzureBlobStorage('{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}'," f"'cont', 'test_create_connection_string', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV')", ) + azure_query(node, "DROP TABLE IF EXISTS test_create_table_account_url") def test_simple_write_account_string(cluster): @@ -156,6 +159,7 @@ def test_simple_write_account_string(cluster): azure_query(node, "INSERT INTO test_simple_write VALUES (1, 'a')") print(get_azure_file_content("test_simple_write.csv", port)) assert get_azure_file_content("test_simple_write.csv", port) == '1,"a"\n' + azure_query(node, "DROP TABLE test_simple_write") def test_simple_write_connection_string(cluster): @@ -169,6 +173,7 @@ def test_simple_write_connection_string(cluster): azure_query(node, "INSERT INTO test_simple_write_connection_string VALUES (1, 'a')") print(get_azure_file_content("test_simple_write_c.csv", port)) assert get_azure_file_content("test_simple_write_c.csv", port) == '1,"a"\n' + azure_query(node, "DROP TABLE test_simple_write_connection_string") def test_simple_write_named_collection_1(cluster): @@ -184,7 +189,7 @@ def test_simple_write_named_collection_1(cluster): ) print(get_azure_file_content("test_simple_write_named.csv", port)) assert get_azure_file_content("test_simple_write_named.csv", port) == '1,"a"\n' - azure_query(node, "TRUNCATE TABLE test_simple_write_named_collection_1") + azure_query(node, "DROP TABLE test_simple_write_named_collection_1") def test_simple_write_named_collection_2(cluster): @@ -201,6 +206,7 @@ def test_simple_write_named_collection_2(cluster): ) print(get_azure_file_content("test_simple_write_named_2.csv", port)) assert get_azure_file_content("test_simple_write_named_2.csv", port) == '1,"a"\n' + azure_query(node, "DROP TABLE test_simple_write_named_collection_2") def test_partition_by(cluster): @@ -222,6 +228,7 @@ def test_partition_by(cluster): assert "1,2,3\n" == get_azure_file_content("test_3.csv", port) assert "3,2,1\n" == get_azure_file_content("test_1.csv", port) assert "78,43,45\n" == get_azure_file_content("test_45.csv", port) + azure_query(node, "DROP TABLE test_partitioned_write") def test_partition_by_string_column(cluster): @@ -242,6 +249,7 @@ def test_partition_by_string_column(cluster): assert '1,"foo/bar"\n' == get_azure_file_content("test_foo/bar.csv", port) assert '3,"йцук"\n' == get_azure_file_content("test_йцук.csv", port) assert '78,"你好"\n' == get_azure_file_content("test_你好.csv", port) + azure_query(node, "DROP TABLE test_partitioned_string_write") def test_partition_by_const_column(cluster): @@ -260,6 +268,7 @@ def test_partition_by_const_column(cluster): ) azure_query(node, f"INSERT INTO test_partitioned_const_write VALUES {values}") assert values_csv == get_azure_file_content("test_88.csv", port) + azure_query(node, "DROP TABLE test_partitioned_const_write") def test_truncate(cluster): @@ -275,6 +284,7 @@ def test_truncate(cluster): azure_query(node, "TRUNCATE TABLE test_truncate") with pytest.raises(Exception): print(get_azure_file_content("test_truncate.csv", port)) + azure_query(node, "DROP TABLE test_truncate") def test_simple_read_write(cluster): @@ -291,6 +301,7 @@ def test_simple_read_write(cluster): assert get_azure_file_content("test_simple_read_write.csv", port) == '1,"a"\n' print(azure_query(node, "SELECT * FROM test_simple_read_write")) assert azure_query(node, "SELECT * FROM test_simple_read_write") == "1\ta\n" + azure_query(node, "DROP TABLE test_simple_read_write") def test_create_new_files_on_insert(cluster): @@ -343,6 +354,7 @@ def test_overwrite(cluster): result = azure_query(node, f"select count() from test_overwrite") assert int(result) == 200 + azure_query(node, f"DROP TABLE test_overwrite") def test_insert_with_path_with_globs(cluster): @@ -355,6 +367,7 @@ def test_insert_with_path_with_globs(cluster): node.query_and_get_error( f"insert into table function test_insert_globs SELECT number, randomString(100) FROM numbers(500)" ) + azure_query(node, f"DROP TABLE test_insert_globs") def test_put_get_with_globs(cluster): @@ -363,6 +376,7 @@ def test_put_get_with_globs(cluster): node = cluster.instances["node"] # type: ClickHouseInstance table_format = "column1 UInt32, column2 UInt32, column3 UInt32" max_path = "" + used_names = [] for i in range(10): for j in range(10): path = "{}/{}_{}/{}.csv".format( @@ -371,6 +385,8 @@ def test_put_get_with_globs(cluster): max_path = max(path, max_path) values = f"({i},{j},{i + j})" + used_names.append(f"test_put_{i}_{j}") + azure_query( node, f"CREATE TABLE test_put_{i}_{j} ({table_format}) Engine = AzureBlobStorage(azure_conf2, " @@ -391,6 +407,9 @@ def test_put_get_with_globs(cluster): bucket="cont", max_path=max_path ) ] + azure_query(node, "DROP TABLE test_glob_select") + for name in used_names: + azure_query(node, f"DROP TABLE {name}") def test_azure_glob_scheherazade(cluster): @@ -399,12 +418,14 @@ def test_azure_glob_scheherazade(cluster): values = "(1, 1, 1)" nights_per_job = 1001 // 30 jobs = [] + used_names = [] for night in range(0, 1001, nights_per_job): def add_tales(start, end): for i in range(start, end): path = "night_{}/tale.csv".format(i) unique_num = random.randint(1, 10000) + used_names.append(f"test_scheherazade_{i}_{unique_num}") azure_query( node, f"CREATE TABLE test_scheherazade_{i}_{unique_num} ({table_format}) Engine = AzureBlobStorage(azure_conf2, " @@ -432,6 +453,9 @@ def test_azure_glob_scheherazade(cluster): ) query = "select count(), sum(column1), sum(column2), sum(column3) from test_glob_select_scheherazade" assert azure_query(node, query).splitlines() == ["1001\t1001\t1001\t1001"] + azure_query(node, "DROP TABLE test_glob_select_scheherazade") + for name in used_names: + azure_query(node, f"DROP TABLE {name}") @pytest.mark.parametrize( @@ -505,6 +529,8 @@ def test_schema_inference_no_globs(cluster): assert azure_query(node, query).splitlines() == [ "499500\t2890\t332833500\ttest_schema_inference_no_globs.csv\tcont/test_schema_inference_no_globs.csv" ] + azure_query(node, f"DROP TABLE test_schema_inference_src") + azure_query(node, f"DROP TABLE test_select_inference") def test_schema_inference_from_globs(cluster): @@ -513,6 +539,7 @@ def test_schema_inference_from_globs(cluster): node = cluster.instances["node"] # type: ClickHouseInstance table_format = "column1 UInt32, column2 UInt32, column3 UInt32" max_path = "" + used_names = [] for i in range(10): for j in range(10): path = "{}/{}_{}/{}.csv".format( @@ -520,6 +547,7 @@ def test_schema_inference_from_globs(cluster): ) max_path = max(path, max_path) values = f"({i},{j},{i + j})" + used_names.append(f"test_schema_{i}_{j}") azure_query( node, @@ -545,6 +573,9 @@ def test_schema_inference_from_globs(cluster): bucket="cont", max_path=max_path ) ] + azure_query(node, "DROP TABLE test_glob_select_inference") + for name in used_names: + azure_query(node, f"DROP TABLE {name}") def test_simple_write_account_string_table_function(cluster): @@ -594,7 +625,7 @@ def test_simple_write_named_collection_1_table_function(cluster): azure_query( node, - "TRUNCATE TABLE drop_table", + "DROP TABLE drop_table", ) @@ -605,6 +636,7 @@ def test_simple_write_named_collection_2_table_function(cluster): node, f"INSERT INTO TABLE FUNCTION azureBlobStorage(azure_conf2, storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}'," f" container='cont', blob_path='test_simple_write_named_2_tf.csv', format='CSV', structure='key UInt64, data String') VALUES (1, 'a')", + settings={"azure_truncate_on_insert": 1}, ) print(get_azure_file_content("test_simple_write_named_2_tf.csv", port)) assert get_azure_file_content("test_simple_write_named_2_tf.csv", port) == '1,"a"\n' @@ -628,6 +660,7 @@ def test_put_get_with_globs_tf(cluster): node, f"INSERT INTO TABLE FUNCTION azureBlobStorage(azure_conf2, storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}'," f" container='cont', blob_path='{path}', format='CSV', compression='auto', structure='{table_format}') VALUES {values}", + settings={"azure_truncate_on_insert": 1}, ) query = ( f"select sum(column1), sum(column2), sum(column3), min(_file), max(_path) from azureBlobStorage(azure_conf2, " @@ -648,7 +681,7 @@ def test_schema_inference_no_globs_tf(cluster): query = ( f"insert into table function azureBlobStorage(azure_conf2, storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', " f"container='cont', blob_path='test_schema_inference_no_globs_tf.csv', format='CSVWithNames', structure='{table_format}') " - f"SELECT number, toString(number), number * number FROM numbers(1000)" + f"SELECT number, toString(number), number * number FROM numbers(1000) SETTINGS azure_truncate_on_insert=1" ) azure_query(node, query) @@ -681,7 +714,7 @@ def test_schema_inference_from_globs_tf(cluster): f"insert into table function azureBlobStorage(azure_conf2, storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', " f"container='cont', blob_path='{path}', format='CSVWithNames', structure='{table_format}') VALUES {values}" ) - azure_query(node, query) + azure_query(node, query, settings={"azure_truncate_on_insert": 1}) query = ( f"select sum(column1), sum(column2), sum(column3), min(_file), max(_path) from azureBlobStorage(azure_conf2, " @@ -708,6 +741,7 @@ def test_partition_by_tf(cluster): f"INSERT INTO TABLE FUNCTION azureBlobStorage('{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', " f"'cont', '{filename}', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', " f"'CSV', 'auto', '{table_format}') PARTITION BY {partition_by} VALUES {values}", + settings={"azure_truncate_on_insert": 1}, ) assert "1,2,3\n" == get_azure_file_content("test_partition_tf_3.csv", port) @@ -727,6 +761,7 @@ def test_filter_using_file(cluster): f"INSERT INTO TABLE FUNCTION azureBlobStorage('{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', 'cont', '{filename}', " f"'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV', 'auto', " f"'{table_format}') PARTITION BY {partition_by} VALUES {values}", + settings={"azure_truncate_on_insert": 1}, ) query = ( @@ -744,7 +779,7 @@ def test_read_subcolumns(cluster): node, f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_subcolumns.tsv', " f"'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto'," - f" 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)') select ((1, 2), 3)", + f" 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)') select ((1, 2), 3) SETTINGS azure_truncate_on_insert=1", ) azure_query( @@ -794,7 +829,7 @@ def test_read_subcolumn_time(cluster): node, f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_subcolumn_time.tsv', " f"'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto'," - f" 'a UInt32') select (42)", + f" 'a UInt32') select (42) SETTINGS azure_truncate_on_insert=1", ) res = node.query( @@ -825,6 +860,7 @@ def test_function_signatures(cluster): azure_query( node, f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_signature.csv', '{account_name}', '{account_key}', 'CSV', 'auto', 'column1 UInt32') VALUES (1),(2),(3)", + settings={"azure_truncate_on_insert": 1}, ) # " - connection_string, container_name, blobpath\n" @@ -939,11 +975,13 @@ def test_union_schema_inference_mode(cluster): azure_query( node, f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_union_schema_inference1.jsonl', '{account_name}', '{account_key}', 'JSONEachRow', 'auto', 'a UInt32') VALUES (1)", + settings={"azure_truncate_on_insert": 1}, ) azure_query( node, f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_union_schema_inference2.jsonl', '{account_name}', '{account_key}', 'JSONEachRow', 'auto', 'b UInt32') VALUES (2)", + settings={"azure_truncate_on_insert": 1}, ) node.query("system drop schema cache for azure") @@ -981,6 +1019,7 @@ def test_union_schema_inference_mode(cluster): azure_query( node, f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_union_schema_inference3.jsonl', '{account_name}', '{account_key}', 'CSV', 'auto', 's String') VALUES ('Error')", + settings={"azure_truncate_on_insert": 1}, ) error = azure_query( @@ -1002,7 +1041,7 @@ def test_schema_inference_cache(cluster): azure_query( node, f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_cache0.jsonl', '{account_name}', '{account_key}') " - f"select * from numbers(100)", + f"select * from numbers(100) SETTINGS azure_truncate_on_insert=1", ) time.sleep(1) @@ -1209,19 +1248,19 @@ def test_filtering_by_file_or_path(cluster): azure_query( node, f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}','cont', 'test_filter1.tsv', 'devstoreaccount1', " - f"'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', 'x UInt64') select 1", + f"'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', 'x UInt64') select 1 SETTINGS azure_truncate_on_insert=1", ) azure_query( node, f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}','cont', 'test_filter2.tsv', 'devstoreaccount1', " - f"'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', 'x UInt64') select 2", + f"'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', 'x UInt64') select 2 SETTINGS azure_truncate_on_insert=1", ) azure_query( node, f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_filter3.tsv', 'devstoreaccount1', " - f"'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', 'x UInt64') select 3", + f"'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', 'x UInt64') select 3 SETTINGS azure_truncate_on_insert=1", ) node.query( @@ -1233,7 +1272,7 @@ def test_filtering_by_file_or_path(cluster): node.query("SYSTEM FLUSH LOGS") result = node.query( - f"SELECT ProfileEvents['EngineFileLikeReadFiles'] FROM system.query_log WHERE query ilike '%select%azure%test_filter%' AND type='QueryFinish'" + f"SELECT ProfileEvents['EngineFileLikeReadFiles'] FROM system.query_log WHERE query ilike '%select%azure%test_filter%' AND type='QueryFinish' ORDER BY event_time_microseconds DESC LIMIT 1" ) assert int(result) == 1 @@ -1245,19 +1284,19 @@ def test_size_virtual_column(cluster): azure_query( node, f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}','cont', 'test_size_virtual_column1.tsv', 'devstoreaccount1', " - f"'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', 'x UInt64') select 1", + f"'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', 'x UInt64') select 1 SETTINGS azure_truncate_on_insert=1", ) azure_query( node, f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}','cont', 'test_size_virtual_column2.tsv', 'devstoreaccount1', " - f"'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', 'x UInt64') select 11", + f"'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', 'x UInt64') select 11 SETTINGS azure_truncate_on_insert=1", ) azure_query( node, f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_size_virtual_column3.tsv', 'devstoreaccount1', " - f"'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', 'x UInt64') select 111", + f"'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', 'x UInt64') select 111 SETTINGS azure_truncate_on_insert=1", ) result = azure_query( @@ -1280,7 +1319,7 @@ def test_format_detection(cluster): account_key = "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==" azure_query( node, - f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_format_detection0', '{account_name}', '{account_key}', 'JSONEachRow', 'auto', 'x UInt64, y String') select number as x, 'str_' || toString(number) from numbers(0)", + f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_format_detection0', '{account_name}', '{account_key}', 'JSONEachRow', 'auto', 'x UInt64, y String') select number as x, 'str_' || toString(number) from numbers(0) SETTINGS azure_truncate_on_insert=1", ) azure_query( @@ -1350,7 +1389,7 @@ def test_write_to_globbed_partitioned_path(cluster): account_key = "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==" error = azure_query( node, - f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_data_*_{{_partition_id}}', '{account_name}', '{account_key}', 'CSV', 'auto', 'x UInt64') partition by 42 select 42", + f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_data_*_{{_partition_id}}', '{account_name}', '{account_key}', 'CSV', 'auto', 'x UInt64') partition by 42 select 42 SETTINGS azure_truncate_on_insert=1", expect_error="true", ) @@ -1462,3 +1501,115 @@ def test_insert_create_new_file(cluster): assert TSV(res) == TSV( "test_create_new_file.csv\t1\ntest_create_new_file.1.csv\t2\n" ) + + +def test_hive_partitioning_with_one_parameter(cluster): + # type: (ClickHouseCluster) -> None + node = cluster.instances["node"] # type: ClickHouseInstance + table_format = "column1 String, column2 String" + values = f"('Elizabeth', 'Gordon')" + path = "a/column1=Elizabeth/sample.csv" + + azure_query( + node, + f"INSERT INTO TABLE FUNCTION azureBlobStorage(azure_conf2, storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}'," + f" container='cont', blob_path='{path}', format='CSV', compression='auto', structure='{table_format}') VALUES {values}", + settings={"azure_truncate_on_insert": 1}, + ) + + query = ( + f"SELECT column1, column2, _file, _path, _column1 FROM azureBlobStorage(azure_conf2, " + f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', " + f"blob_path='{path}', format='CSV', structure='{table_format}')" + ) + assert azure_query( + node, query, settings={"use_hive_partitioning": 1} + ).splitlines() == [ + "Elizabeth\tGordon\tsample.csv\t{bucket}/{max_path}\tElizabeth".format( + bucket="cont", max_path=path + ) + ] + + query = ( + f"SELECT column2 FROM azureBlobStorage(azure_conf2, " + f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', " + f"blob_path='{path}', format='CSV', structure='{table_format}') WHERE column1=_column1;" + ) + assert azure_query( + node, query, settings={"use_hive_partitioning": 1} + ).splitlines() == ["Gordon"] + + +def test_hive_partitioning_with_two_parameters(cluster): + # type: (ClickHouseCluster) -> None + node = cluster.instances["node"] # type: ClickHouseInstance + table_format = "column1 String, column2 String" + values_1 = f"('Elizabeth', 'Gordon')" + values_2 = f"('Emilia', 'Gregor')" + path = "a/column1=Elizabeth/column2=Gordon/sample.csv" + + azure_query( + node, + f"INSERT INTO TABLE FUNCTION azureBlobStorage(azure_conf2, storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}'," + f" container='cont', blob_path='{path}', format='CSV', compression='auto', structure='{table_format}') VALUES {values_1}, {values_2}", + settings={"azure_truncate_on_insert": 1}, + ) + + query = ( + f"SELECT column1, column2, _file, _path, _column1, _column2 FROM azureBlobStorage(azure_conf2, " + f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', " + f"blob_path='{path}', format='CSV', structure='{table_format}') WHERE column1=_column1;" + ) + assert azure_query( + node, query, settings={"use_hive_partitioning": 1} + ).splitlines() == [ + "Elizabeth\tGordon\tsample.csv\t{bucket}/{max_path}\tElizabeth\tGordon".format( + bucket="cont", max_path=path + ) + ] + + query = ( + f"SELECT column1 FROM azureBlobStorage(azure_conf2, " + f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', " + f"blob_path='{path}', format='CSV', structure='{table_format}') WHERE column2=_column2;" + ) + assert azure_query( + node, query, settings={"use_hive_partitioning": 1} + ).splitlines() == ["Elizabeth"] + + query = ( + f"SELECT column1 FROM azureBlobStorage(azure_conf2, " + f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', " + f"blob_path='{path}', format='CSV', structure='{table_format}') WHERE column2=_column2 AND column1=_column1;" + ) + assert azure_query( + node, query, settings={"use_hive_partitioning": 1} + ).splitlines() == ["Elizabeth"] + + +def test_hive_partitioning_without_setting(cluster): + # type: (ClickHouseCluster) -> None + node = cluster.instances["node"] # type: ClickHouseInstance + table_format = "column1 String, column2 String" + values_1 = f"('Elizabeth', 'Gordon')" + values_2 = f"('Emilia', 'Gregor')" + path = "a/column1=Elizabeth/column2=Gordon/sample.csv" + + azure_query( + node, + f"INSERT INTO TABLE FUNCTION azureBlobStorage(azure_conf2, storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}'," + f" container='cont', blob_path='{path}', format='CSV', compression='auto', structure='{table_format}') VALUES {values_1}, {values_2}", + settings={"azure_truncate_on_insert": 1}, + ) + + query = ( + f"SELECT column1, column2, _file, _path, _column1, _column2 FROM azureBlobStorage(azure_conf2, " + f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', " + f"blob_path='{path}', format='CSV', structure='{table_format}') WHERE column1=_column1;" + ) + pattern = re.compile( + r"DB::Exception: Unknown expression identifier '.*' in scope.*", re.DOTALL + ) + + with pytest.raises(Exception, match=pattern): + azure_query(node, query, settings={"use_hive_partitioning": 0}) diff --git a/tests/integration/test_storage_azure_blob_storage/test_cluster.py b/tests/integration/test_storage_azure_blob_storage/test_cluster.py index 6c5e2d20ca5..04baf007c69 100644 --- a/tests/integration/test_storage_azure_blob_storage/test_cluster.py +++ b/tests/integration/test_storage_azure_blob_storage/test_cluster.py @@ -72,6 +72,7 @@ def test_select_all(cluster): f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_cluster_select_all.csv', 'devstoreaccount1'," f"'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV', 'auto', 'key UInt64, data String') " f"VALUES (1, 'a'), (2, 'b')", + settings={"azure_truncate_on_insert": 1}, ) print(get_azure_file_content("test_cluster_select_all.csv", port)) @@ -101,6 +102,7 @@ def test_count(cluster): f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_cluster_count.csv', 'devstoreaccount1', " f"'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV', " f"'auto', 'key UInt64') VALUES (1), (2)", + settings={"azure_truncate_on_insert": 1}, ) print(get_azure_file_content("test_cluster_count.csv", port)) @@ -129,6 +131,7 @@ def test_union_all(cluster): f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_parquet_union_all', 'devstoreaccount1', " f"'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'Parquet', " f"'auto', 'a Int32, b String') VALUES (1, 'a'), (2, 'b'), (3, 'c'), (4, 'd')", + settings={"azure_truncate_on_insert": 1}, ) pure_azure = azure_query( @@ -180,6 +183,7 @@ def test_skip_unavailable_shards(cluster): f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_skip_unavailable.csv', 'devstoreaccount1', " f"'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', " f"'auto', 'a UInt64') VALUES (1), (2)", + settings={"azure_truncate_on_insert": 1}, ) result = azure_query( node, @@ -200,6 +204,7 @@ def test_unset_skip_unavailable_shards(cluster): f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_unset_skip_unavailable.csv', 'devstoreaccount1', " f"'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', " f"'auto', 'a UInt64') VALUES (1), (2)", + settings={"azure_truncate_on_insert": 1}, ) result = azure_query( node, @@ -218,6 +223,7 @@ def test_cluster_with_named_collection(cluster): f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_cluster_with_named_collection.csv', 'devstoreaccount1', " f"'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', " f"'auto', 'a UInt64') VALUES (1), (2)", + settings={"azure_truncate_on_insert": 1}, ) pure_azure = azure_query( @@ -249,6 +255,7 @@ def test_partition_parallel_reading_with_cluster(cluster): f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', '{filename}', 'devstoreaccount1', " f"'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV', 'auto', '{table_format}') " f"PARTITION BY {partition_by} VALUES {values}", + settings={"azure_truncate_on_insert": 1}, ) assert "1,2,3\n" == get_azure_file_content("test_tf_3.csv", port) @@ -272,12 +279,12 @@ def test_format_detection(cluster): azure_query( node, - f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_format_detection0', '{account_name}', '{account_key}', 'JSONEachRow', 'auto', 'x UInt32, y String') select number as x, 'str_' || toString(number) from numbers(10)", + f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_format_detection0', '{account_name}', '{account_key}', 'JSONEachRow', 'auto', 'x UInt32, y String') select number as x, 'str_' || toString(number) from numbers(10) SETTINGS azure_truncate_on_insert=1", ) azure_query( node, - f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_format_detection1', '{account_name}', '{account_key}', 'JSONEachRow', 'auto', 'x UInt32, y String') select number as x, 'str_' || toString(number) from numbers(10, 10)", + f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_format_detection1', '{account_name}', '{account_key}', 'JSONEachRow', 'auto', 'x UInt32, y String') select number as x, 'str_' || toString(number) from numbers(10, 10) SETTINGS azure_truncate_on_insert=1", ) expected_desc_result = azure_query( diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py index aef5ddb3675..fdbf7c5bacb 100644 --- a/tests/integration/test_storage_hdfs/test.py +++ b/tests/integration/test_storage_hdfs/test.py @@ -3,7 +3,9 @@ import os import pytest import uuid import time +import re from helpers.cluster import ClickHouseCluster, is_arm +from helpers.client import QueryRuntimeException from helpers.test_tools import TSV from pyhdfs import HdfsClient @@ -1255,6 +1257,55 @@ def test_respect_object_existence_on_partitioned_write(started_cluster): assert int(result) == 44 +def test_hive_partitioning_with_one_parameter(started_cluster): + hdfs_api = started_cluster.hdfs_api + hdfs_api.write_data(f"/column0=Elizabeth/parquet_1", f"Elizabeth\tGordon\n") + assert hdfs_api.read_data(f"/column0=Elizabeth/parquet_1") == f"Elizabeth\tGordon\n" + + r = node1.query( + "SELECT _column0 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/parquet_1', 'TSV')", + settings={"use_hive_partitioning": 1}, + ) + assert r == f"Elizabeth\n" + + +def test_hive_partitioning_with_two_parameters(started_cluster): + hdfs_api = started_cluster.hdfs_api + hdfs_api.write_data( + f"/column0=Elizabeth/column1=Gordon/parquet_2", f"Elizabeth\tGordon\n" + ) + assert ( + hdfs_api.read_data(f"/column0=Elizabeth/column1=Gordon/parquet_2") + == f"Elizabeth\tGordon\n" + ) + + r = node1.query( + "SELECT _column1 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/column1=Gordon/parquet_2', 'TSV');", + settings={"use_hive_partitioning": 1}, + ) + assert r == f"Gordon\n" + + +def test_hive_partitioning_without_setting(started_cluster): + hdfs_api = started_cluster.hdfs_api + hdfs_api.write_data( + f"/column0=Elizabeth/column1=Gordon/parquet_2", f"Elizabeth\tGordon\n" + ) + assert ( + hdfs_api.read_data(f"/column0=Elizabeth/column1=Gordon/parquet_2") + == f"Elizabeth\tGordon\n" + ) + pattern = re.compile( + r"DB::Exception: Unknown expression identifier '.*' in scope.*", re.DOTALL + ) + + with pytest.raises(QueryRuntimeException, match=pattern): + node1.query( + f"SELECT _column1 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/column1=Gordon/parquet_2', 'TSV');", + settings={"use_hive_partitioning": 0}, + ) + + if __name__ == "__main__": cluster.start() input("Cluster created, press any key to destroy...") diff --git a/tests/integration/test_storage_kafka/clickhouse_path/format_schemas/string_key_value.capnp b/tests/integration/test_storage_kafka/clickhouse_path/format_schemas/string_key_value.capnp new file mode 100644 index 00000000000..4f3eabe22f0 --- /dev/null +++ b/tests/integration/test_storage_kafka/clickhouse_path/format_schemas/string_key_value.capnp @@ -0,0 +1,6 @@ +@0x99f75f775fe63dae; + +struct StringKeyValuePair { + key@0 : Text; + value@1 : Text; +} diff --git a/tests/integration/test_storage_kafka/clickhouse_path/format_schemas/string_key_value.format b/tests/integration/test_storage_kafka/clickhouse_path/format_schemas/string_key_value.format new file mode 100644 index 00000000000..83dff6ce401 --- /dev/null +++ b/tests/integration/test_storage_kafka/clickhouse_path/format_schemas/string_key_value.format @@ -0,0 +1 @@ +(key = ${key:CSV}, value = ${value:CSV}) diff --git a/tests/integration/test_storage_kafka/clickhouse_path/format_schemas/string_key_value.proto b/tests/integration/test_storage_kafka/clickhouse_path/format_schemas/string_key_value.proto new file mode 100644 index 00000000000..71905c63bdf --- /dev/null +++ b/tests/integration/test_storage_kafka/clickhouse_path/format_schemas/string_key_value.proto @@ -0,0 +1,6 @@ +syntax = "proto3"; + +message StringKeyValuePair { + string key = 1; + string value = 2; +} diff --git a/tests/integration/test_storage_kafka/test.py b/tests/integration/test_storage_kafka/test.py index 4b6c9922d74..bef90e1b9d3 100644 --- a/tests/integration/test_storage_kafka/test.py +++ b/tests/integration/test_storage_kafka/test.py @@ -1019,7 +1019,7 @@ def test_kafka_formats(kafka_cluster, create_query_generator): DROP TABLE IF EXISTS test.kafka_{format_name}_mv; - CREATE MATERIALIZED VIEW test.kafka_{format_name}_mv Engine=Log AS + CREATE MATERIALIZED VIEW test.kafka_{format_name}_mv ENGINE=MergeTree ORDER BY tuple() AS SELECT *, _topic, _partition, _offset FROM test.kafka_{format_name}; """.format( topic_name=topic_name, @@ -2460,7 +2460,7 @@ def test_kafka_commit_on_block_write(kafka_cluster, create_query_generator): (generate_old_create_table_query, "kafka.*Committed offset 2.*virt2_[01]"), ( generate_new_create_table_query, - r"kafka.*Saved offset 2[0-9]* for topic-partition \[virt2_[01]:[0-9]+", + r"kafka.*Saved offset 2 for topic-partition \[virt2_[01]:[0-9]+", ), ], ) @@ -2494,7 +2494,7 @@ def test_kafka_virtual_columns2(kafka_cluster, create_query_generator, log_line) f""" {create_query}; - CREATE MATERIALIZED VIEW test.view Engine=Log AS + CREATE MATERIALIZED VIEW test.view ENGINE=MergeTree ORDER BY tuple() AS SELECT value, _key, _topic, _partition, _offset, toUnixTimestamp(_timestamp), toUnixTimestamp64Milli(_timestamp_ms), _headers.name, _headers.value FROM test.kafka; """ ) @@ -2729,7 +2729,7 @@ def test_kafka_produce_key_timestamp(kafka_cluster, create_query_generator, log_ DROP TABLE IF EXISTS test.consumer; {writer_create_query}; {reader_create_query}; - CREATE MATERIALIZED VIEW test.view Engine=Log AS + CREATE MATERIALIZED VIEW test.view ENGINE=MergeTree ORDER BY tuple() AS SELECT key, value, inserted_key, toUnixTimestamp(inserted_timestamp), _key, _topic, _partition, _offset, toUnixTimestamp(_timestamp) FROM test.kafka; """ ) @@ -2865,7 +2865,7 @@ def test_kafka_produce_consume_avro(kafka_cluster, create_query_generator): {writer_create_query}; {reader_create_query}; - CREATE MATERIALIZED VIEW test.view Engine=Log AS + CREATE MATERIALIZED VIEW test.view ENGINE=MergeTree ORDER BY tuple() AS SELECT key, value FROM test.kafka; """ ) @@ -3537,7 +3537,7 @@ def test_bad_reschedule(kafka_cluster, create_query_generator): f""" {create_query}; - CREATE MATERIALIZED VIEW test.destination Engine=Log AS + CREATE MATERIALIZED VIEW test.destination ENGINE=MergeTree ORDER BY tuple() AS SELECT key, now() as consume_ts, @@ -3745,7 +3745,7 @@ def test_kafka_unavailable(kafka_cluster, create_query_generator, do_direct_read f""" {create_query}; - CREATE MATERIALIZED VIEW test.destination_unavailable Engine=Log AS + CREATE MATERIALIZED VIEW test.destination_unavailable ENGINE=MergeTree ORDER BY tuple() AS SELECT key, now() as consume_ts, @@ -4267,12 +4267,12 @@ def test_kafka_formats_with_broken_message(kafka_cluster, create_query_generator {create_query}; DROP TABLE IF EXISTS test.kafka_data_{format_name}_mv; - CREATE MATERIALIZED VIEW test.kafka_data_{format_name}_mv Engine=Log AS + CREATE MATERIALIZED VIEW test.kafka_data_{format_name}_mv ENGINE=MergeTree ORDER BY tuple() AS SELECT *, _topic, _partition, _offset FROM test.kafka_{format_name} WHERE length(_error) = 0; DROP TABLE IF EXISTS test.kafka_errors_{format_name}_mv; - CREATE MATERIALIZED VIEW test.kafka_errors_{format_name}_mv Engine=Log AS + CREATE MATERIALIZED VIEW test.kafka_errors_{format_name}_mv ENGINE=MergeTree ORDER BY tuple() AS SELECT {raw_message} as raw_message, _error as error, _topic as topic, _partition as partition, _offset as offset FROM test.kafka_{format_name} WHERE length(_error) > 0; """ @@ -4796,7 +4796,7 @@ def test_max_rows_per_message(kafka_cluster, create_query_generator): DROP TABLE IF EXISTS test.kafka; {create_query}; - CREATE MATERIALIZED VIEW test.view Engine=Log AS + CREATE MATERIALIZED VIEW test.view ENGINE=MergeTree ORDER BY (key, value) AS SELECT key, value FROM test.kafka; """ ) @@ -4875,7 +4875,7 @@ def test_row_based_formats(kafka_cluster, create_query_generator): {create_query}; - CREATE MATERIALIZED VIEW test.view Engine=Log AS + CREATE MATERIALIZED VIEW test.view ENGINE=MergeTree ORDER BY (key, value) AS SELECT key, value FROM test.{table_name}; INSERT INTO test.{table_name} SELECT number * 10 as key, number * 100 as value FROM numbers({num_rows}); @@ -4982,7 +4982,7 @@ def test_block_based_formats_2(kafka_cluster, create_query_generator): {create_query}; - CREATE MATERIALIZED VIEW test.view Engine=Log AS + CREATE MATERIALIZED VIEW test.view ENGINE=MergeTree ORDER BY (key, value) AS SELECT key, value FROM test.{table_name}; INSERT INTO test.{table_name} SELECT number * 10 as key, number * 100 as value FROM numbers({num_rows}) settings max_block_size=12, optimize_trivial_insert_select=0; @@ -5362,7 +5362,7 @@ def test_formats_errors(kafka_cluster): input_format_with_names_use_header=0, format_schema='key_value_message:Message'; - CREATE MATERIALIZED VIEW test.view Engine=Log AS + CREATE MATERIALIZED VIEW test.view ENGINE=MergeTree ORDER BY (key, value) AS SELECT key, value FROM test.{table_name}; """ ) diff --git a/tests/integration/test_storage_kafka/test_produce_http_interface.py b/tests/integration/test_storage_kafka/test_produce_http_interface.py new file mode 100644 index 00000000000..fc10a07f239 --- /dev/null +++ b/tests/integration/test_storage_kafka/test_produce_http_interface.py @@ -0,0 +1,243 @@ +import time +import logging + +import pytest +from helpers.cluster import ClickHouseCluster, is_arm +from helpers.test_tools import TSV +from kafka import KafkaAdminClient +from kafka.admin import NewTopic + +if is_arm(): + pytestmark = pytest.mark.skip + +cluster = ClickHouseCluster(__file__) +instance = cluster.add_instance( + "instance", + main_configs=["configs/kafka.xml", "configs/named_collection.xml"], + user_configs=["configs/users.xml"], + with_kafka=True, + with_zookeeper=True, # For Replicated Table + macros={ + "kafka_broker": "kafka1", + "kafka_topic_old": "old", + "kafka_group_name_old": "old", + "kafka_topic_new": "new", + "kafka_group_name_new": "new", + "kafka_client_id": "instance", + "kafka_format_json_each_row": "JSONEachRow", + }, + clickhouse_path_dir="clickhouse_path", +) + + +@pytest.fixture(scope="module") +def kafka_cluster(): + try: + cluster.start() + kafka_id = instance.cluster.kafka_docker_id + print(("kafka_id is {}".format(kafka_id))) + yield cluster + finally: + cluster.shutdown() + + +@pytest.fixture(autouse=True) +def kafka_setup_teardown(): + instance.query("DROP DATABASE IF EXISTS test; CREATE DATABASE test;") + # logging.debug("kafka is available - running test") + yield # run test + + +def kafka_create_topic( + admin_client, + topic_name, + num_partitions=1, + replication_factor=1, + max_retries=50, + config=None, +): + logging.debug( + f"Kafka create topic={topic_name}, num_partitions={num_partitions}, replication_factor={replication_factor}" + ) + topics_list = [ + NewTopic( + name=topic_name, + num_partitions=num_partitions, + replication_factor=replication_factor, + topic_configs=config, + ) + ] + retries = 0 + while True: + try: + admin_client.create_topics(new_topics=topics_list, validate_only=False) + logging.debug("Admin client succeed") + return + except Exception as e: + retries += 1 + time.sleep(0.5) + if retries < max_retries: + logging.warning(f"Failed to create topic {e}") + else: + raise + + +def kafka_delete_topic(admin_client, topic, max_retries=50): + result = admin_client.delete_topics([topic]) + for topic, e in result.topic_error_codes: + if e == 0: + logging.debug(f"Topic {topic} deleted") + else: + logging.error(f"Failed to delete topic {topic}: {e}") + + retries = 0 + while True: + topics_listed = admin_client.list_topics() + logging.debug(f"TOPICS LISTED: {topics_listed}") + if topic not in topics_listed: + return + else: + retries += 1 + time.sleep(0.5) + if retries > max_retries: + raise Exception(f"Failed to delete topics {topic}, {result}") + + +def test_kafka_produce_http_interface_row_based_format(kafka_cluster): + # reproduction of #61060 with validating the written messages + admin_client = KafkaAdminClient( + bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) + ) + + topic_prefix = "http_row_" + + # It is important to have: + # - long enough messages + # - enough messages + # I don't know the exact requirement for message sizes, but it doesn't reproduce with short messages + # For the number of messages it seems like at least 3 messages is necessary + expected_key = "01234567890123456789" + expected_value = "aaaaabbbbbccccc" + + insert_query_end = f"(key, value) VALUES ('{expected_key}', '{expected_value}'), ('{expected_key}', '{expected_value}'), ('{expected_key}', '{expected_value}')" + insert_query_template = "INSERT INTO {table_name} " + insert_query_end + + extra_settings = { + "Protobuf": ", kafka_schema = 'string_key_value.proto:StringKeyValuePair'", + "CapnProto": ", kafka_schema='string_key_value:StringKeyValuePair'", + "Template": ", format_template_row='string_key_value.format'", + } + + # Only the formats that can be used both and input and output format are tested + # Reasons to exclude following formats: + # - JSONStrings: not actually an input format + # - ProtobufSingle: I cannot make it work to parse the messages. Probably something is broken, + # because the producer can write multiple rows into a same message, which makes them impossible to parse properly. Should added after #67549 is fixed. + # - ProtobufList: I didn't want to deal with the envelope and stuff + # - Npy: supports only single column + # - LineAsString: supports only single column + # - RawBLOB: supports only single column + formats_to_test = [ + "TabSeparated", + "TabSeparatedRaw", + "TabSeparatedWithNames", + "TabSeparatedWithNamesAndTypes", + "TabSeparatedRawWithNames", + "TabSeparatedRawWithNamesAndTypes", + "Template", + "CSV", + "CSVWithNames", + "CSVWithNamesAndTypes", + "CustomSeparated", + "CustomSeparatedWithNames", + "CustomSeparatedWithNamesAndTypes", + "Values", + "JSON", + "JSONColumns", + "JSONColumnsWithMetadata", + "JSONCompact", + "JSONCompactColumns", + "JSONEachRow", + "JSONStringsEachRow", + "JSONCompactEachRow", + "JSONCompactEachRowWithNames", + "JSONCompactEachRowWithNamesAndTypes", + "JSONCompactStringsEachRow", + "JSONCompactStringsEachRowWithNames", + "JSONCompactStringsEachRowWithNamesAndTypes", + "JSONObjectEachRow", + "BSONEachRow", + "TSKV", + "Protobuf", + "Avro", + "Parquet", + "Arrow", + "ArrowStream", + "ORC", + "RowBinary", + "RowBinaryWithNames", + "RowBinaryWithNamesAndTypes", + "Native", + "CapnProto", + "MsgPack", + ] + for format in formats_to_test: + logging.debug(f"Creating tables and writing messages to {format}") + topic = topic_prefix + format + kafka_create_topic(admin_client, topic) + + extra_setting = extra_settings.get(format, "") + + # kafka_max_rows_per_message is set to 2 to make sure every format produces at least 2 messages, thus increasing the chance of catching a bug + instance.query( + f""" + DROP TABLE IF EXISTS test.view_{topic}; + DROP TABLE IF EXISTS test.consumer_{topic}; + CREATE TABLE test.kafka_writer_{topic} (key String, value String) + ENGINE = Kafka + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = '{topic}', + kafka_group_name = '{topic}', + kafka_format = '{format}', + kafka_max_rows_per_message = 2 {extra_setting}; + + CREATE TABLE test.kafka_{topic} (key String, value String) + ENGINE = Kafka + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = '{topic}', + kafka_group_name = '{topic}', + kafka_format = '{format}' {extra_setting}; + + CREATE MATERIALIZED VIEW test.view_{topic} Engine=Log AS + SELECT key, value FROM test.kafka_{topic}; + """ + ) + instance.http_query( + insert_query_template.format(table_name="test.kafka_writer_" + topic), + method="POST", + ) + + expected = f"""\ +{expected_key}\t{expected_value} +{expected_key}\t{expected_value} +{expected_key}\t{expected_value} +""" + # give some times for the readers to read the messages + for format in formats_to_test: + logging.debug(f"Checking result for {format}") + topic = topic_prefix + format + + result = instance.query_with_retry( + f"SELECT * FROM test.view_{topic}", + check_callback=lambda res: res.count("\n") == 3, + ) + + assert TSV(result) == TSV(expected) + + kafka_delete_topic(admin_client, topic) + + +if __name__ == "__main__": + cluster.start() + input("Cluster created, press any key to destroy...") + cluster.shutdown() diff --git a/tests/integration/test_system_flush_logs/test.py b/tests/integration/test_system_flush_logs/test.py index 713b327eb76..cfecea5b3d6 100644 --- a/tests/integration/test_system_flush_logs/test.py +++ b/tests/integration/test_system_flush_logs/test.py @@ -4,7 +4,7 @@ import pytest from helpers.cluster import ClickHouseCluster -from helpers.test_tools import assert_eq_with_retry +from helpers.test_tools import assert_eq_with_retry, assert_logs_contain_with_retry, TSV cluster = ClickHouseCluster(__file__) node = cluster.add_instance( @@ -12,17 +12,6 @@ node = cluster.add_instance( stay_alive=True, ) -system_logs = [ - # enabled by default - ("system.text_log", 1), - ("system.query_log", 1), - ("system.query_thread_log", 1), - ("system.part_log", 1), - ("system.trace_log", 1), - ("system.metric_log", 1), - ("system.error_log", 1), -] - @pytest.fixture(scope="module", autouse=True) def start_cluster(): @@ -33,22 +22,28 @@ def start_cluster(): cluster.shutdown() -@pytest.fixture(scope="function") -def flush_logs(): +def test_system_logs_exists(): + system_logs = [ + ("system.text_log", 1), + ("system.query_log", 1), + ("system.query_thread_log", 1), + ("system.part_log", 1), + ("system.trace_log", 1), + ("system.metric_log", 1), + ("system.error_log", 1), + ] + node.query("SYSTEM FLUSH LOGS") - - -@pytest.mark.parametrize("table,exists", system_logs) -def test_system_logs(flush_logs, table, exists): - q = "SELECT * FROM {}".format(table) - if exists: - node.query(q) - else: - response = node.query_and_get_error(q) - assert ( - "Table {} does not exist".format(table) in response - or "Unknown table expression identifier '{}'".format(table) in response - ) + for table, exists in system_logs: + q = "SELECT * FROM {}".format(table) + if exists: + node.query(q) + else: + response = node.query_and_get_error(q) + assert ( + "Table {} does not exist".format(table) in response + or "Unknown table expression identifier '{}'".format(table) in response + ) # Logic is tricky, let's check that there is no hang in case of message queue @@ -67,14 +62,19 @@ def test_system_logs_non_empty_queue(): def test_system_suspend(): - node.query("CREATE TABLE t (x DateTime) ENGINE=Memory;") - node.query("INSERT INTO t VALUES (now());") - node.query("SYSTEM SUSPEND FOR 1 SECOND;") - node.query("INSERT INTO t VALUES (now());") - assert "1\n" == node.query("SELECT max(x) - min(x) >= 1 FROM t;") + try: + node.query("CREATE TABLE t (x DateTime) ENGINE=Memory;") + node.query("INSERT INTO t VALUES (now());") + node.query("SYSTEM SUSPEND FOR 1 SECOND;") + node.query("INSERT INTO t VALUES (now());") + assert "1\n" == node.query("SELECT max(x) - min(x) >= 1 FROM t;") + finally: + node.query("DROP TABLE IF EXISTS t;") def test_log_max_size(start_cluster): + # we do misconfiguration here: buffer_size_rows_flush_threshold > max_size_rows, flush_interval_milliseconds is huge + # no auto flush by size not by time has a chance node.exec_in_container( [ "bash", @@ -83,6 +83,7 @@ def test_log_max_size(start_cluster): 1000000 + 1000000 10 10 @@ -91,11 +92,24 @@ def test_log_max_size(start_cluster): """, ] ) - node.restart_clickhouse() - for i in range(10): - node.query(f"select {i}") - assert node.query("select count() >= 10 from system.query_log") == "1\n" + node.query("SYSTEM FLUSH LOGS") + node.query(f"TRUNCATE TABLE IF EXISTS system.query_log") + node.restart_clickhouse() + + # all logs records above max_size_rows are lost + # The accepted logs records are never flushed until system flush logs is called by us + for i in range(21): + node.query(f"select {i}") + node.query("system flush logs") + + assert_logs_contain_with_retry( + node, "Queue had been full at 0, accepted 10 logs, ignored 34 logs." + ) + assert node.query( + "select count() >= 10, count() < 20 from system.query_log" + ) == TSV([[1, 1]]) + node.exec_in_container( ["rm", f"/etc/clickhouse-server/config.d/yyy-override-query_log.xml"] ) diff --git a/tests/integration/test_system_logs_recreate/test.py b/tests/integration/test_system_logs_recreate/test.py index 1bdb1fe3261..8b84734ed02 100644 --- a/tests/integration/test_system_logs_recreate/test.py +++ b/tests/integration/test_system_logs_recreate/test.py @@ -33,124 +33,139 @@ def test_system_logs_recreate(): "error_log", ] - node.query("SYSTEM FLUSH LOGS") - for table in system_logs: - assert "ENGINE = MergeTree" in node.query(f"SHOW CREATE TABLE system.{table}") - assert "ENGINE = Null" not in node.query(f"SHOW CREATE TABLE system.{table}") - assert ( - len( - node.query(f"SHOW TABLES FROM system LIKE '{table}%'") - .strip() - .split("\n") + try: + node.query("SYSTEM FLUSH LOGS") + for table in system_logs: + assert "ENGINE = MergeTree" in node.query( + f"SHOW CREATE TABLE system.{table}" ) - == 1 - ) - - # NOTE: we use zzz- prefix to make it the last file, - # so that it will be applied last. - for table in system_logs: - node.exec_in_container( - [ - "bash", - "-c", - f"""echo " - - <{table}> - ENGINE = Null - - - - " > /etc/clickhouse-server/config.d/zzz-override-{table}.xml - """, - ] - ) - - node.restart_clickhouse() - node.query("SYSTEM FLUSH LOGS") - for table in system_logs: - assert "ENGINE = MergeTree" not in node.query( - f"SHOW CREATE TABLE system.{table}" - ) - assert "ENGINE = Null" in node.query(f"SHOW CREATE TABLE system.{table}") - assert ( - len( - node.query(f"SHOW TABLES FROM system LIKE '{table}%'") - .strip() - .split("\n") + assert "ENGINE = Null" not in node.query( + f"SHOW CREATE TABLE system.{table}" ) - == 2 - ) - - # apply only storage_policy for all system tables - for table in system_logs: - node.exec_in_container( - [ - "bash", - "-c", - f"""echo " - - <{table}> - system_tables - - - " > /etc/clickhouse-server/config.d/zzz-override-{table}.xml - """, - ] - ) - node.restart_clickhouse() - node.query("SYSTEM FLUSH LOGS") - import logging - - for table in system_logs: - create_table_sql = node.query(f"SHOW CREATE TABLE system.{table} FORMAT TSVRaw") - logging.debug( - "With storage policy, SHOW CREATE TABLE system.%s is: %s", - table, - create_table_sql, - ) - assert "ENGINE = MergeTree" in create_table_sql - assert "ENGINE = Null" not in create_table_sql - assert "SETTINGS storage_policy = 'system_tables'" in create_table_sql - assert ( - len( - node.query(f"SHOW TABLES FROM system LIKE '{table}%'") - .strip() - .split("\n") + assert ( + len( + node.query(f"SHOW TABLES FROM system LIKE '{table}%'") + .strip() + .split("\n") + ) + == 1 ) - == 3 - ) - for table in system_logs: - node.exec_in_container( - ["rm", f"/etc/clickhouse-server/config.d/zzz-override-{table}.xml"] - ) - - node.restart_clickhouse() - node.query("SYSTEM FLUSH LOGS") - for table in system_logs: - assert "ENGINE = MergeTree" in node.query(f"SHOW CREATE TABLE system.{table}") - assert "ENGINE = Null" not in node.query(f"SHOW CREATE TABLE system.{table}") - assert ( - len( - node.query(f"SHOW TABLES FROM system LIKE '{table}%'") - .strip() - .split("\n") + # NOTE: we use zzz- prefix to make it the last file, + # so that it will be applied last. + for table in system_logs: + node.exec_in_container( + [ + "bash", + "-c", + f"""echo " + + <{table}> + ENGINE = Null + + + + " > /etc/clickhouse-server/config.d/zzz-override-{table}.xml + """, + ] ) - == 4 - ) - node.query("SYSTEM FLUSH LOGS") - # Ensure that there was no superfluous RENAME's - # IOW that the table created only when the structure is indeed different. - for table in system_logs: - assert ( - len( - node.query(f"SHOW TABLES FROM system LIKE '{table}%'") - .strip() - .split("\n") + node.restart_clickhouse() + node.query("SYSTEM FLUSH LOGS") + for table in system_logs: + assert "ENGINE = MergeTree" not in node.query( + f"SHOW CREATE TABLE system.{table}" ) - == 4 - ) + assert "ENGINE = Null" in node.query(f"SHOW CREATE TABLE system.{table}") + assert ( + len( + node.query(f"SHOW TABLES FROM system LIKE '{table}%'") + .strip() + .split("\n") + ) + == 2 + ) + + # apply only storage_policy for all system tables + for table in system_logs: + node.exec_in_container( + [ + "bash", + "-c", + f"""echo " + + <{table}> + system_tables + + + " > /etc/clickhouse-server/config.d/zzz-override-{table}.xml + """, + ] + ) + node.restart_clickhouse() + node.query("SYSTEM FLUSH LOGS") + import logging + + for table in system_logs: + create_table_sql = node.query( + f"SHOW CREATE TABLE system.{table} FORMAT TSVRaw" + ) + logging.debug( + "With storage policy, SHOW CREATE TABLE system.%s is: %s", + table, + create_table_sql, + ) + assert "ENGINE = MergeTree" in create_table_sql + assert "ENGINE = Null" not in create_table_sql + assert "SETTINGS storage_policy = 'system_tables'" in create_table_sql + assert ( + len( + node.query(f"SHOW TABLES FROM system LIKE '{table}%'") + .strip() + .split("\n") + ) + == 3 + ) + + for table in system_logs: + node.exec_in_container( + ["rm", f"/etc/clickhouse-server/config.d/zzz-override-{table}.xml"] + ) + + node.restart_clickhouse() + node.query("SYSTEM FLUSH LOGS") + for table in system_logs: + assert "ENGINE = MergeTree" in node.query( + f"SHOW CREATE TABLE system.{table}" + ) + assert "ENGINE = Null" not in node.query( + f"SHOW CREATE TABLE system.{table}" + ) + assert ( + len( + node.query(f"SHOW TABLES FROM system LIKE '{table}%'") + .strip() + .split("\n") + ) + == 4 + ) + + node.query("SYSTEM FLUSH LOGS") + # Ensure that there was no superfluous RENAME's + # IOW that the table created only when the structure is indeed different. + for table in system_logs: + assert ( + len( + node.query(f"SHOW TABLES FROM system LIKE '{table}%'") + .strip() + .split("\n") + ) + == 4 + ) + finally: + for table in system_logs: + for syffix in range(3): + node.query(f"DROP TABLE IF EXISTS system.{table}_{syffix} sync") def test_drop_system_log(): @@ -173,11 +188,20 @@ def test_drop_system_log(): node.query("system flush logs") node.query("select 2") node.query("system flush logs") - assert node.query("select count() > 0 from system.query_log") == "1\n" + assert node.query("select count() >= 2 from system.query_log") == "1\n" + node.query("drop table system.query_log sync") node.query("select 3") node.query("system flush logs") - assert node.query("select count() > 0 from system.query_log") == "1\n" + assert node.query("select count() >= 1 from system.query_log") == "1\n" + + node.query("drop table system.query_log sync") + node.restart_clickhouse() + node.query("system flush logs") + assert ( + node.query("select count() >= 0 from system.query_log") == "1\n" + ) # we check that query_log just exists + node.exec_in_container( ["rm", f"/etc/clickhouse-server/config.d/yyy-override-query_log.xml"] ) diff --git a/tests/integration/test_version_update/configs/log_conf.xml b/tests/integration/test_version_update/configs/log_conf.xml index f9d15e572aa..27c7107ce5e 100644 --- a/tests/integration/test_version_update/configs/log_conf.xml +++ b/tests/integration/test_version_update/configs/log_conf.xml @@ -1,4 +1,4 @@ - + trace /var/log/clickhouse-server/log.log @@ -8,4 +8,4 @@ /var/log/clickhouse-server/stderr.log /var/log/clickhouse-server/stdout.log - + diff --git a/tests/performance/insert_select_squashing.xml b/tests/performance/insert_select_squashing.xml new file mode 100644 index 00000000000..4c2c88f3d22 --- /dev/null +++ b/tests/performance/insert_select_squashing.xml @@ -0,0 +1,23 @@ + + + 1000 + + + +CREATE TABLE squash_performance +( + s1 String, + s2 Nullable(String), + a1 Array(Array(String)), + a2 Array(Array(UInt32)), + m1 Map(String, Array(String)), + m2 Map(String, Array(UInt64)), + t Tuple(String, Array(String), Map(String, String)) +) +ENGINE = Null; + + + INSERT INTO squash_performance SELECT * FROM generateRandom(42) LIMIT 500000 + + DROP TABLE IF EXISTS squash_performance + diff --git a/tests/performance/insert_select_squashing_dynamic.xml b/tests/performance/insert_select_squashing_dynamic.xml new file mode 100644 index 00000000000..f7f600fd8bd --- /dev/null +++ b/tests/performance/insert_select_squashing_dynamic.xml @@ -0,0 +1,59 @@ + + + 1000 + 0 + 1 + + + +CREATE TABLE dynamic_squash_performance_1 +( + d Dynamic +) +ENGINE = Null; + + + +CREATE TABLE dynamic_squash_performance_2 +( + d Dynamic(max_types=6) +) +ENGINE = Null; + + + +CREATE TABLE src_dynamic_squash_performance_1 +( + d Dynamic +) +ENGINE = Memory; + + + +CREATE TABLE src_dynamic_squash_performance_2 +( + d Dynamic(max_types=6) +) +ENGINE = Memory; + + + + + + + + + + + + INSERT INTO dynamic_squash_performance_1 SELECT number::Dynamic FROM numbers(10000000) + INSERT INTO dynamic_squash_performance_1 SELECT range(number % 100)::Dynamic FROM numbers(2000000) + INSERT INTO dynamic_squash_performance_1 SELECT * FROM src_dynamic_squash_performance_1 + INSERT INTO dynamic_squash_performance_2 SELECT * FROM src_dynamic_squash_performance_2 + + DROP TABLE IF EXISTS dynamic_squash_performance_1 + DROP TABLE IF EXISTS dynamic_squash_performance_2 + DROP TABLE IF EXISTS src_dynamic_squash_performance_1 + DROP TABLE IF EXISTS src_dynamic_squash_performance_2 + + diff --git a/tests/performance/insert_select_squashing_variant.xml b/tests/performance/insert_select_squashing_variant.xml new file mode 100644 index 00000000000..5c59fc7b50f --- /dev/null +++ b/tests/performance/insert_select_squashing_variant.xml @@ -0,0 +1,34 @@ + + + 1000 + 0 + 1 + 1 + + + +CREATE TABLE variant_squash_performance +( + v Variant(Tuple(v1 Array(UInt64)), Tuple(v2 Array(UInt64)), Tuple(v3 Array(UInt64)), Tuple(v4 Array(UInt64)), Tuple(v5 Array(UInt64))) +) +ENGINE = Null; + + + +CREATE TABLE src_variant_squash_performance +( + v Variant(Tuple(v1 Array(UInt64)), Tuple(v2 Array(UInt64)), Tuple(v3 Array(UInt64)), Tuple(v4 Array(UInt64)), Tuple(v5 Array(UInt64))) +) +ENGINE = Memory; + + + + + + + INSERT INTO variant_squash_performance SELECT * FROM src_variant_squash_performance + + DROP TABLE IF EXISTS variant_squash_performance + DROP TABLE IF EXISTS src_variant_squash_performance + + diff --git a/tests/queries/0_stateless/01049_window_view_window_functions.reference b/tests/queries/0_stateless/01049_window_view_window_functions.reference index 2d49664b280..073301104d2 100644 --- a/tests/queries/0_stateless/01049_window_view_window_functions.reference +++ b/tests/queries/0_stateless/01049_window_view_window_functions.reference @@ -67,3 +67,7 @@ SELECT toDateTime(hopEnd(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 2020-01-10 00:00:00 SELECT hopEnd(hop(toDateTime('2019-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa')); 2019-01-10 00:00:00 +SELECT hopStart(tuple()); -- { serverError ILLEGAL_COLUMN } +SELECT hopEnd(tuple()); -- { serverError ILLEGAL_COLUMN } +SELECT tumbleStart(tuple()); -- { serverError ILLEGAL_COLUMN } +SELECT tumbleEnd(tuple()); -- { serverError ILLEGAL_COLUMN } diff --git a/tests/queries/0_stateless/01049_window_view_window_functions.sql b/tests/queries/0_stateless/01049_window_view_window_functions.sql index 617019bd2c6..fb2b4b4949a 100644 --- a/tests/queries/0_stateless/01049_window_view_window_functions.sql +++ b/tests/queries/0_stateless/01049_window_view_window_functions.sql @@ -36,3 +36,8 @@ SELECT hopEnd(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, I SELECT toDateTime(hopEnd(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa'), 'US/Samoa'); SELECT toDateTime(hopEnd(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa'), 'US/Samoa'); SELECT hopEnd(hop(toDateTime('2019-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa')); + +SELECT hopStart(tuple()); -- { serverError ILLEGAL_COLUMN } +SELECT hopEnd(tuple()); -- { serverError ILLEGAL_COLUMN } +SELECT tumbleStart(tuple()); -- { serverError ILLEGAL_COLUMN } +SELECT tumbleEnd(tuple()); -- { serverError ILLEGAL_COLUMN } diff --git a/tests/queries/0_stateless/01172_transaction_counters.reference b/tests/queries/0_stateless/01172_transaction_counters.reference index 24083d7d40b..0fd73c7bcec 100644 --- a/tests/queries/0_stateless/01172_transaction_counters.reference +++ b/tests/queries/0_stateless/01172_transaction_counters.reference @@ -16,25 +16,25 @@ 7 all_3_3_0 (0,0,'00000000-0000-0000-0000-000000000000') 0 7 all_4_4_0 (0,0,'00000000-0000-0000-0000-000000000000') 0 8 1 -1 1 AddPart 1 1 1 1 all_1_1_0 -2 1 Begin 1 1 1 1 -2 1 AddPart 1 1 1 1 all_2_2_0 -2 1 Rollback 1 1 1 1 -3 1 Begin 1 1 1 1 -3 1 AddPart 1 1 1 1 all_3_3_0 -3 1 Commit 1 1 1 0 -1 1 LockPart 1 1 1 1 all_2_2_0 -4 1 Begin 1 1 1 1 -4 1 AddPart 1 1 1 1 all_4_4_0 -4 1 Commit 1 1 1 0 -5 1 Begin 1 1 1 1 -5 1 AddPart 1 1 1 1 all_5_5_0 -5 1 LockPart 1 1 1 1 all_1_1_0 -5 1 LockPart 1 1 1 1 all_3_3_0 -5 1 LockPart 1 1 1 1 all_4_4_0 -5 1 LockPart 1 1 1 1 all_5_5_0 -5 1 UnlockPart 1 1 1 1 all_1_1_0 -5 1 UnlockPart 1 1 1 1 all_3_3_0 -5 1 UnlockPart 1 1 1 1 all_4_4_0 -5 1 UnlockPart 1 1 1 1 all_5_5_0 -5 1 Rollback 1 1 1 1 +1 AddPart 1 1 1 1 all_1_1_0 +2 Begin 1 1 1 1 +2 AddPart 1 1 1 1 all_2_2_0 +2 Rollback 1 1 1 1 +3 Begin 1 1 1 1 +3 AddPart 1 1 1 1 all_3_3_0 +3 Commit 1 1 1 0 +1 LockPart 1 1 1 1 all_2_2_0 +4 Begin 1 1 1 1 +4 AddPart 1 1 1 1 all_4_4_0 +4 Commit 1 1 1 0 +5 Begin 1 1 1 1 +5 AddPart 1 1 1 1 all_5_5_0 +5 LockPart 1 1 1 1 all_1_1_0 +5 LockPart 1 1 1 1 all_3_3_0 +5 LockPart 1 1 1 1 all_4_4_0 +5 LockPart 1 1 1 1 all_5_5_0 +5 UnlockPart 1 1 1 1 all_1_1_0 +5 UnlockPart 1 1 1 1 all_3_3_0 +5 UnlockPart 1 1 1 1 all_4_4_0 +5 UnlockPart 1 1 1 1 all_5_5_0 +5 Rollback 1 1 1 1 diff --git a/tests/queries/0_stateless/01172_transaction_counters.sql b/tests/queries/0_stateless/01172_transaction_counters.sql index a809e4196e9..581b45cd15c 100644 --- a/tests/queries/0_stateless/01172_transaction_counters.sql +++ b/tests/queries/0_stateless/01172_transaction_counters.sql @@ -42,7 +42,6 @@ rollback; system flush logs; select indexOf((select arraySort(groupUniqArray(tid)) from system.transactions_info_log where database=currentDatabase() and table='txn_counters'), tid), - (toDecimal64(now64(6), 6) - toDecimal64(event_time, 6)) < 100, type, thread_id!=0, length(query_id)=length(queryID()) or type='Commit' and query_id='', -- ignore fault injection after commit diff --git a/tests/queries/0_stateless/01710_projection_vertical_merges.sql b/tests/queries/0_stateless/01710_projection_vertical_merges.sql index 0f80d659e92..0d745e44b10 100644 --- a/tests/queries/0_stateless/01710_projection_vertical_merges.sql +++ b/tests/queries/0_stateless/01710_projection_vertical_merges.sql @@ -1,4 +1,5 @@ --- Tags: long +-- Tags: long, no-parallel +-- set no-parallel tag is to prevent timeout of this test drop table if exists t; diff --git a/tests/queries/0_stateless/01903_correct_block_size_prediction_with_default.reference b/tests/queries/0_stateless/01903_correct_block_size_prediction_with_default.reference index b70a1cb7c75..2c66db91737 100644 --- a/tests/queries/0_stateless/01903_correct_block_size_prediction_with_default.reference +++ b/tests/queries/0_stateless/01903_correct_block_size_prediction_with_default.reference @@ -1,3 +1,9 @@ 8 +8 +1 4 4 +1 +4 +4 +1 diff --git a/tests/queries/0_stateless/01903_correct_block_size_prediction_with_default.sh b/tests/queries/0_stateless/01903_correct_block_size_prediction_with_default.sh new file mode 100755 index 00000000000..1482730af2c --- /dev/null +++ b/tests/queries/0_stateless/01903_correct_block_size_prediction_with_default.sh @@ -0,0 +1,37 @@ +#!/usr/bin/env bash +# Tags: no-random-merge-tree-settings, no-random-settings + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +sql="toUInt16OrNull(arrayFirst((v, k) -> (k = '4Id'), arr[2], arr[1]))" + +# Create the table and fill it +$CLICKHOUSE_CLIENT -n --query=" + CREATE TABLE test_extract(str String, arr Array(Array(String)) ALIAS extractAllGroupsHorizontal(str, '\\W(\\w+)=(\"[^\"]*?\"|[^\",}]*)')) ENGINE=MergeTree() PARTITION BY tuple() ORDER BY tuple(); + INSERT INTO test_extract (str) WITH range(8) as range_arr, arrayMap(x-> concat(toString(x),'Id'), range_arr) as key, arrayMap(x -> rand() % 8, range_arr) as val, arrayStringConcat(arrayMap((x,y) -> concat(x,'=',toString(y)), key, val),',') as str SELECT str FROM numbers(500000); + ALTER TABLE test_extract ADD COLUMN 15Id Nullable(UInt16) DEFAULT $sql;" + +function test() +{ + # Execute two queries and compare if they have similar memory usage: + # The first query uses the default column value, while the second explicitly uses the same SQL as the default value. + # Follow https://github.com/ClickHouse/ClickHouse/issues/17317 for more info about the issue + where=$1 + + uuid_1=$(cat /proc/sys/kernel/random/uuid) + $CLICKHOUSE_CLIENT --query="SELECT uniq(15Id) FROM test_extract $where SETTINGS max_threads=1" --query_id=$uuid_1 + uuid_2=$(cat /proc/sys/kernel/random/uuid) + $CLICKHOUSE_CLIENT --query="SELECT uniq($sql) FROM test_extract $where SETTINGS max_threads=1" --query_id=$uuid_2 + $CLICKHOUSE_CLIENT -n --query=" + SYSTEM FLUSH LOGS; + WITH memory_1 AS (SELECT memory_usage FROM system.query_log WHERE current_database = currentDatabase() AND query_id='$uuid_1' AND type = 'QueryFinish' as memory_1), + memory_2 AS (SELECT memory_usage FROM system.query_log WHERE current_database = currentDatabase() AND query_id='$uuid_2' AND type = 'QueryFinish' as memory_2) + SELECT memory_1.memory_usage <= 1.2 * memory_2.memory_usage OR + memory_2.memory_usage <= 1.2 * memory_1.memory_usage FROM memory_1, memory_2;" +} + +test "" +test "PREWHERE 15Id < 4" +test "WHERE 15Id < 4" diff --git a/tests/queries/0_stateless/01903_correct_block_size_prediction_with_default.sql b/tests/queries/0_stateless/01903_correct_block_size_prediction_with_default.sql deleted file mode 100644 index 2eec08635eb..00000000000 --- a/tests/queries/0_stateless/01903_correct_block_size_prediction_with_default.sql +++ /dev/null @@ -1,13 +0,0 @@ --- Tags: no-random-merge-tree-settings - -CREATE TABLE test_extract(str String, arr Array(Array(String)) ALIAS extractAllGroupsHorizontal(str, '\\W(\\w+)=("[^"]*?"|[^",}]*)')) ENGINE=MergeTree() PARTITION BY tuple() ORDER BY tuple(); - -INSERT INTO test_extract (str) WITH range(8) as range_arr, arrayMap(x-> concat(toString(x),'Id'), range_arr) as key, arrayMap(x -> rand() % 8, range_arr) as val, arrayStringConcat(arrayMap((x,y) -> concat(x,'=',toString(y)), key, val),',') as str SELECT str FROM numbers(500000); - -ALTER TABLE test_extract ADD COLUMN `15Id` Nullable(UInt16) DEFAULT toUInt16OrNull(arrayFirst((v, k) -> (k = '4Id'), arr[2], arr[1])); - -SELECT uniq(15Id) FROM test_extract SETTINGS max_threads=1, max_memory_usage=100000000; - -SELECT uniq(15Id) FROM test_extract PREWHERE 15Id < 4 SETTINGS max_threads=1, max_memory_usage=100000000; - -SELECT uniq(15Id) FROM test_extract WHERE 15Id < 4 SETTINGS max_threads=1, max_memory_usage=100000000; diff --git a/tests/queries/0_stateless/02122_join_group_by_timeout.reference b/tests/queries/0_stateless/02122_join_group_by_timeout.reference index f314e22e519..6500560e8fc 100644 --- a/tests/queries/0_stateless/02122_join_group_by_timeout.reference +++ b/tests/queries/0_stateless/02122_join_group_by_timeout.reference @@ -1,4 +1,6 @@ -Code: 159 -0 +Code: 159 +query_duration 1 +0 +query_duration 1 Code: 159 0 diff --git a/tests/queries/0_stateless/02122_join_group_by_timeout.sh b/tests/queries/0_stateless/02122_join_group_by_timeout.sh index 8380c5dbd0c..b4644878544 100755 --- a/tests/queries/0_stateless/02122_join_group_by_timeout.sh +++ b/tests/queries/0_stateless/02122_join_group_by_timeout.sh @@ -1,27 +1,23 @@ #!/usr/bin/env bash -# Tags: no-debug - -# no-debug: Query is canceled by timeout after max_execution_time, -# but sending an exception to the client may hang -# for more than MAX_PROCESS_WAIT seconds in a slow debug build, -# and test will fail. CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -MAX_PROCESS_WAIT=5 - -IS_SANITIZER=$($CLICKHOUSE_CLIENT -q "SELECT count() FROM system.warnings WHERE message like '%built with sanitizer%'") -if [ "$IS_SANITIZER" -gt 0 ]; then - # Query may hang for more than 5 seconds, especially in tsan build - MAX_PROCESS_WAIT=15 +TIMEOUT=5 +IS_SANITIZER_OR_DEBUG=$($CLICKHOUSE_CLIENT -q "SELECT count() FROM system.warnings WHERE message like '%built with sanitizer%' or message like '%built in debug mode%'") +if [ "$IS_SANITIZER_OR_DEBUG" -gt 0 ]; then + # Increase the timeout due to in debug/sanitizers build: + # - client is slow + # - stacktrace resolving is slow + TIMEOUT=15 fi # TCP CLIENT: As of today (02/12/21) uses PullingAsyncPipelineExecutor ### Should be cancelled after 1 second and return a 159 exception (timeout) -timeout -s KILL $MAX_PROCESS_WAIT $CLICKHOUSE_CLIENT --max_execution_time 1 -q \ - "SELECT * FROM +query_id=$(random_str 12) +$CLICKHOUSE_CLIENT --query_id "$query_id" --max_execution_time 1 -q " + SELECT * FROM ( SELECT a.name as n FROM @@ -34,28 +30,35 @@ timeout -s KILL $MAX_PROCESS_WAIT $CLICKHOUSE_CLIENT --max_execution_time 1 -q \ GROUP BY n ) LIMIT 20 - FORMAT Null" 2>&1 | grep -o "Code: 159" | sort | uniq + FORMAT Null +" 2>&1 | grep -m1 -o "Code: 159" +$CLICKHOUSE_CLIENT -q "system flush logs" +${CLICKHOUSE_CURL} -q -sS "$CLICKHOUSE_URL" -d "select 'query_duration', round(query_duration_ms/1000) from system.query_log where current_database = '$CLICKHOUSE_DATABASE' and query_id = '$query_id' and type != 'QueryStart'" + ### Should stop pulling data and return what has been generated already (return code 0) -timeout -s KILL $MAX_PROCESS_WAIT $CLICKHOUSE_CLIENT -q \ - "SELECT a.name as n - FROM - ( - SELECT 'Name' as name, number FROM system.numbers LIMIT 2000000 - ) AS a, - ( - SELECT 'Name' as name2, number FROM system.numbers LIMIT 2000000 - ) as b - FORMAT Null - SETTINGS max_execution_time = 1, timeout_overflow_mode = 'break' - " +query_id=$(random_str 12) +$CLICKHOUSE_CLIENT --query_id "$query_id" -q " + SELECT a.name as n + FROM + ( + SELECT 'Name' as name, number FROM system.numbers LIMIT 2000000 + ) AS a, + ( + SELECT 'Name' as name2, number FROM system.numbers LIMIT 2000000 + ) as b + FORMAT Null + SETTINGS max_execution_time = 1, timeout_overflow_mode = 'break' +" echo $? +$CLICKHOUSE_CLIENT -q "system flush logs" +${CLICKHOUSE_CURL} -q -sS "$CLICKHOUSE_URL" -d "select 'query_duration', round(query_duration_ms/1000) from system.query_log where current_database = '$CLICKHOUSE_DATABASE' and query_id = '$query_id' and type != 'QueryStart'" # HTTP CLIENT: As of today (02/12/21) uses PullingPipelineExecutor ### Should be cancelled after 1 second and return a 159 exception (timeout) -${CLICKHOUSE_CURL} -q --max-time $MAX_PROCESS_WAIT -sS "$CLICKHOUSE_URL&max_execution_time=1" -d \ - "SELECT * FROM +${CLICKHOUSE_CURL} -q --max-time $TIMEOUT -sS "$CLICKHOUSE_URL&max_execution_time=1" -d " + SELECT * FROM ( SELECT a.name as n FROM @@ -68,12 +71,13 @@ ${CLICKHOUSE_CURL} -q --max-time $MAX_PROCESS_WAIT -sS "$CLICKHOUSE_URL&max_exec GROUP BY n ) LIMIT 20 - FORMAT Null" 2>&1 | grep -o "Code: 159" | sort | uniq + FORMAT Null +" 2>&1 | grep -o "Code: 159" | sort | uniq ### Should stop pulling data and return what has been generated already (return code 0) -${CLICKHOUSE_CURL} -q --max-time $MAX_PROCESS_WAIT -sS "$CLICKHOUSE_URL" -d \ - "SELECT a.name as n +${CLICKHOUSE_CURL} -q --max-time $TIMEOUT -sS "$CLICKHOUSE_URL" -d " + SELECT a.name as n FROM ( SELECT 'Name' as name, number FROM system.numbers LIMIT 2000000 @@ -83,5 +87,5 @@ ${CLICKHOUSE_CURL} -q --max-time $MAX_PROCESS_WAIT -sS "$CLICKHOUSE_URL" -d \ ) as b FORMAT Null SETTINGS max_execution_time = 1, timeout_overflow_mode = 'break' - " +" echo $? diff --git a/tests/queries/0_stateless/02421_simple_queries_for_opentelemetry.sh b/tests/queries/0_stateless/02421_simple_queries_for_opentelemetry.sh index 98b571c5968..91e85eabcb8 100755 --- a/tests/queries/0_stateless/02421_simple_queries_for_opentelemetry.sh +++ b/tests/queries/0_stateless/02421_simple_queries_for_opentelemetry.sh @@ -9,7 +9,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # $2 - query function execute_query() { - ${CLICKHOUSE_CLIENT} --opentelemetry_start_trace_probability=1 --query_id $1 -nq " + ${CLICKHOUSE_CLIENT} --opentelemetry_start_trace_probability=1 --query_id $1 -q " ${2} " } @@ -18,7 +18,7 @@ function execute_query() # so we only to check the db.statement only function check_query_span_query_only() { -${CLICKHOUSE_CLIENT} -nq " +${CLICKHOUSE_CLIENT} -q " SYSTEM FLUSH LOGS; SELECT attribute['db.statement'] as query FROM system.opentelemetry_span_log @@ -31,7 +31,7 @@ ${CLICKHOUSE_CLIENT} -nq " function check_query_span() { -${CLICKHOUSE_CLIENT} -nq " +${CLICKHOUSE_CLIENT} -q " SYSTEM FLUSH LOGS; SELECT attribute['db.statement'] as query, attribute['clickhouse.read_rows'] as read_rows, @@ -47,7 +47,7 @@ ${CLICKHOUSE_CLIENT} -nq " # # Set up # -${CLICKHOUSE_CLIENT} -nq " +${CLICKHOUSE_CLIENT} -q " DROP TABLE IF EXISTS ${CLICKHOUSE_DATABASE}.opentelemetry_test; CREATE TABLE ${CLICKHOUSE_DATABASE}.opentelemetry_test (id UInt64) Engine=MergeTree Order By id; " @@ -79,4 +79,4 @@ check_query_span $query_id # ${CLICKHOUSE_CLIENT} -q " DROP TABLE IF EXISTS ${CLICKHOUSE_DATABASE}.opentelemetry_test; -" \ No newline at end of file +" diff --git a/tests/queries/0_stateless/02458_insert_select_progress_tcp.sh b/tests/queries/0_stateless/02458_insert_select_progress_tcp.sh index ae3ea017fbb..178da822d41 100755 --- a/tests/queries/0_stateless/02458_insert_select_progress_tcp.sh +++ b/tests/queries/0_stateless/02458_insert_select_progress_tcp.sh @@ -4,7 +4,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " drop table if exists insert_select_progress_tcp; create table insert_select_progress_tcp(s UInt16) engine = MergeTree order by s; " diff --git a/tests/queries/0_stateless/02476_analyzer_identifier_hints.sh b/tests/queries/0_stateless/02476_analyzer_identifier_hints.sh index 4c850a6ec9e..92f519a9f8a 100755 --- a/tests/queries/0_stateless/02476_analyzer_identifier_hints.sh +++ b/tests/queries/0_stateless/02476_analyzer_identifier_hints.sh @@ -6,7 +6,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -n -q " +$CLICKHOUSE_CLIENT -q " DROP TABLE IF EXISTS test_table; CREATE TABLE test_table ( @@ -74,7 +74,7 @@ $CLICKHOUSE_CLIENT -q "SELECT 1 AS constant_value, arrayMap(lambda_argument -> l $CLICKHOUSE_CLIENT -q "WITH 1 AS constant_value SELECT (SELECT constant_valu) SETTINGS enable_analyzer = 1;" 2>&1 \ | grep "Maybe you meant: \['constant_value'\]" &>/dev/null; -$CLICKHOUSE_CLIENT -n -q " +$CLICKHOUSE_CLIENT -q " DROP TABLE IF EXISTS test_table_compound; CREATE TABLE test_table_compound ( @@ -142,7 +142,7 @@ $CLICKHOUSE_CLIENT -q "SELECT cast(tuple(1), 'Tuple(value_1 String)') AS constan $CLICKHOUSE_CLIENT -q "WITH cast(tuple(1), 'Tuple(value_1 String)') AS constant_value SELECT (SELECT constant_value.value_) SETTINGS enable_analyzer = 1;" 2>&1 \ | grep "Maybe you meant: \['constant_value.value_1'\]" &>/dev/null; -$CLICKHOUSE_CLIENT -n -q " +$CLICKHOUSE_CLIENT -q " DROP TABLE IF EXISTS test_table_1; CREATE TABLE test_table_1 ( @@ -185,7 +185,7 @@ $CLICKHOUSE_CLIENT -q "SELECT ((1))::Tuple(a Tuple(b UInt32)) AS t, t.a.c SETTIN $CLICKHOUSE_CLIENT -q "SELECT 1"; -$CLICKHOUSE_CLIENT -n -q " +$CLICKHOUSE_CLIENT -q " DROP TABLE test_table; DROP TABLE test_table_compound; DROP TABLE test_table_1; diff --git a/tests/queries/0_stateless/02480_tets_show_full.sh b/tests/queries/0_stateless/02480_tets_show_full.sh index 5f5040ba128..50184857a1f 100755 --- a/tests/queries/0_stateless/02480_tets_show_full.sh +++ b/tests/queries/0_stateless/02480_tets_show_full.sh @@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) database=$($CLICKHOUSE_CLIENT -q 'SELECT currentDatabase()') -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " DROP TABLE IF EXISTS test_02480_table; DROP VIEW IF EXISTS test_02480_view; CREATE TABLE test_02480_table (id Int64) ENGINE=MergeTree ORDER BY id; diff --git a/tests/queries/0_stateless/02482_load_parts_refcounts.sh b/tests/queries/0_stateless/02482_load_parts_refcounts.sh index 5303824d97c..4dc7a7fd99b 100755 --- a/tests/queries/0_stateless/02482_load_parts_refcounts.sh +++ b/tests/queries/0_stateless/02482_load_parts_refcounts.sh @@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -n --query " +$CLICKHOUSE_CLIENT --query " DROP TABLE IF EXISTS load_parts_refcounts SYNC; CREATE TABLE load_parts_refcounts (id UInt32) diff --git a/tests/queries/0_stateless/02494_query_cache_tag.reference b/tests/queries/0_stateless/02494_query_cache_tag.reference new file mode 100644 index 00000000000..f7be5c06ecf --- /dev/null +++ b/tests/queries/0_stateless/02494_query_cache_tag.reference @@ -0,0 +1,12 @@ +1 +SELECT 1 SETTINGS use_query_cache = true, query_cache_tag = \'abc\' abc +--- +1 +1 +SELECT 1 SETTINGS use_query_cache = true +SELECT 1 SETTINGS use_query_cache = true, query_cache_tag = \'abc\' abc +--- +1 +1 +SELECT 1 SETTINGS use_query_cache = true abc +SELECT 1 SETTINGS use_query_cache = true def diff --git a/tests/queries/0_stateless/02494_query_cache_tag.sql b/tests/queries/0_stateless/02494_query_cache_tag.sql new file mode 100644 index 00000000000..62d36f6ebe6 --- /dev/null +++ b/tests/queries/0_stateless/02494_query_cache_tag.sql @@ -0,0 +1,34 @@ +-- Tags: no-parallel +-- Tag no-parallel: Messes with internal cache + +SYSTEM DROP QUERY CACHE; + +-- Store the result a single query with a tag in the query cache and check that the system table knows about the tag +SELECT 1 SETTINGS use_query_cache = true, query_cache_tag = 'abc'; + +SELECT query, tag FROM system.query_cache; + +SELECT '---'; + +SYSTEM DROP QUERY CACHE; + +-- Store the result of the same query with two different tags. The cache should store two entries. +SELECT 1 SETTINGS use_query_cache = true; -- default query_cache_tag = '' +SELECT 1 SETTINGS use_query_cache = true, query_cache_tag = 'abc'; +SELECT query, tag FROM system.query_cache ORDER BY ALL; + +SELECT '---'; + +SYSTEM DROP QUERY CACHE; + +-- Like before but the tag is set standalone. + +SET query_cache_tag = 'abc'; +SELECT 1 SETTINGS use_query_cache = true; + +SET query_cache_tag = 'def'; +SELECT 1 SETTINGS use_query_cache = true; + +SELECT query, tag FROM system.query_cache ORDER BY ALL; + +SYSTEM DROP QUERY CACHE; diff --git a/tests/queries/0_stateless/02496_remove_redundant_sorting.reference b/tests/queries/0_stateless/02496_remove_redundant_sorting.reference index 77ef213b36d..4d004f2f78f 100644 --- a/tests/queries/0_stateless/02496_remove_redundant_sorting.reference +++ b/tests/queries/0_stateless/02496_remove_redundant_sorting.reference @@ -465,6 +465,37 @@ Expression ((Projection + Before ORDER BY)) ReadFromStorage (SystemOne) -- execute Float64 9007199254740994 +-- presence of an inner OFFSET retains the ORDER BY +-- query +WITH + t1 AS ( + SELECT a, b + FROM + VALUES ( + 'b UInt32, a Int32', + (1, 1), + (2, 0) + ) + ) +SELECT + SUM(a) +FROM ( + SELECT a, b + FROM t1 + ORDER BY 1 DESC, 2 + OFFSET 1 +) t2 +-- explain +Expression ((Projection + Before ORDER BY)) + Aggregating + Expression (Before GROUP BY) + Offset + Expression (Projection) + Sorting (Sorting for ORDER BY) + Expression ((Before ORDER BY + (Projection + Before ORDER BY))) + ReadFromStorage (Values) +-- execute +0 -- disable common optimization to avoid functions to be lifted up (liftUpFunctions optimization), needed for testing with stateful function -- neighbor() as stateful function prevents removing inner ORDER BY since its result depends on order -- query diff --git a/tests/queries/0_stateless/02496_remove_redundant_sorting.sh b/tests/queries/0_stateless/02496_remove_redundant_sorting.sh index 646e2501a99..6e132c55628 100755 --- a/tests/queries/0_stateless/02496_remove_redundant_sorting.sh +++ b/tests/queries/0_stateless/02496_remove_redundant_sorting.sh @@ -26,15 +26,15 @@ FROM ORDER BY number DESC ) ORDER BY number ASC" -$CLICKHOUSE_CLIENT -nq "$DISABLE_OPTIMIZATION;EXPLAIN $query" +$CLICKHOUSE_CLIENT -q "$DISABLE_OPTIMIZATION;EXPLAIN $query" function run_query { echo "-- query" echo "$1" echo "-- explain" - $CLICKHOUSE_CLIENT -nq "$ENABLE_OPTIMIZATION;EXPLAIN $1" + $CLICKHOUSE_CLIENT -q "$ENABLE_OPTIMIZATION;EXPLAIN $1" echo "-- execute" - $CLICKHOUSE_CLIENT -nq "$ENABLE_OPTIMIZATION;$1" + $CLICKHOUSE_CLIENT -q "$ENABLE_OPTIMIZATION;$1" } echo "-- Enabled query_plan_remove_redundant_sorting" @@ -302,6 +302,27 @@ FROM )" run_query "$query" +echo "-- presence of an inner OFFSET retains the ORDER BY" +query="WITH + t1 AS ( + SELECT a, b + FROM + VALUES ( + 'b UInt32, a Int32', + (1, 1), + (2, 0) + ) + ) +SELECT + SUM(a) +FROM ( + SELECT a, b + FROM t1 + ORDER BY 1 DESC, 2 + OFFSET 1 +) t2" +run_query "$query" + echo "-- disable common optimization to avoid functions to be lifted up (liftUpFunctions optimization), needed for testing with stateful function" ENABLE_OPTIMIZATION="SET query_plan_enable_optimizations=0;$ENABLE_OPTIMIZATION" echo "-- neighbor() as stateful function prevents removing inner ORDER BY since its result depends on order" diff --git a/tests/queries/0_stateless/02496_remove_redundant_sorting_analyzer.reference b/tests/queries/0_stateless/02496_remove_redundant_sorting_analyzer.reference index b6a2e3182df..dd5ac7bf706 100644 --- a/tests/queries/0_stateless/02496_remove_redundant_sorting_analyzer.reference +++ b/tests/queries/0_stateless/02496_remove_redundant_sorting_analyzer.reference @@ -464,6 +464,36 @@ Expression ((Project names + Projection)) ReadFromStorage (SystemOne) -- execute Float64 9007199254740994 +-- presence of an inner OFFSET retains the ORDER BY +-- query +WITH + t1 AS ( + SELECT a, b + FROM + VALUES ( + 'b UInt32, a Int32', + (1, 1), + (2, 0) + ) + ) +SELECT + SUM(a) +FROM ( + SELECT a, b + FROM t1 + ORDER BY 1 DESC, 2 + OFFSET 1 +) t2 +-- explain +Expression ((Project names + Projection)) + Aggregating + Expression ((Before GROUP BY + (Change column names to column identifiers + Project names))) + Offset + Sorting (Sorting for ORDER BY) + Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers)))))) + ReadFromStorage (Values) +-- execute +0 -- disable common optimization to avoid functions to be lifted up (liftUpFunctions optimization), needed for testing with stateful function -- neighbor() as stateful function prevents removing inner ORDER BY since its result depends on order -- query diff --git a/tests/queries/0_stateless/02497_storage_file_reader_selection.sh b/tests/queries/0_stateless/02497_storage_file_reader_selection.sh index aa43e81f131..27243dd47fa 100755 --- a/tests/queries/0_stateless/02497_storage_file_reader_selection.sh +++ b/tests/queries/0_stateless/02497_storage_file_reader_selection.sh @@ -13,6 +13,6 @@ $CLICKHOUSE_LOCAL --storage_file_read_method=mmap --print-profile-events -q "SEL $CLICKHOUSE_LOCAL --storage_file_read_method=pread --print-profile-events -q "SELECT * FROM file($DATA_FILE) FORMAT Null" 2>&1 | grep -F -q "CreatedReadBufferMMap" && echo 'Fail' || echo 0 $CLICKHOUSE_LOCAL --storage_file_read_method=pread --print-profile-events -q "SELECT * FROM file($DATA_FILE) FORMAT Null" 2>&1 | grep -F -q "CreatedReadBufferOrdinary" && echo 1 || echo 'Fail' -$CLICKHOUSE_CLIENT --storage_file_read_method=mmap -nq "SELECT * FROM file('/dev/null', 'LineAsString') FORMAT Null -- { serverError BAD_ARGUMENTS }" +$CLICKHOUSE_CLIENT --storage_file_read_method=mmap -q "SELECT * FROM file('/dev/null', 'LineAsString') FORMAT Null -- { serverError BAD_ARGUMENTS }" rm $DATA_FILE diff --git a/tests/queries/0_stateless/02500_remove_redundant_distinct.sh b/tests/queries/0_stateless/02500_remove_redundant_distinct.sh index 3c06119e8d2..6fd42fa940a 100755 --- a/tests/queries/0_stateless/02500_remove_redundant_distinct.sh +++ b/tests/queries/0_stateless/02500_remove_redundant_distinct.sh @@ -24,15 +24,15 @@ FROM ) )" -$CLICKHOUSE_CLIENT -nq "$DISABLE_OPTIMIZATION;EXPLAIN $query" +$CLICKHOUSE_CLIENT -q "$DISABLE_OPTIMIZATION;EXPLAIN $query" function run_query { echo "-- query" echo "$1" echo "-- explain" - $CLICKHOUSE_CLIENT -nq "$ENABLE_OPTIMIZATION;EXPLAIN $1" + $CLICKHOUSE_CLIENT -q "$ENABLE_OPTIMIZATION;EXPLAIN $1" echo "-- execute" - $CLICKHOUSE_CLIENT -nq "$ENABLE_OPTIMIZATION;$1" + $CLICKHOUSE_CLIENT -q "$ENABLE_OPTIMIZATION;$1" } echo "-- Enabled $OPTIMIZATION_SETTING" diff --git a/tests/queries/0_stateless/02514_database_replicated_no_arguments_for_rmt.sh b/tests/queries/0_stateless/02514_database_replicated_no_arguments_for_rmt.sh index a0f228e6af4..c1aa24943c1 100755 --- a/tests/queries/0_stateless/02514_database_replicated_no_arguments_for_rmt.sh +++ b/tests/queries/0_stateless/02514_database_replicated_no_arguments_for_rmt.sh @@ -14,8 +14,8 @@ ${CLICKHOUSE_CLIENT} -q "CREATE USER user_${CLICKHOUSE_DATABASE} settings databa ${CLICKHOUSE_CLIENT} -q "GRANT CREATE TABLE ON ${CLICKHOUSE_DATABASE}_db.* TO user_${CLICKHOUSE_DATABASE}" ${CLICKHOUSE_CLIENT} -q "GRANT TABLE ENGINE ON ReplicatedMergeTree TO user_${CLICKHOUSE_DATABASE}" ${CLICKHOUSE_CLIENT} -q "CREATE DATABASE ${CLICKHOUSE_DATABASE}_db engine = Replicated('/clickhouse/databases/${CLICKHOUSE_TEST_ZOOKEEPER_PREFIX}/${CLICKHOUSE_DATABASE}_db', '{shard}', '{replica}')" -${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --user "user_${CLICKHOUSE_DATABASE}" -n --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.tab_rmt_ok (x UInt32) engine = ReplicatedMergeTree order by x;" -${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --user "user_${CLICKHOUSE_DATABASE}" -n --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.tab_rmt_fail (x UInt32) engine = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/root/{shard}', '{replica}') order by x; -- { serverError 80 }" +${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --user "user_${CLICKHOUSE_DATABASE}" --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.tab_rmt_ok (x UInt32) engine = ReplicatedMergeTree order by x;" +${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --user "user_${CLICKHOUSE_DATABASE}" --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.tab_rmt_fail (x UInt32) engine = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/root/{shard}', '{replica}') order by x; -- { serverError 80 }" ${CLICKHOUSE_CLIENT} --query "DROP DATABASE ${CLICKHOUSE_DATABASE}_db" ${CLICKHOUSE_CLIENT} -q "DROP USER user_${CLICKHOUSE_DATABASE}" diff --git a/tests/queries/0_stateless/02532_send_logs_level_test.sh b/tests/queries/0_stateless/02532_send_logs_level_test.sh index 506ac2331f2..a50539311cb 100755 --- a/tests/queries/0_stateless/02532_send_logs_level_test.sh +++ b/tests/queries/0_stateless/02532_send_logs_level_test.sh @@ -8,7 +8,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " drop table if exists data; create table data (key Int) engine=MergeTree order by tuple() settings min_bytes_for_wide_part = '1G', compress_marks = 1; insert into data values (1); diff --git a/tests/queries/0_stateless/02555_davengers_rename_chain.sh b/tests/queries/0_stateless/02555_davengers_rename_chain.sh index 660a95846c4..196507dc72e 100755 --- a/tests/queries/0_stateless/02555_davengers_rename_chain.sh +++ b/tests/queries/0_stateless/02555_davengers_rename_chain.sh @@ -46,7 +46,7 @@ tables["wrong_metadata_compact"]="min_bytes_for_wide_part = 10000000" for table in "${!tables[@]}"; do settings="${tables[$table]}" - $CLICKHOUSE_CLIENT -n --query=" + $CLICKHOUSE_CLIENT --query=" DROP TABLE IF EXISTS $table; CREATE TABLE $table( @@ -69,7 +69,7 @@ for table in "${!tables[@]}"; do wait_column "$table" "\`a1\` UInt64" || exit 2 - $CLICKHOUSE_CLIENT -n --query=" + $CLICKHOUSE_CLIENT --query=" -- { echoOn } SELECT 'ECHO_ALIGNMENT_FIX' FORMAT Null; @@ -82,7 +82,7 @@ for table in "${!tables[@]}"; do wait_mutation_loaded "$table" "b1 TO a" || exit 2 - $CLICKHOUSE_CLIENT -n --query=" + $CLICKHOUSE_CLIENT --query=" -- { echoOn } SELECT 'ECHO_ALIGNMENT_FIX' FORMAT Null; @@ -94,7 +94,7 @@ for table in "${!tables[@]}"; do wait_for_all_mutations "$table" - $CLICKHOUSE_CLIENT -n --query=" + $CLICKHOUSE_CLIENT --query=" -- { echoOn } SELECT 'ECHO_ALIGNMENT_FIX' FORMAT Null; diff --git a/tests/queries/0_stateless/02572_query_views_log_background_thread.sh b/tests/queries/0_stateless/02572_query_views_log_background_thread.sh index 509cd03f6c2..22b94e09b58 100755 --- a/tests/queries/0_stateless/02572_query_views_log_background_thread.sh +++ b/tests/queries/0_stateless/02572_query_views_log_background_thread.sh @@ -8,7 +8,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} --ignore-error --multiquery --query "drop table if exists buffer_02572; +${CLICKHOUSE_CLIENT} --ignore-error --query "drop table if exists buffer_02572; drop table if exists data_02572; drop table if exists copy_02572; drop table if exists mv_02572;" ${CLICKHOUSE_CLIENT} --query="create table copy_02572 (key Int) engine=Memory();" @@ -21,7 +21,7 @@ ${CLICKHOUSE_CLIENT} --query="insert into buffer_02572 values (1);" if [ $(( $(date +%s) - start )) -gt 6 ]; then # clickhouse test cluster is overloaded, will skip # ensure that the flush was not direct - ${CLICKHOUSE_CLIENT} --ignore-error --multiquery --query "select * from data_02572; select * from copy_02572;" + ${CLICKHOUSE_CLIENT} --ignore-error --query "select * from data_02572; select * from copy_02572;" fi # we cannot use OPTIMIZE, this will attach query context, so let's wait @@ -31,11 +31,11 @@ for _ in {1..100}; do done -${CLICKHOUSE_CLIENT} --ignore-error --multiquery --query "select * from data_02572; select * from copy_02572;" +${CLICKHOUSE_CLIENT} --ignore-error --query "select * from data_02572; select * from copy_02572;" ${CLICKHOUSE_CLIENT} --query="system flush logs;" ${CLICKHOUSE_CLIENT} --query="select count() > 0, lower(status::String), errorCodeToName(exception_code) from system.query_views_log where view_name = concatWithSeparator('.', currentDatabase(), 'mv_02572') and view_target = concatWithSeparator('.', currentDatabase(), 'copy_02572') - group by 2, 3;" \ No newline at end of file + group by 2, 3;" diff --git a/tests/queries/0_stateless/02703_max_local_read_bandwidth.sh b/tests/queries/0_stateless/02703_max_local_read_bandwidth.sh index 03e0f363d71..79253648475 100755 --- a/tests/queries/0_stateless/02703_max_local_read_bandwidth.sh +++ b/tests/queries/0_stateless/02703_max_local_read_bandwidth.sh @@ -6,7 +6,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " drop table if exists data; create table data (key UInt64 CODEC(NONE)) engine=MergeTree() order by tuple() settings min_bytes_for_wide_part=1e9; " @@ -26,7 +26,7 @@ read_methods=( for read_method in "${read_methods[@]}"; do query_id=$(random_str 10) $CLICKHOUSE_CLIENT --query_id "$query_id" -q "select * from data format Null settings max_local_read_bandwidth='1M', local_filesystem_read_method='$read_method'" - $CLICKHOUSE_CLIENT -nm -q " + $CLICKHOUSE_CLIENT -m -q " SYSTEM FLUSH LOGS; SELECT '$read_method', diff --git a/tests/queries/0_stateless/02703_max_local_write_bandwidth.sh b/tests/queries/0_stateless/02703_max_local_write_bandwidth.sh index 4f6a300c5b3..c5776134673 100755 --- a/tests/queries/0_stateless/02703_max_local_write_bandwidth.sh +++ b/tests/queries/0_stateless/02703_max_local_write_bandwidth.sh @@ -5,7 +5,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " drop table if exists data; create table data (key UInt64 CODEC(NONE)) engine=MergeTree() order by tuple() settings min_bytes_for_wide_part=1e9; " @@ -13,7 +13,7 @@ $CLICKHOUSE_CLIENT -nm -q " query_id=$(random_str 10) # writes 1e6*8 bytes with 1M bandwith it should take (8-1)/1=7 seconds $CLICKHOUSE_CLIENT --query_id "$query_id" -q "insert into data select * from numbers(1e6) settings max_local_write_bandwidth='1M'" -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " SYSTEM FLUSH LOGS; SELECT query_duration_ms >= 7e3, diff --git a/tests/queries/0_stateless/02704_max_backup_bandwidth.sh b/tests/queries/0_stateless/02704_max_backup_bandwidth.sh index 8cb03a93a7a..7e914c4c539 100755 --- a/tests/queries/0_stateless/02704_max_backup_bandwidth.sh +++ b/tests/queries/0_stateless/02704_max_backup_bandwidth.sh @@ -5,7 +5,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " drop table if exists data; create table data (key UInt64 CODEC(NONE)) engine=MergeTree() order by tuple() settings min_bytes_for_wide_part=1e9; " @@ -15,7 +15,7 @@ $CLICKHOUSE_CLIENT -q "insert into data select * from numbers(1e6)" query_id=$(random_str 10) $CLICKHOUSE_CLIENT --query_id "$query_id" -q "backup table data to Disk('backups', '$CLICKHOUSE_DATABASE/data/backup1')" --max_backup_bandwidth=1M > /dev/null -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " SYSTEM FLUSH LOGS; SELECT query_duration_ms >= 7e3, diff --git a/tests/queries/0_stateless/02724_limit_num_mutations.sh b/tests/queries/0_stateless/02724_limit_num_mutations.sh index 60888db0e2e..604cc9ff08e 100755 --- a/tests/queries/0_stateless/02724_limit_num_mutations.sh +++ b/tests/queries/0_stateless/02724_limit_num_mutations.sh @@ -23,7 +23,7 @@ function wait_for_alter() done } -${CLICKHOUSE_CLIENT} -n --query " +${CLICKHOUSE_CLIENT} --query " DROP TABLE IF EXISTS t_limit_mutations SYNC; CREATE TABLE t_limit_mutations (id UInt64, v UInt64) @@ -48,14 +48,14 @@ SELECT count() FROM system.mutations WHERE database = currentDatabase() AND tabl SHOW CREATE TABLE t_limit_mutations; " -${CLICKHOUSE_CLIENT} -n --query " +${CLICKHOUSE_CLIENT} --query " ALTER TABLE t_limit_mutations UPDATE v = 6 WHERE 1 SETTINGS number_of_mutations_to_throw = 100; ALTER TABLE t_limit_mutations MODIFY COLUMN v String SETTINGS number_of_mutations_to_throw = 100, alter_sync = 0; " wait_for_alter "String" -${CLICKHOUSE_CLIENT} -n --query " +${CLICKHOUSE_CLIENT} --query " SELECT * FROM t_limit_mutations ORDER BY id; SELECT count() FROM system.mutations WHERE database = currentDatabase() AND table = 't_limit_mutations' AND NOT is_done; SHOW CREATE TABLE t_limit_mutations; @@ -65,7 +65,7 @@ ${CLICKHOUSE_CLIENT} --query "SYSTEM START MERGES t_limit_mutations" wait_for_mutation "t_limit_mutations" "0000000003" -${CLICKHOUSE_CLIENT} -n --query " +${CLICKHOUSE_CLIENT} --query " SELECT * FROM t_limit_mutations ORDER BY id; SELECT count() FROM system.mutations WHERE database = currentDatabase() AND table = 't_limit_mutations' AND NOT is_done; SHOW CREATE TABLE t_limit_mutations; diff --git a/tests/queries/0_stateless/02725_async_insert_table_setting.sh b/tests/queries/0_stateless/02725_async_insert_table_setting.sh index 13911e8d677..14c2d335275 100755 --- a/tests/queries/0_stateless/02725_async_insert_table_setting.sh +++ b/tests/queries/0_stateless/02725_async_insert_table_setting.sh @@ -4,7 +4,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} -n --query " +${CLICKHOUSE_CLIENT} --query " DROP TABLE IF EXISTS t_mt_async_insert; DROP TABLE IF EXISTS t_mt_sync_insert; @@ -19,7 +19,7 @@ url="${CLICKHOUSE_URL}&async_insert=0&wait_for_async_insert=1" ${CLICKHOUSE_CURL} -sS "$url" -d "INSERT INTO t_mt_async_insert VALUES (1, 'aa'), (2, 'bb')" ${CLICKHOUSE_CURL} -sS "$url" -d "INSERT INTO t_mt_sync_insert VALUES (1, 'aa'), (2, 'bb')" -${CLICKHOUSE_CLIENT} -n --query " +${CLICKHOUSE_CLIENT} --query " SELECT count() FROM t_mt_async_insert; SELECT count() FROM t_mt_sync_insert; diff --git a/tests/queries/0_stateless/02726_async_insert_flush_queue.sql b/tests/queries/0_stateless/02726_async_insert_flush_queue.sql index 97d644fa4d6..5d941adcb81 100644 --- a/tests/queries/0_stateless/02726_async_insert_flush_queue.sql +++ b/tests/queries/0_stateless/02726_async_insert_flush_queue.sql @@ -30,7 +30,9 @@ SELECT count() FROM t_async_inserts_flush; SYSTEM FLUSH ASYNC INSERT QUEUE; -SELECT count() FROM system.asynchronous_inserts; +SELECT count() FROM system.asynchronous_inserts +WHERE database = currentDatabase() AND table = 't_async_inserts_flush'; + SELECT count() FROM t_async_inserts_flush; DROP TABLE t_async_inserts_flush; diff --git a/tests/queries/0_stateless/02726_async_insert_flush_stress.sh b/tests/queries/0_stateless/02726_async_insert_flush_stress.sh index 876766d0780..61bbbd620f0 100755 --- a/tests/queries/0_stateless/02726_async_insert_flush_stress.sh +++ b/tests/queries/0_stateless/02726_async_insert_flush_stress.sh @@ -91,5 +91,5 @@ flush1 $TIMEOUT & wait ${CLICKHOUSE_CLIENT} -q "SYSTEM FLUSH ASYNC INSERT QUEUE" -${CLICKHOUSE_CLIENT} -q "SELECT count() FROM system.asynchronous_inserts" +${CLICKHOUSE_CLIENT} -q "SELECT count() FROM system.asynchronous_inserts WHERE database = currentDatabase() AND table = 'async_inserts'" ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS async_inserts"; diff --git a/tests/queries/0_stateless/02841_local_assert.sh b/tests/queries/0_stateless/02841_local_assert.sh index a167c09da1f..dc49007b0f6 100755 --- a/tests/queries/0_stateless/02841_local_assert.sh +++ b/tests/queries/0_stateless/02841_local_assert.sh @@ -7,12 +7,12 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CUR_DIR"/../shell_config.sh echo "create table test (x UInt64) engine=Memory; -insert into test from infile 'data'; -- {clientError BAD_ARGUMENTS}" | $CLICKHOUSE_LOCAL -nm +insert into test from infile 'data'; -- {clientError BAD_ARGUMENTS}" | $CLICKHOUSE_LOCAL -m echo "create table test (x UInt64) engine=Memory; -insert into test from infile 'data';" | $CLICKHOUSE_LOCAL -nm --ignore-error +insert into test from infile 'data';" | $CLICKHOUSE_LOCAL -m --ignore-error echo "create table test (x UInt64) engine=Memory; insert into test from infile 'data'; -- {clientError BAD_ARGUMENTS} -select 1" | $CLICKHOUSE_LOCAL -nm +select 1" | $CLICKHOUSE_LOCAL -m diff --git a/tests/queries/0_stateless/02864_statistics_count_min_sketch.reference b/tests/queries/0_stateless/02864_statistics_count_min_sketch.reference deleted file mode 100644 index 02c41656a36..00000000000 --- a/tests/queries/0_stateless/02864_statistics_count_min_sketch.reference +++ /dev/null @@ -1,14 +0,0 @@ -CREATE TABLE default.tab\n(\n `a` String,\n `b` UInt64,\n `c` Int64,\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 -Test statistics count_min: - Prewhere info - Prewhere filter - Prewhere filter column: and(equals(a, \'0\'), equals(b, 0), equals(c, 0)) (removed) -Test statistics multi-types: - Prewhere info - Prewhere filter - Prewhere filter column: and(equals(a, \'0\'), less(c, -90), greater(b, 900)) (removed) - Prewhere info - Prewhere filter - Prewhere filter column: and(equals(a, \'10000\'), equals(b, 0), less(c, 0)) (removed) -Test LowCardinality and Nullable data type: -tab2 diff --git a/tests/queries/0_stateless/02864_statistics_count_min_sketch.sql b/tests/queries/0_stateless/02864_statistics_count_min_sketch.sql deleted file mode 100644 index c730aa7b4a7..00000000000 --- a/tests/queries/0_stateless/02864_statistics_count_min_sketch.sql +++ /dev/null @@ -1,70 +0,0 @@ --- Tags: no-fasttest - -DROP TABLE IF EXISTS tab SYNC; - -SET allow_experimental_statistics = 1; -SET allow_statistics_optimize = 1; -SET allow_suspicious_low_cardinality_types=1; -SET mutations_sync = 2; - -CREATE TABLE tab -( - a String, - b UInt64, - c Int64, - pk String, -) Engine = MergeTree() ORDER BY pk -SETTINGS min_bytes_for_wide_part = 0; - -SHOW CREATE TABLE tab; - -INSERT INTO tab select toString(number % 10000), number % 1000, -(number % 100), generateUUIDv4() FROM system.numbers LIMIT 10000; - -SELECT 'Test statistics count_min:'; - -ALTER TABLE tab ADD STATISTICS a TYPE count_min; -ALTER TABLE tab ADD STATISTICS b TYPE count_min; -ALTER TABLE tab ADD STATISTICS c TYPE count_min; -ALTER TABLE tab MATERIALIZE STATISTICS a, b, c; - -SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String', '') -FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE c = 0/*100*/ and b = 0/*10*/ and a = '0'/*1*/) xx -WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; - -ALTER TABLE tab DROP STATISTICS a, b, c; - - -SELECT 'Test statistics multi-types:'; - -ALTER TABLE tab ADD STATISTICS a TYPE count_min; -ALTER TABLE tab ADD STATISTICS b TYPE count_min, uniq, tdigest; -ALTER TABLE tab ADD STATISTICS c TYPE count_min, uniq, tdigest; -ALTER TABLE tab MATERIALIZE STATISTICS a, b, c; - -SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String', '') -FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE c < -90/*900*/ and b > 900/*990*/ and a = '0'/*1*/) -WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; - -SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String', '') -FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE c < 0/*9900*/ and b = 0/*10*/ and a = '10000'/*0*/) -WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; - -ALTER TABLE tab DROP STATISTICS a, b, c; - -DROP TABLE IF EXISTS tab SYNC; - - -SELECT 'Test LowCardinality and Nullable data type:'; -DROP TABLE IF EXISTS tab2 SYNC; -SET allow_suspicious_low_cardinality_types=1; -CREATE TABLE tab2 -( - a LowCardinality(Int64) STATISTICS(count_min), - b Nullable(Int64) STATISTICS(count_min), - c LowCardinality(Nullable(Int64)) STATISTICS(count_min), - pk String, -) Engine = MergeTree() ORDER BY pk; - -select name from system.tables where name = 'tab2' and database = currentDatabase(); - -DROP TABLE IF EXISTS tab2 SYNC; diff --git a/tests/queries/0_stateless/02864_statistics_ddl.reference b/tests/queries/0_stateless/02864_statistics_ddl.reference index a7ff5caa0b0..0e453b0ee8a 100644 --- a/tests/queries/0_stateless/02864_statistics_ddl.reference +++ b/tests/queries/0_stateless/02864_statistics_ddl.reference @@ -1,31 +1,6 @@ -CREATE TABLE default.tab\n(\n `a` Float64 STATISTICS(tdigest),\n `b` Int64 STATISTICS(tdigest),\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 -After insert - Prewhere info - Prewhere filter - Prewhere filter column: and(less(a, 10), less(b, 10)) (removed) -10 -0 -After drop statistic - Prewhere info - Prewhere filter - Prewhere filter column: and(less(b, 10), less(a, 10)) (removed) -10 -CREATE TABLE default.tab\n(\n `a` Float64,\n `b` Int64,\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 -After add statistic -CREATE TABLE default.tab\n(\n `a` Float64 STATISTICS(tdigest),\n `b` Int64 STATISTICS(tdigest),\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 -After materialize statistic - Prewhere info - Prewhere filter - Prewhere filter column: and(less(a, 10), less(b, 10)) (removed) -20 -After merge - Prewhere info - Prewhere filter - Prewhere filter column: and(less(a, 10), less(b, 10)) (removed) -20 -CREATE TABLE default.tab\n(\n `a` Float64 STATISTICS(tdigest),\n `c` Int64 STATISTICS(tdigest),\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 -After rename - Prewhere info - Prewhere filter - Prewhere filter column: and(less(a, 10), less(c, 10)) (removed) -20 +CREATE TABLE default.tab\n(\n `f64` Float64,\n `f64_tdigest` Float64 STATISTICS(tdigest),\n `f32` Float32,\n `s` String,\n `a` Array(Float64)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.tab\n(\n `f64` Float64 STATISTICS(tdigest, uniq),\n `f64_tdigest` Float64 STATISTICS(tdigest),\n `f32` Float32 STATISTICS(tdigest, uniq),\n `s` String,\n `a` Array(Float64)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.tab\n(\n `f64` Float64 STATISTICS(tdigest, uniq),\n `f64_tdigest` Float64 STATISTICS(tdigest),\n `f32` Float32 STATISTICS(tdigest, uniq),\n `s` String,\n `a` Array(Float64)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.tab\n(\n `f64` Float64 STATISTICS(tdigest, uniq),\n `f64_tdigest` Float64 STATISTICS(tdigest),\n `f32` Float32 STATISTICS(tdigest, uniq),\n `s` String,\n `a` Array(Float64)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.tab\n(\n `f64` Float64 STATISTICS(tdigest, uniq),\n `f64_tdigest` Float64 STATISTICS(tdigest),\n `f32` Float32 STATISTICS(tdigest, uniq),\n `s` String,\n `a` Array(Float64)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.tab\n(\n `f64` Float64,\n `f64_tdigest` Float64 STATISTICS(tdigest),\n `f32` Float32,\n `s` String,\n `a` Array(Float64)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 diff --git a/tests/queries/0_stateless/02864_statistics_ddl.sql b/tests/queries/0_stateless/02864_statistics_ddl.sql index fe612efe2ac..32b56a842b7 100644 --- a/tests/queries/0_stateless/02864_statistics_ddl.sql +++ b/tests/queries/0_stateless/02864_statistics_ddl.sql @@ -1,59 +1,195 @@ --- Tests that various DDL statements create/drop/materialize statistics +-- Tags: no-fasttest +-- no-fasttest: 'count_min' sketches need a 3rd party library + +-- Tests that DDL statements which create / drop / materialize statistics + +SET mutations_sync = 1; DROP TABLE IF EXISTS tab; +-- Error case: Can't create statistics when allow_experimental_statistics = 0 +CREATE TABLE tab (col Float64 STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); -- { serverError INCORRECT_QUERY } + SET allow_experimental_statistics = 1; -SET allow_statistics_optimize = 1; + +-- Error case: Unknown statistics types are rejected +CREATE TABLE tab (col Float64 STATISTICS(no_statistics_type)) Engine = MergeTree() ORDER BY tuple(); -- { serverError INCORRECT_QUERY } + +-- Error case: The same statistics type can't exist more than once on a column +CREATE TABLE tab (col Float64 STATISTICS(tdigest, tdigest)) Engine = MergeTree() ORDER BY tuple(); -- { serverError INCORRECT_QUERY } + +SET allow_suspicious_low_cardinality_types = 1; + +-- Statistics can only be created on columns of specific data types (depending on the statistics kind), (*) + +-- tdigest requires data_type.isValueRepresentedByInteger +-- These types work: +CREATE TABLE tab (col UInt8 STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col UInt256 STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col Float32 STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col Decimal32(3) STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col Date STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col Date32 STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col DateTime STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col DateTime64 STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col Enum('hello', 'world') STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col IPv4 STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col Nullable(UInt8) STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col LowCardinality(UInt8) STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col LowCardinality(Nullable(UInt8)) STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +-- These types don't work: +CREATE TABLE tab (col String STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } +CREATE TABLE tab (col FixedString(1) STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } +CREATE TABLE tab (col Array(Float64) STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } +CREATE TABLE tab (col Tuple(Float64, Float64) STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } +CREATE TABLE tab (col Map(UInt64, UInt64) STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } +CREATE TABLE tab (col UUID STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } +CREATE TABLE tab (col IPv6 STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } + +-- uniq requires data_type.isValueRepresentedByInteger +-- These types work: +CREATE TABLE tab (col UInt8 STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col UInt256 STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col Float32 STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col Decimal32(3) STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col Date STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col Date32 STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col DateTime STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col DateTime64 STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col Enum('hello', 'world') STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col IPv4 STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col Nullable(UInt8) STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col LowCardinality(UInt8) STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col LowCardinality(Nullable(UInt8)) STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +-- These types don't work: +CREATE TABLE tab (col String STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } +CREATE TABLE tab (col FixedString(1) STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } +CREATE TABLE tab (col Array(Float64) STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } +CREATE TABLE tab (col Tuple(Float64, Float64) STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } +CREATE TABLE tab (col Map(UInt64, UInt64) STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } +CREATE TABLE tab (col UUID STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } +CREATE TABLE tab (col IPv6 STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } + +-- count_min requires data_type.isValueRepresentedByInteger or data_type = (Fixed)String +-- These types work: +CREATE TABLE tab (col UInt8 STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col UInt256 STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col Float32 STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col Decimal32(3) STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col Date STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col Date32 STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col DateTime STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col DateTime64 STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col Enum('hello', 'world') STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col IPv4 STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col Nullable(UInt8) STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col LowCardinality(UInt8) STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col LowCardinality(Nullable(UInt8)) STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col String STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col FixedString(1) STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +-- These types don't work: +CREATE TABLE tab (col Array(Float64) STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } +CREATE TABLE tab (col Tuple(Float64, Float64) STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } +CREATE TABLE tab (col Map(UInt64, UInt64) STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } +CREATE TABLE tab (col UUID STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } +CREATE TABLE tab (col IPv6 STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } + +-- CREATE TABLE was easy, ALTER is more fun CREATE TABLE tab ( - a Float64 STATISTICS(tdigest), - b Int64 STATISTICS(tdigest), - pk String, -) Engine = MergeTree() ORDER BY pk -SETTINGS min_bytes_for_wide_part = 0; + f64 Float64, + f64_tdigest Float64 STATISTICS(tdigest), + f32 Float32, + s String, + a Array(Float64) +) +Engine = MergeTree() +ORDER BY tuple(); +-- Error case: Unknown statistics types are rejected +-- (relevant for ADD and MODIFY) +ALTER TABLE tab ADD STATISTICS f64 TYPE no_statistics_type; -- { serverError INCORRECT_QUERY } +ALTER TABLE tab ADD STATISTICS IF NOT EXISTS f64 TYPE no_statistics_type; -- { serverError INCORRECT_QUERY } +ALTER TABLE tab MODIFY STATISTICS f64 TYPE no_statistics_type; -- { serverError INCORRECT_QUERY } +-- for some reason, ALTER TABLE tab MODIFY STATISTICS IF EXISTS is not supported + +-- Error case: The same statistics type can't exist more than once on a column +-- (relevant for ADD and MODIFY) +-- Create the same statistics object twice +ALTER TABLE tab ADD STATISTICS f64 TYPE tdigest, tdigest; -- { serverError INCORRECT_QUERY } +ALTER TABLE tab ADD STATISTICS IF NOT EXISTS f64 TYPE tdigest, tdigest; -- { serverError INCORRECT_QUERY } +ALTER TABLE tab MODIFY STATISTICS f64 TYPE tdigest, tdigest; -- { serverError INCORRECT_QUERY } +-- Create an statistics which exists already +ALTER TABLE tab ADD STATISTICS f64_tdigest TYPE tdigest; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab ADD STATISTICS IF NOT EXISTS f64_tdigest TYPE tdigest; -- no-op +ALTER TABLE tab MODIFY STATISTICS f64_tdigest TYPE tdigest; -- no-op + +-- Error case: Column does not exist +-- (relevant for ADD, MODIFY, DROP, CLEAR, and MATERIALIZE) +-- Note that the results are unfortunately quite inconsistent ... +ALTER TABLE tab ADD STATISTICS no_such_column TYPE tdigest; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab ADD STATISTICS IF NOT EXISTS no_such_column TYPE tdigest; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab MODIFY STATISTICS no_such_column TYPE tdigest; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab DROP STATISTICS no_such_column; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab DROP STATISTICS IF EXISTS no_such_column; -- no-op +ALTER TABLE tab CLEAR STATISTICS no_such_column; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab CLEAR STATISTICS IF EXISTS no_such_column; -- no-op +ALTER TABLE tab MATERIALIZE STATISTICS no_such_column; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab MATERIALIZE STATISTICS IF EXISTS no_such_column; -- { serverError ILLEGAL_STATISTICS } + +-- Error case: Column exists but has no statistics +-- (relevant for MODIFY, DROP, CLEAR, and MATERIALIZE) +-- Note that the results are unfortunately quite inconsistent ... +ALTER TABLE tab MODIFY STATISTICS s TYPE tdigest; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab DROP STATISTICS s; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab DROP STATISTICS IF EXISTS s; -- no-op +ALTER TABLE tab CLEAR STATISTICS s; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab CLEAR STATISTICS IF EXISTS s; -- no-op +ALTER TABLE tab MATERIALIZE STATISTICS s; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab MATERIALIZE STATISTICS IF EXISTS s; -- { serverError ILLEGAL_STATISTICS } + +-- We don't check systematically that that statistics can only be created via ALTER ADD STATISTICS on columns of specific data types (the +-- internal type validation code is tested already above, (*)). Only do a rudimentary check for each statistics type with a data type that +-- works and one that doesn't work. +-- tdigest +-- Works: +ALTER TABLE tab ADD STATISTICS f64 TYPE tdigest; ALTER TABLE tab DROP STATISTICS f64; +ALTER TABLE tab MODIFY STATISTICS f64 TYPE tdigest; ALTER TABLE tab DROP STATISTICS f64; +-- Doesn't work: +ALTER TABLE tab ADD STATISTICS a TYPE tdigest; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab MODIFY STATISTICS a TYPE tdigest; -- { serverError ILLEGAL_STATISTICS } +-- uniq +-- Works: +ALTER TABLE tab ADD STATISTICS f64 TYPE uniq; ALTER TABLE tab DROP STATISTICS f64; +ALTER TABLE tab MODIFY STATISTICS f64 TYPE count_min; ALTER TABLE tab DROP STATISTICS f64; +-- Doesn't work: +ALTER TABLE tab ADD STATISTICS a TYPE uniq; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab MODIFY STATISTICS a TYPE uniq; -- { serverError ILLEGAL_STATISTICS } +-- count_min +-- Works: +ALTER TABLE tab ADD STATISTICS f64 TYPE count_min; ALTER TABLE tab DROP STATISTICS f64; +ALTER TABLE tab MODIFY STATISTICS f64 TYPE count_min; ALTER TABLE tab DROP STATISTICS f64; +-- Doesn't work: +ALTER TABLE tab ADD STATISTICS a TYPE count_min; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab MODIFY STATISTICS a TYPE count_min; -- { serverError ILLEGAL_STATISTICS } + +-- Any data type changes on columns with statistics are disallowed, for simplicity even if the new data type is compatible with all existing +-- statistics objects (e.g. tdigest can be created on Float64 and UInt64) +ALTER TABLE tab MODIFY COLUMN f64_tdigest UInt64; -- { serverError ALTER_OF_COLUMN_IS_FORBIDDEN } + +-- Finally, do a full-circle test of a good case. Print table definition after each step. +-- Intentionally specifying _two_ columns and _two_ statistics types to have that also tested. +SHOW CREATE TABLE tab; +ALTER TABLE tab ADD STATISTICS f64, f32 TYPE tdigest, uniq; +SHOW CREATE TABLE tab; +ALTER TABLE tab MODIFY STATISTICS f64, f32 TYPE tdigest, uniq; +SHOW CREATE TABLE tab; +ALTER TABLE tab CLEAR STATISTICS f64, f32; +SHOW CREATE TABLE tab; +ALTER TABLE tab MATERIALIZE STATISTICS f64, f32; +SHOW CREATE TABLE tab; +ALTER TABLE tab DROP STATISTICS f64, f32; SHOW CREATE TABLE tab; -INSERT INTO tab select number, -number, generateUUIDv4() FROM system.numbers LIMIT 10000; - -SELECT 'After insert'; -SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; -SELECT count(*) FROM tab WHERE b < 10 and a < 10; -SELECT count(*) FROM tab WHERE b < NULL and a < '10'; - -ALTER TABLE tab DROP STATISTICS a, b; - -SELECT 'After drop statistic'; -SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; -SELECT count(*) FROM tab WHERE b < 10 and a < 10; - -SHOW CREATE TABLE tab; - -ALTER TABLE tab ADD STATISTICS a, b TYPE tdigest; - -SELECT 'After add statistic'; - -SHOW CREATE TABLE tab; - -ALTER TABLE tab MATERIALIZE STATISTICS a, b; -INSERT INTO tab select number, -number, generateUUIDv4() FROM system.numbers LIMIT 10000; - -SELECT 'After materialize statistic'; -SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; -SELECT count(*) FROM tab WHERE b < 10 and a < 10; - -OPTIMIZE TABLE tab FINAL; - -SELECT 'After merge'; -SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; -SELECT count(*) FROM tab WHERE b < 10 and a < 10; - -ALTER TABLE tab RENAME COLUMN b TO c; -SHOW CREATE TABLE tab; - -SELECT 'After rename'; -SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE c < 10 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; -SELECT count(*) FROM tab WHERE c < 10 and a < 10; - -DROP TABLE IF EXISTS tab; +DROP TABLE tab; diff --git a/tests/queries/0_stateless/02864_statistics_delayed_materialization_in_merge.reference b/tests/queries/0_stateless/02864_statistics_delayed_materialization_in_merge.reference new file mode 100644 index 00000000000..eb5e685597c --- /dev/null +++ b/tests/queries/0_stateless/02864_statistics_delayed_materialization_in_merge.reference @@ -0,0 +1,12 @@ +After insert + Prewhere info + Prewhere filter + Prewhere filter column: and(less(b, 10_UInt8), less(a, 10_UInt8)) (removed) +After merge + Prewhere info + Prewhere filter + Prewhere filter column: and(less(a, 10_UInt8), less(b, 10_UInt8)) (removed) +After truncate, insert, and materialize + Prewhere info + Prewhere filter + Prewhere filter column: and(less(a, 10_UInt8), less(b, 10_UInt8)) (removed) diff --git a/tests/queries/0_stateless/02864_statistics_delayed_materialization_in_merge.sql b/tests/queries/0_stateless/02864_statistics_delayed_materialization_in_merge.sql new file mode 100644 index 00000000000..d469a4c2036 --- /dev/null +++ b/tests/queries/0_stateless/02864_statistics_delayed_materialization_in_merge.sql @@ -0,0 +1,36 @@ +-- Tests delayed materialization of statistics in merge instead of during insert (setting 'materialize_statistics_on_insert = 0'). +-- (The concrete statistics type, column data type and predicate type don't matter) + +-- Checks by the predicate evaluation order in EXPLAIN. This is quite fragile, a better approach would be helpful (maybe 'send_logs_level'?) + +DROP TABLE IF EXISTS tab; + +SET allow_experimental_statistics = 1; +SET allow_statistics_optimize = 1; +SET enable_analyzer = 1; + +SET materialize_statistics_on_insert = 0; + +CREATE TABLE tab +( + a Int64 STATISTICS(tdigest), + b Int16 STATISTICS(tdigest), +) ENGINE = MergeTree() ORDER BY tuple() +SETTINGS min_bytes_for_wide_part = 0, enable_vertical_merge_algorithm = 0; -- TODO: there is a bug in vertical merge with statistics. + +INSERT INTO tab SELECT number, -number FROM system.numbers LIMIT 10000; +SELECT 'After insert'; +SELECT replaceRegexpAll(explain, '__table1\.', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%'; -- checks b first, then a (statistics not used) + +OPTIMIZE TABLE tab FINAL; +SELECT 'After merge'; +SELECT replaceRegexpAll(explain, '__table1\.', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%'; -- checks a first, then b (statistics used) + +TRUNCATE TABLE tab; +SET mutations_sync = 2; +INSERT INTO tab SELECT number, -number FROM system.numbers LIMIT 10000; +ALTER TABLE tab MATERIALIZE STATISTICS a, b; +SELECT 'After truncate, insert, and materialize'; +SELECT replaceRegexpAll(explain, '__table1\.', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%'; -- checks a first, then b (statistics used) + +DROP TABLE tab; diff --git a/tests/queries/0_stateless/02864_statistics_exception.reference b/tests/queries/0_stateless/02864_statistics_exception.reference deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/queries/0_stateless/02864_statistics_exception.sql b/tests/queries/0_stateless/02864_statistics_exception.sql deleted file mode 100644 index 289ffee6600..00000000000 --- a/tests/queries/0_stateless/02864_statistics_exception.sql +++ /dev/null @@ -1,55 +0,0 @@ --- Tests creating/dropping/materializing statistics produces the right exceptions. - -DROP TABLE IF EXISTS tab; - --- Can't create statistics when allow_experimental_statistics = 0 -CREATE TABLE tab -( - a Float64 STATISTICS(tdigest) -) Engine = MergeTree() ORDER BY tuple(); -- { serverError INCORRECT_QUERY } - -SET allow_experimental_statistics = 1; - --- The same type of statistics can't exist more than once on a column -CREATE TABLE tab -( - a Float64 STATISTICS(tdigest, tdigest) -) Engine = MergeTree() ORDER BY tuple(); -- { serverError INCORRECT_QUERY } - --- Unknown statistics types are rejected -CREATE TABLE tab -( - a Float64 STATISTICS(no_statistics_type) -) Engine = MergeTree() ORDER BY tuple(); -- { serverError INCORRECT_QUERY } - --- tDigest statistics can only be created on numeric columns -CREATE TABLE tab -( - a String STATISTICS(tdigest), -) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } - -CREATE TABLE tab -( - a Float64, - b String -) Engine = MergeTree() ORDER BY tuple(); - -ALTER TABLE tab ADD STATISTICS a TYPE no_statistics_type; -- { serverError INCORRECT_QUERY } -ALTER TABLE tab ADD STATISTICS a TYPE tdigest; -ALTER TABLE tab ADD STATISTICS IF NOT EXISTS a TYPE tdigest; -ALTER TABLE tab ADD STATISTICS a TYPE tdigest; -- { serverError ILLEGAL_STATISTICS } -ALTER TABLE tab MODIFY STATISTICS a TYPE tdigest; --- Statistics can be created only on integer columns -ALTER TABLE tab ADD STATISTICS b TYPE tdigest; -- { serverError ILLEGAL_STATISTICS } -ALTER TABLE tab DROP STATISTICS b; -- { serverError ILLEGAL_STATISTICS } -ALTER TABLE tab DROP STATISTICS a; -ALTER TABLE tab DROP STATISTICS IF EXISTS a; -ALTER TABLE tab CLEAR STATISTICS a; -- { serverError ILLEGAL_STATISTICS } -ALTER TABLE tab CLEAR STATISTICS IF EXISTS a; -ALTER TABLE tab MATERIALIZE STATISTICS b; -- { serverError ILLEGAL_STATISTICS } - -ALTER TABLE tab ADD STATISTICS a TYPE tdigest; -ALTER TABLE tab MODIFY COLUMN a Float64 TTL toDateTime(b) + INTERVAL 1 MONTH; -ALTER TABLE tab MODIFY COLUMN a Int64; -- { serverError ALTER_OF_COLUMN_IS_FORBIDDEN } - -DROP TABLE tab; diff --git a/tests/queries/0_stateless/02864_statistics_materialize_in_merge.reference b/tests/queries/0_stateless/02864_statistics_materialize_in_merge.reference deleted file mode 100644 index 5e969cf41cb..00000000000 --- a/tests/queries/0_stateless/02864_statistics_materialize_in_merge.reference +++ /dev/null @@ -1,10 +0,0 @@ -10 -10 -10 -statistics not used Condition less(b, 10_UInt8) moved to PREWHERE -statistics not used Condition less(a, 10_UInt8) moved to PREWHERE -statistics used after merge Condition less(a, 10_UInt8) moved to PREWHERE -statistics used after merge Condition less(b, 10_UInt8) moved to PREWHERE -statistics used after materialize Condition less(a, 10_UInt8) moved to PREWHERE -statistics used after materialize Condition less(b, 10_UInt8) moved to PREWHERE -2 0 diff --git a/tests/queries/0_stateless/02864_statistics_materialize_in_merge.sql b/tests/queries/0_stateless/02864_statistics_materialize_in_merge.sql deleted file mode 100644 index 6606cff263f..00000000000 --- a/tests/queries/0_stateless/02864_statistics_materialize_in_merge.sql +++ /dev/null @@ -1,52 +0,0 @@ --- Tests delayed materialization of statistics in merge instead of during insert (setting 'materialize_statistics_on_insert = 0'). - -DROP TABLE IF EXISTS tab; - -SET enable_analyzer = 1; -SET allow_experimental_statistics = 1; -SET allow_statistics_optimize = 1; - -SET materialize_statistics_on_insert = 0; - -CREATE TABLE tab -( - a Int64 STATISTICS(tdigest), - b Int16 STATISTICS(tdigest), -) ENGINE = MergeTree() ORDER BY tuple() -SETTINGS min_bytes_for_wide_part = 0, enable_vertical_merge_algorithm = 0; -- TODO: there is a bug in vertical merge with statistics. - -INSERT INTO tab SELECT number, -number FROM system.numbers LIMIT 10000; - -SELECT count(*) FROM tab WHERE b < 10 and a < 10 SETTINGS log_comment = 'statistics not used'; - -OPTIMIZE TABLE tab FINAL; - -SELECT count(*) FROM tab WHERE b < 10 and a < 10 SETTINGS log_comment = 'statistics used after merge'; - -TRUNCATE TABLE tab; -SET mutations_sync = 2; - -INSERT INTO tab SELECT number, -number FROM system.numbers LIMIT 10000; -ALTER TABLE tab MATERIALIZE STATISTICS a, b; - -SELECT count(*) FROM tab WHERE b < 10 and a < 10 SETTINGS log_comment = 'statistics used after materialize'; - -DROP TABLE tab; - -SYSTEM FLUSH LOGS; - -SELECT log_comment, message FROM system.text_log JOIN -( - SELECT Settings['log_comment'] AS log_comment, query_id FROM system.query_log - WHERE current_database = currentDatabase() - AND query LIKE 'SELECT count(*) FROM tab%' - AND type = 'QueryFinish' -) AS query_log USING (query_id) -WHERE message LIKE '%moved to PREWHERE%' -ORDER BY event_time_microseconds; - -SELECT count(), sum(ProfileEvents['MergeTreeDataWriterStatisticsCalculationMicroseconds']) -FROM system.query_log -WHERE current_database = currentDatabase() - AND query LIKE 'INSERT INTO tab SELECT%' - AND type = 'QueryFinish'; diff --git a/tests/queries/0_stateless/02864_statistics_predicates.reference b/tests/queries/0_stateless/02864_statistics_predicates.reference new file mode 100644 index 00000000000..ffbd7269e05 --- /dev/null +++ b/tests/queries/0_stateless/02864_statistics_predicates.reference @@ -0,0 +1,98 @@ +u64 and = +10 +10 +10 +10 +0 +0 +0 +0 +10 +10 +10 +10 +u64 and < +70 +70 +70 +70 +80 +80 +80 +80 +70 +70 +70 +70 +f64 and = +10 +10 +10 +10 +0 +0 +0 +0 +10 +10 +10 +10 +0 +0 +0 +0 +f64 and < +70 +70 +70 +70 +80 +80 +80 +80 +70 +70 +70 +70 +80 +80 +80 +80 +dt and = +0 +0 +0 +0 +10 +10 +10 +10 +dt and < +10000 +10000 +10000 +10000 +70 +70 +70 +70 +b and = +5000 +5000 +5000 +5000 +5000 +5000 +5000 +5000 +5000 +5000 +5000 +5000 +0 +0 +0 +0 +s and = +10 +10 diff --git a/tests/queries/0_stateless/02864_statistics_predicates.sql b/tests/queries/0_stateless/02864_statistics_predicates.sql new file mode 100644 index 00000000000..779116cf19a --- /dev/null +++ b/tests/queries/0_stateless/02864_statistics_predicates.sql @@ -0,0 +1,214 @@ +-- Tags: no-fasttest +-- no-fasttest: 'count_min' sketches need a 3rd party library + +-- Tests the cross product of all predicates with all right-hand sides on all data types and all statistics types. + +SET allow_experimental_statistics = 1; +SET allow_statistics_optimize = 1; + +DROP TABLE IF EXISTS tab; + +CREATE TABLE tab +( + u64 UInt64, + u64_tdigest UInt64 STATISTICS(tdigest), + u64_count_min UInt64 STATISTICS(count_min), + u64_uniq UInt64 STATISTICS(uniq), + f64 Float64, + f64_tdigest Float64 STATISTICS(tdigest), + f64_count_min Float64 STATISTICS(count_min), + f64_uniq Float64 STATISTICS(uniq), + dt DateTime, + dt_tdigest DateTime STATISTICS(tdigest), + dt_count_min DateTime STATISTICS(count_min), + dt_uniq DateTime STATISTICS(uniq), + b Bool, + b_tdigest Bool STATISTICS(tdigest), + b_count_min Bool STATISTICS(count_min), + b_uniq Bool STATISTICS(uniq), + s String, + -- s_tdigest String STATISTICS(tdigest), -- not supported by tdigest + s_count_min String STATISTICS(count_min) + -- s_uniq String STATISTICS(uniq), -- not supported by uniq +) Engine = MergeTree() ORDER BY tuple() +SETTINGS min_bytes_for_wide_part = 0; + +INSERT INTO tab +-- SELECT number % 10000, number % 1000, -(number % 100) FROM system.numbers LIMIT 10000; +SELECT number % 1000, + number % 1000, + number % 1000, + number % 1000, + number % 1000, + number % 1000, + number % 1000, + number % 1000, + number % 1000, + number % 1000, + number % 1000, + number % 1000, + number % 2, + number % 2, + number % 2, + number % 2, + toString(number % 1000), + toString(number % 1000) +FROM system.numbers LIMIT 10000; + +-- u64 ---------------------------------------------------- + +SELECT 'u64 and ='; + +SELECT count(*) FROM tab WHERE u64 = 7; +SELECT count(*) FROM tab WHERE u64_tdigest = 7; +SELECT count(*) FROM tab WHERE u64_count_min = 7; +SELECT count(*) FROM tab WHERE u64_uniq = 7; + +SELECT count(*) FROM tab WHERE u64 = 7.7; +SELECT count(*) FROM tab WHERE u64_tdigest = 7.7; +SELECT count(*) FROM tab WHERE u64_count_min = 7.7; +SELECT count(*) FROM tab WHERE u64_uniq = 7.7; + +SELECT count(*) FROM tab WHERE u64 = '7'; +SELECT count(*) FROM tab WHERE u64_tdigest = '7'; +SELECT count(*) FROM tab WHERE u64_count_min = '7'; +SELECT count(*) FROM tab WHERE u64_uniq = '7'; + +SELECT count(*) FROM tab WHERE u64 = '7.7'; -- { serverError TYPE_MISMATCH } +SELECT count(*) FROM tab WHERE u64_tdigest = '7.7'; -- { serverError TYPE_MISMATCH } +SELECT count(*) FROM tab WHERE u64_count_min = '7.7'; -- { serverError TYPE_MISMATCH } +SELECT count(*) FROM tab WHERE u64_uniq = '7.7'; -- { serverError TYPE_MISMATCH } + +SELECT 'u64 and <'; + +SELECT count(*) FROM tab WHERE u64 < 7; +SELECT count(*) FROM tab WHERE u64_tdigest < 7; +SELECT count(*) FROM tab WHERE u64_count_min < 7; +SELECT count(*) FROM tab WHERE u64_uniq < 7; + +SELECT count(*) FROM tab WHERE u64 < 7.7; +SELECT count(*) FROM tab WHERE u64_tdigest < 7.7; +SELECT count(*) FROM tab WHERE u64_count_min < 7.7; +SELECT count(*) FROM tab WHERE u64_uniq < 7.7; + +SELECT count(*) FROM tab WHERE u64 < '7'; +SELECT count(*) FROM tab WHERE u64_tdigest < '7'; +SELECT count(*) FROM tab WHERE u64_count_min < '7'; +SELECT count(*) FROM tab WHERE u64_uniq < '7'; + +SELECT count(*) FROM tab WHERE u64 < '7.7'; -- { serverError TYPE_MISMATCH } +SELECT count(*) FROM tab WHERE u64_tdigest < '7.7'; -- { serverError TYPE_MISMATCH } +SELECT count(*) FROM tab WHERE u64_count_min < '7.7'; -- { serverError TYPE_MISMATCH } +SELECT count(*) FROM tab WHERE u64_uniq < '7.7'; -- { serverError TYPE_MISMATCH } + +-- f64 ---------------------------------------------------- + +SELECT 'f64 and ='; + +SELECT count(*) FROM tab WHERE f64 = 7; +SELECT count(*) FROM tab WHERE f64_tdigest = 7; +SELECT count(*) FROM tab WHERE f64_count_min = 7; +SELECT count(*) FROM tab WHERE f64_uniq = 7; + +SELECT count(*) FROM tab WHERE f64 = 7.7; +SELECT count(*) FROM tab WHERE f64_tdigest = 7.7; +SELECT count(*) FROM tab WHERE f64_count_min = 7.7; +SELECT count(*) FROM tab WHERE f64_uniq = 7.7; + +SELECT count(*) FROM tab WHERE f64 = '7'; +SELECT count(*) FROM tab WHERE f64_tdigest = '7'; +SELECT count(*) FROM tab WHERE f64_count_min = '7'; +SELECT count(*) FROM tab WHERE f64_uniq = '7'; + +SELECT count(*) FROM tab WHERE f64 = '7.7'; +SELECT count(*) FROM tab WHERE f64_tdigest = '7.7'; +SELECT count(*) FROM tab WHERE f64_count_min = '7.7'; +SELECT count(*) FROM tab WHERE f64_uniq = '7.7'; + +SELECT 'f64 and <'; + +SELECT count(*) FROM tab WHERE f64 < 7; +SELECT count(*) FROM tab WHERE f64_tdigest < 7; +SELECT count(*) FROM tab WHERE f64_count_min < 7; +SELECT count(*) FROM tab WHERE f64_uniq < 7; + +SELECT count(*) FROM tab WHERE f64 < 7.7; +SELECT count(*) FROM tab WHERE f64_tdigest < 7.7; +SELECT count(*) FROM tab WHERE f64_count_min < 7.7; +SELECT count(*) FROM tab WHERE f64_uniq < 7.7; + +SELECT count(*) FROM tab WHERE f64 < '7'; +SELECT count(*) FROM tab WHERE f64_tdigest < '7'; +SELECT count(*) FROM tab WHERE f64_count_min < '7'; +SELECT count(*) FROM tab WHERE f64_uniq < '7'; + +SELECT count(*) FROM tab WHERE f64 < '7.7'; +SELECT count(*) FROM tab WHERE f64_tdigest < '7.7'; +SELECT count(*) FROM tab WHERE f64_count_min < '7.7'; +SELECT count(*) FROM tab WHERE f64_uniq < '7.7'; + +-- dt ---------------------------------------------------- + +SELECT 'dt and ='; + +SELECT count(*) FROM tab WHERE dt = '2024-08-08 11:12:13'; +SELECT count(*) FROM tab WHERE dt_tdigest = '2024-08-08 11:12:13'; +SELECT count(*) FROM tab WHERE dt_count_min = '2024-08-08 11:12:13'; +SELECT count(*) FROM tab WHERE dt_uniq = '2024-08-08 11:12:13'; + +SELECT count(*) FROM tab WHERE dt = 7; +SELECT count(*) FROM tab WHERE dt_tdigest = 7; +SELECT count(*) FROM tab WHERE dt_count_min = 7; +SELECT count(*) FROM tab WHERE dt_uniq = 7; + +SELECT 'dt and <'; + +SELECT count(*) FROM tab WHERE dt < '2024-08-08 11:12:13'; +SELECT count(*) FROM tab WHERE dt_tdigest < '2024-08-08 11:12:13'; +SELECT count(*) FROM tab WHERE dt_count_min < '2024-08-08 11:12:13'; +SELECT count(*) FROM tab WHERE dt_uniq < '2024-08-08 11:12:13'; + +SELECT count(*) FROM tab WHERE dt < 7; +SELECT count(*) FROM tab WHERE dt_tdigest < 7; +SELECT count(*) FROM tab WHERE dt_count_min < 7; +SELECT count(*) FROM tab WHERE dt_uniq < 7; + +-- b ---------------------------------------------------- + +SELECT 'b and ='; + +SELECT count(*) FROM tab WHERE b = true; +SELECT count(*) FROM tab WHERE b_tdigest = true; +SELECT count(*) FROM tab WHERE b_count_min = true; +SELECT count(*) FROM tab WHERE b_uniq = true; + +SELECT count(*) FROM tab WHERE b = 'true'; +SELECT count(*) FROM tab WHERE b_tdigest = 'true'; +SELECT count(*) FROM tab WHERE b_count_min = 'true'; +SELECT count(*) FROM tab WHERE b_uniq = 'true'; + +SELECT count(*) FROM tab WHERE b = 1; +SELECT count(*) FROM tab WHERE b_tdigest = 1; +SELECT count(*) FROM tab WHERE b_count_min = 1; +SELECT count(*) FROM tab WHERE b_uniq = 1; + +SELECT count(*) FROM tab WHERE b = 1.1; +SELECT count(*) FROM tab WHERE b_tdigest = 1.1; +SELECT count(*) FROM tab WHERE b_count_min = 1.1; +SELECT count(*) FROM tab WHERE b_uniq = 1.1; + +-- s ---------------------------------------------------- + +SELECT 's and ='; + +SELECT count(*) FROM tab WHERE s = 7; -- { serverError NO_COMMON_TYPE } +-- SELECT count(*) FROM tab WHERE s_tdigest = 7; -- not supported +SELECT count(*) FROM tab WHERE s_count_min = 7; -- { serverError NO_COMMON_TYPE } +-- SELECT count(*) FROM tab WHERE s_uniq = 7; -- not supported + +SELECT count(*) FROM tab WHERE s = '7'; +-- SELECT count(*) FROM tab WHERE s_tdigest = '7'; -- not supported +SELECT count(*) FROM tab WHERE s_count_min = '7'; +-- SELECT count(*) FROM tab WHERE s_uniq = '7'; -- not supported + +DROP TABLE tab; diff --git a/tests/queries/0_stateless/02864_statistics_uniq.reference b/tests/queries/0_stateless/02864_statistics_uniq.reference deleted file mode 100644 index 77786dbdd8c..00000000000 --- a/tests/queries/0_stateless/02864_statistics_uniq.reference +++ /dev/null @@ -1,35 +0,0 @@ -CREATE TABLE default.t1\n(\n `a` Float64 STATISTICS(tdigest),\n `b` Int64 STATISTICS(tdigest),\n `c` Int64 STATISTICS(tdigest, uniq),\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 -After insert - Prewhere info - Prewhere filter - Prewhere filter column: and(less(a, 10), equals(c, 0), less(b, 10)) (removed) - Prewhere info - Prewhere filter - Prewhere filter column: and(equals(c, 11), less(a, 10), less(b, 10)) (removed) -After merge - Prewhere info - Prewhere filter - Prewhere filter column: and(less(a, 10), equals(c, 0), less(b, 10)) (removed) - Prewhere info - Prewhere filter - Prewhere filter column: and(equals(c, 11), less(a, 10), less(b, 10)) (removed) -After modify TDigest - Prewhere info - Prewhere filter - Prewhere filter column: and(less(a, 10), equals(c, 11), less(b, 10)) (removed) - Prewhere info - Prewhere filter - Prewhere filter column: and(less(a, 10), equals(c, 0), less(b, 10)) (removed) - Prewhere info - Prewhere filter - Prewhere filter column: and(less(c, -1), less(a, 10), less(b, 10)) (removed) -After drop - Prewhere info - Prewhere filter - Prewhere filter column: and(less(a, 10), equals(c, 11), less(b, 10)) (removed) - Prewhere info - Prewhere filter - Prewhere filter column: and(less(a, 10), equals(c, 0), less(b, 10)) (removed) - Prewhere info - Prewhere filter - Prewhere filter column: and(less(a, 10), less(c, -1), less(b, 10)) (removed) diff --git a/tests/queries/0_stateless/02864_statistics_uniq.sql b/tests/queries/0_stateless/02864_statistics_uniq.sql deleted file mode 100644 index 0f5f353c045..00000000000 --- a/tests/queries/0_stateless/02864_statistics_uniq.sql +++ /dev/null @@ -1,73 +0,0 @@ -DROP TABLE IF EXISTS t1; - -SET allow_experimental_statistics = 1; -SET allow_statistics_optimize = 1; -SET mutations_sync = 1; - -CREATE TABLE t1 -( - a Float64 STATISTICS(tdigest), - b Int64 STATISTICS(tdigest), - c Int64 STATISTICS(tdigest, uniq), - pk String, -) Engine = MergeTree() ORDER BY pk -SETTINGS min_bytes_for_wide_part = 0; - -SHOW CREATE TABLE t1; - -INSERT INTO t1 select number, -number, number/1000, generateUUIDv4() FROM system.numbers LIMIT 10000; -INSERT INTO t1 select 0, 0, 11, generateUUIDv4(); - -SELECT 'After insert'; -SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and c = 0 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; -SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and c = 11 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; -OPTIMIZE TABLE t1 FINAL; - -SELECT 'After merge'; -SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and c = 0 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; -SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and c = 11 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; - -SELECT 'After modify TDigest'; -ALTER TABLE t1 MODIFY STATISTICS c TYPE TDigest; -ALTER TABLE t1 MATERIALIZE STATISTICS c; - -SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and c = 11 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; -SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and c = 0 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; -SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and c < -1 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; - - -ALTER TABLE t1 DROP STATISTICS c; - -SELECT 'After drop'; -SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and c = 11 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; -SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and c = 0 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; -SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and c < -1 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; - -DROP TABLE IF EXISTS t1; -DROP TABLE IF EXISTS t2; -SET allow_suspicious_low_cardinality_types=1; -CREATE TABLE t2 -( - a Float64 STATISTICS(tdigest), - b Int64 STATISTICS(tdigest), - c LowCardinality(Int64) STATISTICS(tdigest, uniq), - pk String, -) Engine = MergeTree() ORDER BY pk -SETTINGS min_bytes_for_wide_part = 0; -INSERT INTO t2 select number, -number, number/1000, generateUUIDv4() FROM system.numbers LIMIT 10000; - -DROP TABLE IF EXISTS t2; -DROP TABLE IF EXISTS t3; - -CREATE TABLE t3 -( - a Float64 STATISTICS(tdigest), - b Int64 STATISTICS(tdigest), - c Nullable(Int64) STATISTICS(tdigest, uniq), - pk String, -) Engine = MergeTree() ORDER BY pk -SETTINGS min_bytes_for_wide_part = 0; -INSERT INTO t3 select number, -number, number/1000, generateUUIDv4() FROM system.numbers LIMIT 10000; - -DROP TABLE IF EXISTS t3; - diff --git a/tests/queries/0_stateless/02864_statistics_usage.reference b/tests/queries/0_stateless/02864_statistics_usage.reference new file mode 100644 index 00000000000..a9f669b88c1 --- /dev/null +++ b/tests/queries/0_stateless/02864_statistics_usage.reference @@ -0,0 +1,20 @@ +After insert + Prewhere info + Prewhere filter + Prewhere filter column: and(less(a, 10_UInt8), less(b, 10_UInt8)) (removed) +After drop statistic + Prewhere info + Prewhere filter + Prewhere filter column: and(less(b, 10_UInt8), less(a, 10_UInt8)) (removed) +After add and materialize statistic + Prewhere info + Prewhere filter + Prewhere filter column: and(less(a, 10_UInt8), less(b, 10_UInt8)) (removed) +After merge + Prewhere info + Prewhere filter + Prewhere filter column: and(less(a, 10_UInt8), less(b, 10_UInt8)) (removed) +After rename + Prewhere info + Prewhere filter + Prewhere filter column: and(less(a, 10_UInt8), less(c, 10_UInt8)) (removed) diff --git a/tests/queries/0_stateless/02864_statistics_usage.sql b/tests/queries/0_stateless/02864_statistics_usage.sql new file mode 100644 index 00000000000..4956bd27e87 --- /dev/null +++ b/tests/queries/0_stateless/02864_statistics_usage.sql @@ -0,0 +1,42 @@ +-- Test that the optimizer picks up column statistics +-- (The concrete statistics type, column data type and predicate type don't matter) + +-- Checks by the predicate evaluation order in EXPLAIN. This is quite fragile, a better approach would be helpful (maybe 'send_logs_level'?) + +SET allow_experimental_statistics = 1; +SET allow_statistics_optimize = 1; +SET mutations_sync = 1; +SET enable_analyzer = 1; + +DROP TABLE IF EXISTS tab; + +CREATE TABLE tab +( + a Float64 STATISTICS(tdigest), + b Int64 STATISTICS(tdigest) +) Engine = MergeTree() ORDER BY tuple() +SETTINGS min_bytes_for_wide_part = 0; + +INSERT INTO tab select number, -number FROM system.numbers LIMIT 10000; +SELECT 'After insert'; +SELECT replaceRegexpAll(explain, '__table1\.', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%'; -- checks a first, then b (statistics used) + +ALTER TABLE tab DROP STATISTICS a, b; +SELECT 'After drop statistic'; +SELECT replaceRegexpAll(explain, '__table1\.', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%'; -- checks b first, then a (statistics not used) + +ALTER TABLE tab ADD STATISTICS a, b TYPE tdigest; +ALTER TABLE tab MATERIALIZE STATISTICS a, b; +INSERT INTO tab select number, -number FROM system.numbers LIMIT 10000; +SELECT 'After add and materialize statistic'; +SELECT replaceRegexpAll(explain, '__table1\.', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%'; -- checks a first, then b (statistics used) + +OPTIMIZE TABLE tab FINAL; +SELECT 'After merge'; +SELECT replaceRegexpAll(explain, '__table1\.', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%'; -- checks a first, then b (statistics used) + +ALTER TABLE tab RENAME COLUMN b TO c; +SELECT 'After rename'; +SELECT replaceRegexpAll(explain, '__table1\.', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE c < 10 and a < 10) WHERE explain LIKE '%Prewhere%'; -- checks a first, then c (statistics used) + +DROP TABLE IF EXISTS tab; diff --git a/tests/queries/0_stateless/02868_no_merge_across_partitions_final_with_lonely.sh b/tests/queries/0_stateless/02868_no_merge_across_partitions_final_with_lonely.sh index 4bc29ce4233..be0ef4e2648 100755 --- a/tests/queries/0_stateless/02868_no_merge_across_partitions_final_with_lonely.sh +++ b/tests/queries/0_stateless/02868_no_merge_across_partitions_final_with_lonely.sh @@ -7,7 +7,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} -nm -q """ +${CLICKHOUSE_CLIENT} -m -q """ DROP TABLE IF EXISTS with_lonely; CREATE TABLE with_lonely @@ -23,7 +23,7 @@ ORDER BY (id); """ create_optimize_partition() { - ${CLICKHOUSE_CLIENT} -nm -q """ + ${CLICKHOUSE_CLIENT} -m -q """ INSERT INTO with_lonely SELECT number, '$1', number*10, 0 FROM numbers(10); INSERT INTO with_lonely SELECT number+500000, '$1', number*10, 1 FROM numbers(10); """ @@ -39,7 +39,7 @@ create_optimize_partition "2022-10-29" create_optimize_partition "2022-10-30" create_optimize_partition "2022-10-31" -${CLICKHOUSE_CLIENT} -nm -q """ +${CLICKHOUSE_CLIENT} -m -q """ SYSTEM STOP MERGES with_lonely; INSERT INTO with_lonely SELECT number, '2022-11-01', number*10, 0 FROM numbers(10); diff --git a/tests/queries/0_stateless/02871_clickhouse_client_restart_pager.sh b/tests/queries/0_stateless/02871_clickhouse_client_restart_pager.sh index cc4ce9b122e..418e439e44b 100755 --- a/tests/queries/0_stateless/02871_clickhouse_client_restart_pager.sh +++ b/tests/queries/0_stateless/02871_clickhouse_client_restart_pager.sh @@ -7,7 +7,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # head by default print 10 rows, but it is not enough to query 11 rows, since # we need to overflow the default pipe size, hence just 1 million of rows (it # should be around 6 MiB in text representation, should be definitelly enough). -$CLICKHOUSE_CLIENT --ignore-error -nm --pager head -q " +$CLICKHOUSE_CLIENT --ignore-error -m --pager head -q " select * from numbers(1e6); -- { clientError CANNOT_WRITE_TO_FILE_DESCRIPTOR } select * from numbers(1e6); -- { clientError CANNOT_WRITE_TO_FILE_DESCRIPTOR } " diff --git a/tests/queries/0_stateless/02875_clickhouse_local_multiquery.sh b/tests/queries/0_stateless/02875_clickhouse_local_multiquery.sh index 3f2b732e71b..3a7d861262e 100755 --- a/tests/queries/0_stateless/02875_clickhouse_local_multiquery.sh +++ b/tests/queries/0_stateless/02875_clickhouse_local_multiquery.sh @@ -9,7 +9,7 @@ $CLICKHOUSE_CLIENT -q "select 1; select 2;" $CLICKHOUSE_LOCAL -q "select 1; select 2;" # -n is a no-op -$CLICKHOUSE_CLIENT -n -q "select 1; select 2;" -$CLICKHOUSE_LOCAL -n -q "select 1; select 2;" +$CLICKHOUSE_CLIENT -q "select 1; select 2;" +$CLICKHOUSE_LOCAL -q "select 1; select 2;" exit 0 diff --git a/tests/queries/0_stateless/02875_merge_engine_set_index.sh b/tests/queries/0_stateless/02875_merge_engine_set_index.sh index 355d83167a6..f40696c31a9 100755 --- a/tests/queries/0_stateless/02875_merge_engine_set_index.sh +++ b/tests/queries/0_stateless/02875_merge_engine_set_index.sh @@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -nq " +$CLICKHOUSE_CLIENT -q " CREATE TABLE t1 ( a UInt32, @@ -57,7 +57,7 @@ ORDER BY b DESC FORMAT Null;" -$CLICKHOUSE_CLIENT -nq " +$CLICKHOUSE_CLIENT -q " SYSTEM FLUSH LOGS; SELECT ProfileEvents['SelectedMarks'] diff --git a/tests/queries/0_stateless/02878_use_structure_from_insertion_table_with_explicit_insert_columns.sh b/tests/queries/0_stateless/02878_use_structure_from_insertion_table_with_explicit_insert_columns.sh index 8bdaa47c111..dd08724456b 100755 --- a/tests/queries/0_stateless/02878_use_structure_from_insertion_table_with_explicit_insert_columns.sh +++ b/tests/queries/0_stateless/02878_use_structure_from_insertion_table_with_explicit_insert_columns.sh @@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh $CLICKHOUSE_LOCAL -q "select 42 as x format Native" > $CLICKHOUSE_TEST_UNIQUE_NAME.native -$CLICKHOUSE_LOCAL -n -q " +$CLICKHOUSE_LOCAL -q " create table test (x UInt64, y UInt64) engine=Memory; insert into test (x) select * from file('$CLICKHOUSE_TEST_UNIQUE_NAME.native'); insert into test (y) select * from file('$CLICKHOUSE_TEST_UNIQUE_NAME.native'); diff --git a/tests/queries/0_stateless/02879_use_structure_from_insertion_table_with_defaults.sh b/tests/queries/0_stateless/02879_use_structure_from_insertion_table_with_defaults.sh index 315bbcd544f..c7270b65e19 100755 --- a/tests/queries/0_stateless/02879_use_structure_from_insertion_table_with_defaults.sh +++ b/tests/queries/0_stateless/02879_use_structure_from_insertion_table_with_defaults.sh @@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh $CLICKHOUSE_LOCAL -q "select 1 as x format Native" > $CLICKHOUSE_TEST_UNIQUE_NAME.native -$CLICKHOUSE_LOCAL -n -q " +$CLICKHOUSE_LOCAL -q " create table test (x UInt64, y UInt64 default 42) engine=Memory; insert into test select * from file('$CLICKHOUSE_TEST_UNIQUE_NAME.native'); select * from test; diff --git a/tests/queries/0_stateless/02883_named_collections_override.sh b/tests/queries/0_stateless/02883_named_collections_override.sh index a08c795127d..915ce280226 100755 --- a/tests/queries/0_stateless/02883_named_collections_override.sh +++ b/tests/queries/0_stateless/02883_named_collections_override.sh @@ -8,7 +8,7 @@ u1="${CLICKHOUSE_TEST_UNIQUE_NAME}_collection1" u2="${CLICKHOUSE_TEST_UNIQUE_NAME}_collection2" u3="${CLICKHOUSE_TEST_UNIQUE_NAME}_collection3" -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " DROP NAMED COLLECTION IF EXISTS $u1; DROP NAMED COLLECTION IF EXISTS $u2; diff --git a/tests/queries/0_stateless/02884_async_insert_native_protocol_1.sh b/tests/queries/0_stateless/02884_async_insert_native_protocol_1.sh index 7f583087336..791515c82d6 100755 --- a/tests/queries/0_stateless/02884_async_insert_native_protocol_1.sh +++ b/tests/queries/0_stateless/02884_async_insert_native_protocol_1.sh @@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) set -e -$CLICKHOUSE_CLIENT -n -q " +$CLICKHOUSE_CLIENT -q " DROP TABLE IF EXISTS t_async_insert_native_1; CREATE TABLE t_async_insert_native_1 (id UInt64, s String) ENGINE = MergeTree ORDER BY id; " @@ -22,7 +22,7 @@ echo '{"id": 1, "s": "aaa"}' \ | $CLICKHOUSE_CLIENT $async_insert_options -q 'INSERT INTO t_async_insert_native_1 FORMAT JSONEachRow {"id": 2, "s": "bbb"}' 2>&1 \ | grep -o "NOT_IMPLEMENTED" -$CLICKHOUSE_CLIENT -n -q " +$CLICKHOUSE_CLIENT -q " SELECT sum(length(entries.bytes)) FROM system.asynchronous_inserts WHERE database = '$CLICKHOUSE_DATABASE' AND table = 't_async_insert_native_1'; diff --git a/tests/queries/0_stateless/02884_async_insert_native_protocol_2.sh b/tests/queries/0_stateless/02884_async_insert_native_protocol_2.sh index b9b1854eaef..a8a9209ee68 100755 --- a/tests/queries/0_stateless/02884_async_insert_native_protocol_2.sh +++ b/tests/queries/0_stateless/02884_async_insert_native_protocol_2.sh @@ -6,7 +6,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) set -e -$CLICKHOUSE_CLIENT -n -q " +$CLICKHOUSE_CLIENT -q " DROP TABLE IF EXISTS t_async_insert_native_2; CREATE TABLE t_async_insert_native_2 (id UInt64, s String) ENGINE = MergeTree ORDER BY id; " @@ -18,7 +18,7 @@ echo "(3, 'ccc') (4, 'ddd') (5, 'eee')" | $CLICKHOUSE_CLIENT $async_insert_optio wait -$CLICKHOUSE_CLIENT -n -q " +$CLICKHOUSE_CLIENT -q " SELECT * FROM t_async_insert_native_2 ORDER BY id; SYSTEM FLUSH LOGS; diff --git a/tests/queries/0_stateless/02884_async_insert_native_protocol_3.sh b/tests/queries/0_stateless/02884_async_insert_native_protocol_3.sh index c9d399607d0..229f13eb821 100755 --- a/tests/queries/0_stateless/02884_async_insert_native_protocol_3.sh +++ b/tests/queries/0_stateless/02884_async_insert_native_protocol_3.sh @@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) set -e -$CLICKHOUSE_CLIENT -n -q " +$CLICKHOUSE_CLIENT -q " DROP TABLE IF EXISTS t_async_insert_native_3; CREATE TABLE t_async_insert_native_3 (id UInt64, s String) ENGINE = MergeTree ORDER BY id; " @@ -21,7 +21,7 @@ $CLICKHOUSE_CLIENT $async_insert_options -q "INSERT INTO t_async_insert_native_3 wait -$CLICKHOUSE_CLIENT -n -q " +$CLICKHOUSE_CLIENT -q " SELECT format, length(entries.bytes) FROM system.asynchronous_inserts WHERE database = '$CLICKHOUSE_DATABASE' AND table = 't_async_insert_native_3' ORDER BY format; diff --git a/tests/queries/0_stateless/02884_async_insert_native_protocol_4.sh b/tests/queries/0_stateless/02884_async_insert_native_protocol_4.sh index 9118c11315c..e84c1ca8899 100755 --- a/tests/queries/0_stateless/02884_async_insert_native_protocol_4.sh +++ b/tests/queries/0_stateless/02884_async_insert_native_protocol_4.sh @@ -4,7 +4,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -n -q " +$CLICKHOUSE_CLIENT -q " DROP TABLE IF EXISTS t_async_insert_native_4; CREATE TABLE t_async_insert_native_4 (id UInt64) ENGINE = MergeTree ORDER BY id; " @@ -20,7 +20,7 @@ echo "(2) (3) (4) (5)" | $CLICKHOUSE_CLIENT_WITH_LOG $async_insert_options --asy -q 'INSERT INTO t_async_insert_native_4 FORMAT Values' 2>&1 \ | grep -c "too much data" -$CLICKHOUSE_CLIENT -n -q " +$CLICKHOUSE_CLIENT -q " SELECT * FROM t_async_insert_native_4 ORDER BY id; SYSTEM FLUSH LOGS; diff --git a/tests/queries/0_stateless/02885_ephemeral_columns_from_file.sh b/tests/queries/0_stateless/02885_ephemeral_columns_from_file.sh index 2917ec86957..065658d4d56 100755 --- a/tests/queries/0_stateless/02885_ephemeral_columns_from_file.sh +++ b/tests/queries/0_stateless/02885_ephemeral_columns_from_file.sh @@ -9,7 +9,7 @@ $CLICKHOUSE_LOCAL -q "select * from file('$CLICKHOUSE_TEST_UNIQUE_NAME.jsonl', a $CLICKHOUSE_LOCAL -q "select * from file('$CLICKHOUSE_TEST_UNIQUE_NAME.jsonl', auto, 'x UInt64 Alias y, y UInt64')" 2>&1 | grep -c "BAD_ARGUMENTS" $CLICKHOUSE_LOCAL -q "select * from file('$CLICKHOUSE_TEST_UNIQUE_NAME.jsonl', auto, 'x UInt64 Materialized 42, y UInt64')" 2>&1 | grep -c "BAD_ARGUMENTS" -$CLICKHOUSE_LOCAL -n -q " +$CLICKHOUSE_LOCAL -q " create table test (x UInt64 Ephemeral, y UInt64 default x + 1) engine=Memory; insert into test (x, y) select * from file('$CLICKHOUSE_TEST_UNIQUE_NAME.jsonl'); select * from test; diff --git a/tests/queries/0_stateless/02895_npy_output_format.sh b/tests/queries/0_stateless/02895_npy_output_format.sh index a364e447062..74000bc298f 100755 --- a/tests/queries/0_stateless/02895_npy_output_format.sh +++ b/tests/queries/0_stateless/02895_npy_output_format.sh @@ -9,7 +9,7 @@ mkdir -p ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/ rm -rf ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME:?}/* chmod 777 ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/ -${CLICKHOUSE_CLIENT} -n -q --ignore-error " +${CLICKHOUSE_CLIENT} -q --ignore-error " DROP DATABASE IF EXISTS npy_output_02895; CREATE DATABASE IF NOT EXISTS npy_output_02895; diff --git a/tests/queries/0_stateless/02895_peak_memory_usage_http_headers_regression.sh b/tests/queries/0_stateless/02895_peak_memory_usage_http_headers_regression.sh index d6775927f35..b4656c9e321 100755 --- a/tests/queries/0_stateless/02895_peak_memory_usage_http_headers_regression.sh +++ b/tests/queries/0_stateless/02895_peak_memory_usage_http_headers_regression.sh @@ -4,7 +4,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " DROP TABLE IF EXISTS data; DROP TABLE IF EXISTS data2; DROP VIEW IF EXISTS mv1; diff --git a/tests/queries/0_stateless/02903_empty_order_by_throws_error.sh b/tests/queries/0_stateless/02903_empty_order_by_throws_error.sh index 64f5dd1a987..ef631d9ed1b 100755 --- a/tests/queries/0_stateless/02903_empty_order_by_throws_error.sh +++ b/tests/queries/0_stateless/02903_empty_order_by_throws_error.sh @@ -5,13 +5,13 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CUR_DIR"/../shell_config.sh # setting disabled and no order by or primary key; expect error -$CLICKHOUSE_CLIENT -n --query=" +$CLICKHOUSE_CLIENT --query=" DROP TABLE IF EXISTS test_empty_order_by; CREATE TABLE test_empty_order_by(a UInt8) ENGINE = MergeTree() SETTINGS index_granularity = 8192; " 2>&1 \ | grep -F -q "You must provide an ORDER BY or PRIMARY KEY expression in the table definition." && echo 'OK' || echo 'FAIL' # setting disabled and primary key in table definition -$CLICKHOUSE_CLIENT -n --query=" +$CLICKHOUSE_CLIENT --query=" DROP TABLE IF EXISTS test_empty_order_by; CREATE TABLE test_empty_order_by(a UInt8) ENGINE = MergeTree() PRIMARY KEY a SETTINGS index_granularity = 8192; SHOW CREATE TABLE test_empty_order_by; diff --git a/tests/queries/0_stateless/02903_rmt_retriable_merge_exception.sh b/tests/queries/0_stateless/02903_rmt_retriable_merge_exception.sh index 5065da371a8..b77e5b0b402 100755 --- a/tests/queries/0_stateless/02903_rmt_retriable_merge_exception.sh +++ b/tests/queries/0_stateless/02903_rmt_retriable_merge_exception.sh @@ -15,7 +15,7 @@ if [[ $($CLICKHOUSE_CLIENT -q "select count()>0 from system.clusters where clust cluster=test_cluster_database_replicated fi -$CLICKHOUSE_CLIENT -nm --distributed_ddl_output_mode=none -q " +$CLICKHOUSE_CLIENT -m --distributed_ddl_output_mode=none -q " drop table if exists rmt1; drop table if exists rmt2; @@ -46,7 +46,7 @@ part_name='%' # wait while there be at least one 'No active replica has part all_0_1_1 or covering part' in logs for _ in {0..50}; do - no_active_repilica_messages=$($CLICKHOUSE_CLIENT -nm -q " + no_active_repilica_messages=$($CLICKHOUSE_CLIENT -m -q " system flush logs; select count() @@ -65,7 +65,7 @@ for _ in {0..50}; do sleep 1 done -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " system start pulling replication log rmt2; system flush logs; diff --git a/tests/queries/0_stateless/02904_empty_order_by_with_setting_enabled.sh b/tests/queries/0_stateless/02904_empty_order_by_with_setting_enabled.sh index 7ac9b488be5..5f9dc6ea077 100755 --- a/tests/queries/0_stateless/02904_empty_order_by_with_setting_enabled.sh +++ b/tests/queries/0_stateless/02904_empty_order_by_with_setting_enabled.sh @@ -5,7 +5,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CUR_DIR"/../shell_config.sh # setting enabled and no order by or primary key -${CLICKHOUSE_CLIENT} -n --query=" +${CLICKHOUSE_CLIENT} --query=" SET create_table_empty_primary_key_by_default = true; DROP TABLE IF EXISTS test_empty_order_by; CREATE TABLE test_empty_order_by(a UInt8) ENGINE = MergeTree() SETTINGS index_granularity = 8192; @@ -13,7 +13,7 @@ ${CLICKHOUSE_CLIENT} -n --query=" " 2>&1 \ | grep -F -q "ORDER BY tuple()" && echo 'OK' || echo 'FAIL' # setting enabled and per-column primary key -${CLICKHOUSE_CLIENT} -n --query=" +${CLICKHOUSE_CLIENT} --query=" SET create_table_empty_primary_key_by_default = true; DROP TABLE IF EXISTS test_empty_order_by; CREATE TABLE test_empty_order_by(a UInt8 PRIMARY KEY, b String PRIMARY KEY) ENGINE = MergeTree() SETTINGS index_granularity = 8192; @@ -21,7 +21,7 @@ ${CLICKHOUSE_CLIENT} -n --query=" " 2>&1 \ | grep -F -q "ORDER BY (a, b)" && echo 'OK' || echo 'FAIL' # setting enabled and primary key in table definition (not per-column or order by) -${CLICKHOUSE_CLIENT} -n --query=" +${CLICKHOUSE_CLIENT} --query=" SET create_table_empty_primary_key_by_default = true; DROP TABLE IF EXISTS test_empty_order_by; CREATE TABLE test_empty_order_by(a UInt8, b String) ENGINE = MergeTree() PRIMARY KEY (a) SETTINGS index_granularity = 8192; @@ -29,7 +29,7 @@ ${CLICKHOUSE_CLIENT} -n --query=" " 2>&1 \ | grep -F -q "ORDER BY a" && echo 'OK' || echo 'FAIL' # setting enabled and order by in table definition (no primary key) -${CLICKHOUSE_CLIENT} -n --query=" +${CLICKHOUSE_CLIENT} --query=" SET create_table_empty_primary_key_by_default = true; DROP TABLE IF EXISTS test_empty_order_by; CREATE TABLE test_empty_order_by(a UInt8, b String) ENGINE = MergeTree() ORDER BY (a, b) SETTINGS index_granularity = 8192; diff --git a/tests/queries/0_stateless/02907_backup_mv_with_no_inner_table.sh b/tests/queries/0_stateless/02907_backup_mv_with_no_inner_table.sh index 30ec50fa20f..e37f1e51c74 100755 --- a/tests/queries/0_stateless/02907_backup_mv_with_no_inner_table.sh +++ b/tests/queries/0_stateless/02907_backup_mv_with_no_inner_table.sh @@ -5,7 +5,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " drop table if exists src; create table src (a Int32) engine = MergeTree() order by tuple(); @@ -15,14 +15,14 @@ create materialized view mv (a Int32) engine = MergeTree() order by tuple() as s uuid=$(${CLICKHOUSE_CLIENT} --query "select uuid from system.tables where table='mv' and database == currentDatabase()") inner_table=".inner_id.${uuid}" -${CLICKHOUSE_CLIENT} -nm --query "drop table \`$inner_table\` sync" +${CLICKHOUSE_CLIENT} -m --query "drop table \`$inner_table\` sync" -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " set send_logs_level = 'error'; backup table ${CLICKHOUSE_DATABASE}.\`mv\` to Disk('backups', '${CLICKHOUSE_TEST_UNIQUE_NAME}'); " | grep -o "BACKUP_CREATED" -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " drop table mv; restore table ${CLICKHOUSE_DATABASE}.\`mv\` from Disk('backups', '${CLICKHOUSE_TEST_UNIQUE_NAME}'); " | grep -o "RESTORED" diff --git a/tests/queries/0_stateless/02907_backup_mv_with_no_source_table.sh b/tests/queries/0_stateless/02907_backup_mv_with_no_source_table.sh index d59ebe400ee..f950954941f 100755 --- a/tests/queries/0_stateless/02907_backup_mv_with_no_source_table.sh +++ b/tests/queries/0_stateless/02907_backup_mv_with_no_source_table.sh @@ -4,7 +4,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " drop table if exists src; create table src (a Int32) engine = MergeTree() order by tuple(); @@ -15,18 +15,18 @@ drop table if exists mv; create materialized view mv to dst (a Int32) as select * from src; " -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " drop table src; backup database ${CLICKHOUSE_DATABASE} on cluster test_shard_localhost to Disk('backups', '${CLICKHOUSE_TEST_UNIQUE_NAME}'); " | grep -o "BACKUP_CREATED" -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " drop table mv; set allow_deprecated_database_ordinary=1; restore table ${CLICKHOUSE_DATABASE}.mv on cluster test_shard_localhost from Disk('backups', '${CLICKHOUSE_TEST_UNIQUE_NAME}'); " | grep -o "RESTORED" -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " drop table if exists src; create table src (a Int32) engine = MergeTree() order by tuple(); @@ -37,13 +37,13 @@ drop table if exists mv; create materialized view mv to dst (a Int32) as select * from src; " -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " drop table src; drop table dst; backup database ${CLICKHOUSE_DATABASE} on cluster test_shard_localhost to Disk('backups', '${CLICKHOUSE_TEST_UNIQUE_NAME}2'); " | grep -o "BACKUP_CREATED" -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " drop table mv; set allow_deprecated_database_ordinary=1; restore table ${CLICKHOUSE_DATABASE}.mv on cluster test_shard_localhost from Disk('backups', '${CLICKHOUSE_TEST_UNIQUE_NAME}2'); diff --git a/tests/queries/0_stateless/02907_backup_restore_default_nullable.sh b/tests/queries/0_stateless/02907_backup_restore_default_nullable.sh index 8ed36a7edd7..dc5793d1638 100755 --- a/tests/queries/0_stateless/02907_backup_restore_default_nullable.sh +++ b/tests/queries/0_stateless/02907_backup_restore_default_nullable.sh @@ -4,7 +4,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " drop table if exists test; set data_type_default_nullable = 0; create table test (test String) ENGINE = MergeTree() ORDER BY tuple(); @@ -13,7 +13,7 @@ backup table ${CLICKHOUSE_DATABASE}.test on cluster test_shard_localhost to Disk ${CLICKHOUSE_CLIENT} --query "show create table test" -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " drop table test sync; set data_type_default_nullable = 1; restore table ${CLICKHOUSE_DATABASE}.test on cluster test_shard_localhost from Disk('backups', '${CLICKHOUSE_TEST_UNIQUE_NAME}'); diff --git a/tests/queries/0_stateless/02907_backup_restore_flatten_nested.sh b/tests/queries/0_stateless/02907_backup_restore_flatten_nested.sh index 742d24a97eb..eae307add10 100755 --- a/tests/queries/0_stateless/02907_backup_restore_flatten_nested.sh +++ b/tests/queries/0_stateless/02907_backup_restore_flatten_nested.sh @@ -4,7 +4,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " drop table if exists test; set flatten_nested = 0; create table test (test Array(Tuple(foo String, bar Float64))) ENGINE = MergeTree() ORDER BY tuple(); @@ -13,7 +13,7 @@ backup table ${CLICKHOUSE_DATABASE}.test on cluster test_shard_localhost to Disk ${CLICKHOUSE_CLIENT} --query "show create table test" -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " drop table if exists test2; set flatten_nested = 0; create table test2 (test Nested(foo String, bar Float64)) ENGINE = MergeTree() ORDER BY tuple(); @@ -22,7 +22,7 @@ backup table ${CLICKHOUSE_DATABASE}.test2 on cluster test_shard_localhost to Dis ${CLICKHOUSE_CLIENT} --query "show create table test2" -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " drop table test sync; set flatten_nested = 1; restore table ${CLICKHOUSE_DATABASE}.test on cluster test_shard_localhost from Disk('backups', '${CLICKHOUSE_TEST_UNIQUE_NAME}'); @@ -30,7 +30,7 @@ restore table ${CLICKHOUSE_DATABASE}.test on cluster test_shard_localhost from D ${CLICKHOUSE_CLIENT} --query "show create table test" -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " drop table test2 sync; set flatten_nested = 1; restore table ${CLICKHOUSE_DATABASE}.test2 on cluster test_shard_localhost from Disk('backups', '${CLICKHOUSE_TEST_UNIQUE_NAME}2'); diff --git a/tests/queries/0_stateless/02907_clickhouse_dictionary_bug.sh b/tests/queries/0_stateless/02907_clickhouse_dictionary_bug.sh index 57182050534..2cad15c6fcb 100755 --- a/tests/queries/0_stateless/02907_clickhouse_dictionary_bug.sh +++ b/tests/queries/0_stateless/02907_clickhouse_dictionary_bug.sh @@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} -n -q " +${CLICKHOUSE_CLIENT} -q " DROP DICTIONARY IF EXISTS 02907_dictionary; DROP TABLE IF EXISTS 02907_table; diff --git a/tests/queries/0_stateless/02907_system_backups_profile_events.sh b/tests/queries/0_stateless/02907_system_backups_profile_events.sh index 801056a2844..9a1d5a3db11 100755 --- a/tests/queries/0_stateless/02907_system_backups_profile_events.sh +++ b/tests/queries/0_stateless/02907_system_backups_profile_events.sh @@ -4,7 +4,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " drop table if exists test; create table test (a Int32) engine = MergeTree() order by tuple(); " @@ -12,10 +12,10 @@ create table test (a Int32) engine = MergeTree() order by tuple(); backup_id=${CLICKHOUSE_TEST_UNIQUE_NAME} backup_name="Disk('backups', '$backup_id')"; -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " backup table ${CLICKHOUSE_DATABASE}.test to $backup_name; " | grep -o "BACKUP_CREATED" -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " select ProfileEvents['BackupEntriesCollectorMicroseconds'] > 10 from system.backups where name='Disk(\'backups\', \'$backup_id\')' " diff --git a/tests/queries/0_stateless/02908_Npy_files_caching.sh b/tests/queries/0_stateless/02908_Npy_files_caching.sh index 4845f740972..218e13efb95 100755 --- a/tests/queries/0_stateless/02908_Npy_files_caching.sh +++ b/tests/queries/0_stateless/02908_Npy_files_caching.sh @@ -7,13 +7,13 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) $CLICKHOUSE_LOCAL -q "select count() from file('$CURDIR/data_npy/one_dim.npy') settings optimize_count_from_files=0" $CLICKHOUSE_LOCAL -q "select count() from file('$CURDIR/data_npy/one_dim.npy') settings optimize_count_from_files=1" $CLICKHOUSE_LOCAL -q "select count() from file('$CURDIR/data_npy/one_dim.npy', auto, 'array Int64') settings optimize_count_from_files=1" -$CLICKHOUSE_LOCAL -nm -q " +$CLICKHOUSE_LOCAL -m -q " desc file('$CURDIR/data_npy/one_dim.npy'); select number_of_rows from system.schema_inference_cache where format='Npy'; " $CLICKHOUSE_LOCAL -q "select count() from file('$CURDIR/data_npy/npy_big.npy') settings optimize_count_from_files=0" $CLICKHOUSE_LOCAL -q "select count() from file('$CURDIR/data_npy/npy_big.npy') settings optimize_count_from_files=1" -$CLICKHOUSE_LOCAL -nm -q " +$CLICKHOUSE_LOCAL -m -q " desc file('$CURDIR/data_npy/npy_big.npy'); select number_of_rows from system.schema_inference_cache where format='Npy'; " diff --git a/tests/queries/0_stateless/02908_table_ttl_dependency.sh b/tests/queries/0_stateless/02908_table_ttl_dependency.sh index 70136b4a42b..0bc02426f61 100755 --- a/tests/queries/0_stateless/02908_table_ttl_dependency.sh +++ b/tests/queries/0_stateless/02908_table_ttl_dependency.sh @@ -6,7 +6,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " DROP TABLE IF EXISTS 02908_dependent; DROP TABLE IF EXISTS 02908_main; @@ -14,11 +14,11 @@ $CLICKHOUSE_CLIENT -nm -q " CREATE TABLE 02908_dependent (a UInt32, ts DateTime) ENGINE = MergeTree ORDER BY a TTL ts + 1 WHERE a IN (SELECT a FROM ${CLICKHOUSE_DATABASE}.02908_main); " -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " DROP TABLE 02908_main; " 2>&1 | grep -F -q "HAVE_DEPENDENT_OBJECTS" -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " DROP TABLE 02908_dependent; DROP TABLE 02908_main; " diff --git a/tests/queries/0_stateless/02909_settings_in_json_schema_cache.sh b/tests/queries/0_stateless/02909_settings_in_json_schema_cache.sh index 8da144f90ca..75d491642ea 100755 --- a/tests/queries/0_stateless/02909_settings_in_json_schema_cache.sh +++ b/tests/queries/0_stateless/02909_settings_in_json_schema_cache.sh @@ -5,7 +5,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CUR_DIR"/../shell_config.sh echo '{"x" : 42}' > $CLICKHOUSE_TEST_UNIQUE_NAME.json -$CLICKHOUSE_LOCAL -nm -q " +$CLICKHOUSE_LOCAL -m -q " DESC file('$CLICKHOUSE_TEST_UNIQUE_NAME.json') SETTINGS schema_inference_make_columns_nullable=1; DESC file('$CLICKHOUSE_TEST_UNIQUE_NAME.json') SETTINGS schema_inference_make_columns_nullable=0; SELECT count() from system.schema_inference_cache where format = 'JSON' and additional_format_info like '%schema_inference_make_columns_nullable%';" diff --git a/tests/queries/0_stateless/02915_input_table_function_in_subquery.sh b/tests/queries/0_stateless/02915_input_table_function_in_subquery.sh index 80e38338751..7ad38e11e96 100755 --- a/tests/queries/0_stateless/02915_input_table_function_in_subquery.sh +++ b/tests/queries/0_stateless/02915_input_table_function_in_subquery.sh @@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " CREATE TABLE IF NOT EXISTS ts_data_double_raw ( device_id UInt32 NOT NULL CODEC(ZSTD), diff --git a/tests/queries/0_stateless/02915_lazy_loading_of_base_backups.sh b/tests/queries/0_stateless/02915_lazy_loading_of_base_backups.sh index 5f0f41a956b..b6d6ca57768 100755 --- a/tests/queries/0_stateless/02915_lazy_loading_of_base_backups.sh +++ b/tests/queries/0_stateless/02915_lazy_loading_of_base_backups.sh @@ -13,40 +13,40 @@ b_backup="Disk('backups', '$b_backup_id')" c_backup_id=${CLICKHOUSE_TEST_UNIQUE_NAME}_c c_backup="Disk('backups', '$c_backup_id')" -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " DROP TABLE IF EXISTS tbl1; DROP TABLE IF EXISTS tbl2; DROP TABLE IF EXISTS tbl3; " -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " CREATE TABLE tbl1 (a Int32) ENGINE = MergeTree() ORDER BY tuple(); " # The following BACKUP command must write backup 'a'. -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " BACKUP DATABASE ${CLICKHOUSE_DATABASE} TO $a_backup SETTINGS id='$a_backup_id'; " | grep -o "BACKUP_CREATED" -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " CREATE TABLE tbl2 (a Int32) ENGINE = MergeTree() ORDER BY tuple(); " # The following BACKUP command must read backup 'a' and write backup 'b'. -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " BACKUP DATABASE ${CLICKHOUSE_DATABASE} TO $b_backup SETTINGS id='$b_backup_id', base_backup=$a_backup; " | grep -o "BACKUP_CREATED" -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " CREATE TABLE tbl3 (a Int32) ENGINE = MergeTree() ORDER BY tuple(); " # The following BACKUP command must read only backup 'b' (and not 'a') and write backup 'c'. -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " BACKUP DATABASE ${CLICKHOUSE_DATABASE} TO $c_backup SETTINGS id='$c_backup_id', base_backup=$b_backup; " | grep -o "BACKUP_CREATED" -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " DROP TABLE tbl1; DROP TABLE tbl2; DROP TABLE tbl3; @@ -57,28 +57,28 @@ r2_restore_id=${CLICKHOUSE_TEST_UNIQUE_NAME}_r2 r3_restore_id=${CLICKHOUSE_TEST_UNIQUE_NAME}_r3 # The following RESTORE command must read all 3 backups 'a', 'b', c' because the table 'tbl1' was in the first backup. -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " RESTORE TABLE ${CLICKHOUSE_DATABASE}.tbl1 FROM $c_backup SETTINGS id='$r1_restore_id'; " | grep -o "RESTORED" # The following RESTORE command must read only 2 backups 'b', c' (and not 'a') because the table 'tbl2' was in the second backup. -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " RESTORE TABLE ${CLICKHOUSE_DATABASE}.tbl2 FROM $c_backup SETTINGS id='$r2_restore_id'; " | grep -o "RESTORED" # The following RESTORE command must read only 1 backup 'c' (and not 'a' or 'b') because the table 'tbl3' was in the third backup. -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " RESTORE TABLE ${CLICKHOUSE_DATABASE}.tbl3 FROM $c_backup SETTINGS id='$r3_restore_id'; " | grep -o "RESTORED" all_ids="['$a_backup_id', '$b_backup_id', '$c_backup_id', '$r1_restore_id', '$r2_restore_id', '$r3_restore_id']" id_prefix_len=`expr "${CLICKHOUSE_TEST_UNIQUE_NAME}_" : '.*'` -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " SELECT substr(id, 1 + $id_prefix_len) as short_id, ProfileEvents['BackupsOpenedForRead'], ProfileEvents['BackupsOpenedForWrite'] FROM system.backups WHERE id IN ${all_ids} ORDER BY short_id " -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " DROP TABLE tbl1; DROP TABLE tbl2; DROP TABLE tbl3; diff --git a/tests/queries/0_stateless/02916_dictionary_access.sh b/tests/queries/0_stateless/02916_dictionary_access.sh index 08ee517ab3b..be62cc027ef 100755 --- a/tests/queries/0_stateless/02916_dictionary_access.sh +++ b/tests/queries/0_stateless/02916_dictionary_access.sh @@ -7,7 +7,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) username="user_${CLICKHOUSE_TEST_UNIQUE_NAME}" dictname="dict_${CLICKHOUSE_TEST_UNIQUE_NAME}" -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " CREATE DICTIONARY IF NOT EXISTS ${dictname} ( id UInt64, @@ -23,15 +23,15 @@ ${CLICKHOUSE_CLIENT} -nm --query " SELECT dictGet(${dictname}, 'value', 1); " -$CLICKHOUSE_CLIENT -nm --user="${username}" --query " +$CLICKHOUSE_CLIENT -m --user="${username}" --query " SELECT * FROM dictionary(${dictname}); " 2>&1 | grep -o ACCESS_DENIED | uniq -$CLICKHOUSE_CLIENT -nm --user="${username}" --query " +$CLICKHOUSE_CLIENT -m --user="${username}" --query " SELECT dictGet(${dictname}, 'value', 1); " 2>&1 | grep -o ACCESS_DENIED | uniq -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " DROP DICTIONARY IF EXISTS ${dictname}; DROP USER IF EXISTS ${username}; " diff --git a/tests/queries/0_stateless/02916_joinget_dependency.sh b/tests/queries/0_stateless/02916_joinget_dependency.sh index 6477ae8c967..ff9332cb57f 100755 --- a/tests/queries/0_stateless/02916_joinget_dependency.sh +++ b/tests/queries/0_stateless/02916_joinget_dependency.sh @@ -6,7 +6,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # We test the dependency on the DROP -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " DROP TABLE IF EXISTS Sub_distributed; DROP TABLE IF EXISTS Sub; DROP TABLE IF EXISTS Mapping; @@ -20,8 +20,8 @@ $CLICKHOUSE_CLIENT -q " DROP TABLE Mapping; " 2>&1 | grep -cm1 "HAVE_DEPENDENT_OBJECTS" -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " DROP TABLE Sub_distributed; DROP TABLE Sub; DROP TABLE Mapping; -" \ No newline at end of file +" diff --git a/tests/queries/0_stateless/02930_client_file_log_comment.sh b/tests/queries/0_stateless/02930_client_file_log_comment.sh index 50cd587e4b5..393bffcaf59 100755 --- a/tests/queries/0_stateless/02930_client_file_log_comment.sh +++ b/tests/queries/0_stateless/02930_client_file_log_comment.sh @@ -14,7 +14,7 @@ echo -n 'select 4242' >> "$file2" $CLICKHOUSE_CLIENT --queries-file "$file1" "$file2" <<<'select 42' $CLICKHOUSE_CLIENT --log_comment foo --queries-file /dev/stdin <<<'select 424242' -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " system flush logs; select query, log_comment from system.query_log where current_database = '$CLICKHOUSE_DATABASE' and event_date >= yesterday() and query = 'select 42' and type != 'QueryStart'; select query, log_comment from system.query_log where current_database = '$CLICKHOUSE_DATABASE' and event_date >= yesterday() and query = 'select 4242' and type != 'QueryStart'; diff --git a/tests/queries/0_stateless/02931_size_virtual_column_use_structure_from_insertion_table.sh b/tests/queries/0_stateless/02931_size_virtual_column_use_structure_from_insertion_table.sh index d9e4a2c8f8b..76ab56a4570 100755 --- a/tests/queries/0_stateless/02931_size_virtual_column_use_structure_from_insertion_table.sh +++ b/tests/queries/0_stateless/02931_size_virtual_column_use_structure_from_insertion_table.sh @@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh echo "1,2" > $CLICKHOUSE_TEST_UNIQUE_NAME.csv -$CLICKHOUSE_LOCAL -nm -q " +$CLICKHOUSE_LOCAL -m -q " create table test (x UInt64, y UInt32, size UInt64) engine=Memory; insert into test select c1, c2, _size from file('$CLICKHOUSE_TEST_UNIQUE_NAME.csv') settings use_structure_from_insertion_table_in_table_functions=1; select * from test; diff --git a/tests/queries/0_stateless/02940_system_stacktrace_optimizations.sh b/tests/queries/0_stateless/02940_system_stacktrace_optimizations.sh index 0e23bb6c42b..9d1faf301d3 100755 --- a/tests/queries/0_stateless/02940_system_stacktrace_optimizations.sh +++ b/tests/queries/0_stateless/02940_system_stacktrace_optimizations.sh @@ -9,12 +9,12 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # no message at all echo "thread = 0" -$CLICKHOUSE_CLIENT --allow_repeated_settings --send_logs_level=test -nm -q "select * from system.stack_trace where thread_id = 0" |& grep -F -o 'Send signal to' +$CLICKHOUSE_CLIENT --allow_repeated_settings --send_logs_level=test -m -q "select * from system.stack_trace where thread_id = 0" |& grep -F -o 'Send signal to' # send messages to some threads echo "thread != 0" -$CLICKHOUSE_CLIENT --allow_repeated_settings --send_logs_level=test -nm -q "select * from system.stack_trace where thread_id != 0 format Null" |& grep -F -o 'Send signal to' | grep -v 'Send signal to 0 threads (total)' +$CLICKHOUSE_CLIENT --allow_repeated_settings --send_logs_level=test -m -q "select * from system.stack_trace where thread_id != 0 format Null" |& grep -F -o 'Send signal to' | grep -v 'Send signal to 0 threads (total)' # there is no thread with comm="foo", so no signals will be sent echo "thread_name = 'foo'" -$CLICKHOUSE_CLIENT --allow_repeated_settings --send_logs_level=test -nm -q "select * from system.stack_trace where thread_name = 'foo' format Null" |& grep -F -o 'Send signal to 0 threads (total)' +$CLICKHOUSE_CLIENT --allow_repeated_settings --send_logs_level=test -m -q "select * from system.stack_trace where thread_name = 'foo' format Null" |& grep -F -o 'Send signal to 0 threads (total)' diff --git a/tests/queries/0_stateless/02943_rmt_alter_metadata_merge_checksum_mismatch.sh b/tests/queries/0_stateless/02943_rmt_alter_metadata_merge_checksum_mismatch.sh index 27950866e81..44af2dbf26f 100755 --- a/tests/queries/0_stateless/02943_rmt_alter_metadata_merge_checksum_mismatch.sh +++ b/tests/queries/0_stateless/02943_rmt_alter_metadata_merge_checksum_mismatch.sh @@ -26,7 +26,7 @@ function wait_part() function restore_failpoints() { # restore entry error with failpoints (to avoid endless errors in logs) - $CLICKHOUSE_CLIENT -nm -q " + $CLICKHOUSE_CLIENT -m -q " system enable failpoint replicated_queue_unfail_entries; system sync replica $failed_replica; system disable failpoint replicated_queue_unfail_entries; @@ -34,7 +34,7 @@ function restore_failpoints() } trap restore_failpoints EXIT -$CLICKHOUSE_CLIENT -nm --insert_keeper_fault_injection_probability=0 -q " +$CLICKHOUSE_CLIENT -m --insert_keeper_fault_injection_probability=0 -q " drop table if exists data_r1; drop table if exists data_r2; @@ -45,7 +45,7 @@ $CLICKHOUSE_CLIENT -nm --insert_keeper_fault_injection_probability=0 -q " " # will fail ALTER_METADATA on one of replicas -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " system enable failpoint replicated_queue_fail_next_entry; alter table data_r1 drop index value_idx settings alter_sync=0; -- part all_0_0_0_1 @@ -80,7 +80,7 @@ fi # This will create MERGE_PARTS, on failed replica it will be fetched from source replica (since it does not have all parts to execute merge) $CLICKHOUSE_CLIENT -q "optimize table $success_replica final settings optimize_throw_if_noop=1, alter_sync=1" # part all_0_0_1_1 -$CLICKHOUSE_CLIENT -nm --insert_keeper_fault_injection_probability=0 -q " +$CLICKHOUSE_CLIENT -m --insert_keeper_fault_injection_probability=0 -q " insert into $success_replica (key) values (2); -- part all_2_2_0 -- Avoid 'Cannot select parts for optimization: Entry for part all_2_2_0 hasn't been read from the replication log yet' system sync replica $success_replica pull; diff --git a/tests/queries/0_stateless/02947_merge_tree_index_table_3.sh b/tests/queries/0_stateless/02947_merge_tree_index_table_3.sh index 6cb184cb1fe..ec699d974d4 100755 --- a/tests/queries/0_stateless/02947_merge_tree_index_table_3.sh +++ b/tests/queries/0_stateless/02947_merge_tree_index_table_3.sh @@ -6,7 +6,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) user_name="${CLICKHOUSE_DATABASE}_test_user_02947" -$CLICKHOUSE_CLIENT -n -q " +$CLICKHOUSE_CLIENT -q " DROP TABLE IF EXISTS t_merge_tree_index; DROP USER IF EXISTS $user_name; @@ -44,7 +44,7 @@ $CLICKHOUSE_CLIENT --user "$user_name" --password "password" -q "SELECT arr.size $CLICKHOUSE_CLIENT --user "$user_name" --password "password" -q "SELECT b FROM mergeTreeIndex(currentDatabase(), t_merge_tree_index, with_marks = true)" 2>&1 | grep -m1 -o "ACCESS_DENIED" || echo "OK" $CLICKHOUSE_CLIENT --user "$user_name" --password "password" -q "SELECT b.mark FROM mergeTreeIndex(currentDatabase(), t_merge_tree_index, with_marks = true)" 2>&1 | grep -m1 -o "ACCESS_DENIED" || echo "OK" -$CLICKHOUSE_CLIENT -n -q " +$CLICKHOUSE_CLIENT -q " DROP TABLE IF EXISTS t_merge_tree_index; DROP USER IF EXISTS $user_name; " diff --git a/tests/queries/0_stateless/02950_dictionary_ssd_cache_short_circuit.sh b/tests/queries/0_stateless/02950_dictionary_ssd_cache_short_circuit.sh index daa9c571a5d..d06aba8a4b6 100755 --- a/tests/queries/0_stateless/02950_dictionary_ssd_cache_short_circuit.sh +++ b/tests/queries/0_stateless/02950_dictionary_ssd_cache_short_circuit.sh @@ -4,7 +4,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -n --query=" +$CLICKHOUSE_CLIENT --query=" CREATE TABLE source_table ( id UInt64, diff --git a/tests/queries/0_stateless/02950_distributed_initial_query_event.sh b/tests/queries/0_stateless/02950_distributed_initial_query_event.sh index 7f690a681c4..737d5c6b41e 100755 --- a/tests/queries/0_stateless/02950_distributed_initial_query_event.sh +++ b/tests/queries/0_stateless/02950_distributed_initial_query_event.sh @@ -6,7 +6,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CUR_DIR"/../shell_config.sh # CREATE TABLE local (x UInt8) Engine=Memory; # CREATE TABLE distributed ON CLUSTER cluster (p Date, i Int32) ENGINE = Distributed(test_cluster_two_shards, currentDatabase(), x) -$CLICKHOUSE_CLIENT -n -q " +$CLICKHOUSE_CLIENT -q " DROP TABLE IF EXISTS local; DROP TABLE IF EXISTS distributed; CREATE TABLE local (x UInt8) Engine=Memory; diff --git a/tests/queries/0_stateless/02974_backup_query_format_null.sh b/tests/queries/0_stateless/02974_backup_query_format_null.sh index ddba2f6de16..345a4f47b20 100755 --- a/tests/queries/0_stateless/02974_backup_query_format_null.sh +++ b/tests/queries/0_stateless/02974_backup_query_format_null.sh @@ -4,7 +4,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " DROP TABLE IF EXISTS tbl; CREATE TABLE tbl (a Int32) ENGINE = MergeTree() ORDER BY tuple(); INSERT INTO tbl VALUES (2), (80), (-12345); @@ -14,7 +14,7 @@ backup_name="Disk('backups', '${CLICKHOUSE_TEST_UNIQUE_NAME}')" ${CLICKHOUSE_CLIENT} --query "BACKUP TABLE tbl TO ${backup_name} FORMAT Null" -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " DROP TABLE tbl; RESTORE ALL FROM ${backup_name} FORMAT Null " diff --git a/tests/queries/0_stateless/02981_insert_select_resize_to_max_insert_threads.sh b/tests/queries/0_stateless/02981_insert_select_resize_to_max_insert_threads.sh index e65c9654c9c..7ad5a2179f9 100755 --- a/tests/queries/0_stateless/02981_insert_select_resize_to_max_insert_threads.sh +++ b/tests/queries/0_stateless/02981_insert_select_resize_to_max_insert_threads.sh @@ -6,7 +6,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} -nq """ +${CLICKHOUSE_CLIENT} -q """ CREATE TABLE t1_local ( n UInt64, diff --git a/tests/queries/0_stateless/03008_deduplication_random_setttings.sh b/tests/queries/0_stateless/03008_deduplication_random_setttings.sh index e9f59138177..07b99eb4e99 100755 --- a/tests/queries/0_stateless/03008_deduplication_random_setttings.sh +++ b/tests/queries/0_stateless/03008_deduplication_random_setttings.sh @@ -35,7 +35,7 @@ THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" -$CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " +$CLICKHOUSE_CLIENT --max_insert_block_size 1 -mq " $(python3 $CURDIR/03008_deduplication.python insert_several_blocks_into_table \ --insert-method $insert_method \ --table-engine $engine \ @@ -48,7 +48,7 @@ $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " ) " 1>/dev/null 2>&1 && echo 'insert_several_blocks_into_table OK' || echo "FAIL: insert_several_blocks_into_table ${THIS_RUN}" -$CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " +$CLICKHOUSE_CLIENT --max_insert_block_size 1 -mq " $(python3 $CURDIR/03008_deduplication.python mv_generates_several_blocks \ --insert-method $insert_method \ --table-engine $engine \ @@ -61,7 +61,7 @@ $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " ) " 1>/dev/null 2>&1 && echo 'mv_generates_several_blocks OK' || echo "FAIL: mv_generates_several_blocks ${THIS_RUN}" -$CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " +$CLICKHOUSE_CLIENT --max_insert_block_size 1 -mq " $(python3 $CURDIR/03008_deduplication.python several_mv_into_one_table \ --insert-method $insert_method \ --table-engine $engine \ diff --git a/tests/queries/0_stateless/03008_local_plain_rewritable.sh b/tests/queries/0_stateless/03008_local_plain_rewritable.sh index d51e180efc9..e61f9061297 100755 --- a/tests/queries/0_stateless/03008_local_plain_rewritable.sh +++ b/tests/queries/0_stateless/03008_local_plain_rewritable.sh @@ -8,7 +8,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) ${CLICKHOUSE_CLIENT} --query "drop table if exists 03008_test_local_mt sync" -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " create table 03008_test_local_mt (a Int32, b Int64, c Int64) engine = MergeTree() partition by intDiv(a, 1000) order by tuple(a, b) settings disk = disk( @@ -19,35 +19,35 @@ settings disk = disk( path = '/var/lib/clickhouse/disks/local_plain_rewritable/') " -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " insert into 03008_test_local_mt (*) values (1, 2, 0), (2, 2, 2), (3, 1, 9), (4, 7, 7), (5, 10, 2), (6, 12, 5); insert into 03008_test_local_mt (*) select number, number, number from numbers_mt(10000); " -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " select count(*) from 03008_test_local_mt; select (*) from 03008_test_local_mt order by tuple(a, b) limit 10; " ${CLICKHOUSE_CLIENT} --query "optimize table 03008_test_local_mt final;" -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " alter table 03008_test_local_mt modify setting disk = '03008_local_plain_rewritable', old_parts_lifetime = 3600; select engine_full from system.tables WHERE database = currentDatabase() AND name = '03008_test_local_mt'; " | grep -c "old_parts_lifetime = 3600" -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " select count(*) from 03008_test_local_mt; select (*) from 03008_test_local_mt order by tuple(a, b) limit 10; " -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " alter table 03008_test_local_mt update c = 0 where a % 2 = 1; alter table 03008_test_local_mt add column d Int64 after c; alter table 03008_test_local_mt drop column c; " 2>&1 | grep -Fq "SUPPORT_IS_DISABLED" -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " truncate table 03008_test_local_mt; select count(*) from 03008_test_local_mt; " diff --git a/tests/queries/0_stateless/03031_clickhouse_local_input.sh b/tests/queries/0_stateless/03031_clickhouse_local_input.sh index 6f59e9b9703..e2f9cf48108 100755 --- a/tests/queries/0_stateless/03031_clickhouse_local_input.sh +++ b/tests/queries/0_stateless/03031_clickhouse_local_input.sh @@ -6,15 +6,15 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) tmp_file="$CUR_DIR/$CLICKHOUSE_DATABASE.txt" echo '# foo' -$CLICKHOUSE_LOCAL --engine_file_truncate_on_insert=1 -n -q "insert into function file('$tmp_file', 'LineAsString', 'x String') select * from input('x String') format LineAsString" << "$INPUT_FILE" -$CLICKHOUSE_CLIENT --external --file="$INPUT_FILE" --name=t --structure='x String' -nm -q " +$CLICKHOUSE_CLIENT --external --file="$INPUT_FILE" --name=t --structure='x String' -m -q " select * from t; select * from t; " diff --git a/tests/queries/0_stateless/03143_prewhere_profile_events.sh b/tests/queries/0_stateless/03143_prewhere_profile_events.sh index 00daa0fe7cc..6a6b993e5f8 100755 --- a/tests/queries/0_stateless/03143_prewhere_profile_events.sh +++ b/tests/queries/0_stateless/03143_prewhere_profile_events.sh @@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} -nq " +${CLICKHOUSE_CLIENT} -q " DROP TABLE IF EXISTS t; CREATE TABLE t(a UInt32, b UInt32, c UInt32, d UInt32) ENGINE=MergeTree ORDER BY a SETTINGS min_bytes_for_wide_part=0, min_rows_for_wide_part=0; @@ -25,7 +25,7 @@ client_opts=( --max_threads 8 ) -${CLICKHOUSE_CLIENT} "${client_opts[@]}" --query_id "$query_id_1" -nq " +${CLICKHOUSE_CLIENT} "${client_opts[@]}" --query_id "$query_id_1" -q " SELECT * FROM t PREWHERE (b % 8192) = 42 @@ -33,7 +33,7 @@ PREWHERE (b % 8192) = 42 FORMAT Null " -${CLICKHOUSE_CLIENT} "${client_opts[@]}" --query_id "$query_id_2" -nq " +${CLICKHOUSE_CLIENT} "${client_opts[@]}" --query_id "$query_id_2" -q " SELECT * FROM t PREWHERE (b % 8192) = 42 AND (c % 8192) = 42 @@ -42,7 +42,7 @@ PREWHERE (b % 8192) = 42 AND (c % 8192) = 42 settings enable_multiple_prewhere_read_steps=1; " -${CLICKHOUSE_CLIENT} "${client_opts[@]}" --query_id "$query_id_3" -nq " +${CLICKHOUSE_CLIENT} "${client_opts[@]}" --query_id "$query_id_3" -q " SELECT * FROM t PREWHERE (b % 8192) = 42 AND (c % 16384) = 42 @@ -51,7 +51,7 @@ PREWHERE (b % 8192) = 42 AND (c % 16384) = 42 settings enable_multiple_prewhere_read_steps=0; " -${CLICKHOUSE_CLIENT} "${client_opts[@]}" --query_id "$query_id_4" -nq " +${CLICKHOUSE_CLIENT} "${client_opts[@]}" --query_id "$query_id_4" -q " SELECT b, c FROM t PREWHERE (b % 8192) = 42 AND (c % 8192) = 42 @@ -59,7 +59,7 @@ PREWHERE (b % 8192) = 42 AND (c % 8192) = 42 settings enable_multiple_prewhere_read_steps=1; " -${CLICKHOUSE_CLIENT} -nq " +${CLICKHOUSE_CLIENT} -q " SYSTEM FLUSH LOGS; -- 52503 which is 43 * number of granules, 10000000 diff --git a/tests/queries/0_stateless/03145_non_loaded_projection_backup.sh b/tests/queries/0_stateless/03145_non_loaded_projection_backup.sh index 95aef9bbc5b..4e7b318e202 100755 --- a/tests/queries/0_stateless/03145_non_loaded_projection_backup.sh +++ b/tests/queries/0_stateless/03145_non_loaded_projection_backup.sh @@ -4,7 +4,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " drop table if exists tp_1; create table tp_1 (x Int32, y Int32, projection p (select x, y order by x)) engine = MergeTree order by y partition by intDiv(y, 100) settings max_parts_to_merge_at_once=1; insert into tp_1 select number, number from numbers(3); @@ -25,7 +25,7 @@ alter table tp_1 drop projection pp; alter table tp_1 attach partition '0'; " -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " set send_logs_level='fatal'; check table tp_1 settings check_query_single_value_result = 0;" | grep -o "Found unexpected projection directories: pp.proj" @@ -34,19 +34,19 @@ $CLICKHOUSE_CLIENT -q " backup table tp_1 to Disk('backups', '$backup_id'); " | grep -o "BACKUP_CREATED" -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " set send_logs_level='fatal'; drop table tp_1; restore table tp_1 from Disk('backups', '$backup_id'); " | grep -o "RESTORED" $CLICKHOUSE_CLIENT -q "select count() from tp_1;" -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " set send_logs_level='fatal'; check table tp_1 settings check_query_single_value_result = 0;" | grep -o "Found unexpected projection directories: pp.proj" -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " set send_logs_level='fatal'; check table tp_1" -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " set send_logs_level='fatal'; drop table tp_1" diff --git a/tests/queries/0_stateless/03155_test_move_to_prewhere.sh b/tests/queries/0_stateless/03155_test_move_to_prewhere.sh index b6980b3a23a..f33a6b3ef27 100755 --- a/tests/queries/0_stateless/03155_test_move_to_prewhere.sh +++ b/tests/queries/0_stateless/03155_test_move_to_prewhere.sh @@ -4,7 +4,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} -nq " +${CLICKHOUSE_CLIENT} -q " CREATE TABLE event_envoy ( timestamp_interval DateTime CODEC(DoubleDelta), @@ -18,7 +18,7 @@ ${CLICKHOUSE_CLIENT} -nq " INSERT INTO event_envoy SELECT now() - number, 'us-east-1', 'ch_super_fast' FROM numbers_mt(1e5); " -${CLICKHOUSE_CLIENT} -nq " +${CLICKHOUSE_CLIENT} -q " CREATE TABLE event_envoy_remote ( timestamp_interval DateTime CODEC(DoubleDelta), diff --git a/tests/queries/0_stateless/03156_default_multiquery_split.sh b/tests/queries/0_stateless/03156_default_multiquery_split.sh index 8ba2f46b786..d849fb5a162 100755 --- a/tests/queries/0_stateless/03156_default_multiquery_split.sh +++ b/tests/queries/0_stateless/03156_default_multiquery_split.sh @@ -53,6 +53,6 @@ SELECT * FROM TEST2 ORDER BY value; DROP TABLE TEST1; DROP TABLE TEST2; EOF -$CLICKHOUSE_CLIENT -m -n < "$SQL_FILE_NAME" +$CLICKHOUSE_CLIENT -m < "$SQL_FILE_NAME" rm "$SQL_FILE_NAME" diff --git a/tests/queries/0_stateless/03169_time_virtual_column.sh b/tests/queries/0_stateless/03169_time_virtual_column.sh index fef1de8c6f2..b289f39accb 100755 --- a/tests/queries/0_stateless/03169_time_virtual_column.sh +++ b/tests/queries/0_stateless/03169_time_virtual_column.sh @@ -6,7 +6,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) echo "1,2" > $CLICKHOUSE_TEST_UNIQUE_NAME.csv sleep 1 -$CLICKHOUSE_LOCAL -nm -q " +$CLICKHOUSE_LOCAL -m -q " select _size, (dateDiff('millisecond', _time, now()) < 600000 AND dateDiff('millisecond', _time, now()) > 0) from file('$CLICKHOUSE_TEST_UNIQUE_NAME.csv'); " rm $CLICKHOUSE_TEST_UNIQUE_NAME.csv diff --git a/tests/queries/0_stateless/03173_parallel_replicas_join_bug.sh b/tests/queries/0_stateless/03173_parallel_replicas_join_bug.sh index 7c567c0f58f..af702569794 100755 --- a/tests/queries/0_stateless/03173_parallel_replicas_join_bug.sh +++ b/tests/queries/0_stateless/03173_parallel_replicas_join_bug.sh @@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -nq " +$CLICKHOUSE_CLIENT -q " CREATE TABLE ids (id UUID, whatever String) Engine=MergeTree ORDER BY tuple(); INSERT INTO ids VALUES ('a1451105-722e-4fe7-bfaa-65ad2ae249c2', 'whatever'); diff --git a/tests/queries/0_stateless/03198_settings_in_csv_tsv_schema_cache.sh b/tests/queries/0_stateless/03198_settings_in_csv_tsv_schema_cache.sh index ce53f467823..583257d8fd3 100755 --- a/tests/queries/0_stateless/03198_settings_in_csv_tsv_schema_cache.sh +++ b/tests/queries/0_stateless/03198_settings_in_csv_tsv_schema_cache.sh @@ -5,25 +5,25 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CUR_DIR"/../shell_config.sh echo -e 'a,b,c\n1,2,3' > $CLICKHOUSE_TEST_UNIQUE_NAME.csv -$CLICKHOUSE_LOCAL -nm -q " +$CLICKHOUSE_LOCAL -m -q " DESC file('$CLICKHOUSE_TEST_UNIQUE_NAME.csv') SETTINGS input_format_csv_skip_first_lines=1; DESC file('$CLICKHOUSE_TEST_UNIQUE_NAME.csv') SETTINGS input_format_csv_skip_first_lines=0; SELECT count() from system.schema_inference_cache where format = 'CSV' and additional_format_info like '%skip_first_lines%';" echo -e 'a,b,c\n"1",2,3' > $CLICKHOUSE_TEST_UNIQUE_NAME.csv -$CLICKHOUSE_LOCAL -nm -q " +$CLICKHOUSE_LOCAL -m -q " DESC file('$CLICKHOUSE_TEST_UNIQUE_NAME.csv') SETTINGS input_format_csv_try_infer_numbers_from_strings=1; DESC file('$CLICKHOUSE_TEST_UNIQUE_NAME.csv') SETTINGS input_format_csv_try_infer_numbers_from_strings=0; SELECT count() from system.schema_inference_cache where format = 'CSV' and additional_format_info like '%try_infer_numbers_from_strings%';" echo -e 'a,b,c\n"(1,2,3)",2,3' > $CLICKHOUSE_TEST_UNIQUE_NAME.csv -$CLICKHOUSE_LOCAL -nm -q " +$CLICKHOUSE_LOCAL -m -q " DESC file('$CLICKHOUSE_TEST_UNIQUE_NAME.csv') SETTINGS input_format_csv_try_infer_strings_from_quoted_tuples=1; DESC file('$CLICKHOUSE_TEST_UNIQUE_NAME.csv') SETTINGS input_format_csv_try_infer_strings_from_quoted_tuples=0; SELECT count() from system.schema_inference_cache where format = 'CSV' and additional_format_info like '%try_infer_strings_from_quoted_tuples%';" echo -e 'a\tb\tc\n1\t2\t3' > $CLICKHOUSE_TEST_UNIQUE_NAME.tsv -$CLICKHOUSE_LOCAL -nm -q " +$CLICKHOUSE_LOCAL -m -q " DESC file('$CLICKHOUSE_TEST_UNIQUE_NAME.tsv') SETTINGS input_format_tsv_skip_first_lines=1; DESC file('$CLICKHOUSE_TEST_UNIQUE_NAME.tsv') SETTINGS input_format_tsv_skip_first_lines=0; SELECT count() from system.schema_inference_cache where format = 'TSV' and additional_format_info like '%skip_first_lines%';" diff --git a/tests/queries/0_stateless/03198_unload_primary_key_outdated.sh b/tests/queries/0_stateless/03198_unload_primary_key_outdated.sh index 4f217935123..c759cc34425 100755 --- a/tests/queries/0_stateless/03198_unload_primary_key_outdated.sh +++ b/tests/queries/0_stateless/03198_unload_primary_key_outdated.sh @@ -4,7 +4,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -n " +$CLICKHOUSE_CLIENT " DROP TABLE IF EXISTS t_unload_primary_key; CREATE TABLE t_unload_primary_key (a UInt64, b UInt64) @@ -26,7 +26,7 @@ for _ in {1..100}; do sleep 0.3 done -$CLICKHOUSE_CLIENT -n " +$CLICKHOUSE_CLIENT " SELECT name, active, primary_key_bytes_in_memory FROM system.parts WHERE database = '$CLICKHOUSE_DATABASE' AND table = 't_unload_primary_key' ORDER BY name; DROP TABLE IF EXISTS t_unload_primary_key; " diff --git a/tests/queries/0_stateless/03199_dictionary_table_access.sh b/tests/queries/0_stateless/03199_dictionary_table_access.sh index 952b466b5da..14f017c7fbc 100755 --- a/tests/queries/0_stateless/03199_dictionary_table_access.sh +++ b/tests/queries/0_stateless/03199_dictionary_table_access.sh @@ -8,7 +8,7 @@ username="user_${CLICKHOUSE_TEST_UNIQUE_NAME}" dictname="dict_${CLICKHOUSE_TEST_UNIQUE_NAME}" dicttablename="dict_table_${CLICKHOUSE_TEST_UNIQUE_NAME}" -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " CREATE DICTIONARY IF NOT EXISTS ${dictname} ( id UInt64, @@ -26,15 +26,15 @@ ${CLICKHOUSE_CLIENT} -nm --query " SELECT * FROM ${dicttablename}; " -$CLICKHOUSE_CLIENT -nm --user="${username}" --query " +$CLICKHOUSE_CLIENT -m --user="${username}" --query " SELECT * FROM ${dictname}; " 2>&1 | grep -o ACCESS_DENIED | uniq -$CLICKHOUSE_CLIENT -nm --user="${username}" --query " +$CLICKHOUSE_CLIENT -m --user="${username}" --query " SELECT * FROM ${dicttablename}; " 2>&1 | grep -o ACCESS_DENIED | uniq -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " DROP TABLE IF EXISTS ${dicttablename} SYNC; DROP DICTIONARY IF EXISTS ${dictname}; DROP USER IF EXISTS ${username}; diff --git a/tests/queries/0_stateless/03203_hive_style_partitioning.reference b/tests/queries/0_stateless/03203_hive_style_partitioning.reference new file mode 100644 index 00000000000..a4a2e48e046 --- /dev/null +++ b/tests/queries/0_stateless/03203_hive_style_partitioning.reference @@ -0,0 +1,120 @@ +TESTING THE FILE HIVE PARTITIONING +first last Elizabeth +Jorge Frank Elizabeth +Hunter Moreno Elizabeth +Esther Guzman Elizabeth +Dennis Stephens Elizabeth +Nettie Franklin Elizabeth +Stanley Gibson Elizabeth +Eugenia Greer Elizabeth +Jeffery Delgado Elizabeth +Clara Cross Elizabeth +Elizabeth Gordon Elizabeth +Eva Schmidt Elizabeth Schmidt +Samuel Schmidt Elizabeth Schmidt +Eva Schmidt Elizabeth +Samuel Schmidt Elizabeth +Elizabeth Gordon Elizabeth Gordon +Elizabeth Gordon Elizabeth +Elizabeth Gordon Elizabeth Gordon +Elizabeth Gordon Elizabeth +first last Elizabeth +Jorge Frank Elizabeth +Hunter Moreno Elizabeth +Esther Guzman Elizabeth +Dennis Stephens Elizabeth +Nettie Franklin Elizabeth +Stanley Gibson Elizabeth +Eugenia Greer Elizabeth +Jeffery Delgado Elizabeth +Clara Cross Elizabeth +Elizabeth Gordon Elizabeth +42 2020-01-01 +[1,2,3] 42.42 +Array(Int64) LowCardinality(Float64) +101 +2070 +4081 +2070 +2070 +1 +1 +TESTING THE URL PARTITIONING +first last Elizabeth +Jorge Frank Elizabeth +Hunter Moreno Elizabeth +Esther Guzman Elizabeth +Dennis Stephens Elizabeth +Nettie Franklin Elizabeth +Stanley Gibson Elizabeth +Eugenia Greer Elizabeth +Jeffery Delgado Elizabeth +Clara Cross Elizabeth +Elizabeth Gordon Elizabeth +Eva Schmidt Elizabeth Schmidt +Samuel Schmidt Elizabeth Schmidt +Eva Schmidt Elizabeth +Samuel Schmidt Elizabeth +Elizabeth Gordon Elizabeth Gordon +Elizabeth Gordon Elizabeth +Elizabeth Gordon Elizabeth Gordon +Elizabeth Gordon Elizabeth +first last Elizabeth +Jorge Frank Elizabeth +Hunter Moreno Elizabeth +Esther Guzman Elizabeth +Dennis Stephens Elizabeth +Nettie Franklin Elizabeth +Stanley Gibson Elizabeth +Eugenia Greer Elizabeth +Jeffery Delgado Elizabeth +Clara Cross Elizabeth +1 +TESTING THE S3 PARTITIONING +first last Elizabeth +Jorge Frank Elizabeth +Hunter Moreno Elizabeth +Esther Guzman Elizabeth +Dennis Stephens Elizabeth +Nettie Franklin Elizabeth +Stanley Gibson Elizabeth +Eugenia Greer Elizabeth +Jeffery Delgado Elizabeth +Clara Cross Elizabeth +Elizabeth Gordon Elizabeth +Eva Schmidt Elizabeth Schmidt +Samuel Schmidt Elizabeth Schmidt +Eva Schmidt Elizabeth +Samuel Schmidt Elizabeth +Elizabeth Gordon Elizabeth Gordon +Elizabeth Gordon Elizabeth +Elizabeth Gordon Elizabeth Gordon +Elizabeth Gordon Elizabeth +first last Elizabeth +Jorge Frank Elizabeth +Hunter Moreno Elizabeth +Esther Guzman Elizabeth +Dennis Stephens Elizabeth +Nettie Franklin Elizabeth +Stanley Gibson Elizabeth +Eugenia Greer Elizabeth +Jeffery Delgado Elizabeth +Clara Cross Elizabeth +Elizabeth Gordon Elizabeth +OK +TESTING THE S3CLUSTER PARTITIONING +first last Elizabeth +Jorge Frank Elizabeth +Hunter Moreno Elizabeth +Esther Guzman Elizabeth +Dennis Stephens Elizabeth +Nettie Franklin Elizabeth +Stanley Gibson Elizabeth +Eugenia Greer Elizabeth +Jeffery Delgado Elizabeth +Clara Cross Elizabeth +Elizabeth Gordon Elizabeth +Eva Schmidt Elizabeth Schmidt +Samuel Schmidt Elizabeth Schmidt +Eva Schmidt Elizabeth +Samuel Schmidt Elizabeth diff --git a/tests/queries/0_stateless/03203_hive_style_partitioning.sh b/tests/queries/0_stateless/03203_hive_style_partitioning.sh new file mode 100755 index 00000000000..db1f073d736 --- /dev/null +++ b/tests/queries/0_stateless/03203_hive_style_partitioning.sh @@ -0,0 +1,136 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_LOCAL -q "SELECT 'TESTING THE FILE HIVE PARTITIONING'" + + +$CLICKHOUSE_LOCAL -n -q """ +set use_hive_partitioning = 1; + +SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/sample.parquet') LIMIT 10; + +SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/sample.parquet') WHERE column0 = _column0; + +SELECT *, _column0, _column1 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column1 = _column1; +SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column1 = _column1; + +SELECT *, _column0, _column1 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column0 = _column0 AND column1 = _column1; +SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column0 = _column0 AND column1 = _column1; + +SELECT *, _column0, _column1 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column1 = _column1; +SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column1 = _column1; + +SELECT *, _column0, _column1 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column0 = _column0 AND column1 = _column1; +SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column0 = _column0 AND column1 = _column1; + +SELECT *, _non_existing_column FROM file('$CURDIR/data_hive/partitioning/non_existing_column=Elizabeth/sample.parquet') LIMIT 10; +SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=*/sample.parquet') WHERE column0 = _column0; + +SELECT _number, _date FROM file('$CURDIR/data_hive/partitioning/number=42/date=2020-01-01/sample.parquet') LIMIT 1; +SELECT _array, _float FROM file('$CURDIR/data_hive/partitioning/array=[1,2,3]/float=42.42/sample.parquet') LIMIT 1; +SELECT toTypeName(_array), toTypeName(_float) FROM file('$CURDIR/data_hive/partitioning/array=[1,2,3]/float=42.42/sample.parquet') LIMIT 1; +SELECT count(*) FROM file('$CURDIR/data_hive/partitioning/number=42/date=2020-01-01/sample.parquet') WHERE _number = 42; +""" + +$CLICKHOUSE_LOCAL -n -q """ +set use_hive_partitioning = 1; + +SELECT _identifier FROM file('$CURDIR/data_hive/partitioning/identifier=*/email.csv') LIMIT 2; +SELECT __identifier FROM file('$CURDIR/data_hive/partitioning/identifier=*/email.csv') LIMIT 2; +""" + +$CLICKHOUSE_LOCAL -n -q """ +set use_hive_partitioning = 1; + +SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column0=Elizabeth/sample.parquet') LIMIT 10; +""" 2>&1 | grep -c "INCORRECT_DATA" + +$CLICKHOUSE_LOCAL -n -q """ +set use_hive_partitioning = 0; + +SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/sample.parquet') LIMIT 10; +""" 2>&1 | grep -c "UNKNOWN_IDENTIFIER" + + +$CLICKHOUSE_LOCAL -q "SELECT 'TESTING THE URL PARTITIONING'" + + +$CLICKHOUSE_LOCAL -n -q """ +set use_hive_partitioning = 1; + +SELECT *, _column0 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') LIMIT 10; + +SELECT *, _column0 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') WHERE column0 = _column0; + +SELECT *, _column0, _column1 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column1 = _column1; +SELECT *, _column0 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column1 = _column1; + +SELECT *, _column0, _column1 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column0 = _column0 AND column1 = _column1; +SELECT *, _column0 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column0 = _column0 AND column1 = _column1; + +SELECT *, _column0, _column1 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column1 = _column1; +SELECT *, _column0 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column1 = _column1; + +SELECT *, _column0, _column1 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column0 = _column0 AND column1 = _column1; +SELECT *, _column0 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column0 = _column0 AND column1 = _column1; + +SELECT *, _non_existing_column FROM url('http://localhost:11111/test/hive_partitioning/non_existing_column=Elizabeth/sample.parquet') LIMIT 10;""" + +$CLICKHOUSE_LOCAL -n -q """ +set use_hive_partitioning = 0; + +SELECT *, _column0 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') LIMIT 10; +""" 2>&1 | grep -c "UNKNOWN_IDENTIFIER" + + +$CLICKHOUSE_LOCAL -q "SELECT 'TESTING THE S3 PARTITIONING'" + + +$CLICKHOUSE_CLIENT -n -q """ +set use_hive_partitioning = 1; + +SELECT *, _column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') LIMIT 10; + +SELECT *, _column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') WHERE column0 = _column0; + +SELECT *, _column0, _column1 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column1 = _column1; +SELECT *, _column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column1 = _column1; + +SELECT *, _column0, _column1 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column0 = _column0 AND column1 = _column1; +SELECT *, _column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column0 = _column0 AND column1 = _column1; + +SELECT *, _column0, _column1 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column1 = _column1; +SELECT *, _column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column1 = _column1; + +SELECT *, _column0, _column1 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column0 = _column0 AND column1 = _column1; +SELECT *, _column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column0 = _column0 AND column1 = _column1; + +SELECT *, _non_existing_column FROM s3('http://localhost:11111/test/hive_partitioning/non_existing_column=Elizabeth/sample.parquet') LIMIT 10; +SELECT *, _column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=*/sample.parquet') WHERE column0 = _column0; +""" + +$CLICKHOUSE_CLIENT -n -q """ +set use_hive_partitioning = 0; + +SELECT *, _column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') LIMIT 10; +""" 2>&1 | grep -F -q "UNKNOWN_IDENTIFIER" && echo "OK" || echo "FAIL"; + +$CLICKHOUSE_LOCAL -q "SELECT 'TESTING THE S3CLUSTER PARTITIONING'" + +$CLICKHOUSE_CLIENT -n -q """ +set use_hive_partitioning = 1; + +SELECT *, _column0 FROM s3Cluster(test_cluster_one_shard_three_replicas_localhost, 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') LIMIT 10; + +SELECT *, _column0 FROM s3Cluster(test_cluster_one_shard_three_replicas_localhost, 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') WHERE column0 = _column0; + +SELECT *, _column0, _column1 FROM s3Cluster(test_cluster_one_shard_three_replicas_localhost, 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column1 = _column1; +SELECT *, _column0 FROM s3Cluster(test_cluster_one_shard_three_replicas_localhost, 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column1 = _column1; + +SELECT *, _column0, _column1 FROM s3Cluster(test_cluster_one_shard_three_replicas_localhost, 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column0 = _column0 AND column1 = _column1; +SELECT *, _column0 FROM s3Cluster(test_cluster_one_shard_three_replicas_localhost, 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column0 = _column0 AND column1 = _column1; +""" diff --git a/tests/queries/0_stateless/03210_dynamic_squashing.reference b/tests/queries/0_stateless/03210_dynamic_squashing.reference new file mode 100644 index 00000000000..4f5b5ba098c --- /dev/null +++ b/tests/queries/0_stateless/03210_dynamic_squashing.reference @@ -0,0 +1,8 @@ +Array(UInt8) +None +UInt64 +None +String +UInt64 +String +UInt64 diff --git a/tests/queries/0_stateless/03210_dynamic_squashing.sql b/tests/queries/0_stateless/03210_dynamic_squashing.sql new file mode 100644 index 00000000000..23b47184e33 --- /dev/null +++ b/tests/queries/0_stateless/03210_dynamic_squashing.sql @@ -0,0 +1,20 @@ +set allow_experimental_dynamic_type = 1; +set max_block_size = 1000; + +drop table if exists test; + +create table test (d Dynamic) engine=MergeTree order by tuple(); +insert into test select multiIf(number < 1000, NULL::Dynamic(max_types=2), number < 3000, range(number % 5)::Dynamic(max_types=2), number::Dynamic(max_types=2)) from numbers(1000000); +select distinct dynamicType(d) as type from test order by type; + +drop table test; +create table test (d Dynamic(max_types=2)) engine=MergeTree order by tuple(); +insert into test select multiIf(number < 1000, NULL::Dynamic(max_types=2), number < 3000, range(number % 5)::Dynamic(max_types=2), number::Dynamic(max_types=2)) from numbers(1000000); +select distinct dynamicType(d) as type from test order by type; + +truncate table test; +insert into test select multiIf(number < 1000, 'Str'::Dynamic(max_types=2), number < 3000, range(number % 5)::Dynamic(max_types=2), number::Dynamic(max_types=2)) from numbers(1000000); +select distinct dynamicType(d) as type from test order by type; + +drop table test; + diff --git a/tests/queries/0_stateless/03212_max_bytes_to_read_for_schema_inference_in_cache.sh b/tests/queries/0_stateless/03212_max_bytes_to_read_for_schema_inference_in_cache.sh index 8a77538f592..adbb0cb6de0 100755 --- a/tests/queries/0_stateless/03212_max_bytes_to_read_for_schema_inference_in_cache.sh +++ b/tests/queries/0_stateless/03212_max_bytes_to_read_for_schema_inference_in_cache.sh @@ -5,7 +5,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CUR_DIR"/../shell_config.sh echo '{"x" : 42}' > $CLICKHOUSE_TEST_UNIQUE_NAME.json -$CLICKHOUSE_LOCAL -nm -q " +$CLICKHOUSE_LOCAL -m -q " DESC file('$CLICKHOUSE_TEST_UNIQUE_NAME.json') SETTINGS input_format_max_bytes_to_read_for_schema_inference=1000; SELECT additional_format_info from system.schema_inference_cache" diff --git a/tests/queries/0_stateless/03215_grant_current_grants.reference b/tests/queries/0_stateless/03215_grant_current_grants.reference new file mode 100644 index 00000000000..e4f6850b806 --- /dev/null +++ b/tests/queries/0_stateless/03215_grant_current_grants.reference @@ -0,0 +1,2 @@ +GRANT SELECT, CREATE TABLE, CREATE VIEW ON default.* +GRANT SELECT ON default.* diff --git a/tests/queries/0_stateless/03215_grant_current_grants.sh b/tests/queries/0_stateless/03215_grant_current_grants.sh new file mode 100755 index 00000000000..68af4a62bba --- /dev/null +++ b/tests/queries/0_stateless/03215_grant_current_grants.sh @@ -0,0 +1,26 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + + +user1="user03215_1_${CLICKHOUSE_DATABASE}_$RANDOM" +user2="user03215_2_${CLICKHOUSE_DATABASE}_$RANDOM" +user3="user03215_3_${CLICKHOUSE_DATABASE}_$RANDOM" +db=${CLICKHOUSE_DATABASE} + + +${CLICKHOUSE_CLIENT} --query "CREATE USER $user1, $user2, $user3;"; +${CLICKHOUSE_CLIENT} --query "GRANT SELECT, CREATE TABLE, CREATE VIEW ON $db.* TO $user1 WITH GRANT OPTION;"; + +${CLICKHOUSE_CLIENT} --query "GRANT CURRENT GRANTS ON $db.* TO $user2" --user $user1; +${CLICKHOUSE_CLIENT} --query "GRANT CURRENT GRANTS ON $db.* TO $user3" --user $user2; + +${CLICKHOUSE_CLIENT} --query "SHOW GRANTS FOR $user2" | sed 's/ TO.*//'; +${CLICKHOUSE_CLIENT} --query "SHOW GRANTS FOR $user3" | sed 's/ TO.*//'; + +${CLICKHOUSE_CLIENT} --query "GRANT CURRENT GRANTS(SELECT ON $db.*) TO $user3" --user $user1; +${CLICKHOUSE_CLIENT} --query "SHOW GRANTS FOR $user3" | sed 's/ TO.*//'; + +${CLICKHOUSE_CLIENT} --query "DROP USER IF EXISTS $user1, $user2, $user3"; diff --git a/tests/queries/0_stateless/data_hive/partitioning/array=[1,2,3]/float=42.42/sample.parquet b/tests/queries/0_stateless/data_hive/partitioning/array=[1,2,3]/float=42.42/sample.parquet new file mode 100644 index 00000000000..9b6a78cf8cc Binary files /dev/null and b/tests/queries/0_stateless/data_hive/partitioning/array=[1,2,3]/float=42.42/sample.parquet differ diff --git a/tests/queries/0_stateless/data_hive/partitioning/column0=Elizabeth/column0=Elizabeth/sample.parquet b/tests/queries/0_stateless/data_hive/partitioning/column0=Elizabeth/column0=Elizabeth/sample.parquet new file mode 100644 index 00000000000..9b6a78cf8cc Binary files /dev/null and b/tests/queries/0_stateless/data_hive/partitioning/column0=Elizabeth/column0=Elizabeth/sample.parquet differ diff --git a/tests/queries/0_stateless/data_hive/partitioning/column0=Elizabeth/column1=Gordon/sample.parquet b/tests/queries/0_stateless/data_hive/partitioning/column0=Elizabeth/column1=Gordon/sample.parquet new file mode 100644 index 00000000000..9b6a78cf8cc Binary files /dev/null and b/tests/queries/0_stateless/data_hive/partitioning/column0=Elizabeth/column1=Gordon/sample.parquet differ diff --git a/tests/queries/0_stateless/data_hive/partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet b/tests/queries/0_stateless/data_hive/partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet new file mode 100644 index 00000000000..9b6a78cf8cc Binary files /dev/null and b/tests/queries/0_stateless/data_hive/partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet differ diff --git a/tests/queries/0_stateless/data_hive/partitioning/column0=Elizabeth/sample.parquet b/tests/queries/0_stateless/data_hive/partitioning/column0=Elizabeth/sample.parquet new file mode 100644 index 00000000000..9b6a78cf8cc Binary files /dev/null and b/tests/queries/0_stateless/data_hive/partitioning/column0=Elizabeth/sample.parquet differ diff --git a/tests/queries/0_stateless/data_hive/partitioning/identifier=2070/email.csv b/tests/queries/0_stateless/data_hive/partitioning/identifier=2070/email.csv new file mode 100644 index 00000000000..936d995cc64 --- /dev/null +++ b/tests/queries/0_stateless/data_hive/partitioning/identifier=2070/email.csv @@ -0,0 +1,5 @@ +_login_email,_identifier,_first_name,_last_name +laura@example.com,2070,Laura,Grey +craig@example.com,4081,Craig,Johnson +mary@example.com,9346,Mary,Jenkins +jamie@example.com,5079,Jamie,Smith diff --git a/tests/queries/0_stateless/data_hive/partitioning/non_existing_column=Elizabeth/sample.parquet b/tests/queries/0_stateless/data_hive/partitioning/non_existing_column=Elizabeth/sample.parquet new file mode 100644 index 00000000000..9b6a78cf8cc Binary files /dev/null and b/tests/queries/0_stateless/data_hive/partitioning/non_existing_column=Elizabeth/sample.parquet differ diff --git a/tests/queries/0_stateless/data_hive/partitioning/number=42/date=2020-01-01/sample.parquet b/tests/queries/0_stateless/data_hive/partitioning/number=42/date=2020-01-01/sample.parquet new file mode 100644 index 00000000000..9b6a78cf8cc Binary files /dev/null and b/tests/queries/0_stateless/data_hive/partitioning/number=42/date=2020-01-01/sample.parquet differ diff --git a/tests/queries/0_stateless/data_minio/hive_partitioning/column0=Elizabeth/column1=Gordon/sample.parquet b/tests/queries/0_stateless/data_minio/hive_partitioning/column0=Elizabeth/column1=Gordon/sample.parquet new file mode 100644 index 00000000000..9b6a78cf8cc Binary files /dev/null and b/tests/queries/0_stateless/data_minio/hive_partitioning/column0=Elizabeth/column1=Gordon/sample.parquet differ diff --git a/tests/queries/0_stateless/data_minio/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet b/tests/queries/0_stateless/data_minio/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet new file mode 100644 index 00000000000..9b6a78cf8cc Binary files /dev/null and b/tests/queries/0_stateless/data_minio/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet differ diff --git a/tests/queries/0_stateless/data_minio/hive_partitioning/column0=Elizabeth/sample.parquet b/tests/queries/0_stateless/data_minio/hive_partitioning/column0=Elizabeth/sample.parquet new file mode 100644 index 00000000000..9b6a78cf8cc Binary files /dev/null and b/tests/queries/0_stateless/data_minio/hive_partitioning/column0=Elizabeth/sample.parquet differ diff --git a/tests/queries/0_stateless/data_minio/hive_partitioning/non_existing_column=Elizabeth/sample.parquet b/tests/queries/0_stateless/data_minio/hive_partitioning/non_existing_column=Elizabeth/sample.parquet new file mode 100644 index 00000000000..9b6a78cf8cc Binary files /dev/null and b/tests/queries/0_stateless/data_minio/hive_partitioning/non_existing_column=Elizabeth/sample.parquet differ diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index ffd9fae7f45..ce7c666912e 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -1702,6 +1702,8 @@ getOSKernelVersion getServerPort getSetting getSizeOfEnumType +getSubcolumn +getTypeSerializationStreams getblockinfo getevents ghcnd @@ -2115,6 +2117,7 @@ namenode namepassword nameprofile namequota +namespace namespaces natively nats