diff --git a/.gitignore b/.gitignore index a04c60d5ca3..39d6f3f9fc8 100644 --- a/.gitignore +++ b/.gitignore @@ -161,6 +161,7 @@ tests/queries/0_stateless/test_* tests/queries/0_stateless/*.binary tests/queries/0_stateless/*.generated-expect tests/queries/0_stateless/*.expect.history +tests/integration/**/_gen # rust /rust/**/target diff --git a/.gitmodules b/.gitmodules index 151dc28c55b..ba71a8ae3a7 100644 --- a/.gitmodules +++ b/.gitmodules @@ -258,9 +258,6 @@ [submodule "contrib/wyhash"] path = contrib/wyhash url = https://github.com/wangyi-fudan/wyhash -[submodule "contrib/hashidsxx"] - path = contrib/hashidsxx - url = https://github.com/schoentoon/hashidsxx [submodule "contrib/nats-io"] path = contrib/nats-io url = https://github.com/ClickHouse/nats.c diff --git a/base/base/find_symbols.h b/base/base/find_symbols.h index 83232669c04..fda94edaa88 100644 --- a/base/base/find_symbols.h +++ b/base/base/find_symbols.h @@ -448,7 +448,7 @@ inline char * find_last_not_symbols_or_null(char * begin, char * end) /// See https://github.com/boostorg/algorithm/issues/63 /// And https://bugs.llvm.org/show_bug.cgi?id=41141 template -inline void splitInto(To & to, const std::string & what, bool token_compress = false) +inline To & splitInto(To & to, std::string_view what, bool token_compress = false) { const char * pos = what.data(); const char * end = pos + what.size(); @@ -464,4 +464,6 @@ inline void splitInto(To & to, const std::string & what, bool token_compress = f else pos = delimiter_or_end; } + + return to; } diff --git a/base/base/move_extend.h b/base/base/move_extend.h new file mode 100644 index 00000000000..6e5b16e037c --- /dev/null +++ b/base/base/move_extend.h @@ -0,0 +1,9 @@ +#pragma once + +/// Extend @p to by moving elements from @p from to @p to end +/// @return @p to iterator to first of moved elements. +template +typename To::iterator moveExtend(To & to, From && from) +{ + return to.insert(to.end(), std::make_move_iterator(from.begin()), std::make_move_iterator(from.end())); +} diff --git a/base/poco/Net/include/Poco/Net/HTTPClientSession.h b/base/poco/Net/include/Poco/Net/HTTPClientSession.h index d495d662f75..167a06eb7ff 100644 --- a/base/poco/Net/include/Poco/Net/HTTPClientSession.h +++ b/base/poco/Net/include/Poco/Net/HTTPClientSession.h @@ -306,7 +306,7 @@ namespace Net DEFAULT_KEEP_ALIVE_TIMEOUT = 8 }; - void reconnect(); + virtual void reconnect(); /// Connects the underlying socket to the HTTP server. int write(const char * buffer, std::streamsize length); diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 2af468970f1..0f68c0cbc7c 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -164,7 +164,6 @@ add_contrib (libpq-cmake libpq) add_contrib (nuraft-cmake NuRaft) add_contrib (fast_float-cmake fast_float) add_contrib (datasketches-cpp-cmake datasketches-cpp) -add_contrib (hashidsxx-cmake hashidsxx) option(ENABLE_NLP "Enable NLP functions support" ${ENABLE_LIBRARIES}) if (ENABLE_NLP) diff --git a/contrib/NuRaft b/contrib/NuRaft index 491eaf592d9..eb1572129c7 160000 --- a/contrib/NuRaft +++ b/contrib/NuRaft @@ -1 +1 @@ -Subproject commit 491eaf592d950e0e37accbe8b3f217e068c9fecf +Subproject commit eb1572129c71beb2156dcdaadc3fb136954aed96 diff --git a/contrib/hashidsxx b/contrib/hashidsxx deleted file mode 160000 index 783f6911ccf..00000000000 --- a/contrib/hashidsxx +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 783f6911ccfdaca83e3cfac084c4aad888a80cee diff --git a/contrib/hashidsxx-cmake/CMakeLists.txt b/contrib/hashidsxx-cmake/CMakeLists.txt deleted file mode 100644 index 17f3888bd94..00000000000 --- a/contrib/hashidsxx-cmake/CMakeLists.txt +++ /dev/null @@ -1,14 +0,0 @@ -set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/hashidsxx") - -set (SRCS - "${LIBRARY_DIR}/hashids.cpp" -) - -set (HDRS - "${LIBRARY_DIR}/hashids.h" -) - -add_library(_hashidsxx ${SRCS} ${HDRS}) -target_include_directories(_hashidsxx SYSTEM PUBLIC "${LIBRARY_DIR}") - -add_library(ch_contrib::hashidsxx ALIAS _hashidsxx) diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index 828c73e6781..e25b5fdbfed 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -141,7 +141,6 @@ function clone_submodules contrib/jemalloc contrib/replxx contrib/wyhash - contrib/hashidsxx contrib/c-ares contrib/morton-nd contrib/xxHash diff --git a/docker/test/integration/runner/Dockerfile b/docker/test/integration/runner/Dockerfile index 0d1fa00b214..8e95d94b6dc 100644 --- a/docker/test/integration/runner/Dockerfile +++ b/docker/test/integration/runner/Dockerfile @@ -135,4 +135,5 @@ ENV MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1' EXPOSE 2375 ENTRYPOINT ["dockerd-entrypoint.sh"] -CMD ["sh", "-c", "pytest $PYTEST_OPTS"] +# To pass additional arguments (i.e. list of tests) use PYTEST_ADDOPTS +CMD ["sh", "-c", "pytest"] diff --git a/docker/test/integration/runner/compose/docker_compose_kafka.yml b/docker/test/integration/runner/compose/docker_compose_kafka.yml index 7e34f4c114d..30d1b0bed3f 100644 --- a/docker/test/integration/runner/compose/docker_compose_kafka.yml +++ b/docker/test/integration/runner/compose/docker_compose_kafka.yml @@ -4,6 +4,8 @@ services: kafka_zookeeper: image: zookeeper:3.4.9 hostname: kafka_zookeeper + ports: + - 2181:2181 environment: ZOO_MY_ID: 1 ZOO_PORT: 2181 @@ -15,15 +17,14 @@ services: image: confluentinc/cp-kafka:5.2.0 hostname: kafka1 ports: - - ${KAFKA_EXTERNAL_PORT:-8081}:${KAFKA_EXTERNAL_PORT:-8081} + - ${KAFKA_EXTERNAL_PORT}:${KAFKA_EXTERNAL_PORT} environment: KAFKA_ADVERTISED_LISTENERS: INSIDE://localhost:${KAFKA_EXTERNAL_PORT},OUTSIDE://kafka1:19092 KAFKA_ADVERTISED_HOST_NAME: kafka1 - KAFKA_LISTENERS: INSIDE://0.0.0.0:${KAFKA_EXTERNAL_PORT},OUTSIDE://0.0.0.0:19092 KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: INSIDE:PLAINTEXT,OUTSIDE:PLAINTEXT KAFKA_INTER_BROKER_LISTENER_NAME: INSIDE KAFKA_BROKER_ID: 1 - KAFKA_ZOOKEEPER_CONNECT: "kafka_zookeeper:2181" + KAFKA_ZOOKEEPER_CONNECT: kafka_zookeeper:2181 KAFKA_LOG4J_LOGGERS: "kafka.controller=INFO,kafka.producer.async.DefaultEventHandler=INFO,state.change.logger=INFO" KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1 depends_on: @@ -35,13 +36,38 @@ services: image: confluentinc/cp-schema-registry:5.2.0 hostname: schema-registry ports: - - ${SCHEMA_REGISTRY_EXTERNAL_PORT:-12313}:${SCHEMA_REGISTRY_INTERNAL_PORT:-12313} + - ${SCHEMA_REGISTRY_EXTERNAL_PORT}:${SCHEMA_REGISTRY_EXTERNAL_PORT} environment: SCHEMA_REGISTRY_HOST_NAME: schema-registry - SCHEMA_REGISTRY_KAFKASTORE_SECURITY_PROTOCOL: PLAINTEXT SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS: PLAINTEXT://kafka1:19092 + SCHEMA_REGISTRY_LISTENERS: http://0.0.0.0:${SCHEMA_REGISTRY_EXTERNAL_PORT} + SCHEMA_REGISTRY_SCHEMA_REGISTRY_GROUP_ID: noauth depends_on: - kafka_zookeeper - kafka1 + restart: always + security_opt: + - label:disable + + schema-registry-auth: + image: confluentinc/cp-schema-registry:5.2.0 + hostname: schema-registry-auth + ports: + - ${SCHEMA_REGISTRY_AUTH_EXTERNAL_PORT}:${SCHEMA_REGISTRY_AUTH_EXTERNAL_PORT} + environment: + SCHEMA_REGISTRY_HOST_NAME: schema-registry-auth + SCHEMA_REGISTRY_LISTENERS: http://0.0.0.0:${SCHEMA_REGISTRY_AUTH_EXTERNAL_PORT} + SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS: PLAINTEXT://kafka1:19092 + SCHEMA_REGISTRY_AUTHENTICATION_METHOD: BASIC + SCHEMA_REGISTRY_AUTHENTICATION_ROLES: user + SCHEMA_REGISTRY_AUTHENTICATION_REALM: RealmFooBar + SCHEMA_REGISTRY_OPTS: "-Djava.security.auth.login.config=/etc/schema-registry/secrets/schema_registry_jaas.conf" + SCHEMA_REGISTRY_SCHEMA_REGISTRY_GROUP_ID: auth + volumes: + - ${SCHEMA_REGISTRY_DIR:-}/secrets:/etc/schema-registry/secrets + depends_on: + - kafka_zookeeper + - kafka1 + restart: always security_opt: - label:disable diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index ed2f010a632..15f9d1f47bf 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -76,6 +76,7 @@ The supported formats are: | [RowBinary](#rowbinary) | ✔ | ✔ | | [RowBinaryWithNames](#rowbinarywithnamesandtypes) | ✔ | ✔ | | [RowBinaryWithNamesAndTypes](#rowbinarywithnamesandtypes) | ✔ | ✔ | +| [RowBinaryWithDefaults](#rowbinarywithdefaults) | ✔ | ✔ | | [Native](#native) | ✔ | ✔ | | [Null](#null) | ✗ | ✔ | | [XML](#xml) | ✗ | ✔ | @@ -472,6 +473,7 @@ The CSV format supports the output of totals and extremes the same way as `TabSe - [input_format_csv_trim_whitespaces](/docs/en/operations/settings/settings-formats.md/#input_format_csv_trim_whitespaces) - trim spaces and tabs in non-quoted CSV strings. Default value - `true`. - [input_format_csv_allow_whitespace_or_tab_as_delimiter](/docs/en/operations/settings/settings-formats.md/# input_format_csv_allow_whitespace_or_tab_as_delimiter) - Allow to use whitespace or tab as field delimiter in CSV strings. Default value - `false`. - [input_format_csv_allow_variable_number_of_columns](/docs/en/operations/settings/settings-formats.md/#input_format_csv_allow_variable_number_of_columns) - ignore extra columns in CSV input (if file has more columns than expected) and treat missing fields in CSV input as default values. Default value - `false`. +- [input_format_csv_use_default_on_bad_values](/docs/en/operations/settings/settings-formats.md/#input_format_csv_use_default_on_bad_values) - Allow to set default value to column when CSV field deserialization failed on bad value. Default value - `false`. ## CSVWithNames {#csvwithnames} @@ -1515,6 +1517,23 @@ If setting [input_format_with_types_use_header](/docs/en/operations/settings/set the types from input data will be compared with the types of the corresponding columns from the table. Otherwise, the second row will be skipped. ::: +## RowBinaryWithDefaults {#rowbinarywithdefaults} + +Similar to [RowBinary](#rowbinary), but with an extra byte before each column that indicates if default value should be used. + +Examples: + +```sql +:) select * from format('RowBinaryWithDefaults', 'x UInt32 default 42, y UInt32', x'010001000000') + +┌──x─┬─y─┐ +│ 42 │ 1 │ +└────┴───┘ +``` + +For column `x` there is only one byte `01` that indicates that default value should be used and no other data after this byte is provided. +For column `y` data starts with byte `00` that indicates that column has actual value that should be read from the subsequent data `01000000`. + ## RowBinary format settings {#row-binary-format-settings} - [format_binary_max_string_size](/docs/en/operations/settings/settings-formats.md/#format_binary_max_string_size) - The maximum allowed size for String in RowBinary format. Default value - `1GiB`. diff --git a/docs/en/operations/backup.md b/docs/en/operations/backup.md index c3ddee07d0b..62f931a76b4 100644 --- a/docs/en/operations/backup.md +++ b/docs/en/operations/backup.md @@ -30,7 +30,7 @@ description: In order to effectively mitigate possible human errors, you should ``` :::note ALL -`ALL` is only applicable to the `RESTORE` command prior to version 23.4 of Clickhouse. +Prior to version 23.4 of ClickHouse, `ALL` was only applicable to the `RESTORE` command. ::: ## Background diff --git a/docs/en/operations/settings/settings-formats.md b/docs/en/operations/settings/settings-formats.md index 0915c51806a..ee8e0d547b8 100644 --- a/docs/en/operations/settings/settings-formats.md +++ b/docs/en/operations/settings/settings-formats.md @@ -989,6 +989,28 @@ Result a b ``` +### input_format_csv_use_default_on_bad_values {#input_format_csv_use_default_on_bad_values} + +Allow to set default value to column when CSV field deserialization failed on bad value + +Default value: `false`. + +**Examples** + +Query + +```bash +./clickhouse local -q "create table test_tbl (x String, y UInt32, z Date) engine=MergeTree order by x" +echo 'a,b,c' | ./clickhouse local -q "INSERT INTO test_tbl SETTINGS input_format_csv_use_default_on_bad_values=true FORMAT CSV" +./clickhouse local -q "select * from test_tbl" +``` + +Result + +```text +a 0 1971-01-01 +``` + ## Values format settings {#values-format-settings} ### input_format_values_interpret_expressions {#input_format_values_interpret_expressions} @@ -1325,6 +1347,17 @@ Default value: 0. Sets [Confluent Schema Registry](https://docs.confluent.io/current/schema-registry/index.html) URL to use with [AvroConfluent](../../interfaces/formats.md/#data-format-avro-confluent) format. +Format: +``` text +http://[user:password@]machine[:port]" +``` + +Examples: +``` text +http://registry.example.com:8081 +http://admin:secret@registry.example.com:8081 +``` + Default value: `Empty`. ### output_format_avro_codec {#output_format_avro_codec} diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 22aeecf4335..580b51a984d 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -537,6 +537,8 @@ Possible values: The first phase of a grace join reads the right table and splits it into N buckets depending on the hash value of key columns (initially, N is `grace_hash_join_initial_buckets`). This is done in a way to ensure that each bucket can be processed independently. Rows from the first bucket are added to an in-memory hash table while the others are saved to disk. If the hash table grows beyond the memory limit (e.g., as set by [`max_bytes_in_join`](/docs/en/operations/settings/query-complexity.md/#settings-max_bytes_in_join)), the number of buckets is increased and the assigned bucket for each row. Any rows which don’t belong to the current bucket are flushed and reassigned. + Supports `INNER/LEFT/RIGHT/FULL ALL/ANY JOIN`. + - hash [Hash join algorithm](https://en.wikipedia.org/wiki/Hash_join) is used. The most generic implementation that supports all combinations of kind and strictness and multiple join keys that are combined with `OR` in the `JOIN ON` section. diff --git a/docs/en/sql-reference/functions/arithmetic-functions.md b/docs/en/sql-reference/functions/arithmetic-functions.md index 64fae0e82f0..69f1816b7df 100644 --- a/docs/en/sql-reference/functions/arithmetic-functions.md +++ b/docs/en/sql-reference/functions/arithmetic-functions.md @@ -6,9 +6,20 @@ sidebar_label: Arithmetic # Arithmetic Functions -The result type of all arithmetic functions is the smallest type which can represent all possible results. Size promotion happens for integers up to 32 bit, e.g. `UInt8 + UInt16 = UInt32`. If one of the inters has 64 or more bits, the result is of the same type as the bigger of the input integers, e.g. `UInt16 + UInt128 = UInt128`. While this introduces a risk of overflows around the value range boundary, it ensures that calculations are performed quickly using the maximum native integer width of 64 bit. +Arithmetic functions work for any two operands of type `UInt8`, `UInt16`, `UInt32`, `UInt64`, `Int8`, `Int16`, `Int32`, `Int64`, `Float32`, or `Float64`. -The result of addition or multiplication of two integers is unsigned unless one of the integers is signed. +Before performing the operation, both operands are casted to the result type. The result type is determined as follows (unless specified +differently in the function documentation below): +- If both operands are up to 32 bits wide, the size of the result type will be the size of the next bigger type following the bigger of the + two operands (integer size promotion). For example, `UInt8 + UInt16 = UInt32` or `Float32 * Float32 = Float64`. +- If one of the operands has 64 or more bits, the size of the result type will be the same size as the bigger of the two operands. For + example, `UInt32 + UInt128 = UInt128` or `Float32 * Float64 = Float64`. +- If one of the operands is signed, the result type will also be signed, otherwise it will be signed. For example, `UInt32 * Int32 = Int64`. + +These rules make sure that the result type will be the smallest type which can represent all possible results. While this introduces a risk +of overflows around the value range boundary, it ensures that calculations are performed quickly using the maximum native integer width of +64 bit. This behavior also guarantees compatibility with many other databases which provide 64 bit integers (BIGINT) as the biggest integer +type. Example: @@ -22,8 +33,6 @@ SELECT toTypeName(0), toTypeName(0 + 0), toTypeName(0 + 0 + 0), toTypeName(0 + 0 └───────────────┴────────────────────────┴─────────────────────────────────┴──────────────────────────────────────────┘ ``` -Arithmetic functions work for any pair of `UInt8`, `UInt16`, `UInt32`, `UInt64`, `Int8`, `Int16`, `Int32`, `Int64`, `Float32`, or `Float64` values. - Overflows are produced the same way as in C++. ## plus @@ -68,7 +77,7 @@ Alias: `a \* b` (operator) ## divide -Calculates the quotient of two values `a` and `b`. The result is always a floating-point value. If you need integer division, you can use the `intDiv` function. +Calculates the quotient of two values `a` and `b`. The result type is always [Float64](../../sql-reference/data-types/float.md). Integer division is provided by the `intDiv` function. Division by 0 returns `inf`, `-inf`, or `nan`. @@ -84,7 +93,7 @@ Alias: `a / b` (operator) Performs an integer division of two values `a` by `b`, i.e. computes the quotient rounded down to the next smallest integer. -The result has the same type as the dividend (the first parameter). +The result has the same width as the dividend (the first parameter). An exception is thrown when dividing by zero, when the quotient does not fit in the range of the dividend, or when dividing a minimal negative number by minus one. @@ -135,7 +144,7 @@ intDivOrZero(a, b) Calculates the remainder of the division of two values `a` by `b`. -The result type is an integer if both inputs are integers. If one of the inputs is a floating-point number, the result is a floating-point number. +The result type is an integer if both inputs are integers. If one of the inputs is a floating-point number, the result type is [Float64](../../sql-reference/data-types/float.md). The remainder is computed like in C++. Truncated division is used for negative numbers. diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index 19eeda967fe..87d84425029 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -722,7 +722,7 @@ SELECT toDate('2016-12-27') AS date, toYearWeek(date) AS yearWeek0, toYearWeek(d ## age -Returns the `unit` component of the difference between `startdate` and `enddate`. The difference is calculated using a precision of 1 second. +Returns the `unit` component of the difference between `startdate` and `enddate`. The difference is calculated using a precision of 1 microsecond. E.g. the difference between `2021-12-29` and `2022-01-01` is 3 days for `day` unit, 0 months for `month` unit, 0 years for `year` unit. For an alternative to `age`, see function `date\_diff`. @@ -738,6 +738,8 @@ age('unit', startdate, enddate, [timezone]) - `unit` — The type of interval for result. [String](../../sql-reference/data-types/string.md). Possible values: + - `microsecond` (possible abbreviations: `us`, `u`) + - `millisecond` (possible abbreviations: `ms`) - `second` (possible abbreviations: `ss`, `s`) - `minute` (possible abbreviations: `mi`, `n`) - `hour` (possible abbreviations: `hh`, `h`) @@ -813,6 +815,8 @@ Aliases: `dateDiff`, `DATE_DIFF`, `timestampDiff`, `timestamp_diff`, `TIMESTAMP_ - `unit` — The type of interval for result. [String](../../sql-reference/data-types/string.md). Possible values: + - `microsecond` (possible abbreviations: `us`, `u`) + - `millisecond` (possible abbreviations: `ms`) - `second` (possible abbreviations: `ss`, `s`) - `minute` (possible abbreviations: `mi`, `n`) - `hour` (possible abbreviations: `hh`, `h`) @@ -1134,6 +1138,8 @@ Result: Returns the current date and time at the moment of query analysis. The function is a constant expression. +Alias: `current_timestamp`. + **Syntax** ``` sql @@ -1264,6 +1270,8 @@ Result: Accepts zero arguments and returns the current date at one of the moments of query analysis. The same as ‘toDate(now())’. +Aliases: `curdate`, `current_date`. + ## yesterday Accepts zero arguments and returns yesterday’s date at one of the moments of query analysis. @@ -1441,7 +1449,7 @@ Using replacement fields, you can define a pattern for the resulting string. “ | %n | new-line character (‘’) | | | %p | AM or PM designation | PM | | %Q | Quarter (1-4) | 1 | -| %r | 12-hour HH:MM AM/PM time, equivalent to %H:%i %p | 10:30 PM | +| %r | 12-hour HH:MM AM/PM time, equivalent to %h:%i %p | 10:30 PM | | %R | 24-hour HH:MM time, equivalent to %H:%i | 22:33 | | %s | second (00-59) | 44 | | %S | second (00-59) | 44 | diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md index 4f174a53ad6..9890d257e84 100644 --- a/docs/en/sql-reference/functions/string-functions.md +++ b/docs/en/sql-reference/functions/string-functions.md @@ -1267,3 +1267,36 @@ Like [initcap](#initcap), assuming that the string contains valid UTF-8 encoded Does not detect the language, e.g. for Turkish the result might not be exactly correct (i/İ vs. i/I). If the length of the UTF-8 byte sequence is different for upper and lower case of a code point, the result may be incorrect for this code point. + +## firstLine + +Returns the first line from a multi-line string. + +**Syntax** + +```sql +firstLine(val) +``` + +**Arguments** + +- `val` - Input value. [String](../data-types/string.md) + +**Returned value** + +- The first line of the input value or the whole value if there is no line + separators. [String](../data-types/string.md) + +**Example** + +```sql +select firstLine('foo\nbar\nbaz'); +``` + +Result: + +```result +┌─firstLine('foo\nbar\nbaz')─┐ +│ foo │ +└────────────────────────────┘ +``` diff --git a/docs/en/sql-reference/statements/create/view.md b/docs/en/sql-reference/statements/create/view.md index 10b15638152..11026340a0f 100644 --- a/docs/en/sql-reference/statements/create/view.md +++ b/docs/en/sql-reference/statements/create/view.md @@ -97,7 +97,7 @@ This is an experimental feature that may change in backwards-incompatible ways i ::: ```sql -CREATE LIVE VIEW [IF NOT EXISTS] [db.]table_name [WITH [TIMEOUT [value_in_sec] [AND]] [REFRESH [value_in_sec]]] AS SELECT ... +CREATE LIVE VIEW [IF NOT EXISTS] [db.]table_name [WITH REFRESH [value_in_sec]] AS SELECT ... ``` Live views store result of the corresponding [SELECT](../../../sql-reference/statements/select/index.md) query and are updated any time the result of the query changes. Query result as well as partial result needed to combine with new data are stored in memory providing increased performance for repeated queries. Live views can provide push notifications when query result changes using the [WATCH](../../../sql-reference/statements/watch.md) query. diff --git a/docs/en/sql-reference/statements/show.md b/docs/en/sql-reference/statements/show.md index 336b93db9d5..1c399d2072b 100644 --- a/docs/en/sql-reference/statements/show.md +++ b/docs/en/sql-reference/statements/show.md @@ -205,7 +205,7 @@ The optional keyword `EXTENDED` currently has no effect, it only exists for MySQ The optional keyword `FULL` causes the output to include the collation, comment and privilege columns. -`SHOW COLUMNS` produces a result table with the following structure: +The statement produces a result table with the following structure: - field - The name of the column (String) - type - The column data type (String) - null - If the column data type is Nullable (UInt8) @@ -272,6 +272,10 @@ SHOW DICTIONARIES FROM db LIKE '%reg%' LIMIT 2 Displays a list of primary and data skipping indexes of a table. +This statement mostly exists for compatibility with MySQL. System tables [system.tables](../../operations/system-tables/tables.md) (for +primary keys) and [system.data_skipping_indices](../../operations/system-tables/data_skipping_indices.md) (for data skipping indices) +provide equivalent information but in a fashion more native to ClickHouse. + ```sql SHOW [EXTENDED] {INDEX | INDEXES | INDICES | KEYS } {FROM | IN} [{FROM | IN} ] [WHERE ] [INTO OUTFILE ] [FORMAT ] ``` @@ -281,22 +285,22 @@ equivalent. If no database is specified, the query assumes the current database The optional keyword `EXTENDED` currently has no effect, it only exists for MySQL compatibility. -`SHOW INDEX` produces a result table with the following structure: -- table - The name of the table (String) -- non_unique - 0 if the index cannot contain duplicates, 1 otherwise (UInt8) -- key_name - The name of the index, `PRIMARY` if the index is a primary key index (String) -- seq_in_index - Currently unused -- column_name - Currently unused -- collation - The sorting of the column in the index, `A` if ascending, `D` if descending, `NULL` if unsorted (Nullable(String)) -- cardinality - Currently unused -- sub_part - Currently unused -- packed - Currently unused +The statement produces a result table with the following structure: +- table - The name of the table. (String) +- non_unique - Always `1` as ClickHouse does not support uniqueness constraints. (UInt8) +- key_name - The name of the index, `PRIMARY` if the index is a primary key index. (String) +- seq_in_index - For a primary key index, the position of the column starting from `1`. For a data skipping index: always `1`. (UInt8) +- column_name - For a primary key index, the name of the column. For a data skipping index: `''` (empty string), see field "expression". (String) +- collation - The sorting of the column in the index: `A` if ascending, `D` if descending, `NULL` if unsorted. (Nullable(String)) +- cardinality - An estimation of the index cardinality (number of unique values in the index). Currently always 0. (UInt64) +- sub_part - Always `NULL` because ClickHouse does not support index prefixes like MySQL. (Nullable(String)) +- packed - Always `NULL` because ClickHouse does not support packed indexes (like MySQL). (Nullable(String)) - null - Currently unused -- index_type - The index type, e.g. `primary`, `minmax`, `bloom_filter` etc. (String) -- comment - Currently unused -- index_comment - Currently unused -- visible - If the index is visible to the optimizer, always `YES` (String) -- expression - The index expression (String) +- index_type - The index type, e.g. `PRIMARY`, `MINMAX`, `BLOOM_FILTER` etc. (String) +- comment - Additional information about the index, currently always `''` (empty string). (String) +- index_comment - `''` (empty string) because indexes in ClickHouse cannot have a `COMMENT` field (like in MySQL). (String) +- visible - If the index is visible to the optimizer, always `YES`. (String) +- expression - For a data skipping index, the index expression. For a primary key index: `''` (empty string). (String) **Examples** @@ -310,11 +314,12 @@ Result: ``` text ┌─table─┬─non_unique─┬─key_name─┬─seq_in_index─┬─column_name─┬─collation─┬─cardinality─┬─sub_part─┬─packed─┬─null─┬─index_type───┬─comment─┬─index_comment─┬─visible─┬─expression─┐ -│ tbl │ 0 │ blf_idx │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ bloom_filter │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ YES │ d, b │ -│ tbl │ 0 │ mm1_idx │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ minmax │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ YES │ a, c, d │ -│ tbl │ 0 │ mm2_idx │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ minmax │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ YES │ c, d, e │ -│ tbl │ 0 │ PRIMARY │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ A │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ primary │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ YES │ c, a │ -│ tbl │ 0 │ set_idx │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ set │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ YES │ e │ +│ tbl │ 1 │ blf_idx │ 1 │ 1 │ ᴺᵁᴸᴸ │ 0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ BLOOM_FILTER │ │ │ YES │ d, b │ +│ tbl │ 1 │ mm1_idx │ 1 │ 1 │ ᴺᵁᴸᴸ │ 0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ MINMAX │ │ │ YES │ a, c, d │ +│ tbl │ 1 │ mm2_idx │ 1 │ 1 │ ᴺᵁᴸᴸ │ 0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ MINMAX │ │ │ YES │ c, d, e │ +│ tbl │ 1 │ PRIMARY │ 1 │ c │ A │ 0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ PRIMARY │ │ │ YES │ │ +│ tbl │ 1 │ PRIMARY │ 2 │ a │ A │ 0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ PRIMARY │ │ │ YES │ │ +│ tbl │ 1 │ set_idx │ 1 │ 1 │ ᴺᵁᴸᴸ │ 0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ SET │ │ │ YES │ e │ └───────┴────────────┴──────────┴──────────────┴─────────────┴───────────┴─────────────┴──────────┴────────┴──────┴──────────────┴─────────┴───────────────┴─────────┴────────────┘ ``` diff --git a/docs/en/sql-reference/table-functions/file.md b/docs/en/sql-reference/table-functions/file.md index c78ffc1d61c..00917414e0c 100644 --- a/docs/en/sql-reference/table-functions/file.md +++ b/docs/en/sql-reference/table-functions/file.md @@ -134,7 +134,7 @@ Multiple path components can have globs. For being processed file must exist and - `*` — Substitutes any number of any characters except `/` including empty string. - `?` — Substitutes any single character. -- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`. +- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`, including `/`. - `{N..M}` — Substitutes any number in range from N to M including both borders. - `**` - Fetches all files inside the folder recursively. diff --git a/docs/ru/development/build-osx.md b/docs/ru/development/build-osx.md index 9a1f9c9347d..6b4e612b13f 100644 --- a/docs/ru/development/build-osx.md +++ b/docs/ru/development/build-osx.md @@ -68,7 +68,7 @@ $ /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/ $ rm -rf build $ mkdir build $ cd build - $ cmake -DCMAKE_C_COMPILER=$(brew --prefix llvm)/bin/clang -DCMAKE_CXX_COMPILER==$(brew --prefix llvm)/bin/clang++ -DCMAKE_BUILD_TYPE=RelWithDebInfo -DENABLE_JEMALLOC=OFF .. + $ cmake -DCMAKE_C_COMPILER=$(brew --prefix llvm)/bin/clang -DCMAKE_CXX_COMPILER=$(brew --prefix llvm)/bin/clang++ -DCMAKE_BUILD_TYPE=RelWithDebInfo -DENABLE_JEMALLOC=OFF .. $ cmake -DCMAKE_C_COMPILER=$(brew --prefix llvm)/bin/clang -DCMAKE_CXX_COMPILER=$(brew --prefix llvm)/bin/clang++ -DCMAKE_BUILD_TYPE=RelWithDebInfo -DENABLE_JEMALLOC=OFF .. $ cmake --build . --config RelWithDebInfo $ cd .. diff --git a/docs/ru/sql-reference/functions/date-time-functions.md b/docs/ru/sql-reference/functions/date-time-functions.md index 779728ca0fe..4db8a1ec6f8 100644 --- a/docs/ru/sql-reference/functions/date-time-functions.md +++ b/docs/ru/sql-reference/functions/date-time-functions.md @@ -625,7 +625,7 @@ SELECT toDate('2016-12-27') AS date, toYearWeek(date) AS yearWeek0, toYearWeek(d ## age -Вычисляет компонент `unit` разницы между `startdate` и `enddate`. Разница вычисляется с точностью в 1 секунду. +Вычисляет компонент `unit` разницы между `startdate` и `enddate`. Разница вычисляется с точностью в 1 микросекунду. Например, разница между `2021-12-29` и `2022-01-01` 3 дня для единицы `day`, 0 месяцев для единицы `month`, 0 лет для единицы `year`. **Синтаксис** @@ -639,6 +639,8 @@ age('unit', startdate, enddate, [timezone]) - `unit` — единица измерения времени, в которой будет выражено возвращаемое значение функции. [String](../../sql-reference/data-types/string.md). Возможные значения: + - `microsecond` (возможные сокращения: `us`, `u`) + - `millisecond` (возможные сокращения: `ms`) - `second` (возможные сокращения: `ss`, `s`) - `minute` (возможные сокращения: `mi`, `n`) - `hour` (возможные сокращения: `hh`, `h`) @@ -712,6 +714,8 @@ date_diff('unit', startdate, enddate, [timezone]) - `unit` — единица измерения времени, в которой будет выражено возвращаемое значение функции. [String](../../sql-reference/data-types/string.md). Возможные значения: + - `microsecond` (возможные сокращения: `us`, `u`) + - `millisecond` (возможные сокращения: `ms`) - `second` (возможные сокращения: `ss`, `s`) - `minute` (возможные сокращения: `mi`, `n`) - `hour` (возможные сокращения: `hh`, `h`) diff --git a/docs/ru/sql-reference/functions/string-functions.md b/docs/ru/sql-reference/functions/string-functions.md index b872200f99b..276dfc2ef20 100644 --- a/docs/ru/sql-reference/functions/string-functions.md +++ b/docs/ru/sql-reference/functions/string-functions.md @@ -1124,3 +1124,39 @@ Do Nothing for 2 Minutes 2:00   Не учитывает язык. То есть, для турецкого языка, результат может быть не совсем верным. Если длина UTF-8 последовательности байтов различна для верхнего и нижнего регистра кодовой точки, то для этой кодовой точки результат работы может быть некорректным. Если строка содержит набор байтов, не являющийся UTF-8, то поведение не определено. + +## firstLine + +Возвращает первую строку в многострочном тексте. + +**Синтаксис** + +```sql +firstLine(val) +``` + +**Аргументы** + +- `val` - текст для обработки. [String](../data-types/string.md) + +**Returned value** + +- Первая строка текста или весь текст, если переносы строк отсутствуют. + +Тип: [String](../data-types/string.md) + +**Пример** + +Запрос: + +```sql +select firstLine('foo\nbar\nbaz'); +``` + +Результат: + +```result +┌─firstLine('foo\nbar\nbaz')─┐ +│ foo │ +└────────────────────────────┘ +``` diff --git a/docs/ru/sql-reference/statements/create/view.md b/docs/ru/sql-reference/statements/create/view.md index d3846aac289..1a60dc0716c 100644 --- a/docs/ru/sql-reference/statements/create/view.md +++ b/docs/ru/sql-reference/statements/create/view.md @@ -73,7 +73,7 @@ CREATE MATERIALIZED VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER] [TO[db.]na Чтобы использовать `LIVE VIEW` и запросы `WATCH`, включите настройку [allow_experimental_live_view](../../../operations/settings/settings.md#allow-experimental-live-view). ::: ```sql -CREATE LIVE VIEW [IF NOT EXISTS] [db.]table_name [WITH [TIMEOUT [value_in_sec] [AND]] [REFRESH [value_in_sec]]] AS SELECT ... +CREATE LIVE VIEW [IF NOT EXISTS] [db.]table_name [WITH REFRESH [value_in_sec]] AS SELECT ... ``` `LIVE VIEW` хранит результат запроса [SELECT](../../../sql-reference/statements/select/index.md), указанного при создании, и обновляется сразу же при изменении этого результата. Конечный результат запроса и промежуточные данные, из которых формируется результат, хранятся в оперативной памяти, и это обеспечивает высокую скорость обработки для повторяющихся запросов. LIVE-представления могут отправлять push-уведомления при изменении результата исходного запроса `SELECT`. Для этого используйте запрос [WATCH](../../../sql-reference/statements/watch.md). diff --git a/docs/ru/sql-reference/table-functions/file.md b/docs/ru/sql-reference/table-functions/file.md index 0983c51d954..83ef115aacd 100644 --- a/docs/ru/sql-reference/table-functions/file.md +++ b/docs/ru/sql-reference/table-functions/file.md @@ -79,7 +79,7 @@ SELECT * FROM file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 U - `*` — заменяет любое количество любых символов кроме `/`, включая отсутствие символов. - `?` — заменяет ровно один любой символ. -- `{some_string,another_string,yet_another_one}` — заменяет любую из строк `'some_string', 'another_string', 'yet_another_one'`. +- `{some_string,another_string,yet_another_one}` — заменяет любую из строк `'some_string', 'another_string', 'yet_another_one'`, причём строка может содержать `/`. - `{N..M}` — заменяет любое число в интервале от `N` до `M` включительно (может содержать ведущие нули). Конструкция с `{}` аналогична табличной функции [remote](remote.md). diff --git a/docs/zh/sql-reference/functions/date-time-functions.md b/docs/zh/sql-reference/functions/date-time-functions.md index 53dadc23c6d..e4b70322477 100644 --- a/docs/zh/sql-reference/functions/date-time-functions.md +++ b/docs/zh/sql-reference/functions/date-time-functions.md @@ -643,6 +643,8 @@ date_diff('unit', startdate, enddate, [timezone]) - `unit` — `value`对应的时间单位。类型为[String](../../sql-reference/data-types/string.md)。 可能的值: + - `microsecond` + - `millisecond` - `second` - `minute` - `hour` diff --git a/docs/zh/sql-reference/statements/create/view.md b/docs/zh/sql-reference/statements/create/view.md index 8ce2d20a10c..bce0994ecd2 100644 --- a/docs/zh/sql-reference/statements/create/view.md +++ b/docs/zh/sql-reference/statements/create/view.md @@ -72,7 +72,7 @@ ClickHouse 中的物化视图更像是插入触发器。 如果视图查询中 使用[allow_experimental_live_view](../../../operations/settings/settings.md#allow-experimental-live-view)设置启用实时视图和`WATCH`查询的使用。 输入命令`set allow_experimental_live_view = 1`。 ```sql -CREATE LIVE VIEW [IF NOT EXISTS] [db.]table_name [WITH [TIMEOUT [value_in_sec] [AND]] [REFRESH [value_in_sec]]] AS SELECT ... +CREATE LIVE VIEW [IF NOT EXISTS] [db.]table_name [WITH REFRESH [value_in_sec]] AS SELECT ... ``` 实时视图存储相应[SELECT](../../../sql-reference/statements/select/index.md)查询的结果,并在查询结果更改时随时更新。 查询结果以及与新数据结合所需的部分结果存储在内存中,为重复查询提供更高的性能。当使用[WATCH](../../../sql-reference/statements/watch.md)查询更改查询结果时,实时视图可以提供推送通知。 diff --git a/packages/clickhouse-server.service b/packages/clickhouse-server.service index 7742d8b278a..42dc5bd380d 100644 --- a/packages/clickhouse-server.service +++ b/packages/clickhouse-server.service @@ -29,6 +29,7 @@ EnvironmentFile=-/etc/default/clickhouse LimitCORE=infinity LimitNOFILE=500000 CapabilityBoundingSet=CAP_NET_ADMIN CAP_IPC_LOCK CAP_SYS_NICE CAP_NET_BIND_SERVICE +AmbientCapabilities=CAP_NET_ADMIN CAP_IPC_LOCK CAP_SYS_NICE CAP_NET_BIND_SERVICE [Install] # ClickHouse should not start from the rescue shell (rescue.target). diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index f791c39bad1..e1a33231592 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -1173,12 +1173,12 @@ void Client::processOptions(const OptionsDescription & options_description, { String traceparent = options["opentelemetry-traceparent"].as(); String error; - if (!global_context->getClientInfo().client_trace_context.parseTraceparentHeader(traceparent, error)) + if (!global_context->getClientTraceContext().parseTraceparentHeader(traceparent, error)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot parse OpenTelemetry traceparent '{}': {}", traceparent, error); } if (options.count("opentelemetry-tracestate")) - global_context->getClientInfo().client_trace_context.tracestate = options["opentelemetry-tracestate"].as(); + global_context->getClientTraceContext().tracestate = options["opentelemetry-tracestate"].as(); } @@ -1238,10 +1238,9 @@ void Client::processConfig() global_context->getSettingsRef().max_insert_block_size); } - ClientInfo & client_info = global_context->getClientInfo(); - client_info.setInitialQuery(); - client_info.quota_key = config().getString("quota_key", ""); - client_info.query_kind = query_kind; + global_context->setQueryKindInitial(); + global_context->setQuotaClientKey(config().getString("quota_key", "")); + global_context->setQueryKind(query_kind); } diff --git a/programs/keeper/CMakeLists.txt b/programs/keeper/CMakeLists.txt index 54c39f5709f..940e6848597 100644 --- a/programs/keeper/CMakeLists.txt +++ b/programs/keeper/CMakeLists.txt @@ -34,6 +34,8 @@ add_dependencies(clickhouse-keeper-lib clickhouse_keeper_configs) if (BUILD_STANDALONE_KEEPER) # Straight list of all required sources set(CLICKHOUSE_KEEPER_STANDALONE_SOURCES + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperReconfiguration.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/RaftServerConfig.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/ACLMap.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/Changelog.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/CoordinationSettings.cpp diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 96924e3c8d9..3c2a8ae3152 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -737,9 +737,8 @@ void LocalServer::processConfig() for (const auto & [key, value] : prompt_substitutions) boost::replace_all(prompt_by_server_display_name, "{" + key + "}", value); - ClientInfo & client_info = global_context->getClientInfo(); - client_info.setInitialQuery(); - client_info.query_kind = query_kind; + global_context->setQueryKindInitial(); + global_context->setQueryKind(query_kind); } diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index c7a7ba71e83..33fdcc9c1a8 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -887,6 +887,7 @@ try #endif global_context->setRemoteHostFilter(config()); + global_context->setHTTPHeaderFilter(config()); std::string path_str = getCanonicalPath(config().getString("path", DBMS_DEFAULT_PATH)); fs::path path = path_str; @@ -1200,6 +1201,7 @@ try } global_context->setRemoteHostFilter(*config); + global_context->setHTTPHeaderFilter(*config); global_context->setMaxTableSizeToDrop(server_settings_.max_table_size_to_drop); global_context->setMaxPartitionSizeToDrop(server_settings_.max_partition_size_to_drop); @@ -1599,13 +1601,7 @@ try global_context->setSystemZooKeeperLogAfterInitializationIfNeeded(); /// Build loggers before tables startup to make log messages from tables /// attach available in system.text_log - { - String level_str = config().getString("text_log.level", ""); - int level = level_str.empty() ? INT_MAX : Poco::Logger::parseLevel(level_str); - setTextLog(global_context->getTextLog(), level); - - buildLoggers(config(), logger()); - } + buildLoggers(config(), logger()); /// After the system database is created, attach virtual system tables (in addition to query_log and part_log) attachSystemTablesServer(global_context, *database_catalog.getSystemDatabase(), has_zookeeper); attachInformationSchema(global_context, *database_catalog.getDatabase(DatabaseCatalog::INFORMATION_SCHEMA)); @@ -1641,17 +1637,26 @@ try global_context->initializeTraceCollector(); /// Set up server-wide memory profiler (for total memory tracker). - UInt64 total_memory_profiler_step = config().getUInt64("total_memory_profiler_step", 0); - if (total_memory_profiler_step) + if (server_settings.total_memory_profiler_step) { - total_memory_tracker.setProfilerStep(total_memory_profiler_step); + total_memory_tracker.setProfilerStep(server_settings.total_memory_profiler_step); } - double total_memory_tracker_sample_probability = config().getDouble("total_memory_tracker_sample_probability", 0); - if (total_memory_tracker_sample_probability > 0.0) + if (server_settings.total_memory_tracker_sample_probability > 0.0) { - total_memory_tracker.setSampleProbability(total_memory_tracker_sample_probability); + total_memory_tracker.setSampleProbability(server_settings.total_memory_tracker_sample_probability); } + + if (server_settings.total_memory_profiler_sample_min_allocation_size) + { + total_memory_tracker.setSampleMinAllocationSize(server_settings.total_memory_profiler_sample_min_allocation_size); + } + + if (server_settings.total_memory_profiler_sample_max_allocation_size) + { + total_memory_tracker.setSampleMaxAllocationSize(server_settings.total_memory_profiler_sample_max_allocation_size); + } + } #endif diff --git a/programs/server/config.xml b/programs/server/config.xml index 65d325ad6c4..2a7dc1e576a 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -866,6 +866,14 @@ --> + + + don't replace it - LOG_TRACE(&Poco::Logger::get("QueryCache"), "Skipped insert (non-stale entry found), query: {}", key.queryStringFromAst()); + LOG_TRACE(&Poco::Logger::get("QueryCache"), "Skipped insert (non-stale entry found), query: {}", key.query_string); } } @@ -263,14 +263,14 @@ void QueryCache::Writer::finalizeWrite() if (std::chrono::duration_cast(std::chrono::system_clock::now() - query_start_time) < min_query_runtime) { - LOG_TRACE(&Poco::Logger::get("QueryCache"), "Skipped insert (query not expensive enough), query: {}", key.queryStringFromAst()); + LOG_TRACE(&Poco::Logger::get("QueryCache"), "Skipped insert (query not expensive enough), query: {}", key.query_string); return; } if (auto entry = cache.getWithKey(key); entry.has_value() && !IsStale()(entry->key)) { /// Same check as in ctor because a parallel Writer could have inserted the current key in the meantime - LOG_TRACE(&Poco::Logger::get("QueryCache"), "Skipped insert (non-stale entry found), query: {}", key.queryStringFromAst()); + LOG_TRACE(&Poco::Logger::get("QueryCache"), "Skipped insert (non-stale entry found), query: {}", key.query_string); return; } @@ -353,7 +353,7 @@ void QueryCache::Writer::finalizeWrite() if ((new_entry_size_in_bytes > max_entry_size_in_bytes) || (new_entry_size_in_rows > max_entry_size_in_rows)) { - LOG_TRACE(&Poco::Logger::get("QueryCache"), "Skipped insert (query result too big), new_entry_size_in_bytes: {} ({}), new_entry_size_in_rows: {} ({}), query: {}", new_entry_size_in_bytes, max_entry_size_in_bytes, new_entry_size_in_rows, max_entry_size_in_rows, key.queryStringFromAst()); + LOG_TRACE(&Poco::Logger::get("QueryCache"), "Skipped insert (query result too big), new_entry_size_in_bytes: {} ({}), new_entry_size_in_rows: {} ({}), query: {}", new_entry_size_in_bytes, max_entry_size_in_bytes, new_entry_size_in_rows, max_entry_size_in_rows, key.query_string); return; } @@ -388,7 +388,7 @@ QueryCache::Reader::Reader(Cache & cache_, const Key & key, const std::lock_guar if (!entry.has_value()) { - LOG_TRACE(&Poco::Logger::get("QueryCache"), "No entry found for query {}", key.queryStringFromAst()); + LOG_TRACE(&Poco::Logger::get("QueryCache"), "No entry found for query {}", key.query_string); return; } @@ -397,13 +397,13 @@ QueryCache::Reader::Reader(Cache & cache_, const Key & key, const std::lock_guar if (!entry_key.is_shared && entry_key.user_name != key.user_name) { - LOG_TRACE(&Poco::Logger::get("QueryCache"), "Inaccessible entry found for query {}", key.queryStringFromAst()); + LOG_TRACE(&Poco::Logger::get("QueryCache"), "Inaccessible entry found for query {}", key.query_string); return; } if (IsStale()(entry_key)) { - LOG_TRACE(&Poco::Logger::get("QueryCache"), "Stale entry found for query {}", key.queryStringFromAst()); + LOG_TRACE(&Poco::Logger::get("QueryCache"), "Stale entry found for query {}", key.query_string); return; } @@ -441,7 +441,7 @@ QueryCache::Reader::Reader(Cache & cache_, const Key & key, const std::lock_guar buildSourceFromChunks(entry_key.header, std::move(decompressed_chunks), entry_mapped->totals, entry_mapped->extremes); } - LOG_TRACE(&Poco::Logger::get("QueryCache"), "Entry found for query {}", key.queryStringFromAst()); + LOG_TRACE(&Poco::Logger::get("QueryCache"), "Entry found for query {}", key.query_string); } bool QueryCache::Reader::hasCacheEntryForKey() const diff --git a/src/Interpreters/Cache/QueryCache.h b/src/Interpreters/Cache/QueryCache.h index 6ef7cc60918..c24b09c8e46 100644 --- a/src/Interpreters/Cache/QueryCache.h +++ b/src/Interpreters/Cache/QueryCache.h @@ -30,7 +30,7 @@ public: /// ---------------------------------------------------- /// The actual key (data which gets hashed): - /// Unlike the query string, the AST is agnostic to lower/upper case (SELECT vs. select) + /// Unlike the query string, the AST is agnostic to lower/upper case (SELECT vs. select). const ASTPtr ast; /// Note: For a transactionally consistent cache, we would need to include the system settings in the cache key or invalidate the @@ -58,6 +58,11 @@ public: /// (we could theoretically apply compression also to the totals and extremes but it's an obscure use case) const bool is_compressed; + /// The SELECT query as plain string, displayed in SYSTEM.QUERY_CACHE. Stored explicitly, i.e. not constructed from the AST, for the + /// sole reason that QueryCache-related SETTINGS are pruned from the AST (see removeQueryCacheSettings()) which will look ugly in + /// SYSTEM.QUERY_CACHE. + const String query_string; + /// Ctor to construct a Key for writing into query cache. Key(ASTPtr ast_, Block header_, @@ -69,7 +74,6 @@ public: Key(ASTPtr ast_, const String & user_name_); bool operator==(const Key & other) const; - String queryStringFromAst() const; }; struct Entry diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp index e2f1dfe8ba7..3dea52faf46 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.cpp +++ b/src/Interpreters/ClusterProxy/executeQuery.cpp @@ -171,7 +171,7 @@ void executeQuery( SelectStreamFactory::Shards remote_shards; auto new_context = updateSettingsForCluster(*query_info.getCluster(), context, settings, main_table, &query_info, log); - new_context->getClientInfo().distributed_depth += 1; + new_context->increaseDistributedDepth(); size_t shards = query_info.getCluster()->getShardCount(); for (const auto & shard_info : query_info.getCluster()->getShardsInfo()) diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 10a46fda24e..cc1277e08b9 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -98,6 +98,7 @@ #include #include #include +#include #include #include #include @@ -327,9 +328,10 @@ struct ContextSharedPart : boost::noncopyable OrdinaryBackgroundExecutorPtr fetch_executor; OrdinaryBackgroundExecutorPtr common_executor; - RemoteHostFilter remote_host_filter; /// Allowed URL from config.xml + RemoteHostFilter remote_host_filter; /// Allowed URL from config.xml + HTTPHeaderFilter http_header_filter; /// Forbidden HTTP headers from config.xml - std::optional trace_collector; /// Thread collecting traces from threads executing queries + std::optional trace_collector; /// Thread collecting traces from threads executing queries /// Clusters for distributed tables /// Initialized on demand (on distributed storages initialization) since Settings should be initialized @@ -1057,25 +1059,54 @@ ConfigurationPtr Context::getUsersConfig() return shared->users_config; } -void Context::setUser(const UUID & user_id_) +void Context::setUser(const UUID & user_id_, bool set_current_profiles_, bool set_current_roles_, bool set_current_database_) { + /// Prepare lists of user's profiles, constraints, settings, roles. + + std::shared_ptr user; + std::shared_ptr temp_access; + if (set_current_profiles_ || set_current_roles_ || set_current_database_) + { + std::optional params; + { + auto lock = getLock(); + params.emplace(ContextAccessParams{user_id_, /* full_access= */ false, /* use_default_roles = */ true, {}, settings, current_database, client_info}); + } + /// `temp_access` is used here only to extract information about the user, not to actually check access. + /// NOTE: AccessControl::getContextAccess() may require some IO work, so Context::getLock() must be unlocked while we're doing this. + temp_access = getAccessControl().getContextAccess(*params); + user = temp_access->getUser(); + } + + std::shared_ptr profiles; + if (set_current_profiles_) + profiles = temp_access->getDefaultProfileInfo(); + + std::optional> roles; + if (set_current_roles_) + roles = user->granted_roles.findGranted(user->default_roles); + + String database; + if (set_current_database_) + database = user->default_database; + + /// Apply user's profiles, constraints, settings, roles. auto lock = getLock(); - user_id = user_id_; + setUserID(user_id_); - access = getAccessControl().getContextAccess( - user_id_, /* current_roles = */ {}, /* use_default_roles = */ true, settings, current_database, client_info); + if (profiles) + { + /// A profile can specify a value and a readonly constraint for same setting at the same time, + /// so we shouldn't check constraints here. + setCurrentProfiles(*profiles, /* check_constraints= */ false); + } - auto user = access->getUser(); + if (roles) + setCurrentRoles(*roles); - current_roles = std::make_shared>(user->granted_roles.findGranted(user->default_roles)); - - auto default_profile_info = access->getDefaultProfileInfo(); - settings_constraints_and_current_profiles = default_profile_info->getConstraintsAndProfileIDs(); - applySettingsChanges(default_profile_info->settings); - - if (!user->default_database.empty()) - setCurrentDatabase(user->default_database); + if (!database.empty()) + setCurrentDatabase(database); } std::shared_ptr Context::getUser() const @@ -1088,6 +1119,13 @@ String Context::getUserName() const return getAccess()->getUserName(); } +void Context::setUserID(const UUID & user_id_) +{ + auto lock = getLock(); + user_id = user_id_; + need_recalculate_access = true; +} + std::optional Context::getUserID() const { auto lock = getLock(); @@ -1105,10 +1143,11 @@ void Context::setQuotaKey(String quota_key_) void Context::setCurrentRoles(const std::vector & current_roles_) { auto lock = getLock(); - if (current_roles ? (*current_roles == current_roles_) : current_roles_.empty()) - return; - current_roles = std::make_shared>(current_roles_); - calculateAccessRights(); + if (current_roles_.empty()) + current_roles = nullptr; + else + current_roles = std::make_shared>(current_roles_); + need_recalculate_access = true; } void Context::setCurrentRolesDefault() @@ -1133,20 +1172,6 @@ std::shared_ptr Context::getRolesInfo() const } -void Context::calculateAccessRights() -{ - auto lock = getLock(); - if (user_id) - access = getAccessControl().getContextAccess( - *user_id, - current_roles ? *current_roles : std::vector{}, - /* use_default_roles = */ false, - settings, - current_database, - client_info); -} - - template void Context::checkAccessImpl(const Args &... args) const { @@ -1166,32 +1191,55 @@ void Context::checkAccess(const AccessFlags & flags, const StorageID & table_id, void Context::checkAccess(const AccessRightsElement & element) const { return checkAccessImpl(element); } void Context::checkAccess(const AccessRightsElements & elements) const { return checkAccessImpl(elements); } - std::shared_ptr Context::getAccess() const { - auto lock = getLock(); - return access ? access : ContextAccess::getFullAccess(); + /// A helper function to collect parameters for calculating access rights, called with Context::getLock() acquired. + auto get_params = [this]() + { + /// If setUserID() was never called then this must be the global context with the full access. + bool full_access = !user_id; + + return ContextAccessParams{user_id, full_access, /* use_default_roles= */ false, current_roles, settings, current_database, client_info}; + }; + + /// Check if the current access rights are still valid, otherwise get parameters for recalculating access rights. + std::optional params; + + { + auto lock = getLock(); + if (access && !need_recalculate_access) + return access; /// No need to recalculate access rights. + + params.emplace(get_params()); + + if (access && (access->getParams() == *params)) + { + need_recalculate_access = false; + return access; /// No need to recalculate access rights. + } + } + + /// Calculate new access rights according to the collected parameters. + /// NOTE: AccessControl::getContextAccess() may require some IO work, so Context::getLock() must be unlocked while we're doing this. + auto res = getAccessControl().getContextAccess(*params); + + { + /// If the parameters of access rights were not changed while we were calculated them + /// then we store the new access rights in the Context to allow reusing it later. + auto lock = getLock(); + if (get_params() == *params) + { + access = res; + need_recalculate_access = false; + } + } + + return res; } RowPolicyFilterPtr Context::getRowPolicyFilter(const String & database, const String & table_name, RowPolicyFilterType filter_type) const { - auto lock = getLock(); - RowPolicyFilterPtr row_filter_of_initial_user; - if (row_policies_of_initial_user) - row_filter_of_initial_user = row_policies_of_initial_user->getFilter(database, table_name, filter_type); - return getAccess()->getRowPolicyFilter(database, table_name, filter_type, row_filter_of_initial_user); -} - -void Context::enableRowPoliciesOfInitialUser() -{ - auto lock = getLock(); - row_policies_of_initial_user = nullptr; - if (client_info.initial_user == client_info.current_user) - return; - auto initial_user_id = getAccessControl().find(client_info.initial_user); - if (!initial_user_id) - return; - row_policies_of_initial_user = getAccessControl().tryGetDefaultRowPolicies(*initial_user_id); + return getAccess()->getRowPolicyFilter(database, table_name, filter_type); } @@ -1207,13 +1255,12 @@ std::optional Context::getQuotaUsage() const } -void Context::setCurrentProfile(const String & profile_name) +void Context::setCurrentProfile(const String & profile_name, bool check_constraints) { - auto lock = getLock(); try { UUID profile_id = getAccessControl().getID(profile_name); - setCurrentProfile(profile_id); + setCurrentProfile(profile_id, check_constraints); } catch (Exception & e) { @@ -1222,15 +1269,20 @@ void Context::setCurrentProfile(const String & profile_name) } } -void Context::setCurrentProfile(const UUID & profile_id) +void Context::setCurrentProfile(const UUID & profile_id, bool check_constraints) { - auto lock = getLock(); auto profile_info = getAccessControl().getSettingsProfileInfo(profile_id); - checkSettingsConstraints(profile_info->settings); - applySettingsChanges(profile_info->settings); - settings_constraints_and_current_profiles = profile_info->getConstraintsAndProfileIDs(settings_constraints_and_current_profiles); + setCurrentProfiles(*profile_info, check_constraints); } +void Context::setCurrentProfiles(const SettingsProfilesInfo & profiles_info, bool check_constraints) +{ + auto lock = getLock(); + if (check_constraints) + checkSettingsConstraints(profiles_info.settings); + applySettingsChanges(profiles_info.settings); + settings_constraints_and_current_profiles = profiles_info.getConstraintsAndProfileIDs(settings_constraints_and_current_profiles); +} std::vector Context::getCurrentProfiles() const { @@ -1409,15 +1461,24 @@ void Context::addQueryAccessInfo( void Context::addQueryAccessInfo(const Names & partition_names) { if (isGlobalContext()) - { throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context cannot have query access info"); - } std::lock_guard lock(query_access_info.mutex); for (const auto & partition_name : partition_names) - { query_access_info.partitions.emplace(partition_name); - } +} + +void Context::addQueryAccessInfo(const QualifiedProjectionName & qualified_projection_name) +{ + if (!qualified_projection_name) + return; + + if (isGlobalContext()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context cannot have query access info"); + + std::lock_guard lock(query_access_info.mutex); + query_access_info.projections.emplace(fmt::format( + "{}.{}", qualified_projection_name.storage_id.getFullTableName(), backQuoteIfNeed(qualified_projection_name.projection_name))); } void Context::addQueryFactoriesInfo(QueryLogFactories factory_type, const String & created_object) const @@ -1704,27 +1765,8 @@ Settings Context::getSettings() const void Context::setSettings(const Settings & settings_) { auto lock = getLock(); - const auto old_readonly = settings.readonly; - const auto old_allow_ddl = settings.allow_ddl; - const auto old_allow_introspection_functions = settings.allow_introspection_functions; - const auto old_display_secrets = settings.format_display_secrets_in_show_and_select; - settings = settings_; - - if ((settings.readonly != old_readonly) - || (settings.allow_ddl != old_allow_ddl) - || (settings.allow_introspection_functions != old_allow_introspection_functions) - || (settings.format_display_secrets_in_show_and_select != old_display_secrets)) - calculateAccessRights(); -} - -void Context::recalculateAccessRightsIfNeeded(std::string_view name) -{ - if (name == "readonly" - || name == "allow_ddl" - || name == "allow_introspection_functions" - || name == "format_display_secrets_in_show_and_select") - calculateAccessRights(); + need_recalculate_access = true; } void Context::setSetting(std::string_view name, const String & value) @@ -1736,7 +1778,8 @@ void Context::setSetting(std::string_view name, const String & value) return; } settings.set(name, value); - recalculateAccessRightsIfNeeded(name); + if (ContextAccessParams::dependsOnSettingName(name)) + need_recalculate_access = true; } void Context::setSetting(std::string_view name, const Field & value) @@ -1748,7 +1791,8 @@ void Context::setSetting(std::string_view name, const Field & value) return; } settings.set(name, value); - recalculateAccessRightsIfNeeded(name); + if (ContextAccessParams::dependsOnSettingName(name)) + need_recalculate_access = true; } void Context::applySettingChange(const SettingChange & change) @@ -1857,7 +1901,7 @@ void Context::setCurrentDatabase(const String & name) DatabaseCatalog::instance().assertDatabaseExists(name); auto lock = getLock(); current_database = name; - calculateAccessRights(); + need_recalculate_access = true; } void Context::setCurrentQueryId(const String & query_id) @@ -2963,6 +3007,16 @@ const RemoteHostFilter & Context::getRemoteHostFilter() const return shared->remote_host_filter; } +void Context::setHTTPHeaderFilter(const Poco::Util::AbstractConfiguration & config) +{ + shared->http_header_filter.setValuesFromConfig(config); +} + +const HTTPHeaderFilter & Context::getHTTPHeaderFilter() const +{ + return shared->http_header_filter; +} + UInt16 Context::getTCPPort() const { auto lock = getLock(); @@ -3821,6 +3875,129 @@ void Context::resetInputCallbacks() } +void Context::setClientInfo(const ClientInfo & client_info_) +{ + client_info = client_info_; + need_recalculate_access = true; +} + +void Context::setClientName(const String & client_name) +{ + client_info.client_name = client_name; +} + +void Context::setClientInterface(ClientInfo::Interface interface) +{ + client_info.interface = interface; + need_recalculate_access = true; +} + +void Context::setClientVersion(UInt64 client_version_major, UInt64 client_version_minor, UInt64 client_version_patch, unsigned client_tcp_protocol_version) +{ + client_info.client_version_major = client_version_major; + client_info.client_version_minor = client_version_minor; + client_info.client_version_patch = client_version_patch; + client_info.client_tcp_protocol_version = client_tcp_protocol_version; +} + +void Context::setClientConnectionId(uint32_t connection_id_) +{ + client_info.connection_id = connection_id_; +} + +void Context::setHttpClientInfo(ClientInfo::HTTPMethod http_method, const String & http_user_agent, const String & http_referer) +{ + client_info.http_method = http_method; + client_info.http_user_agent = http_user_agent; + client_info.http_referer = http_referer; + need_recalculate_access = true; +} + +void Context::setForwardedFor(const String & forwarded_for) +{ + client_info.forwarded_for = forwarded_for; + need_recalculate_access = true; +} + +void Context::setQueryKind(ClientInfo::QueryKind query_kind) +{ + client_info.query_kind = query_kind; +} + +void Context::setQueryKindInitial() +{ + /// TODO: Try to combine this function with setQueryKind(). + client_info.setInitialQuery(); +} + +void Context::setQueryKindReplicatedDatabaseInternal() +{ + /// TODO: Try to combine this function with setQueryKind(). + client_info.is_replicated_database_internal = true; +} + +void Context::setCurrentUserName(const String & current_user_name) +{ + /// TODO: Try to combine this function with setUser(). + client_info.current_user = current_user_name; + need_recalculate_access = true; +} + +void Context::setCurrentAddress(const Poco::Net::SocketAddress & current_address) +{ + client_info.current_address = current_address; + need_recalculate_access = true; +} + +void Context::setInitialUserName(const String & initial_user_name) +{ + client_info.initial_user = initial_user_name; + need_recalculate_access = true; +} + +void Context::setInitialAddress(const Poco::Net::SocketAddress & initial_address) +{ + client_info.initial_address = initial_address; +} + +void Context::setInitialQueryId(const String & initial_query_id) +{ + client_info.initial_query_id = initial_query_id; +} + +void Context::setInitialQueryStartTime(std::chrono::time_point initial_query_start_time) +{ + client_info.initial_query_start_time = timeInSeconds(initial_query_start_time); + client_info.initial_query_start_time_microseconds = timeInMicroseconds(initial_query_start_time); +} + +void Context::setQuotaClientKey(const String & quota_key_) +{ + client_info.quota_key = quota_key_; + need_recalculate_access = true; +} + +void Context::setConnectionClientVersion(UInt64 client_version_major, UInt64 client_version_minor, UInt64 client_version_patch, unsigned client_tcp_protocol_version) +{ + client_info.connection_client_version_major = client_version_major; + client_info.connection_client_version_minor = client_version_minor; + client_info.connection_client_version_patch = client_version_patch; + client_info.connection_tcp_protocol_version = client_tcp_protocol_version; +} + +void Context::setReplicaInfo(bool collaborate_with_initiator, size_t all_replicas_count, size_t number_of_current_replica) +{ + client_info.collaborate_with_initiator = collaborate_with_initiator; + client_info.count_participating_replicas = all_replicas_count; + client_info.number_of_current_replica = number_of_current_replica; +} + +void Context::increaseDistributedDepth() +{ + ++client_info.distributed_depth; +} + + StorageID Context::resolveStorageID(StorageID storage_id, StorageNamespace where) const { if (storage_id.uuid != UUIDHelpers::Nil) diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 6cbb0e58911..fa210f04451 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -50,8 +51,8 @@ struct ContextSharedPart; class ContextAccess; struct User; using UserPtr = std::shared_ptr; +struct SettingsProfilesInfo; struct EnabledRolesInfo; -class EnabledRowPolicies; struct RowPolicyFilter; using RowPolicyFilterPtr = std::shared_ptr; class EnabledQuota; @@ -248,8 +249,8 @@ private: std::optional user_id; std::shared_ptr> current_roles; std::shared_ptr settings_constraints_and_current_profiles; - std::shared_ptr access; - std::shared_ptr row_policies_of_initial_user; + mutable std::shared_ptr access; + mutable bool need_recalculate_access = true; String current_database; Settings settings; /// Setting for query execution. @@ -529,12 +530,14 @@ public: /// Sets the current user assuming that he/she is already authenticated. /// WARNING: This function doesn't check password! - void setUser(const UUID & user_id_); - + void setUser(const UUID & user_id_, bool set_current_profiles_ = true, bool set_current_roles_ = true, bool set_current_database_ = true); UserPtr getUser() const; - String getUserName() const; + + void setUserID(const UUID & user_id_); std::optional getUserID() const; + String getUserName() const; + void setQuotaKey(String quota_key_); void setCurrentRoles(const std::vector & current_roles_); @@ -543,8 +546,9 @@ public: boost::container::flat_set getEnabledRoles() const; std::shared_ptr getRolesInfo() const; - void setCurrentProfile(const String & profile_name); - void setCurrentProfile(const UUID & profile_id); + void setCurrentProfile(const String & profile_name, bool check_constraints = true); + void setCurrentProfile(const UUID & profile_id, bool check_constraints = true); + void setCurrentProfiles(const SettingsProfilesInfo & profiles_info, bool check_constraints = true); std::vector getCurrentProfiles() const; std::vector getEnabledProfiles() const; @@ -567,13 +571,6 @@ public: RowPolicyFilterPtr getRowPolicyFilter(const String & database, const String & table_name, RowPolicyFilterType filter_type) const; - /// Finds and sets extra row policies to be used based on `client_info.initial_user`, - /// if the initial user exists. - /// TODO: we need a better solution here. It seems we should pass the initial row policy - /// because a shard is allowed to not have the initial user or it might be another user - /// with the same name. - void enableRowPoliciesOfInitialUser(); - std::shared_ptr getQuota() const; std::optional getQuotaUsage() const; @@ -597,9 +594,33 @@ public: InputBlocksReader getInputBlocksReaderCallback() const; void resetInputCallbacks(); - ClientInfo & getClientInfo() { return client_info; } + /// Returns information about the client executing a query. const ClientInfo & getClientInfo() const { return client_info; } + /// Modify stored in the context information about the client executing a query. + void setClientInfo(const ClientInfo & client_info_); + void setClientName(const String & client_name); + void setClientInterface(ClientInfo::Interface interface); + void setClientVersion(UInt64 client_version_major, UInt64 client_version_minor, UInt64 client_version_patch, unsigned client_tcp_protocol_version); + void setClientConnectionId(uint32_t connection_id); + void setHttpClientInfo(ClientInfo::HTTPMethod http_method, const String & http_user_agent, const String & http_referer); + void setForwardedFor(const String & forwarded_for); + void setQueryKind(ClientInfo::QueryKind query_kind); + void setQueryKindInitial(); + void setQueryKindReplicatedDatabaseInternal(); + void setCurrentUserName(const String & current_user_name); + void setCurrentAddress(const Poco::Net::SocketAddress & current_address); + void setInitialUserName(const String & initial_user_name); + void setInitialAddress(const Poco::Net::SocketAddress & initial_address); + void setInitialQueryId(const String & initial_query_id); + void setInitialQueryStartTime(std::chrono::time_point initial_query_start_time); + void setQuotaClientKey(const String & quota_key); + void setConnectionClientVersion(UInt64 client_version_major, UInt64 client_version_minor, UInt64 client_version_patch, unsigned client_tcp_protocol_version); + void setReplicaInfo(bool collaborate_with_initiator, size_t all_replicas_count, size_t number_of_current_replica); + void increaseDistributedDepth(); + const OpenTelemetry::TracingContext & getClientTraceContext() const { return client_info.client_trace_context; } + OpenTelemetry::TracingContext & getClientTraceContext() { return client_info.client_trace_context; } + enum StorageNamespace { ResolveGlobal = 1u, /// Database name must be specified @@ -637,6 +658,14 @@ public: const String & view_name = {}); void addQueryAccessInfo(const Names & partition_names); + struct QualifiedProjectionName + { + StorageID storage_id = StorageID::createEmpty(); + String projection_name; + explicit operator bool() const { return !projection_name.empty(); } + }; + void addQueryAccessInfo(const QualifiedProjectionName & qualified_projection_name); + /// Supported factories for records in query_log enum class QueryLogFactories @@ -766,6 +795,10 @@ public: void setRemoteHostFilter(const Poco::Util::AbstractConfiguration & config); const RemoteHostFilter & getRemoteHostFilter() const; + /// Storage of forbidden HTTP headers from config.xml + void setHTTPHeaderFilter(const Poco::Util::AbstractConfiguration & config); + const HTTPHeaderFilter & getHTTPHeaderFilter() const; + /// The port that the server listens for executing SQL queries. UInt16 getTCPPort() const; @@ -1149,10 +1182,6 @@ private: void initGlobal(); - /// Compute and set actual user settings, client_info.current_user should be set - void calculateAccessRights(); - void recalculateAccessRightsIfNeeded(std::string_view setting_name); - template void checkAccessImpl(const Args &... args) const; diff --git a/src/Interpreters/DDLTask.cpp b/src/Interpreters/DDLTask.cpp index b24856a6146..4e684f5899f 100644 --- a/src/Interpreters/DDLTask.cpp +++ b/src/Interpreters/DDLTask.cpp @@ -199,7 +199,7 @@ ContextMutablePtr DDLTaskBase::makeQueryContext(ContextPtr from_context, const Z auto query_context = Context::createCopy(from_context); query_context->makeQueryContext(); query_context->setCurrentQueryId(""); // generate random query_id - query_context->getClientInfo().query_kind = ClientInfo::QueryKind::SECONDARY_QUERY; + query_context->setQueryKind(ClientInfo::QueryKind::SECONDARY_QUERY); if (entry.settings) query_context->applySettingsChanges(*entry.settings); return query_context; @@ -439,8 +439,8 @@ void DatabaseReplicatedTask::parseQueryFromEntry(ContextPtr context) ContextMutablePtr DatabaseReplicatedTask::makeQueryContext(ContextPtr from_context, const ZooKeeperPtr & zookeeper) { auto query_context = DDLTaskBase::makeQueryContext(from_context, zookeeper); - query_context->getClientInfo().query_kind = ClientInfo::QueryKind::SECONDARY_QUERY; - query_context->getClientInfo().is_replicated_database_internal = true; + query_context->setQueryKind(ClientInfo::QueryKind::SECONDARY_QUERY); + query_context->setQueryKindReplicatedDatabaseInternal(); query_context->setCurrentDatabase(database->getDatabaseName()); auto txn = std::make_shared(zookeeper, database->zookeeper_path, is_initial_query, entry_path); diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp index 81c78000ac3..193bb5b6ab0 100644 --- a/src/Interpreters/DDLWorker.cpp +++ b/src/Interpreters/DDLWorker.cpp @@ -476,7 +476,7 @@ bool DDLWorker::tryExecuteQuery(DDLTaskBase & task, const ZooKeeperPtr & zookeep query_context->setSetting("implicit_transaction", Field{0}); } - query_context->getClientInfo().initial_query_id = task.entry.initial_query_id; + query_context->setInitialQueryId(task.entry.initial_query_id); if (!task.is_initial_query) query_scope.emplace(query_context); diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index 23a67f4bc2f..0e2e30eefee 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -697,6 +697,7 @@ DatabaseCatalog::DatabaseCatalog(ContextMutablePtr global_context_) , loading_dependencies{"LoadingDeps"} , view_dependencies{"ViewDeps"} , log(&Poco::Logger::get("DatabaseCatalog")) + , first_async_drop_in_queue(tables_marked_dropped.end()) { } @@ -959,9 +960,17 @@ void DatabaseCatalog::enqueueDroppedTableCleanup(StorageID table_id, StoragePtr std::lock_guard lock(tables_marked_dropped_mutex); if (ignore_delay) - tables_marked_dropped.push_front({table_id, table, dropped_metadata_path, drop_time}); + { + /// Insert it before first_async_drop_in_queue, so sync drop queries will have priority over async ones, + /// but the queue will remain fair for multiple sync drop queries. + tables_marked_dropped.emplace(first_async_drop_in_queue, TableMarkedAsDropped{table_id, table, dropped_metadata_path, drop_time}); + } else + { tables_marked_dropped.push_back({table_id, table, dropped_metadata_path, drop_time + drop_delay_sec}); + if (first_async_drop_in_queue == tables_marked_dropped.end()) + --first_async_drop_in_queue; + } tables_marked_dropped_ids.insert(table_id.uuid); CurrentMetrics::add(CurrentMetrics::TablesToDropQueueSize, 1); @@ -1012,6 +1021,8 @@ void DatabaseCatalog::dequeueDroppedTableCleanup(StorageID table_id) /// This maybe throw exception. renameNoReplace(latest_metadata_dropped_path, table_metadata_path); + if (first_async_drop_in_queue == it_dropped_table) + ++first_async_drop_in_queue; tables_marked_dropped.erase(it_dropped_table); [[maybe_unused]] auto removed = tables_marked_dropped_ids.erase(dropped_table.table_id.uuid); assert(removed); @@ -1074,6 +1085,8 @@ void DatabaseCatalog::dropTableDataTask() table = std::move(*it); LOG_INFO(log, "Have {} tables in drop queue ({} of them are in use), will try drop {}", tables_marked_dropped.size(), tables_in_use_count, table.table_id.getNameForLogs()); + if (first_async_drop_in_queue == it) + ++first_async_drop_in_queue; tables_marked_dropped.erase(it); /// Schedule the task as soon as possible, while there are suitable tables to drop. schedule_after_ms = 0; @@ -1110,6 +1123,8 @@ void DatabaseCatalog::dropTableDataTask() table.drop_time = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()) + drop_error_cooldown_sec; std::lock_guard lock(tables_marked_dropped_mutex); tables_marked_dropped.emplace_back(std::move(table)); + if (first_async_drop_in_queue == tables_marked_dropped.end()) + --first_async_drop_in_queue; /// If list of dropped tables was empty, schedule a task to retry deletion. if (tables_marked_dropped.size() == 1) { diff --git a/src/Interpreters/DatabaseCatalog.h b/src/Interpreters/DatabaseCatalog.h index d502505027f..805d7786569 100644 --- a/src/Interpreters/DatabaseCatalog.h +++ b/src/Interpreters/DatabaseCatalog.h @@ -323,6 +323,7 @@ private: mutable std::mutex ddl_guards_mutex; TablesMarkedAsDropped tables_marked_dropped TSA_GUARDED_BY(tables_marked_dropped_mutex); + TablesMarkedAsDropped::iterator first_async_drop_in_queue TSA_GUARDED_BY(tables_marked_dropped_mutex); std::unordered_set tables_marked_dropped_ids TSA_GUARDED_BY(tables_marked_dropped_mutex); mutable std::mutex tables_marked_dropped_mutex; diff --git a/src/Interpreters/GraceHashJoin.cpp b/src/Interpreters/GraceHashJoin.cpp index edf604bc0b4..5d72cf20740 100644 --- a/src/Interpreters/GraceHashJoin.cpp +++ b/src/Interpreters/GraceHashJoin.cpp @@ -302,7 +302,8 @@ void GraceHashJoin::initBuckets() bool GraceHashJoin::isSupported(const std::shared_ptr & table_join) { bool is_asof = (table_join->strictness() == JoinStrictness::Asof); - return !is_asof && isInnerOrLeft(table_join->kind()) && table_join->oneDisjunct(); + auto kind = table_join->kind(); + return !is_asof && (isInner(kind) || isLeft(kind) || isRight(kind) || isFull(kind)) && table_join->oneDisjunct(); } GraceHashJoin::~GraceHashJoin() = default; @@ -322,7 +323,6 @@ bool GraceHashJoin::hasMemoryOverflow(size_t total_rows, size_t total_bytes) con /// One row can't be split, avoid loop if (total_rows < 2) return false; - bool has_overflow = !table_join->sizeLimits().softCheck(total_rows, total_bytes); if (has_overflow) @@ -494,17 +494,30 @@ bool GraceHashJoin::alwaysReturnsEmptySet() const return hash_join_is_empty; } -IBlocksStreamPtr GraceHashJoin::getNonJoinedBlocks(const Block &, const Block &, UInt64) const +/// Each bucket are handled by the following steps +/// 1. build hash_join by the right side blocks. +/// 2. join left side with the hash_join, +/// 3. read right non-joined blocks from hash_join. +/// buckets are handled one by one, each hash_join will not be release before the right non-joined blocks are emitted. +/// +/// There is a finished counter in JoiningTransform/DelayedJoinedBlocksWorkerTransform, +/// only one processor could take the non-joined blocks from right stream, and ensure all rows from +/// left stream have been emitted before this. +IBlocksStreamPtr +GraceHashJoin::getNonJoinedBlocks(const Block & left_sample_block_, const Block & result_sample_block_, UInt64 max_block_size_) const { - /// We do no support returning non joined blocks here. - /// TODO: They _should_ be reported by getDelayedBlocks instead - return nullptr; + return hash_join->getNonJoinedBlocks(left_sample_block_, result_sample_block_, max_block_size_); } class GraceHashJoin::DelayedBlocks : public IBlocksStream { public: - explicit DelayedBlocks(size_t current_bucket_, Buckets buckets_, InMemoryJoinPtr hash_join_, const Names & left_key_names_, const Names & right_key_names_) + explicit DelayedBlocks( + size_t current_bucket_, + Buckets buckets_, + InMemoryJoinPtr hash_join_, + const Names & left_key_names_, + const Names & right_key_names_) : current_bucket(current_bucket_) , buckets(std::move(buckets_)) , hash_join(std::move(hash_join_)) @@ -522,12 +535,15 @@ public: do { + // One DelayedBlocks is shared among multiple DelayedJoinedBlocksWorkerTransform. + // There is a lock inside left_reader.read() . block = left_reader.read(); if (!block) { return {}; } + // block comes from left_reader, need to join with right table to get the result. Blocks blocks = JoinCommon::scatterBlockByHash(left_key_names, block, num_buckets); block = std::move(blocks[current_idx]); diff --git a/src/Interpreters/GraceHashJoin.h b/src/Interpreters/GraceHashJoin.h index bce04ee6b04..ce519892b0e 100644 --- a/src/Interpreters/GraceHashJoin.h +++ b/src/Interpreters/GraceHashJoin.h @@ -13,7 +13,6 @@ namespace DB { - class TableJoin; class HashJoin; @@ -79,7 +78,7 @@ public: bool supportTotals() const override { return false; } IBlocksStreamPtr - getNonJoinedBlocks(const Block & left_sample_block, const Block & result_sample_block, UInt64 max_block_size) const override; + getNonJoinedBlocks(const Block & left_sample_block_, const Block & result_sample_block_, UInt64 max_block_size) const override; /// Open iterator over joined blocks. /// Must be called after all @joinBlock calls. diff --git a/src/Interpreters/InterpreterDropQuery.cpp b/src/Interpreters/InterpreterDropQuery.cpp index 0beb4492aef..616cf80a446 100644 --- a/src/Interpreters/InterpreterDropQuery.cpp +++ b/src/Interpreters/InterpreterDropQuery.cpp @@ -451,11 +451,11 @@ void InterpreterDropQuery::executeDropQuery(ASTDropQuery::Kind kind, ContextPtr auto drop_context = Context::createCopy(global_context); if (ignore_sync_setting) drop_context->setSetting("database_atomic_wait_for_drop_and_detach_synchronously", false); - drop_context->getClientInfo().query_kind = ClientInfo::QueryKind::SECONDARY_QUERY; + drop_context->setQueryKind(ClientInfo::QueryKind::SECONDARY_QUERY); if (auto txn = current_context->getZooKeeperMetadataTransaction()) { /// For Replicated database - drop_context->getClientInfo().is_replicated_database_internal = true; + drop_context->setQueryKindReplicatedDatabaseInternal(); drop_context->setQueryContext(std::const_pointer_cast(current_context)); drop_context->initZooKeeperMetadataTransaction(txn, true); } diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 32812151b59..d07a6521544 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -3183,7 +3183,7 @@ void InterpreterSelectQuery::initSettings() if (query.settings()) InterpreterSetQuery(query.settings(), context).executeForCurrentContext(options.ignore_setting_constraints); - auto & client_info = context->getClientInfo(); + const auto & client_info = context->getClientInfo(); auto min_major = DBMS_MIN_MAJOR_VERSION_WITH_CURRENT_AGGREGATION_VARIANT_SELECTION_METHOD; auto min_minor = DBMS_MIN_MINOR_VERSION_WITH_CURRENT_AGGREGATION_VARIANT_SELECTION_METHOD; diff --git a/src/Interpreters/InterpreterShowIndexesQuery.cpp b/src/Interpreters/InterpreterShowIndexesQuery.cpp index 51311c82eeb..149420006fb 100644 --- a/src/Interpreters/InterpreterShowIndexesQuery.cpp +++ b/src/Interpreters/InterpreterShowIndexesQuery.cpp @@ -40,20 +40,20 @@ SELECT * FROM ( (SELECT name AS table, - 0 AS non_unique, + 1 AS non_unique, 'PRIMARY' AS key_name, - NULL AS seq_in_index, - NULL AS column_name, + row_number() over (order by column_name) AS seq_in_index, + arrayJoin(splitByString(', ', primary_key)) AS column_name, 'A' AS collation, - NULL AS cardinality, + 0 AS cardinality, NULL AS sub_part, NULL AS packed, NULL AS null, - 'primary' AS index_type, - NULL AS comment, - NULL AS index_comment, + 'PRIMARY' AS index_type, + '' AS comment, + '' AS index_comment, 'YES' AS visible, - primary_key AS expression + '' AS expression FROM system.tables WHERE database = '{0}' @@ -61,18 +61,18 @@ FROM ( UNION ALL ( SELECT table AS table, - 0 AS non_unique, + 1 AS non_unique, name AS key_name, - NULL AS seq_in_index, - NULL AS column_name, + 1 AS seq_in_index, + '' AS column_name, NULL AS collation, - NULL AS cardinality, + 0 AS cardinality, NULL AS sub_part, NULL AS packed, NULL AS null, - type AS index_type, - NULL AS comment, - NULL AS index_comment, + upper(type) AS index_type, + '' AS comment, + '' AS index_comment, 'YES' AS visible, expr AS expression FROM system.data_skipping_indices @@ -80,12 +80,27 @@ FROM ( database = '{0}' AND table = '{1}')) {2} -ORDER BY index_type, expression;)", database, table, where_expression); +ORDER BY index_type, expression, column_name, seq_in_index;)", database, table, where_expression); /// Sorting is strictly speaking not necessary but 1. it is convenient for users, 2. SQL currently does not allow to /// sort the output of SHOW INDEXES otherwise (SELECT * FROM (SHOW INDEXES ...) ORDER BY ...) is rejected) and 3. some /// SQL tests can take advantage of this. + /// Note about compatibility of fields 'column_name', 'seq_in_index' and 'expression' with MySQL: + /// MySQL has non-functional and functional indexes. + /// - Non-functional indexes only reference columns, e.g. 'col1, col2'. In this case, `SHOW INDEX` produces as many result rows as there + /// are indexed columns. 'column_name' and 'seq_in_index' (an ascending integer 1, 2, ...) are filled, 'expression' is empty. + /// - Functional indexes can reference arbitrary expressions, e.g. 'col1 + 1, concat(col2, col3)'. 'SHOW INDEX' produces a single row + /// with `column_name` and `seq_in_index` empty and `expression` filled with the entire index expression. Only non-primary-key indexes + /// can be functional indexes. + /// Above SELECT tries to emulate that. Caveats: + /// 1. The primary key index sub-SELECT assumes the primary key expression is non-functional. Non-functional primary key indexes in + /// ClickHouse are possible but quiete obscure. In MySQL they are not possible at all. + /// 2. Related to 1.: Poor man's tuple parsing with splitByString() in the PK sub-SELECT messes up for functional primary key index + /// expressions where the comma is not only used as separator between tuple components, e.g. in 'col1 + 1, concat(col2, col3)'. + /// 3. The data skipping index sub-SELECT assumes the index expression is functional. 3rd party tools that expect MySQL semantics from + /// SHOW INDEX will probably not care as MySQL has no skipping indexes and they only use the result to figure out the primary key. + return rewritten_query; } diff --git a/src/Interpreters/JoinedTables.cpp b/src/Interpreters/JoinedTables.cpp index ee5c288afbb..29add31fd5d 100644 --- a/src/Interpreters/JoinedTables.cpp +++ b/src/Interpreters/JoinedTables.cpp @@ -337,6 +337,11 @@ std::shared_ptr JoinedTables::makeTableJoin(const ASTSelectQuery & se LOG_TRACE(&Poco::Logger::get("JoinedTables"), "Can't use dictionary join: dictionary '{}' was not found", dictionary_name); return nullptr; } + if (dictionary->getSpecialKeyType() == DictionarySpecialKeyType::Range) + { + LOG_TRACE(&Poco::Logger::get("JoinedTables"), "Can't use dictionary join: dictionary '{}' is a range dictionary", dictionary_name); + return nullptr; + } auto dictionary_kv = std::dynamic_pointer_cast(dictionary); table_join->setStorageJoin(dictionary_kv); diff --git a/src/Interpreters/PredicateExpressionsOptimizer.cpp b/src/Interpreters/PredicateExpressionsOptimizer.cpp index 6606e64f689..e64ff34b11f 100644 --- a/src/Interpreters/PredicateExpressionsOptimizer.cpp +++ b/src/Interpreters/PredicateExpressionsOptimizer.cpp @@ -118,7 +118,10 @@ bool PredicateExpressionsOptimizer::tryRewritePredicatesToTables(ASTs & tables_e if (table_element->table_join && isLeft(table_element->table_join->as()->kind)) continue; /// Skip right table optimization - if (table_element->table_join && isFull(table_element->table_join->as()->kind)) + if (table_element->table_join && ( + isFull(table_element->table_join->as()->kind) + || table_element->table_join->as()->strictness == JoinStrictness::Asof + || table_element->table_join->as()->strictness == JoinStrictness::Anti)) break; /// Skip left and right table optimization is_rewrite_tables |= tryRewritePredicatesToTable(tables_element[table_pos], tables_predicates[table_pos], diff --git a/src/Interpreters/ProcessList.cpp b/src/Interpreters/ProcessList.cpp index 1503e396298..c299572ef41 100644 --- a/src/Interpreters/ProcessList.cpp +++ b/src/Interpreters/ProcessList.cpp @@ -223,7 +223,10 @@ ProcessList::insert(const String & query_, const IAST * ast, ContextMutablePtr q { /// Set up memory profiling thread_group->memory_tracker.setProfilerStep(settings.memory_profiler_step); + thread_group->memory_tracker.setSampleProbability(settings.memory_profiler_sample_probability); + thread_group->memory_tracker.setSampleMinAllocationSize(settings.memory_profiler_sample_min_allocation_size); + thread_group->memory_tracker.setSampleMaxAllocationSize(settings.memory_profiler_sample_max_allocation_size); thread_group->performance_counters.setTraceProfileEvents(settings.trace_profile_events); } diff --git a/src/Interpreters/Session.cpp b/src/Interpreters/Session.cpp index 64f7b4fc934..97b056cfc32 100644 --- a/src/Interpreters/Session.cpp +++ b/src/Interpreters/Session.cpp @@ -299,7 +299,10 @@ Session::~Session() if (notified_session_log_about_login) { if (auto session_log = getSessionLog()) + { + /// TODO: We have to ensure that the same info is added to the session log on a LoginSuccess event and on the corresponding Logout event. session_log->addLogOut(auth_id, user, getClientInfo()); + } } } @@ -368,17 +371,117 @@ void Session::onAuthenticationFailure(const std::optional & user_name, c } } -ClientInfo & Session::getClientInfo() -{ - /// FIXME it may produce different info for LoginSuccess and the corresponding Logout entries in the session log - return session_context ? session_context->getClientInfo() : *prepared_client_info; -} - const ClientInfo & Session::getClientInfo() const { return session_context ? session_context->getClientInfo() : *prepared_client_info; } +void Session::setClientInfo(const ClientInfo & client_info) +{ + if (session_context) + session_context->setClientInfo(client_info); + else + prepared_client_info = client_info; +} + +void Session::setClientName(const String & client_name) +{ + if (session_context) + session_context->setClientName(client_name); + else + prepared_client_info->client_name = client_name; +} + +void Session::setClientInterface(ClientInfo::Interface interface) +{ + if (session_context) + session_context->setClientInterface(interface); + else + prepared_client_info->interface = interface; +} + +void Session::setClientVersion(UInt64 client_version_major, UInt64 client_version_minor, UInt64 client_version_patch, unsigned client_tcp_protocol_version) +{ + if (session_context) + { + session_context->setClientVersion(client_version_major, client_version_minor, client_version_patch, client_tcp_protocol_version); + } + else + { + prepared_client_info->client_version_major = client_version_major; + prepared_client_info->client_version_minor = client_version_minor; + prepared_client_info->client_version_patch = client_version_patch; + prepared_client_info->client_tcp_protocol_version = client_tcp_protocol_version; + } +} + +void Session::setClientConnectionId(uint32_t connection_id) +{ + if (session_context) + session_context->setClientConnectionId(connection_id); + else + prepared_client_info->connection_id = connection_id; +} + +void Session::setHttpClientInfo(ClientInfo::HTTPMethod http_method, const String & http_user_agent, const String & http_referer) +{ + if (session_context) + { + session_context->setHttpClientInfo(http_method, http_user_agent, http_referer); + } + else + { + prepared_client_info->http_method = http_method; + prepared_client_info->http_user_agent = http_user_agent; + prepared_client_info->http_referer = http_referer; + } +} + +void Session::setForwardedFor(const String & forwarded_for) +{ + if (session_context) + session_context->setForwardedFor(forwarded_for); + else + prepared_client_info->forwarded_for = forwarded_for; +} + +void Session::setQuotaClientKey(const String & quota_key) +{ + if (session_context) + session_context->setQuotaClientKey(quota_key); + else + prepared_client_info->quota_key = quota_key; +} + +void Session::setConnectionClientVersion(UInt64 client_version_major, UInt64 client_version_minor, UInt64 client_version_patch, unsigned client_tcp_protocol_version) +{ + if (session_context) + { + session_context->setConnectionClientVersion(client_version_major, client_version_minor, client_version_patch, client_tcp_protocol_version); + } + else + { + prepared_client_info->connection_client_version_major = client_version_major; + prepared_client_info->connection_client_version_minor = client_version_minor; + prepared_client_info->connection_client_version_patch = client_version_patch; + prepared_client_info->connection_tcp_protocol_version = client_tcp_protocol_version; + } +} + +const OpenTelemetry::TracingContext & Session::getClientTraceContext() const +{ + if (session_context) + return session_context->getClientTraceContext(); + return prepared_client_info->client_trace_context; +} + +OpenTelemetry::TracingContext & Session::getClientTraceContext() +{ + if (session_context) + return session_context->getClientTraceContext(); + return prepared_client_info->client_trace_context; +} + ContextMutablePtr Session::makeSessionContext() { if (session_context) @@ -396,8 +499,7 @@ ContextMutablePtr Session::makeSessionContext() new_session_context->makeSessionContext(); /// Copy prepared client info to the new session context. - auto & res_client_info = new_session_context->getClientInfo(); - res_client_info = std::move(prepared_client_info).value(); + new_session_context->setClientInfo(*prepared_client_info); prepared_client_info.reset(); /// Set user information for the new context: current profiles, roles, access rights. @@ -436,8 +538,7 @@ ContextMutablePtr Session::makeSessionContext(const String & session_name_, std: /// Copy prepared client info to the session context, no matter it's been just created or not. /// If we continue using a previously created session context found by session ID /// it's necessary to replace the client info in it anyway, because it contains actual connection information (client address, etc.) - auto & res_client_info = new_session_context->getClientInfo(); - res_client_info = std::move(prepared_client_info).value(); + new_session_context->setClientInfo(*prepared_client_info); prepared_client_info.reset(); /// Set user information for the new context: current profiles, roles, access rights. @@ -492,32 +593,28 @@ ContextMutablePtr Session::makeQueryContextImpl(const ClientInfo * client_info_t } /// Copy the specified client info to the new query context. - auto & res_client_info = query_context->getClientInfo(); if (client_info_to_move) - res_client_info = std::move(*client_info_to_move); + query_context->setClientInfo(*client_info_to_move); else if (client_info_to_copy && (client_info_to_copy != &getClientInfo())) - res_client_info = *client_info_to_copy; + query_context->setClientInfo(*client_info_to_copy); /// Copy current user's name and address if it was authenticated after query_client_info was initialized. if (prepared_client_info && !prepared_client_info->current_user.empty()) { - res_client_info.current_user = prepared_client_info->current_user; - res_client_info.current_address = prepared_client_info->current_address; + query_context->setCurrentUserName(prepared_client_info->current_user); + query_context->setCurrentAddress(prepared_client_info->current_address); } /// Set parameters of initial query. - if (res_client_info.query_kind == ClientInfo::QueryKind::NO_QUERY) - res_client_info.query_kind = ClientInfo::QueryKind::INITIAL_QUERY; + if (query_context->getClientInfo().query_kind == ClientInfo::QueryKind::NO_QUERY) + query_context->setQueryKind(ClientInfo::QueryKind::INITIAL_QUERY); - if (res_client_info.query_kind == ClientInfo::QueryKind::INITIAL_QUERY) + if (query_context->getClientInfo().query_kind == ClientInfo::QueryKind::INITIAL_QUERY) { - res_client_info.initial_user = res_client_info.current_user; - res_client_info.initial_address = res_client_info.current_address; + query_context->setInitialUserName(query_context->getClientInfo().current_user); + query_context->setInitialAddress(query_context->getClientInfo().current_address); } - /// Sets that row policies of the initial user should be used too. - query_context->enableRowPoliciesOfInitialUser(); - /// Set user information for the new context: current profiles, roles, access rights. if (user_id && !query_context->getAccess()->tryGetUser()) query_context->setUser(*user_id); @@ -566,4 +663,3 @@ void Session::closeSession(const String & session_id) } } - diff --git a/src/Interpreters/Session.h b/src/Interpreters/Session.h index d7c06a60464..36f811ccd24 100644 --- a/src/Interpreters/Session.h +++ b/src/Interpreters/Session.h @@ -54,10 +54,23 @@ public: /// Writes a row about login failure into session log (if enabled) void onAuthenticationFailure(const std::optional & user_name, const Poco::Net::SocketAddress & address_, const Exception & e); - /// Returns a reference to session ClientInfo. - ClientInfo & getClientInfo(); + /// Returns a reference to the session's ClientInfo. const ClientInfo & getClientInfo() const; + /// Modify the session's ClientInfo. + void setClientInfo(const ClientInfo & client_info); + void setClientName(const String & client_name); + void setClientInterface(ClientInfo::Interface interface); + void setClientVersion(UInt64 client_version_major, UInt64 client_version_minor, UInt64 client_version_patch, unsigned client_tcp_protocol_version); + void setClientConnectionId(uint32_t connection_id); + void setHttpClientInfo(ClientInfo::HTTPMethod http_method, const String & http_user_agent, const String & http_referer); + void setForwardedFor(const String & forwarded_for); + void setQuotaClientKey(const String & quota_key); + void setConnectionClientVersion(UInt64 client_version_major, UInt64 client_version_minor, UInt64 client_version_patch, unsigned client_tcp_protocol_version); + + const OpenTelemetry::TracingContext & getClientTraceContext() const; + OpenTelemetry::TracingContext & getClientTraceContext(); + /// Makes a session context, can be used one or zero times. /// The function also assigns an user to this context. ContextMutablePtr makeSessionContext(); diff --git a/src/Interpreters/SystemLog.cpp b/src/Interpreters/SystemLog.cpp index 3fd0297f5b8..0b89b1dec26 100644 --- a/src/Interpreters/SystemLog.cpp +++ b/src/Interpreters/SystemLog.cpp @@ -332,15 +332,16 @@ SystemLog::SystemLog( const String & database_name_, const String & table_name_, const String & storage_def_, - size_t flush_interval_milliseconds_) - : WithContext(context_) + size_t flush_interval_milliseconds_, + std::shared_ptr> queue_) + : Base(database_name_ + "." + table_name_, flush_interval_milliseconds_, queue_) + , WithContext(context_) + , log(&Poco::Logger::get("SystemLog (" + database_name_ + "." + table_name_ + ")")) , table_id(database_name_, table_name_) , storage_def(storage_def_) , create_query(serializeAST(*getCreateTableQuery())) - , flush_interval_milliseconds(flush_interval_milliseconds_) { assert(database_name_ == DatabaseCatalog::SYSTEM_DATABASE); - log = &Poco::Logger::get("SystemLog (" + database_name_ + "." + table_name_ + ")"); } template @@ -353,6 +354,26 @@ void SystemLog::shutdown() table->flushAndShutdown(); } +template +void SystemLog::stopFlushThread() +{ + { + std::lock_guard lock(thread_mutex); + + if (!saving_thread || !saving_thread->joinable()) + return; + + if (is_shutdown) + return; + + is_shutdown = true; + queue->shutdown(); + } + + saving_thread->join(); +} + + template void SystemLog::savingThreadFunction() { @@ -370,27 +391,7 @@ void SystemLog::savingThreadFunction() // Should we prepare table even if there are no new messages. bool should_prepare_tables_anyway = false; - { - std::unique_lock lock(mutex); - flush_event.wait_for(lock, - std::chrono::milliseconds(flush_interval_milliseconds), - [&] () - { - return requested_flush_up_to > flushed_up_to || is_shutdown || is_force_prepare_tables; - } - ); - - queue_front_index += queue.size(); - to_flush_end = queue_front_index; - // Swap with existing array from previous flush, to save memory - // allocations. - to_flush.resize(0); - queue.swap(to_flush); - - should_prepare_tables_anyway = is_force_prepare_tables; - - exit_this_thread = is_shutdown; - } + to_flush_end = queue->pop(to_flush, should_prepare_tables_anyway, exit_this_thread); if (to_flush.empty()) { @@ -399,9 +400,7 @@ void SystemLog::savingThreadFunction() prepareTable(); LOG_TRACE(log, "Table created (force)"); - std::lock_guard lock(mutex); - is_force_prepare_tables = false; - flush_event.notify_all(); + queue->confirm(to_flush_end); } } else @@ -473,12 +472,7 @@ void SystemLog::flushImpl(const std::vector & to_flush, tryLogCurrentException(__PRETTY_FUNCTION__); } - { - std::lock_guard lock(mutex); - flushed_up_to = to_flush_end; - is_force_prepare_tables = false; - flush_event.notify_all(); - } + queue->confirm(to_flush_end); LOG_TRACE(log, "Flushed system log up to offset {}", to_flush_end); } @@ -618,7 +612,6 @@ ASTPtr SystemLog::getCreateTableQuery() return create; } - #define INSTANTIATE_SYSTEM_LOG(ELEMENT) template class SystemLog; SYSTEM_LOG_ELEMENTS(INSTANTIATE_SYSTEM_LOG) diff --git a/src/Interpreters/SystemLog.h b/src/Interpreters/SystemLog.h index 84b70c67e2a..5d8bb30150d 100644 --- a/src/Interpreters/SystemLog.h +++ b/src/Interpreters/SystemLog.h @@ -108,32 +108,34 @@ public: const String & database_name_, const String & table_name_, const String & storage_def_, - size_t flush_interval_milliseconds_); + size_t flush_interval_milliseconds_, + std::shared_ptr> queue_ = nullptr); + + /** Append a record into log. + * Writing to table will be done asynchronously and in case of failure, record could be lost. + */ void shutdown() override; + void stopFlushThread() override; + protected: - using ISystemLog::mutex; + Poco::Logger * log; + using ISystemLog::is_shutdown; - using ISystemLog::flush_event; - using ISystemLog::stopFlushThread; - using Base::log; + using ISystemLog::saving_thread; + using ISystemLog::thread_mutex; using Base::queue; - using Base::queue_front_index; - using Base::is_force_prepare_tables; - using Base::requested_flush_up_to; - using Base::flushed_up_to; - using Base::logged_queue_full_at_index; private: + /* Saving thread data */ const StorageID table_id; const String storage_def; String create_query; String old_create_query; bool is_prepared = false; - const size_t flush_interval_milliseconds; /** Creates new table if it does not exist. * Renames old table if its structure is not suitable. diff --git a/src/Interpreters/TableJoin.h b/src/Interpreters/TableJoin.h index ba3befab59b..5d14a57759f 100644 --- a/src/Interpreters/TableJoin.h +++ b/src/Interpreters/TableJoin.h @@ -223,10 +223,10 @@ public: { /// When join_algorithm = 'default' (not specified by user) we use hash or direct algorithm. /// It's behaviour that was initially supported by clickhouse. - bool is_enbaled_by_default = val == JoinAlgorithm::DEFAULT + bool is_enabled_by_default = val == JoinAlgorithm::DEFAULT || val == JoinAlgorithm::HASH || val == JoinAlgorithm::DIRECT; - if (join_algorithm.isSet(JoinAlgorithm::DEFAULT) && is_enbaled_by_default) + if (join_algorithm.isSet(JoinAlgorithm::DEFAULT) && is_enabled_by_default) return true; return join_algorithm.isSet(val); } diff --git a/src/Interpreters/TextLog.cpp b/src/Interpreters/TextLog.cpp index 45d5a7b2344..108135c78b3 100644 --- a/src/Interpreters/TextLog.cpp +++ b/src/Interpreters/TextLog.cpp @@ -84,7 +84,7 @@ TextLog::TextLog(ContextPtr context_, const String & database_name_, const String & table_name_, const String & storage_def_, size_t flush_interval_milliseconds_) : SystemLog(context_, database_name_, table_name_, - storage_def_, flush_interval_milliseconds_) + storage_def_, flush_interval_milliseconds_, getLogQueue(flush_interval_milliseconds_)) { // SystemLog methods may write text logs, so we disable logging for the text // log table to avoid recursion. diff --git a/src/Interpreters/TextLog.h b/src/Interpreters/TextLog.h index 6efc1c906d4..60ca11632aa 100644 --- a/src/Interpreters/TextLog.h +++ b/src/Interpreters/TextLog.h @@ -40,12 +40,20 @@ struct TextLogElement class TextLog : public SystemLog { public: + using Queue = SystemLogQueue; + TextLog( ContextPtr context_, const String & database_name_, const String & table_name_, const String & storage_def_, size_t flush_interval_milliseconds_); + + static std::shared_ptr getLogQueue(size_t flush_interval_milliseconds) + { + static std::shared_ptr queue = std::make_shared("text_log", flush_interval_milliseconds, true); + return queue; + } }; } diff --git a/src/Interpreters/ThreadStatusExt.cpp b/src/Interpreters/ThreadStatusExt.cpp index 5acfe500b1d..bac16c05533 100644 --- a/src/Interpreters/ThreadStatusExt.cpp +++ b/src/Interpreters/ThreadStatusExt.cpp @@ -83,6 +83,8 @@ ThreadGroupPtr ThreadGroup::createForBackgroundProcess(ContextPtr storage_contex const Settings & settings = storage_context->getSettingsRef(); group->memory_tracker.setProfilerStep(settings.memory_profiler_step); group->memory_tracker.setSampleProbability(settings.memory_profiler_sample_probability); + group->memory_tracker.setSampleMinAllocationSize(settings.memory_profiler_sample_min_allocation_size); + group->memory_tracker.setSampleMaxAllocationSize(settings.memory_profiler_sample_max_allocation_size); group->memory_tracker.setSoftLimit(settings.memory_overcommit_ratio_denominator); group->memory_tracker.setParent(&background_memory_tracker); if (settings.memory_tracker_fault_probability > 0.0) @@ -518,7 +520,7 @@ void ThreadStatus::logToQueryThreadLog(QueryThreadLog & thread_log, const String static String getCleanQueryAst(const ASTPtr q, ContextPtr context) { - String res = serializeAST(*q, true); + String res = serializeAST(*q); if (auto * masker = SensitiveDataMasker::getInstance()) masker->wipeSensitiveData(res); diff --git a/src/Interpreters/ZooKeeperLog.cpp b/src/Interpreters/ZooKeeperLog.cpp index 48f4d510af7..2231a58c6a9 100644 --- a/src/Interpreters/ZooKeeperLog.cpp +++ b/src/Interpreters/ZooKeeperLog.cpp @@ -73,6 +73,7 @@ NamesAndTypesList ZooKeeperLogElement::getNamesAndTypes() {"Create", static_cast(Coordination::OpNum::Create)}, {"Remove", static_cast(Coordination::OpNum::Remove)}, {"Exists", static_cast(Coordination::OpNum::Exists)}, + {"Reconfig", static_cast(Coordination::OpNum::Reconfig)}, {"Get", static_cast(Coordination::OpNum::Get)}, {"Set", static_cast(Coordination::OpNum::Set)}, {"GetACL", static_cast(Coordination::OpNum::GetACL)}, diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 4b76d20f31d..688d3b9967d 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -75,6 +75,7 @@ #include #include +#include namespace ProfileEvents { @@ -655,7 +656,7 @@ static std::tuple executeQueryImpl( /// the value passed by the client Stopwatch start_watch{CLOCK_MONOTONIC}; - auto & client_info = context->getClientInfo(); + const auto & client_info = context->getClientInfo(); if (!internal) { @@ -667,8 +668,7 @@ static std::tuple executeQueryImpl( // On the other hand, if it's initialized then take it as the start of the query if (client_info.initial_query_start_time == 0) { - client_info.initial_query_start_time = timeInSeconds(query_start_time); - client_info.initial_query_start_time_microseconds = timeInMicroseconds(query_start_time); + context->setInitialQueryStartTime(query_start_time); } else { @@ -702,10 +702,14 @@ static std::tuple executeQueryImpl( /// TODO: parser should fail early when max_query_size limit is reached. ast = parseQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth); } + else if (settings.dialect == Dialect::prql && !internal) + { + ParserPRQLQuery parser(max_query_size, settings.max_parser_depth); + ast = parseQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth); + } else { ParserQuery parser(end, settings.allow_settings_after_format_in_insert); - /// TODO: parser should fail early when max_query_size limit is reached. ast = parseQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth); } diff --git a/src/Loggers/Loggers.cpp b/src/Loggers/Loggers.cpp index 4c85ea79a63..271ab39cd88 100644 --- a/src/Loggers/Loggers.cpp +++ b/src/Loggers/Loggers.cpp @@ -45,21 +45,11 @@ static std::string renderFileNameTemplate(time_t now, const std::string & file_p } #ifndef WITHOUT_TEXT_LOG -void Loggers::setTextLog(std::shared_ptr log, int max_priority) -{ - text_log = log; - text_log_max_priority = max_priority; -} +constexpr size_t DEFAULT_SYSTEM_LOG_FLUSH_INTERVAL_MILLISECONDS = 7500; #endif void Loggers::buildLoggers(Poco::Util::AbstractConfiguration & config, Poco::Logger & logger /*_root*/, const std::string & cmd_name) { -#ifndef WITHOUT_TEXT_LOG - if (split) - if (auto log = text_log.lock()) - split->addTextLog(log, text_log_max_priority); -#endif - auto current_logger = config.getString("logger", ""); if (config_logger.has_value() && *config_logger == current_logger) return; @@ -276,6 +266,16 @@ void Loggers::buildLoggers(Poco::Util::AbstractConfiguration & config, Poco::Log } } } +#ifndef WITHOUT_TEXT_LOG + if (config.has("text_log")) + { + String text_log_level_str = config.getString("text_log.level", "trace"); + int text_log_level = Poco::Logger::parseLevel(text_log_level_str); + size_t flush_interval_milliseconds = config.getUInt64("text_log.flush_interval_milliseconds", + DEFAULT_SYSTEM_LOG_FLUSH_INTERVAL_MILLISECONDS); + split->addTextLog(DB::TextLog::getLogQueue(flush_interval_milliseconds), text_log_level); + } +#endif } void Loggers::updateLevels(Poco::Util::AbstractConfiguration & config, Poco::Logger & logger) diff --git a/src/Loggers/Loggers.h b/src/Loggers/Loggers.h index ebc10954b94..9eff731a4c5 100644 --- a/src/Loggers/Loggers.h +++ b/src/Loggers/Loggers.h @@ -7,12 +7,6 @@ #include #include "OwnSplitChannel.h" -#ifndef WITHOUT_TEXT_LOG -namespace DB -{ - class TextLog; -} -#endif namespace Poco::Util { @@ -29,9 +23,6 @@ public: /// Close log files. On next log write files will be reopened. void closeLogs(Poco::Logger & logger); -#ifndef WITHOUT_TEXT_LOG - void setTextLog(std::shared_ptr log, int max_priority); -#endif private: Poco::AutoPtr log_file; @@ -41,10 +32,6 @@ private: /// Previous value of logger element in config. It is used to reinitialize loggers whenever the value changed. std::optional config_logger; -#ifndef WITHOUT_TEXT_LOG - std::weak_ptr text_log; - int text_log_max_priority = -1; -#endif Poco::AutoPtr split; }; diff --git a/src/Loggers/OwnSplitChannel.cpp b/src/Loggers/OwnSplitChannel.cpp index 03db198c305..b5ac42d6041 100644 --- a/src/Loggers/OwnSplitChannel.cpp +++ b/src/Loggers/OwnSplitChannel.cpp @@ -135,13 +135,10 @@ void OwnSplitChannel::logSplit(const Poco::Message & msg) elem.source_line = msg.getSourceLine(); elem.message_format_string = msg.getFormatString(); - std::shared_ptr text_log_locked{}; - { - std::lock_guard lock(text_log_mutex); - text_log_locked = text_log.lock(); - } + std::shared_ptr> text_log_locked{}; + text_log_locked = text_log.lock(); if (text_log_locked) - text_log_locked->add(elem); + text_log_locked->push(elem); } #endif } @@ -153,10 +150,9 @@ void OwnSplitChannel::addChannel(Poco::AutoPtr channel, const std } #ifndef WITHOUT_TEXT_LOG -void OwnSplitChannel::addTextLog(std::shared_ptr log, int max_priority) +void OwnSplitChannel::addTextLog(std::shared_ptr> log_queue, int max_priority) { - std::lock_guard lock(text_log_mutex); - text_log = log; + text_log = log_queue; text_log_max_priority.store(max_priority, std::memory_order_relaxed); } #endif diff --git a/src/Loggers/OwnSplitChannel.h b/src/Loggers/OwnSplitChannel.h index 80305c1ccee..a6ee8af5b14 100644 --- a/src/Loggers/OwnSplitChannel.h +++ b/src/Loggers/OwnSplitChannel.h @@ -10,7 +10,9 @@ #ifndef WITHOUT_TEXT_LOG namespace DB { - class TextLog; + template class SystemLogQueue; + struct TextLogElement; + using TextLogQueue = SystemLogQueue; } #endif @@ -31,7 +33,7 @@ public: void addChannel(Poco::AutoPtr channel, const std::string & name); #ifndef WITHOUT_TEXT_LOG - void addTextLog(std::shared_ptr log, int max_priority); + void addTextLog(std::shared_ptr log_queue, int max_priority); #endif void setLevel(const std::string & name, int level); @@ -45,10 +47,8 @@ private: using ExtendedChannelPtrPair = std::pair; std::map channels; - std::mutex text_log_mutex; - #ifndef WITHOUT_TEXT_LOG - std::weak_ptr text_log; + std::weak_ptr text_log; std::atomic text_log_max_priority = -1; #endif }; diff --git a/src/Parsers/CMakeLists.txt b/src/Parsers/CMakeLists.txt index d5cf2bd4784..d74137f8a91 100644 --- a/src/Parsers/CMakeLists.txt +++ b/src/Parsers/CMakeLists.txt @@ -4,8 +4,12 @@ add_headers_and_sources(clickhouse_parsers .) add_headers_and_sources(clickhouse_parsers ./Access) add_headers_and_sources(clickhouse_parsers ./MySQL) add_headers_and_sources(clickhouse_parsers ./Kusto) +add_headers_and_sources(clickhouse_parsers ./PRQL) add_library(clickhouse_parsers ${clickhouse_parsers_headers} ${clickhouse_parsers_sources}) target_link_libraries(clickhouse_parsers PUBLIC clickhouse_common_io clickhouse_common_access string_utils) +if (TARGET ch_rust::prql) + target_link_libraries(clickhouse_parsers PRIVATE ch_rust::prql) +endif () if (USE_DEBUG_HELPERS) # CMake generator expression will do insane quoting when it encounters special character like quotes, spaces, etc. diff --git a/src/Parsers/IAST.cpp b/src/Parsers/IAST.cpp index 0138372ce89..bf4d6fc9dec 100644 --- a/src/Parsers/IAST.cpp +++ b/src/Parsers/IAST.cpp @@ -170,7 +170,9 @@ size_t IAST::checkDepthImpl(size_t max_depth) const String IAST::formatWithPossiblyHidingSensitiveData(size_t max_length, bool one_line, bool show_secrets) const { WriteBufferFromOwnString buf; - format({buf, one_line, show_secrets}); + FormatSettings settings(buf, one_line); + settings.show_secrets = show_secrets; + format(settings); return wipeSensitiveDataAndCutToLength(buf.str(), max_length); } diff --git a/src/Parsers/IAST.h b/src/Parsers/IAST.h index 7a8ab36518d..d217876459f 100644 --- a/src/Parsers/IAST.h +++ b/src/Parsers/IAST.h @@ -191,27 +191,39 @@ public: struct FormatSettings { WriteBuffer & ostr; - bool hilite = false; bool one_line; - bool always_quote_identifiers = false; - IdentifierQuotingStyle identifier_quoting_style = IdentifierQuotingStyle::Backticks; - bool show_secrets = true; /// Show secret parts of the AST (e.g. passwords, encryption keys). + bool hilite; + bool always_quote_identifiers; + IdentifierQuotingStyle identifier_quoting_style; + bool show_secrets; /// Show secret parts of the AST (e.g. passwords, encryption keys). + char nl_or_ws; /// Newline or whitespace. - // Newline or whitespace. - char nl_or_ws; - - FormatSettings(WriteBuffer & ostr_, bool one_line_, bool show_secrets_ = true) - : ostr(ostr_), one_line(one_line_), show_secrets(show_secrets_) + explicit FormatSettings( + WriteBuffer & ostr_, + bool one_line_, + bool hilite_ = false, + bool always_quote_identifiers_ = false, + IdentifierQuotingStyle identifier_quoting_style_ = IdentifierQuotingStyle::Backticks, + bool show_secrets_ = true) + : ostr(ostr_) + , one_line(one_line_) + , hilite(hilite_) + , always_quote_identifiers(always_quote_identifiers_) + , identifier_quoting_style(identifier_quoting_style_) + , show_secrets(show_secrets_) + , nl_or_ws(one_line ? ' ' : '\n') { - nl_or_ws = one_line ? ' ' : '\n'; } FormatSettings(WriteBuffer & ostr_, const FormatSettings & other) - : ostr(ostr_), hilite(other.hilite), one_line(other.one_line), - always_quote_identifiers(other.always_quote_identifiers), identifier_quoting_style(other.identifier_quoting_style), - show_secrets(other.show_secrets) + : ostr(ostr_) + , one_line(other.one_line) + , hilite(other.hilite) + , always_quote_identifiers(other.always_quote_identifiers) + , identifier_quoting_style(other.identifier_quoting_style) + , show_secrets(other.show_secrets) + , nl_or_ws(other.nl_or_ws) { - nl_or_ws = one_line ? ' ' : '\n'; } void writeIdentifier(const String & name) const; diff --git a/src/Parsers/PRQL/ParserPRQLQuery.cpp b/src/Parsers/PRQL/ParserPRQLQuery.cpp new file mode 100644 index 00000000000..b3733b727dc --- /dev/null +++ b/src/Parsers/PRQL/ParserPRQLQuery.cpp @@ -0,0 +1,86 @@ +#include +#include + +#include "Parsers/Lexer.h" +#include "config.h" + +#if USE_PRQL +# include +#endif + +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int SYNTAX_ERROR; + extern const int SUPPORT_IS_DISABLED; +} + +bool ParserPRQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ParserSetQuery set_p; + + if (set_p.parse(pos, node, expected)) + return true; + +#if !USE_PRQL + throw Exception( + ErrorCodes::SUPPORT_IS_DISABLED, "PRQL is not available. Rust code or PRQL itself may be disabled. Use another dialect!"); +#else + const auto * begin = pos->begin; + + // The same parsers are used in the client and the server, so the parser have to detect the end of a single query in case of multiquery queries + while (!pos->isEnd() && pos->type != TokenType::Semicolon) + ++pos; + + const auto * end = pos->begin; + + uint8_t * sql_query_ptr{nullptr}; + uint64_t sql_query_size{0}; + + const auto res + = prql_to_sql(reinterpret_cast(begin), static_cast(end - begin), &sql_query_ptr, &sql_query_size); + + SCOPE_EXIT({ prql_free_pointer(sql_query_ptr); }); + + const auto * sql_query_char_ptr = reinterpret_cast(sql_query_ptr); + const auto * const original_sql_query_ptr = sql_query_char_ptr; + + if (res != 0) + { + throw Exception(ErrorCodes::SYNTAX_ERROR, "PRQL syntax error: '{}'", sql_query_char_ptr); + } + chassert(sql_query_size > 0); + + ParserQuery query_p(end, false); + String error_message; + node = tryParseQuery( + query_p, + sql_query_char_ptr, + sql_query_char_ptr + sql_query_size - 1, + error_message, + false, + "", + false, + max_query_size, + max_parser_depth); + + if (!node) + throw Exception( + ErrorCodes::SYNTAX_ERROR, + "Error while parsing the SQL query generated from PRQL query :'{}'.\nPRQL Query:'{}'\nSQL query: '{}'", + error_message, + std::string_view{begin, end}, + std::string_view(original_sql_query_ptr, original_sql_query_ptr + sql_query_size)); + + + return true; +#endif +} +} diff --git a/src/Parsers/PRQL/ParserPRQLQuery.h b/src/Parsers/PRQL/ParserPRQLQuery.h new file mode 100644 index 00000000000..4fc450df6b6 --- /dev/null +++ b/src/Parsers/PRQL/ParserPRQLQuery.h @@ -0,0 +1,27 @@ +#pragma once + +#include + +namespace DB +{ +// Even when PRQL is disabled, it is not possible to exclude this parser because changing the dialect via `SET dialect = '...'` queries should succeed. +// Another solution would be disabling setting the dialect to PRQL, but it requires a lot of finicky conditional compiling around the Dialect setting enum. +// Therefore the decision, for now, is to use this parser even when PRQL is disabled to enable users to switch to another dialect. +class ParserPRQLQuery final : public IParserBase +{ +private: + // These fields are not used when PRQL is disabled at build time. + [[maybe_unused]] size_t max_query_size; + [[maybe_unused]] size_t max_parser_depth; + +public: + ParserPRQLQuery(size_t max_query_size_, size_t max_parser_depth_) : max_query_size{max_query_size_}, max_parser_depth{max_parser_depth_} + { + } + + const char * getName() const override { return "PRQL Statement"; } + +protected: + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; +} diff --git a/src/Parsers/formatAST.cpp b/src/Parsers/formatAST.cpp index fca8ea0aa35..9315279eae6 100644 --- a/src/Parsers/formatAST.cpp +++ b/src/Parsers/formatAST.cpp @@ -4,18 +4,17 @@ namespace DB { -void formatAST(const IAST & ast, WriteBuffer & buf, bool hilite, bool one_line) +void formatAST(const IAST & ast, WriteBuffer & buf, bool hilite, bool one_line, bool show_secrets) { - IAST::FormatSettings settings(buf, one_line); - settings.hilite = hilite; - + IAST::FormatSettings settings(buf, one_line, hilite); + settings.show_secrets = show_secrets; ast.format(settings); } -String serializeAST(const IAST & ast, bool one_line) +String serializeAST(const IAST & ast) { WriteBufferFromOwnString buf; - formatAST(ast, buf, false, one_line); + formatAST(ast, buf, false, true); return buf.str(); } diff --git a/src/Parsers/formatAST.h b/src/Parsers/formatAST.h index 28af2400a4c..dd72a59b4a2 100644 --- a/src/Parsers/formatAST.h +++ b/src/Parsers/formatAST.h @@ -8,12 +8,13 @@ namespace DB class WriteBuffer; -/** Takes a syntax tree and turns it back into text. - * In case of INSERT query, the data will be missing. - */ -void formatAST(const IAST & ast, WriteBuffer & buf, bool hilite = true, bool one_line = false); +/// Takes a syntax tree and turns it into text. +/// Intended for pretty-printing (multi-line + hiliting). +/// In case of INSERT query, the data will be missing. +void formatAST(const IAST & ast, WriteBuffer & buf, bool hilite = true, bool one_line = false, bool show_secrets = true); -String serializeAST(const IAST & ast, bool one_line = true); +/// Like formatAST() but intended for serialization w/o pretty-printing (single-line, no hiliting). +String serializeAST(const IAST & ast); inline WriteBuffer & operator<<(WriteBuffer & buf, const IAST & ast) { diff --git a/src/Parsers/getInsertQuery.cpp b/src/Parsers/getInsertQuery.cpp index 6f52056dfe2..9d111b147bd 100644 --- a/src/Parsers/getInsertQuery.cpp +++ b/src/Parsers/getInsertQuery.cpp @@ -19,9 +19,7 @@ std::string getInsertQuery(const std::string & db_name, const std::string & tabl query.columns->children.emplace_back(std::make_shared(column.name)); WriteBufferFromOwnString buf; - IAST::FormatSettings settings(buf, true); - settings.always_quote_identifiers = true; - settings.identifier_quoting_style = quoting; + IAST::FormatSettings settings(buf, /*one_line*/ true, /*hilite*/ false, /*always_quote_identifiers*/ true, /*identifier_quoting_style*/ quoting); query.IAST::format(settings); return buf.str(); } diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index 2795de64b1d..d77ae8d3a27 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -64,7 +65,10 @@ TEST_P(ParserTest, parseQuery) if (std::string("CREATE USER or ALTER USER query") != parser->getName() && std::string("ATTACH access entity query") != parser->getName()) { - EXPECT_EQ(expected_ast, serializeAST(*ast->clone(), false)); + WriteBufferFromOwnString buf; + formatAST(*ast->clone(), buf, false, false); + String formatted_ast = buf.str(); + EXPECT_EQ(expected_ast, formatted_ast); } else { @@ -75,7 +79,10 @@ TEST_P(ParserTest, parseQuery) } else { - EXPECT_TRUE(std::regex_match(serializeAST(*ast->clone(), false), std::regex(expected_ast))); + WriteBufferFromOwnString buf; + formatAST(*ast->clone(), buf, false, false); + String formatted_ast = buf.str(); + EXPECT_TRUE(std::regex_match(formatted_ast, std::regex(expected_ast))); } } } @@ -476,3 +483,22 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, "SELECT *\nFROM Customers\nWHERE NOT (FirstName ILIKE 'pet%')" } }))); + +static constexpr size_t kDummyMaxQuerySize = 256 * 1024; +static constexpr size_t kDummyMaxParserDepth = 256; + +INSTANTIATE_TEST_SUITE_P( + ParserPRQL, + ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared(kDummyMaxQuerySize, kDummyMaxParserDepth)), + ::testing::ValuesIn(std::initializer_list{ + { + "from albums\ngroup [author_id] (\n aggregate [first_pushlied = min published]\n)\njoin a=author side:left [==author_id]\njoin p=purchases side:right [==author_id]\ngroup [a.id, p.purchase_id] (\n aggregate [avg_sell = min first_pushlied]\n)", + "WITH table_1 AS\n (\n SELECT\n MIN(published) AS _expr_0,\n author_id\n FROM albums\n GROUP BY author_id\n )\nSELECT\n a.id,\n p.purchase_id,\n MIN(table_0._expr_0) AS avg_sell\nFROM table_1 AS table_0\nLEFT JOIN author AS a ON table_0.author_id = a.author_id\nRIGHT JOIN purchases AS p ON table_0.author_id = p.author_id\nGROUP BY\n a.id,\n p.purchase_id", + }, + { + "from matches\nfilter start_date > @2023-05-30 # Some comment here\nderive [\n some_derived_value_1 = a + (b ?? 0), # And there\n some_derived_value_2 = c + some_derived_value\n]\nfilter some_derived_value_2 > 0\ngroup [country, city] (\n aggregate [\n average some_derived_value_2,\n aggr = max some_derived_value_2,\n ]\n)\nderive place = f\"{city} in {country}\"\nderive country_code = s\"LEFT(country, 2)\"\nsort [aggr, -country]\ntake 1..20", + "WITH\n table_3 AS\n (\n SELECT\n country,\n city,\n c + some_derived_value AS _expr_1\n FROM matches\n WHERE start_date > toDate('2023-05-30')\n ),\n table_1 AS\n (\n SELECT\n country,\n city,\n AVG(_expr_1) AS _expr_0,\n MAX(_expr_1) AS aggr\n FROM table_3 AS table_2\n WHERE _expr_1 > 0\n GROUP BY\n country,\n city\n )\nSELECT\n country,\n city,\n _expr_0,\n aggr,\n CONCAT(city, ' in ', country) AS place,\n LEFT(country, 2) AS country_code\nFROM table_1 AS table_0\nORDER BY\n aggr ASC,\n country DESC\nLIMIT 20", + }, + }))); diff --git a/src/Parsers/tests/gtest_dictionary_parser.cpp b/src/Parsers/tests/gtest_dictionary_parser.cpp index 22484727ea2..c0a975f7a38 100644 --- a/src/Parsers/tests/gtest_dictionary_parser.cpp +++ b/src/Parsers/tests/gtest_dictionary_parser.cpp @@ -155,7 +155,7 @@ TEST(ParserDictionaryDDL, AttributesWithMultipleProperties) EXPECT_EQ(attributes_children[0]->as()->expression, nullptr); EXPECT_EQ(attributes_children[1]->as()->expression, nullptr); - EXPECT_EQ(serializeAST(*attributes_children[2]->as()->expression, true), "(rand() % 100) * 77"); + EXPECT_EQ(serializeAST(*attributes_children[2]->as()->expression), "(rand() % 100) * 77"); EXPECT_EQ(attributes_children[0]->as()->hierarchical, false); EXPECT_EQ(attributes_children[1]->as()->hierarchical, true); @@ -201,7 +201,7 @@ TEST(ParserDictionaryDDL, CustomAttributePropertiesOrder) EXPECT_EQ(attributes_children[0]->as()->expression, nullptr); EXPECT_EQ(attributes_children[1]->as()->expression, nullptr); - EXPECT_EQ(serializeAST(*attributes_children[2]->as()->expression, true), "(rand() % 100) * 77"); + EXPECT_EQ(serializeAST(*attributes_children[2]->as()->expression), "(rand() % 100) * 77"); EXPECT_EQ(attributes_children[0]->as()->hierarchical, false); EXPECT_EQ(attributes_children[1]->as()->hierarchical, true); @@ -288,7 +288,7 @@ TEST(ParserDictionaryDDL, Formatting) ParserCreateDictionaryQuery parser; ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 0); ASTCreateQuery * create = ast->as(); - auto str = serializeAST(*create, true); + auto str = serializeAST(*create); EXPECT_EQ(str, "CREATE DICTIONARY test.dict5 (`key_column1` UInt64 DEFAULT 1 HIERARCHICAL INJECTIVE, `key_column2` String DEFAULT '', `second_column` UInt8 EXPRESSION intDiv(50, rand() % 1000), `third_column` UInt8) PRIMARY KEY key_column1, key_column2 SOURCE(MYSQL(HOST 'localhost' PORT 9000 USER 'default' REPLICA (HOST '127.0.0.1' PRIORITY 1) PASSWORD '')) LIFETIME(MIN 1 MAX 10) LAYOUT(CACHE(SIZE_IN_CELLS 50)) RANGE(MIN second_column MAX third_column)"); } @@ -303,7 +303,7 @@ TEST(ParserDictionaryDDL, ParseDropQuery) EXPECT_TRUE(drop1->is_dictionary); EXPECT_EQ(drop1->getDatabase(), "test"); EXPECT_EQ(drop1->getTable(), "dict1"); - auto str1 = serializeAST(*drop1, true); + auto str1 = serializeAST(*drop1); EXPECT_EQ(input1, str1); String input2 = "DROP DICTIONARY IF EXISTS dict2"; @@ -314,7 +314,7 @@ TEST(ParserDictionaryDDL, ParseDropQuery) EXPECT_TRUE(drop2->is_dictionary); EXPECT_EQ(drop2->getDatabase(), ""); EXPECT_EQ(drop2->getTable(), "dict2"); - auto str2 = serializeAST(*drop2, true); + auto str2 = serializeAST(*drop2); EXPECT_EQ(input2, str2); } diff --git a/src/Parsers/tests/gtest_format_hiliting.cpp b/src/Parsers/tests/gtest_format_hiliting.cpp index d0ce8f2c897..a4c3ed86182 100644 --- a/src/Parsers/tests/gtest_format_hiliting.cpp +++ b/src/Parsers/tests/gtest_format_hiliting.cpp @@ -51,8 +51,7 @@ void compare(const String & expected, const String & query) ASTPtr ast = parseQuery(parser, query, 0, 0); WriteBufferFromOwnString write_buffer; - IAST::FormatSettings settings(write_buffer, true); - settings.hilite = true; + IAST::FormatSettings settings(write_buffer, true, true); ast->format(settings); ASSERT_PRED2(HiliteComparator::are_equal_with_hilites_removed, expected, write_buffer.str()); diff --git a/src/Planner/PlannerJoins.cpp b/src/Planner/PlannerJoins.cpp index 7da10a8523b..e495b0967e9 100644 --- a/src/Planner/PlannerJoins.cpp +++ b/src/Planner/PlannerJoins.cpp @@ -542,7 +542,8 @@ void trySetStorageInTableJoin(const QueryTreeNodePtr & table_expression, std::sh if (!table_join->isEnabledAlgorithm(JoinAlgorithm::DIRECT)) return; - if (auto storage_dictionary = std::dynamic_pointer_cast(storage); storage_dictionary) + if (auto storage_dictionary = std::dynamic_pointer_cast(storage); + storage_dictionary && storage_dictionary->getDictionary()->getSpecialKeyType() != DictionarySpecialKeyType::Range) table_join->setStorageJoin(std::dynamic_pointer_cast(storage_dictionary->getDictionary())); else if (auto storage_key_value = std::dynamic_pointer_cast(storage); storage_key_value) table_join->setStorageJoin(storage_key_value); diff --git a/src/Processors/Chunk.h b/src/Processors/Chunk.h index 413872d512d..f50e45db644 100644 --- a/src/Processors/Chunk.h +++ b/src/Processors/Chunk.h @@ -114,16 +114,20 @@ private: using Chunks = std::vector; -/// ChunkOffsets marks offsets of different sub-chunks, which will be used by async inserts. -class ChunkOffsets : public ChunkInfo +/// AsyncInsert needs two kinds of information: +/// - offsets of different sub-chunks +/// - tokens of different sub-chunks, which are assigned by setting `insert_deduplication_token`. +class AsyncInsertInfo : public ChunkInfo { public: - ChunkOffsets() = default; - explicit ChunkOffsets(const std::vector & offsets_) : offsets(offsets_) {} + AsyncInsertInfo() = default; + explicit AsyncInsertInfo(const std::vector & offsets_, const std::vector & tokens_) : offsets(offsets_), tokens(tokens_) {} + std::vector offsets; + std::vector tokens; }; -using ChunkOffsetsPtr = std::shared_ptr; +using AsyncInsertInfoPtr = std::shared_ptr; /// Extension to support delayed defaults. AddingDefaultsProcessor uses it to replace missing values with column defaults. class ChunkMissingValues : public ChunkInfo diff --git a/src/Processors/Formats/Impl/ArrowFieldIndexUtil.h b/src/Processors/Formats/Impl/ArrowFieldIndexUtil.h index 4beffbcf869..b7adaa35335 100644 --- a/src/Processors/Formats/Impl/ArrowFieldIndexUtil.h +++ b/src/Processors/Formats/Impl/ArrowFieldIndexUtil.h @@ -75,7 +75,7 @@ public: { if (!allow_missing_columns) throw Exception( - ErrorCodes::THERE_IS_NO_COLUMN, "Not found field({}) in arrow schema:{}.", named_col.name, schema.ToString()); + ErrorCodes::THERE_IS_NO_COLUMN, "Not found field ({}) in the following Arrow schema:\n{}\n", named_col.name, schema.ToString()); else continue; } @@ -168,4 +168,3 @@ private: }; } #endif - diff --git a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp index 4cd73cb23b5..a7efc823fbb 100644 --- a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp @@ -52,6 +52,8 @@ #include #include #include +#include +#include #include #include #include @@ -934,6 +936,27 @@ private: Poco::Net::HTTPRequest request(Poco::Net::HTTPRequest::HTTP_GET, url.getPathAndQuery(), Poco::Net::HTTPRequest::HTTP_1_1); request.setHost(url.getHost()); + if (!url.getUserInfo().empty()) + { + Poco::Net::HTTPCredentials http_credentials; + Poco::Net::HTTPBasicCredentials http_basic_credentials; + + http_credentials.fromUserInfo(url.getUserInfo()); + + std::string decoded_username; + Poco::URI::decode(http_credentials.getUsername(), decoded_username); + http_basic_credentials.setUsername(decoded_username); + + if (!http_credentials.getPassword().empty()) + { + std::string decoded_password; + Poco::URI::decode(http_credentials.getPassword(), decoded_password); + http_basic_credentials.setPassword(decoded_password); + } + + http_basic_credentials.authenticate(request); + } + auto session = makePooledHTTPSession(url, timeouts, 1); session->sendRequest(request); diff --git a/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp b/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp index a4f779076eb..ac5da172210 100644 --- a/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp @@ -13,7 +13,8 @@ namespace ErrorCodes extern const int CANNOT_SKIP_UNKNOWN_FIELD; } -BinaryRowInputFormat::BinaryRowInputFormat(ReadBuffer & in_, const Block & header, Params params_, bool with_names_, bool with_types_, const FormatSettings & format_settings_) +template +BinaryRowInputFormat::BinaryRowInputFormat(ReadBuffer & in_, const Block & header, Params params_, bool with_names_, bool with_types_, const FormatSettings & format_settings_) : RowInputFormatWithNamesAndTypes( header, in_, @@ -22,16 +23,17 @@ BinaryRowInputFormat::BinaryRowInputFormat(ReadBuffer & in_, const Block & heade with_names_, with_types_, format_settings_, - std::make_unique(in_, format_settings_)) + std::make_unique>(in_, format_settings_)) { } - -BinaryFormatReader::BinaryFormatReader(ReadBuffer & in_, const FormatSettings & format_settings_) : FormatWithNamesAndTypesReader(in_, format_settings_) +template +BinaryFormatReader::BinaryFormatReader(ReadBuffer & in_, const FormatSettings & format_settings_) : FormatWithNamesAndTypesReader(in_, format_settings_) { } -std::vector BinaryFormatReader::readHeaderRow() +template +std::vector BinaryFormatReader::readHeaderRow() { std::vector fields; String field; @@ -43,13 +45,15 @@ std::vector BinaryFormatReader::readHeaderRow() return fields; } -std::vector BinaryFormatReader::readNames() +template +std::vector BinaryFormatReader::readNames() { readVarUInt(read_columns, *in); return readHeaderRow(); } -std::vector BinaryFormatReader::readTypes() +template +std::vector BinaryFormatReader::readTypes() { auto types = readHeaderRow(); for (const auto & type_name : types) @@ -57,26 +61,40 @@ std::vector BinaryFormatReader::readTypes() return types; } -bool BinaryFormatReader::readField(IColumn & column, const DataTypePtr & /*type*/, const SerializationPtr & serialization, bool /*is_last_file_column*/, const String & /*column_name*/) +template +bool BinaryFormatReader::readField(IColumn & column, const DataTypePtr & /*type*/, const SerializationPtr & serialization, bool /*is_last_file_column*/, const String & /*column_name*/) { + if constexpr (with_defaults) + { + UInt8 is_default; + readBinary(is_default, *in); + if (is_default) + { + column.insertDefault(); + return false; + } + } serialization->deserializeBinary(column, *in, format_settings); return true; } -void BinaryFormatReader::skipHeaderRow() +template +void BinaryFormatReader::skipHeaderRow() { String tmp; for (size_t i = 0; i < read_columns; ++i) readStringBinary(tmp, *in); } -void BinaryFormatReader::skipNames() +template +void BinaryFormatReader::skipNames() { readVarUInt(read_columns, *in); skipHeaderRow(); } -void BinaryFormatReader::skipTypes() +template +void BinaryFormatReader::skipTypes() { if (read_columns == 0) { @@ -87,7 +105,8 @@ void BinaryFormatReader::skipTypes() skipHeaderRow(); } -void BinaryFormatReader::skipField(size_t file_column) +template +void BinaryFormatReader::skipField(size_t file_column) { if (file_column >= read_data_types.size()) throw Exception(ErrorCodes::CANNOT_SKIP_UNKNOWN_FIELD, @@ -111,12 +130,21 @@ void registerInputFormatRowBinary(FormatFactory & factory) const IRowInputFormat::Params & params, const FormatSettings & settings) { - return std::make_shared(buf, sample, params, with_names, with_types, settings); + return std::make_shared>(buf, sample, params, with_names, with_types, settings); }); }; registerWithNamesAndTypes("RowBinary", register_func); factory.registerFileExtension("bin", "RowBinary"); + + factory.registerInputFormat("RowBinaryWithDefaults", []( + ReadBuffer & buf, + const Block & sample, + const IRowInputFormat::Params & params, + const FormatSettings & settings) + { + return std::make_shared>(buf, sample, params, false, false, settings); + }); } void registerRowBinaryWithNamesAndTypesSchemaReader(FormatFactory & factory) @@ -125,6 +153,8 @@ void registerRowBinaryWithNamesAndTypesSchemaReader(FormatFactory & factory) { return std::make_shared(buf, settings); }); + + } diff --git a/src/Processors/Formats/Impl/BinaryRowInputFormat.h b/src/Processors/Formats/Impl/BinaryRowInputFormat.h index 3d3d80f1043..6f2042d1315 100644 --- a/src/Processors/Formats/Impl/BinaryRowInputFormat.h +++ b/src/Processors/Formats/Impl/BinaryRowInputFormat.h @@ -12,6 +12,7 @@ class ReadBuffer; /** A stream for inputting data in a binary line-by-line format. */ +template class BinaryRowInputFormat final : public RowInputFormatWithNamesAndTypes { public: @@ -25,6 +26,7 @@ public: std::string getDiagnosticInfo() override { return {}; } }; +template class BinaryFormatReader final : public FormatWithNamesAndTypesReader { public: @@ -54,7 +56,7 @@ public: BinaryWithNamesAndTypesSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_); private: - BinaryFormatReader reader; + BinaryFormatReader reader; }; } diff --git a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp index 79ce2549b4d..244b906549e 100644 --- a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp @@ -1,4 +1,5 @@ #include +#include #include #include @@ -315,17 +316,54 @@ bool CSVFormatReader::readField( return false; } + if (format_settings.csv.use_default_on_bad_values) + return readFieldOrDefault(column, type, serialization); + return readFieldImpl(*buf, column, type, serialization); +} + +bool CSVFormatReader::readFieldImpl(ReadBuffer & istr, DB::IColumn & column, const DB::DataTypePtr & type, const DB::SerializationPtr & serialization) +{ if (format_settings.null_as_default && !isNullableOrLowCardinalityNullable(type)) { /// If value is null but type is not nullable then use default value instead. - return SerializationNullable::deserializeTextCSVImpl(column, *buf, format_settings, serialization); + return SerializationNullable::deserializeTextCSVImpl(column, istr, format_settings, serialization); } /// Read the column normally. - serialization->deserializeTextCSV(column, *buf, format_settings); + serialization->deserializeTextCSV(column, istr, format_settings); return true; } +bool CSVFormatReader::readFieldOrDefault(DB::IColumn & column, const DB::DataTypePtr & type, const DB::SerializationPtr & serialization) +{ + String field; + readCSVField(field, *buf, format_settings.csv); + ReadBufferFromString tmp_buf(field); + bool is_bad_value = false; + bool res = false; + + size_t col_size = column.size(); + try + { + res = readFieldImpl(tmp_buf, column, type, serialization); + /// Check if we parsed the whole field successfully. + if (!field.empty() && !tmp_buf.eof()) + is_bad_value = true; + } + catch (const Exception &) + { + is_bad_value = true; + } + + if (!is_bad_value) + return res; + + if (column.size() == col_size + 1) + column.popBack(1); + column.insertDefault(); + return false; +} + void CSVFormatReader::skipPrefixBeforeHeader() { for (size_t i = 0; i != format_settings.csv.skip_first_lines; ++i) diff --git a/src/Processors/Formats/Impl/CSVRowInputFormat.h b/src/Processors/Formats/Impl/CSVRowInputFormat.h index 8ccf04feed3..7b1a1fc433d 100644 --- a/src/Processors/Formats/Impl/CSVRowInputFormat.h +++ b/src/Processors/Formats/Impl/CSVRowInputFormat.h @@ -89,6 +89,8 @@ public: void setReadBuffer(ReadBuffer & in_) override; FormatSettings::EscapingRule getEscapingRule() const override { return FormatSettings::EscapingRule::CSV; } + bool readFieldImpl(ReadBuffer & istr, DB::IColumn & column, const DB::DataTypePtr & type, const DB::SerializationPtr & serialization); + bool readFieldOrDefault(DB::IColumn & column, const DB::DataTypePtr & type, const DB::SerializationPtr & serialization); protected: PeekableReadBuffer * buf; diff --git a/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp b/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp index 5d438d47de6..06efe0a20aa 100644 --- a/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp +++ b/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp @@ -177,6 +177,14 @@ private: if (function.name == "lambda") return; + /// Parsing of INTERVALs is quite hacky. Expressions are rewritten during parsing like this: + /// "now() + interval 1 day" -> "now() + toIntervalDay(1)" + /// "select now() + INTERVAL '1 day 1 hour 1 minute'" -> "now() + (toIntervalDay(1), toIntervalHour(1), toIntervalMinute(1))" + /// so the AST is completely different from the original expression . + /// Avoid extracting these literals and simply compare tokens. It makes the template less flexible but much simpler. + if (function.name.starts_with("toInterval")) + return; + FunctionOverloadResolverPtr builder = FunctionFactory::instance().get(function.name, context); /// Do not replace literals which must be constant ColumnNumbers dont_visit_children = builder->getArgumentsThatAreAlwaysConstant(); @@ -350,6 +358,31 @@ ConstantExpressionTemplate::TemplateStructure::TemplateStructure(LiteralsInfo & } +String ConstantExpressionTemplate::TemplateStructure::dumpTemplate() const +{ + WriteBufferFromOwnString res; + + size_t cur_column = 0; + size_t cur_token = 0; + size_t num_columns = literals.columns(); + while (cur_column < num_columns) + { + size_t skip_tokens_until = token_after_literal_idx[cur_column]; + while (cur_token < skip_tokens_until) + res << quote << tokens[cur_token++] << ", "; + + const DataTypePtr & type = literals.getByPosition(cur_column).type; + res << type->getName() << ", "; + ++cur_column; + } + + while (cur_token < tokens.size()) + res << quote << tokens[cur_token++] << ", "; + + res << "eof"; + return res.str(); +} + size_t ConstantExpressionTemplate::TemplateStructure::getTemplateHash(const ASTPtr & expression, const LiteralsInfo & replaced_literals, const DataTypePtr & result_column_type, diff --git a/src/Processors/Formats/Impl/ConstantExpressionTemplate.h b/src/Processors/Formats/Impl/ConstantExpressionTemplate.h index fbb3cbcd22a..71d0d0f7134 100644 --- a/src/Processors/Formats/Impl/ConstantExpressionTemplate.h +++ b/src/Processors/Formats/Impl/ConstantExpressionTemplate.h @@ -31,6 +31,8 @@ class ConstantExpressionTemplate : boost::noncopyable static size_t getTemplateHash(const ASTPtr & expression, const LiteralsInfo & replaced_literals, const DataTypePtr & result_column_type, bool null_as_default, const String & salt); + String dumpTemplate() const; + String result_column_name; std::vector tokens; diff --git a/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp b/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp index d61e723fd75..3a65a6fe4ea 100644 --- a/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -474,6 +475,10 @@ bool ValuesBlockInputFormat::parseExpression(IColumn & column, size_t column_idx context, &found_in_cache, delimiter); + + LOG_TEST(&Poco::Logger::get("ValuesBlockInputFormat"), "Will use an expression template to parse column {}: {}", + column_idx, structure->dumpTemplate()); + templates[column_idx].emplace(structure); if (found_in_cache) ++attempts_to_deduce_template_cached[column_idx]; diff --git a/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp b/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp index 9b9cc221ca8..b251eec2d28 100644 --- a/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp +++ b/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp @@ -72,14 +72,10 @@ std::unique_ptr createLocalPlan( if (coordinator) { new_context->parallel_reading_coordinator = coordinator; - new_context->getClientInfo().interface = ClientInfo::Interface::LOCAL; - new_context->getClientInfo().collaborate_with_initiator = true; - new_context->getClientInfo().query_kind = ClientInfo::QueryKind::SECONDARY_QUERY; - new_context->getClientInfo().count_participating_replicas = replica_count; - new_context->getClientInfo().number_of_current_replica = replica_num; - new_context->getClientInfo().connection_client_version_major = DBMS_VERSION_MAJOR; - new_context->getClientInfo().connection_client_version_minor = DBMS_VERSION_MINOR; - new_context->getClientInfo().connection_tcp_protocol_version = DBMS_TCP_PROTOCOL_VERSION; + new_context->setClientInterface(ClientInfo::Interface::LOCAL); + new_context->setQueryKind(ClientInfo::QueryKind::SECONDARY_QUERY); + new_context->setReplicaInfo(true, replica_count, replica_num); + new_context->setConnectionClientVersion(DBMS_VERSION_MAJOR, DBMS_VERSION_MINOR, DBMS_VERSION_PATCH, DBMS_TCP_PROTOCOL_VERSION); new_context->setParallelReplicasGroupUUID(group_uuid); new_context->setMergeTreeAllRangesCallback([coordinator](InitialAllRangesAnnouncement announcement) { diff --git a/src/Processors/QueryPlan/Optimizations/actionsDAGUtils.cpp b/src/Processors/QueryPlan/Optimizations/actionsDAGUtils.cpp index c9cf46aaeca..787a106200a 100644 --- a/src/Processors/QueryPlan/Optimizations/actionsDAGUtils.cpp +++ b/src/Processors/QueryPlan/Optimizations/actionsDAGUtils.cpp @@ -8,7 +8,7 @@ namespace DB { -MatchedTrees::Matches matchTrees(const ActionsDAG & inner_dag, const ActionsDAG & outer_dag) +MatchedTrees::Matches matchTrees(const ActionsDAG & inner_dag, const ActionsDAG & outer_dag, bool check_monotonicity) { using Parents = std::set; std::unordered_map inner_parents; @@ -75,7 +75,12 @@ MatchedTrees::Matches matchTrees(const ActionsDAG & inner_dag, const ActionsDAG } /// A node from found match may be nullptr. /// It means that node is visited, but no match was found. - frame.mapped_children.push_back(it->second.node); + if (it->second.monotonicity) + /// Ignore a match with monotonicity. + frame.mapped_children.push_back(nullptr); + else + frame.mapped_children.push_back(it->second.node); + } if (frame.mapped_children.size() < frame.node->children.size()) @@ -182,7 +187,7 @@ MatchedTrees::Matches matchTrees(const ActionsDAG & inner_dag, const ActionsDAG } } - if (!match.node && frame.node->function_base->hasInformationAboutMonotonicity()) + if (!match.node && check_monotonicity && frame.node->function_base->hasInformationAboutMonotonicity()) { size_t num_const_args = 0; const ActionsDAG::Node * monotonic_child = nullptr; diff --git a/src/Processors/QueryPlan/Optimizations/actionsDAGUtils.h b/src/Processors/QueryPlan/Optimizations/actionsDAGUtils.h index dd689cba46b..223fc40e33f 100644 --- a/src/Processors/QueryPlan/Optimizations/actionsDAGUtils.h +++ b/src/Processors/QueryPlan/Optimizations/actionsDAGUtils.h @@ -39,5 +39,5 @@ struct MatchedTrees using Matches = std::unordered_map; }; -MatchedTrees::Matches matchTrees(const ActionsDAG & inner_dag, const ActionsDAG & outer_dag); +MatchedTrees::Matches matchTrees(const ActionsDAG & inner_dag, const ActionsDAG & outer_dag, bool check_monotonicity = true); } diff --git a/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp b/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp index ca8a412bf2e..3352567943a 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp @@ -138,8 +138,11 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes & nodes) if (table_expression_modifiers && table_expression_modifiers->hasSampleSizeRatio()) { const auto & sampling_key = storage_snapshot->getMetadataForQuery()->getSamplingKey(); - const auto & sampling_columns = sampling_key.sample_block.getColumnsWithTypeAndName(); - required_columns_after_filter.insert(required_columns_after_filter.end(), sampling_columns.begin(), sampling_columns.end()); + const auto & sampling_source_columns = sampling_key.expression->getRequiredColumnsWithTypes(); + for (const auto & column : sampling_source_columns) + required_columns_after_filter.push_back(ColumnWithTypeAndName(column.type, column.name)); + const auto & sampling_result_columns = sampling_key.sample_block.getColumnsWithTypeAndName(); + required_columns_after_filter.insert(required_columns_after_filter.end(), sampling_result_columns.begin(), sampling_result_columns.end()); } const auto & storage = storage_snapshot->storage; diff --git a/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp b/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp index 6cb76d540f7..b13dda9a8f0 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp @@ -114,6 +114,10 @@ void optimizeTreeSecondPass(const QueryPlanOptimizationSettings & optimization_s while (!stack.empty()) { + /// NOTE: optimizePrewhere can modify the stack. + optimizePrewhere(stack, nodes); + optimizePrimaryKeyCondition(stack); + { /// NOTE: frame cannot be safely used after stack was modified. auto & frame = stack.back(); @@ -125,6 +129,7 @@ void optimizeTreeSecondPass(const QueryPlanOptimizationSettings & optimization_s if (optimization_settings.read_in_order) optimizeReadInOrder(*frame.node, nodes); + /// Projection optimization relies on PK optimization if (optimization_settings.optimize_projection) num_applied_projection += optimizeUseAggregateProjections(*frame.node, nodes, optimization_settings.optimize_use_implicit_projections); @@ -148,6 +153,7 @@ void optimizeTreeSecondPass(const QueryPlanOptimizationSettings & optimization_s if (optimization_settings.optimize_projection) { + /// Projection optimization relies on PK optimization if (optimizeUseNormalProjections(stack, nodes)) { ++num_applied_projection; @@ -164,9 +170,6 @@ void optimizeTreeSecondPass(const QueryPlanOptimizationSettings & optimization_s } } - /// NOTE: optimizePrewhere can modify the stack. - optimizePrewhere(stack, nodes); - optimizePrimaryKeyCondition(stack); enableMemoryBoundMerging(*stack.back().node, nodes); stack.pop_back(); diff --git a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp index f183bdca7a9..81ce4007214 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp @@ -287,7 +287,7 @@ ActionsDAGPtr analyzeAggregateProjection( { auto proj_index = buildDAGIndex(*info.before_aggregation); - MatchedTrees::Matches matches = matchTrees(*info.before_aggregation, *query.dag); + MatchedTrees::Matches matches = matchTrees(*info.before_aggregation, *query.dag, false /* check_monotonicity */); // for (const auto & [node, match] : matches) // { @@ -497,6 +497,9 @@ AggregateProjectionCandidates getAggregateProjectionCandidates( // LOG_TRACE(&Poco::Logger::get("optimizeUseProjections"), "Projection sample block 2 {}", block.dumpStructure()); + // minmax_count_projection cannot be used used when there is no data to process, because + // it will produce incorrect result during constant aggregation. + // See https://github.com/ClickHouse/ClickHouse/issues/36728 if (block) { MinMaxProjectionCandidate minmax; @@ -625,8 +628,16 @@ bool optimizeUseAggregateProjections(QueryPlan::Node & node, QueryPlan::Nodes & // candidates.minmax_projection->block.dumpStructure()); Pipe pipe(std::make_shared(std::move(candidates.minmax_projection->block))); - projection_reading = std::make_unique(std::move(pipe)); - + projection_reading = std::make_unique( + std::move(pipe), + context, + query_info.is_internal + ? Context::QualifiedProjectionName{} + : Context::QualifiedProjectionName + { + .storage_id = reading->getMergeTreeData().getStorageID(), + .projection_name = candidates.minmax_projection->candidate.projection->name, + }); has_ordinary_parts = !candidates.minmax_projection->normal_parts.empty(); if (has_ordinary_parts) reading->resetParts(std::move(candidates.minmax_projection->normal_parts)); @@ -658,7 +669,16 @@ bool optimizeUseAggregateProjections(QueryPlan::Node & node, QueryPlan::Nodes & { auto header = proj_snapshot->getSampleBlockForColumns(best_candidate->dag->getRequiredColumnsNames()); Pipe pipe(std::make_shared(std::move(header))); - projection_reading = std::make_unique(std::move(pipe)); + projection_reading = std::make_unique( + std::move(pipe), + context, + query_info.is_internal + ? Context::QualifiedProjectionName{} + : Context::QualifiedProjectionName + { + .storage_id = reading->getMergeTreeData().getStorageID(), + .projection_name = best_candidate->projection->name, + }); } has_ordinary_parts = best_candidate->merge_tree_ordinary_select_result_ptr != nullptr; diff --git a/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp b/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp index dd7a5d449bc..f6ace6f8025 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp @@ -183,7 +183,16 @@ bool optimizeUseNormalProjections(Stack & stack, QueryPlan::Nodes & nodes) if (!projection_reading) { Pipe pipe(std::make_shared(proj_snapshot->getSampleBlockForColumns(required_columns))); - projection_reading = std::make_unique(std::move(pipe)); + projection_reading = std::make_unique( + std::move(pipe), + context, + query_info.is_internal + ? Context::QualifiedProjectionName{} + : Context::QualifiedProjectionName + { + .storage_id = reading->getMergeTreeData().getStorageID(), + .projection_name = best_candidate->projection->name, + }); } bool has_ordinary_parts = best_candidate->merge_tree_ordinary_select_result_ptr != nullptr; diff --git a/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp b/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp index cb76ffa84ba..7ddda29cad4 100644 --- a/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp +++ b/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp @@ -131,7 +131,8 @@ bool QueryDAG::buildImpl(QueryPlan::Node & node, ActionsDAG::NodeRawConstPtrs & if (prewhere_info->prewhere_actions) { appendExpression(prewhere_info->prewhere_actions); - if (const auto * filter_expression = findInOutputs(*dag, prewhere_info->prewhere_column_name, prewhere_info->remove_prewhere_column)) + if (const auto * filter_expression + = findInOutputs(*dag, prewhere_info->prewhere_column_name, prewhere_info->remove_prewhere_column)) filter_nodes.push_back(filter_expression); else return false; diff --git a/src/Processors/QueryPlan/Optimizations/projectionsCommon.h b/src/Processors/QueryPlan/Optimizations/projectionsCommon.h index 1e9ab67c8fe..35daccad115 100644 --- a/src/Processors/QueryPlan/Optimizations/projectionsCommon.h +++ b/src/Processors/QueryPlan/Optimizations/projectionsCommon.h @@ -38,7 +38,6 @@ std::shared_ptr getMaxAddedBlocks(ReadFromMergeTree * rea /// This is a common DAG which is a merge of DAGs from Filter and Expression steps chain. /// Additionally, for all the Filter steps, we collect filter conditions into filter_nodes. -/// Flag remove_last_filter_node is set in case if the last step is a Filter step and it should remove filter column. struct QueryDAG { ActionsDAGPtr dag; diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 13de5d1d140..2d2412f7e36 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -1761,6 +1761,10 @@ void ReadFromMergeTree::initializePipeline(QueryPipelineBuilder & pipeline, cons fmt::format("{}.{}", data.getStorageID().getFullNameNotQuoted(), part.data_part->info.partition_id)); } context->getQueryContext()->addQueryAccessInfo(partition_names); + + if (storage_snapshot->projection) + context->getQueryContext()->addQueryAccessInfo( + Context::QualifiedProjectionName{.storage_id = data.getStorageID(), .projection_name = storage_snapshot->projection->name}); } ProfileEvents::increment(ProfileEvents::SelectedParts, result.selected_parts); diff --git a/src/Processors/QueryPlan/ReadFromPreparedSource.cpp b/src/Processors/QueryPlan/ReadFromPreparedSource.cpp index 7446203ec35..a24c4dbe4d0 100644 --- a/src/Processors/QueryPlan/ReadFromPreparedSource.cpp +++ b/src/Processors/QueryPlan/ReadFromPreparedSource.cpp @@ -4,14 +4,19 @@ namespace DB { -ReadFromPreparedSource::ReadFromPreparedSource(Pipe pipe_) +ReadFromPreparedSource::ReadFromPreparedSource(Pipe pipe_, ContextPtr context_, Context::QualifiedProjectionName qualified_projection_name_) : ISourceStep(DataStream{.header = pipe_.getHeader()}) , pipe(std::move(pipe_)) + , context(std::move(context_)) + , qualified_projection_name(std::move(qualified_projection_name_)) { } void ReadFromPreparedSource::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) { + if (context && context->hasQueryContext()) + context->getQueryContext()->addQueryAccessInfo(qualified_projection_name); + for (const auto & processor : pipe.getProcessors()) processors.emplace_back(processor); diff --git a/src/Processors/QueryPlan/ReadFromPreparedSource.h b/src/Processors/QueryPlan/ReadFromPreparedSource.h index 05e3ebd5102..2606f501009 100644 --- a/src/Processors/QueryPlan/ReadFromPreparedSource.h +++ b/src/Processors/QueryPlan/ReadFromPreparedSource.h @@ -1,4 +1,6 @@ #pragma once + +#include #include #include @@ -9,7 +11,8 @@ namespace DB class ReadFromPreparedSource : public ISourceStep { public: - explicit ReadFromPreparedSource(Pipe pipe_); + explicit ReadFromPreparedSource( + Pipe pipe_, ContextPtr context_ = nullptr, Context::QualifiedProjectionName qualified_projection_name_ = {}); String getName() const override { return "ReadFromPreparedSource"; } @@ -18,6 +21,7 @@ public: protected: Pipe pipe; ContextPtr context; + Context::QualifiedProjectionName qualified_projection_name; }; class ReadFromStorageStep : public ReadFromPreparedSource diff --git a/src/Processors/QueryPlan/ReadFromRemote.cpp b/src/Processors/QueryPlan/ReadFromRemote.cpp index ed740e3e242..5cc13f45df4 100644 --- a/src/Processors/QueryPlan/ReadFromRemote.cpp +++ b/src/Processors/QueryPlan/ReadFromRemote.cpp @@ -86,9 +86,7 @@ static String formattedAST(const ASTPtr & ast) return {}; WriteBufferFromOwnString buf; - IAST::FormatSettings ast_format_settings(buf, /*one_line*/ true); - ast_format_settings.hilite = false; - ast_format_settings.always_quote_identifiers = true; + IAST::FormatSettings ast_format_settings(buf, /*one_line*/ true, /*hilite*/ false, /*always_quote_identifiers*/ true); ast->format(ast_format_settings); return buf.str(); } diff --git a/src/Processors/Transforms/CheckConstraintsTransform.cpp b/src/Processors/Transforms/CheckConstraintsTransform.cpp index 88f02a3926f..3a6595ea4fb 100644 --- a/src/Processors/Transforms/CheckConstraintsTransform.cpp +++ b/src/Processors/Transforms/CheckConstraintsTransform.cpp @@ -73,7 +73,7 @@ void CheckConstraintsTransform::onConsume(Chunk chunk) "Constraint expression returns nullable column that contains null value", backQuote(constraint_ptr->name), table_id.getNameForLogs(), - serializeAST(*(constraint_ptr->expr), true)); + serializeAST(*(constraint_ptr->expr))); result_column = nested_column; } @@ -116,7 +116,7 @@ void CheckConstraintsTransform::onConsume(Chunk chunk) backQuote(constraint_ptr->name), table_id.getNameForLogs(), rows_written + row_idx + 1, - serializeAST(*(constraint_ptr->expr), true), + serializeAST(*(constraint_ptr->expr)), column_values_msg); } } diff --git a/src/Processors/Transforms/FinishSortingTransform.cpp b/src/Processors/Transforms/FinishSortingTransform.cpp index 05fddc35e15..63a9c3924a2 100644 --- a/src/Processors/Transforms/FinishSortingTransform.cpp +++ b/src/Processors/Transforms/FinishSortingTransform.cpp @@ -35,9 +35,20 @@ FinishSortingTransform::FinishSortingTransform( "Can't finish sorting. SortDescription " "of already sorted stream is not prefix of SortDescription needed to sort"); + /// Remove constants from description_sorted_. + SortDescription description_sorted_without_constants; + description_sorted_without_constants.reserve(description_sorted_.size()); + size_t num_columns = const_columns_to_remove.size(); + for (const auto & column_description : description_sorted_) + { + auto pos = header.getPositionByName(column_description.column_name); + + if (pos < num_columns && !const_columns_to_remove[pos]) + description_sorted_without_constants.push_back(column_description); + } /// The target description is modified in SortingTransform constructor. /// To avoid doing the same actions with description_sorted just copy it from prefix of target description. - for (const auto & column_sort_desc : description_sorted_) + for (const auto & column_sort_desc : description_sorted_without_constants) description_with_positions.emplace_back(column_sort_desc, header_without_constants.getPositionByName(column_sort_desc.column_name)); } diff --git a/src/Processors/Transforms/JoiningTransform.cpp b/src/Processors/Transforms/JoiningTransform.cpp index 49b90d04b81..4e7868ea1c2 100644 --- a/src/Processors/Transforms/JoiningTransform.cpp +++ b/src/Processors/Transforms/JoiningTransform.cpp @@ -189,7 +189,6 @@ void JoiningTransform::transform(Chunk & chunk) } else block = readExecute(chunk); - auto num_rows = block.rows(); chunk.setColumns(block.getColumns(), num_rows); } @@ -311,8 +310,11 @@ void FillingRightJoinSideTransform::work() } -DelayedJoinedBlocksWorkerTransform::DelayedJoinedBlocksWorkerTransform(Block output_header) - : IProcessor(InputPorts{Block()}, OutputPorts{output_header}) +DelayedJoinedBlocksWorkerTransform::DelayedJoinedBlocksWorkerTransform( + Block output_header_, + NonJoinedStreamBuilder non_joined_stream_builder_) + : IProcessor(InputPorts{Block()}, OutputPorts{output_header_}) + , non_joined_stream_builder(std::move(non_joined_stream_builder_)) { } @@ -365,6 +367,7 @@ IProcessor::Status DelayedJoinedBlocksWorkerTransform::prepare() if (!data.chunk.hasChunkInfo()) throw Exception(ErrorCodes::LOGICAL_ERROR, "DelayedJoinedBlocksWorkerTransform must have chunk info"); + task = std::dynamic_pointer_cast(data.chunk.getChunkInfo()); } else @@ -372,7 +375,8 @@ IProcessor::Status DelayedJoinedBlocksWorkerTransform::prepare() input.setNotNeeded(); } - if (task->finished) + // When delayed_blocks is nullptr, it means that all buckets have been joined. + if (!task->delayed_blocks) { input.close(); output.finish(); @@ -387,11 +391,21 @@ void DelayedJoinedBlocksWorkerTransform::work() if (!task) return; - Block block = task->delayed_blocks->next(); - + Block block; + /// All joined and non-joined rows from left stream are emitted, only right non-joined rows are left + if (!task->delayed_blocks->isFinished()) + { + block = task->delayed_blocks->next(); + if (!block) + block = nextNonJoinedBlock(); + } + else + { + block = nextNonJoinedBlock(); + } if (!block) { - task.reset(); + resetTask(); return; } @@ -400,6 +414,30 @@ void DelayedJoinedBlocksWorkerTransform::work() output_chunk.setColumns(block.getColumns(), rows); } +void DelayedJoinedBlocksWorkerTransform::resetTask() +{ + task.reset(); + non_joined_delayed_stream = nullptr; +} + +Block DelayedJoinedBlocksWorkerTransform::nextNonJoinedBlock() +{ + // Before read from non-joined stream, all blocks in left file reader must have been joined. + // For example, in HashJoin, it may return invalid mismatch rows from non-joined stream before + // the all blocks in left file reader have been finished, since the used flags are incomplete. + // To make only one processor could read from non-joined stream seems be a easy way. + if (!non_joined_delayed_stream && task && task->left_delayed_stream_finish_counter->isLast()) + { + non_joined_delayed_stream = non_joined_stream_builder(); + } + + if (non_joined_delayed_stream) + { + return non_joined_delayed_stream->next(); + } + return {}; +} + DelayedJoinedBlocksTransform::DelayedJoinedBlocksTransform(size_t num_streams, JoinPtr join_) : IProcessor(InputPorts{}, OutputPorts(num_streams, Block())) , join(std::move(join_)) @@ -433,6 +471,9 @@ IProcessor::Status DelayedJoinedBlocksTransform::prepare() if (finished) { + // Since have memory limit, cannot handle all buckets parallelly by different + // DelayedJoinedBlocksWorkerTransform. So send the same task to all outputs. + // Wait for all DelayedJoinedBlocksWorkerTransform be idle before getting next bucket. for (auto & output : outputs) { if (output.isFinished()) @@ -448,10 +489,14 @@ IProcessor::Status DelayedJoinedBlocksTransform::prepare() if (delayed_blocks) { + // This counter is used to ensure that only the last DelayedJoinedBlocksWorkerTransform + // could read right non-joined blocks from the join. + auto left_delayed_stream_finished_counter = std::make_shared(outputs.size()); for (auto & output : outputs) { Chunk chunk; - chunk.setChunkInfo(std::make_shared(delayed_blocks)); + auto task = std::make_shared(delayed_blocks, left_delayed_stream_finished_counter); + chunk.setChunkInfo(task); output.push(std::move(chunk)); } delayed_blocks = nullptr; diff --git a/src/Processors/Transforms/JoiningTransform.h b/src/Processors/Transforms/JoiningTransform.h index e7edff40c56..a308af03662 100644 --- a/src/Processors/Transforms/JoiningTransform.h +++ b/src/Processors/Transforms/JoiningTransform.h @@ -115,12 +115,16 @@ class DelayedBlocksTask : public ChunkInfo { public: - explicit DelayedBlocksTask() : finished(true) {} - explicit DelayedBlocksTask(IBlocksStreamPtr delayed_blocks_) : delayed_blocks(std::move(delayed_blocks_)) {} + DelayedBlocksTask() = default; + explicit DelayedBlocksTask(IBlocksStreamPtr delayed_blocks_, JoiningTransform::FinishCounterPtr left_delayed_stream_finish_counter_) + : delayed_blocks(std::move(delayed_blocks_)) + , left_delayed_stream_finish_counter(left_delayed_stream_finish_counter_) + { + } IBlocksStreamPtr delayed_blocks = nullptr; + JoiningTransform::FinishCounterPtr left_delayed_stream_finish_counter = nullptr; - bool finished = false; }; using DelayedBlocksTaskPtr = std::shared_ptr; @@ -147,7 +151,10 @@ private: class DelayedJoinedBlocksWorkerTransform : public IProcessor { public: - explicit DelayedJoinedBlocksWorkerTransform(Block output_header); + using NonJoinedStreamBuilder = std::function; + explicit DelayedJoinedBlocksWorkerTransform( + Block output_header_, + NonJoinedStreamBuilder non_joined_stream_builder_); String getName() const override { return "DelayedJoinedBlocksWorkerTransform"; } @@ -157,8 +164,12 @@ public: private: DelayedBlocksTaskPtr task; Chunk output_chunk; + /// For building a block stream to access the non-joined rows. + NonJoinedStreamBuilder non_joined_stream_builder; + IBlocksStreamPtr non_joined_delayed_stream = nullptr; - bool finished = false; + void resetTask(); + Block nextNonJoinedBlock(); }; } diff --git a/src/QueryPipeline/QueryPipelineBuilder.cpp b/src/QueryPipeline/QueryPipelineBuilder.cpp index dedf85e409c..553b18dd57b 100644 --- a/src/QueryPipeline/QueryPipelineBuilder.cpp +++ b/src/QueryPipeline/QueryPipelineBuilder.cpp @@ -491,7 +491,10 @@ std::unique_ptr QueryPipelineBuilder::joinPipelinesRightLe if (delayed_root) { // Process delayed joined blocks when all JoiningTransform are finished. - auto delayed = std::make_shared(joined_header); + auto delayed = std::make_shared( + joined_header, + [left_header, joined_header, max_block_size, join]() + { return join->getNonJoinedBlocks(left_header, joined_header, max_block_size); }); if (delayed->getInputs().size() != 1 || delayed->getOutputs().size() != 1) throw Exception(ErrorCodes::LOGICAL_ERROR, "DelayedJoinedBlocksWorkerTransform should have one input and one output"); diff --git a/src/Server/GRPCServer.cpp b/src/Server/GRPCServer.cpp index bf9ba20a5cf..67d30012b0e 100644 --- a/src/Server/GRPCServer.cpp +++ b/src/Server/GRPCServer.cpp @@ -798,7 +798,7 @@ namespace /// Authentication. session.emplace(iserver.context(), ClientInfo::Interface::GRPC); session->authenticate(user, password, user_address); - session->getClientInfo().quota_key = quota_key; + session->setQuotaClientKey(quota_key); ClientInfo client_info = session->getClientInfo(); diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp index f7cdb905710..069670c84a5 100644 --- a/src/Server/HTTPHandler.cpp +++ b/src/Server/HTTPHandler.cpp @@ -474,7 +474,6 @@ bool HTTPHandler::authenticateUser( } /// Set client info. It will be used for quota accounting parameters in 'setUser' method. - ClientInfo & client_info = session->getClientInfo(); ClientInfo::HTTPMethod http_method = ClientInfo::HTTPMethod::UNKNOWN; if (request.getMethod() == HTTPServerRequest::HTTP_GET) @@ -482,15 +481,13 @@ bool HTTPHandler::authenticateUser( else if (request.getMethod() == HTTPServerRequest::HTTP_POST) http_method = ClientInfo::HTTPMethod::POST; - client_info.http_method = http_method; - client_info.http_user_agent = request.get("User-Agent", ""); - client_info.http_referer = request.get("Referer", ""); - client_info.forwarded_for = request.get("X-Forwarded-For", ""); - client_info.quota_key = quota_key; + session->setHttpClientInfo(http_method, request.get("User-Agent", ""), request.get("Referer", "")); + session->setForwardedFor(request.get("X-Forwarded-For", "")); + session->setQuotaClientKey(quota_key); /// Extract the last entry from comma separated list of forwarded_for addresses. /// Only the last proxy can be trusted (if any). - String forwarded_address = client_info.getLastForwardedFor(); + String forwarded_address = session->getClientInfo().getLastForwardedFor(); try { if (!forwarded_address.empty() && server.config().getBool("auth_use_forwarded_address", false)) @@ -988,22 +985,22 @@ void HTTPHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse } // Parse the OpenTelemetry traceparent header. - ClientInfo& client_info = session->getClientInfo(); + auto & client_trace_context = session->getClientTraceContext(); if (request.has("traceparent")) { std::string opentelemetry_traceparent = request.get("traceparent"); std::string error; - if (!client_info.client_trace_context.parseTraceparentHeader(opentelemetry_traceparent, error)) + if (!client_trace_context.parseTraceparentHeader(opentelemetry_traceparent, error)) { LOG_DEBUG(log, "Failed to parse OpenTelemetry traceparent header '{}': {}", opentelemetry_traceparent, error); } - client_info.client_trace_context.tracestate = request.get("tracestate", ""); + client_trace_context.tracestate = request.get("tracestate", ""); } // Setup tracing context for this thread auto context = session->sessionOrGlobalContext(); thread_trace_context = std::make_unique("HTTPHandler", - client_info.client_trace_context, + client_trace_context, context->getSettingsRef(), context->getOpenTelemetrySpanLog()); thread_trace_context->root_span.kind = OpenTelemetry::SERVER; diff --git a/src/Server/MySQLHandler.cpp b/src/Server/MySQLHandler.cpp index 7318b0ad89b..f98b86e6cf8 100644 --- a/src/Server/MySQLHandler.cpp +++ b/src/Server/MySQLHandler.cpp @@ -94,7 +94,7 @@ void MySQLHandler::run() session = std::make_unique(server.context(), ClientInfo::Interface::MYSQL); SCOPE_EXIT({ session.reset(); }); - session->getClientInfo().connection_id = connection_id; + session->setClientConnectionId(connection_id); in = std::make_shared(socket()); out = std::make_shared(socket()); diff --git a/src/Server/PostgreSQLHandler.cpp b/src/Server/PostgreSQLHandler.cpp index 36b05932979..7b078154252 100644 --- a/src/Server/PostgreSQLHandler.cpp +++ b/src/Server/PostgreSQLHandler.cpp @@ -58,7 +58,7 @@ void PostgreSQLHandler::run() session = std::make_unique(server.context(), ClientInfo::Interface::POSTGRESQL); SCOPE_EXIT({ session.reset(); }); - session->getClientInfo().connection_id = connection_id; + session->setClientConnectionId(connection_id); try { diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 36566832ebc..a747f06f1ce 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -1177,21 +1177,12 @@ std::unique_ptr TCPHandler::makeSession() auto res = std::make_unique(server.context(), interface, socket().secure(), certificate); - auto & client_info = res->getClientInfo(); - client_info.forwarded_for = forwarded_for; - client_info.client_name = client_name; - client_info.client_version_major = client_version_major; - client_info.client_version_minor = client_version_minor; - client_info.client_version_patch = client_version_patch; - client_info.client_tcp_protocol_version = client_tcp_protocol_version; - - client_info.connection_client_version_major = client_version_major; - client_info.connection_client_version_minor = client_version_minor; - client_info.connection_client_version_patch = client_version_patch; - client_info.connection_tcp_protocol_version = client_tcp_protocol_version; - - client_info.quota_key = quota_key; - client_info.interface = interface; + res->setForwardedFor(forwarded_for); + res->setClientName(client_name); + res->setClientVersion(client_version_major, client_version_minor, client_version_patch, client_tcp_protocol_version); + res->setConnectionClientVersion(client_version_major, client_version_minor, client_version_patch, client_tcp_protocol_version); + res->setQuotaClientKey(quota_key); + res->setClientInterface(interface); return res; } @@ -1253,7 +1244,7 @@ void TCPHandler::receiveHello() } session = makeSession(); - auto & client_info = session->getClientInfo(); + const auto & client_info = session->getClientInfo(); #if USE_SSL /// Authentication with SSL user certificate @@ -1286,7 +1277,7 @@ void TCPHandler::receiveAddendum() { readStringBinary(quota_key, *in); if (!is_interserver_mode) - session->getClientInfo().quota_key = quota_key; + session->setQuotaClientKey(quota_key); } } diff --git a/src/Storages/ConstraintsDescription.cpp b/src/Storages/ConstraintsDescription.cpp index db37ac7c4c3..249ed8be428 100644 --- a/src/Storages/ConstraintsDescription.cpp +++ b/src/Storages/ConstraintsDescription.cpp @@ -35,7 +35,7 @@ String ConstraintsDescription::toString() const for (const auto & constraint : constraints) list.children.push_back(constraint); - return serializeAST(list, true); + return serializeAST(list); } ConstraintsDescription ConstraintsDescription::parse(const String & str) diff --git a/src/Storages/DataLakes/HudiMetadataParser.cpp b/src/Storages/DataLakes/HudiMetadataParser.cpp index a1f35a5ae42..78d69c83989 100644 --- a/src/Storages/DataLakes/HudiMetadataParser.cpp +++ b/src/Storages/DataLakes/HudiMetadataParser.cpp @@ -67,7 +67,8 @@ struct HudiMetadataParser::Impl { auto key_file = std::filesystem::path(key); Strings file_parts; - splitInto<'_'>(file_parts, key_file.stem()); + const String stem = key_file.stem(); + splitInto<'_'>(file_parts, stem); if (file_parts.size() != 3) throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected format for file: {}", key); diff --git a/src/Storages/Distributed/DistributedSink.cpp b/src/Storages/Distributed/DistributedSink.cpp index 875764f7633..0dcdae01ba9 100644 --- a/src/Storages/Distributed/DistributedSink.cpp +++ b/src/Storages/Distributed/DistributedSink.cpp @@ -132,7 +132,7 @@ DistributedSink::DistributedSink( const auto & settings = context->getSettingsRef(); if (settings.max_distributed_depth && context->getClientInfo().distributed_depth >= settings.max_distributed_depth) throw Exception(ErrorCodes::TOO_LARGE_DISTRIBUTED_DEPTH, "Maximum distributed depth exceeded"); - context->getClientInfo().distributed_depth += 1; + context->increaseDistributedDepth(); random_shard_insert = settings.insert_distributed_one_random_shard && !storage.has_sharding_key; } diff --git a/src/Storages/HDFS/ReadBufferFromHDFS.cpp b/src/Storages/HDFS/ReadBufferFromHDFS.cpp index ee8e0764db0..483f0894cc4 100644 --- a/src/Storages/HDFS/ReadBufferFromHDFS.cpp +++ b/src/Storages/HDFS/ReadBufferFromHDFS.cpp @@ -89,7 +89,7 @@ struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl : public BufferWithOwnMemory {})", file_offset, read_until_position - 1); - num_bytes_to_read = read_until_position - file_offset; + num_bytes_to_read = std::min(read_until_position - file_offset, internal_buffer.size()); } else { diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp index a41c65cdb2e..3d7e2b05f5a 100644 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ b/src/Storages/HDFS/StorageHDFS.cpp @@ -64,23 +64,131 @@ namespace ErrorCodes } namespace { + /// Forward-declared to use in LSWithFoldedRegexpMatching w/o circular dependency. + std::vector LSWithRegexpMatching(const String & path_for_ls, + const HDFSFSPtr & fs, + const String & for_match); + + /* + * When `{...}` has any `/`s, it must be processed in a different way: + * Basically, a path with globs is processed by LSWithRegexpMatching. In case it detects multi-dir glob {.../..., .../...}, + * LSWithFoldedRegexpMatching is in charge from now on. + * It works a bit different: it still recursively goes through subdirectories, but does not match every directory to glob. + * Instead, it goes many levels down (until the approximate max_depth is reached) and compares this multi-dir path to a glob. + * StorageFile.cpp has the same logic. + */ + std::vector LSWithFoldedRegexpMatching(const String & path_for_ls, + const HDFSFSPtr & fs, + const String & processed_suffix, + const String & suffix_with_globs, + re2::RE2 & matcher, + const size_t max_depth, + const size_t next_slash_after_glob_pos) + { + /// We don't need to go all the way in every directory if max_depth is reached + /// as it is upper limit of depth by simply counting `/`s in curly braces + if (!max_depth) + return {}; + + HDFSFileInfo ls; + ls.file_info = hdfsListDirectory(fs.get(), path_for_ls.data(), &ls.length); + if (ls.file_info == nullptr && errno != ENOENT) // NOLINT + { + // ignore file not found exception, keep throw other exception, libhdfs3 doesn't have function to get exception type, so use errno. + throw Exception( + ErrorCodes::ACCESS_DENIED, "Cannot list directory {}: {}", path_for_ls, String(hdfsGetLastError())); + } + + std::vector result; + + if (!ls.file_info && ls.length > 0) + throw Exception(ErrorCodes::LOGICAL_ERROR, "file_info shouldn't be null"); + + for (int i = 0; i < ls.length; ++i) + { + const String full_path = String(ls.file_info[i].mName); + const size_t last_slash = full_path.rfind('/'); + const String dir_or_file_name = full_path.substr(last_slash); + const bool is_directory = ls.file_info[i].mKind == 'D'; + + if (re2::RE2::FullMatch(processed_suffix + dir_or_file_name, matcher)) + { + if (next_slash_after_glob_pos == std::string::npos) + { + result.emplace_back( + String(ls.file_info[i].mName), + StorageHDFS::PathInfo{ls.file_info[i].mLastMod, static_cast(ls.file_info[i].mSize)}); + } + else + { + std::vector result_part = LSWithRegexpMatching( + fs::path(full_path) / "" , fs, suffix_with_globs.substr(next_slash_after_glob_pos)); + std::move(result_part.begin(), result_part.end(), std::back_inserter(result)); + } + } + else if (is_directory) + { + std::vector result_part = LSWithFoldedRegexpMatching( + fs::path(full_path), fs, processed_suffix + dir_or_file_name, + suffix_with_globs, matcher, max_depth - 1, next_slash_after_glob_pos); + std::move(result_part.begin(), result_part.end(), std::back_inserter(result)); + } + } + return result; + } + /* Recursive directory listing with matched paths as a result. * Have the same method in StorageFile. */ - std::vector LSWithRegexpMatching(const String & path_for_ls, const HDFSFSPtr & fs, const String & for_match) + std::vector LSWithRegexpMatching( + const String & path_for_ls, + const HDFSFSPtr & fs, + const String & for_match) { - const size_t first_glob = for_match.find_first_of("*?{"); + const size_t first_glob_pos = for_match.find_first_of("*?{"); + const bool has_glob = first_glob_pos != std::string::npos; - const size_t end_of_path_without_globs = for_match.substr(0, first_glob).rfind('/'); + const size_t end_of_path_without_globs = for_match.substr(0, first_glob_pos).rfind('/'); const String suffix_with_globs = for_match.substr(end_of_path_without_globs); /// begin with '/' const String prefix_without_globs = path_for_ls + for_match.substr(1, end_of_path_without_globs); /// ends with '/' - const size_t next_slash = suffix_with_globs.find('/', 1); - re2::RE2 matcher(makeRegexpPatternFromGlobs(suffix_with_globs.substr(0, next_slash))); + size_t slashes_in_glob = 0; + const size_t next_slash_after_glob_pos = [&]() + { + if (!has_glob) + return suffix_with_globs.find('/', 1); + + size_t in_curly = 0; + for (std::string::const_iterator it = ++suffix_with_globs.begin(); it != suffix_with_globs.end(); it++) + { + if (*it == '{') + ++in_curly; + else if (*it == '/') + { + if (in_curly) + ++slashes_in_glob; + else + return size_t(std::distance(suffix_with_globs.begin(), it)); + } + else if (*it == '}') + --in_curly; + } + return std::string::npos; + }(); + + const std::string current_glob = suffix_with_globs.substr(0, next_slash_after_glob_pos); + + re2::RE2 matcher(makeRegexpPatternFromGlobs(current_glob)); if (!matcher.ok()) throw Exception(ErrorCodes::CANNOT_COMPILE_REGEXP, "Cannot compile regex from glob ({}): {}", for_match, matcher.error()); + if (slashes_in_glob) + { + return LSWithFoldedRegexpMatching(fs::path(prefix_without_globs), fs, "", suffix_with_globs, + matcher, slashes_in_glob, next_slash_after_glob_pos); + } + HDFSFileInfo ls; ls.file_info = hdfsListDirectory(fs.get(), prefix_without_globs.data(), &ls.length); if (ls.file_info == nullptr && errno != ENOENT) // NOLINT @@ -97,7 +205,7 @@ namespace const String full_path = String(ls.file_info[i].mName); const size_t last_slash = full_path.rfind('/'); const String file_name = full_path.substr(last_slash); - const bool looking_for_directory = next_slash != std::string::npos; + const bool looking_for_directory = next_slash_after_glob_pos != std::string::npos; const bool is_directory = ls.file_info[i].mKind == 'D'; /// Condition with type of current file_info means what kind of path is it in current iteration of ls if (!is_directory && !looking_for_directory) @@ -111,7 +219,7 @@ namespace { if (re2::RE2::FullMatch(file_name, matcher)) { - std::vector result_part = LSWithRegexpMatching(fs::path(full_path) / "", fs, suffix_with_globs.substr(next_slash)); + std::vector result_part = LSWithRegexpMatching(fs::path(full_path) / "", fs, suffix_with_globs.substr(next_slash_after_glob_pos)); /// Recursion depth is limited by pattern. '*' works only for depth = 1, for depth = 2 pattern path is '*/*'. So we do not need additional check. std::move(result_part.begin(), result_part.end(), std::back_inserter(result)); } diff --git a/src/Storages/IndicesDescription.cpp b/src/Storages/IndicesDescription.cpp index c7aeaf8e4ef..06518a52c61 100644 --- a/src/Storages/IndicesDescription.cpp +++ b/src/Storages/IndicesDescription.cpp @@ -151,7 +151,7 @@ String IndicesDescription::toString() const for (const auto & index : *this) list.children.push_back(index.definition_ast); - return serializeAST(list, true); + return serializeAST(list); } diff --git a/src/Storages/MeiliSearch/StorageMeiliSearch.cpp b/src/Storages/MeiliSearch/StorageMeiliSearch.cpp index 5d77fc080a4..aa8b437263a 100644 --- a/src/Storages/MeiliSearch/StorageMeiliSearch.cpp +++ b/src/Storages/MeiliSearch/StorageMeiliSearch.cpp @@ -62,9 +62,10 @@ ColumnsDescription StorageMeiliSearch::getTableStructureFromData(const MeiliSear String convertASTtoStr(ASTPtr ptr) { WriteBufferFromOwnString out; - IAST::FormatSettings settings(out, true); - settings.identifier_quoting_style = IdentifierQuotingStyle::BackticksMySQL; - settings.always_quote_identifiers = IdentifierQuotingStyle::BackticksMySQL != IdentifierQuotingStyle::None; + IAST::FormatSettings settings( + out, /*one_line*/ true, /*hilite*/ false, + /*always_quote_identifiers*/ IdentifierQuotingStyle::BackticksMySQL != IdentifierQuotingStyle::None, + /*identifier_quoting_style*/ IdentifierQuotingStyle::BackticksMySQL); ptr->format(settings); return out.str(); } diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp index 23bbc1c7f9d..6a3bf2940e9 100644 --- a/src/Storages/MergeTree/DataPartsExchange.cpp +++ b/src/Storages/MergeTree/DataPartsExchange.cpp @@ -353,8 +353,14 @@ MergeTreeData::DataPartPtr Service::findPart(const String & name) { /// It is important to include Outdated parts here because remote replicas cannot reliably /// determine the local state of the part, so queries for the parts in these states are completely normal. - auto part = data.getPartIfExists( - name, {MergeTreeDataPartState::Active, MergeTreeDataPartState::Outdated}); + MergeTreeData::DataPartPtr part; + + /// Ephemeral zero-copy lock may be lost for PreActive parts + bool zero_copy_enabled = data.getSettings()->allow_remote_fs_zero_copy_replication; + if (zero_copy_enabled) + part = data.getPartIfExists(name, {MergeTreeDataPartState::Active, MergeTreeDataPartState::Outdated}); + else + part = data.getPartIfExists(name, {MergeTreeDataPartState::PreActive, MergeTreeDataPartState::Active, MergeTreeDataPartState::Outdated}); if (part) return part; diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index a80e32ff650..af6906e004d 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -501,7 +501,7 @@ public: mutable std::atomic removal_state = DataPartRemovalState::NOT_ATTEMPTED; - mutable std::atomic last_removal_attemp_time = 0; + mutable std::atomic last_removal_attempt_time = 0; protected: diff --git a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp index 9f54c554c85..883cfee89c8 100644 --- a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp +++ b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp @@ -230,7 +230,7 @@ ReplicatedMergeMutateTaskBase::PrepareResult MergeFromLogEntryTask::prepare() /// the fast replica is not overloaded because amount of executing merges doesn't affect the ability to acquire locks for new merges. /// /// So here we trying to solve it with the simplest solution -- sleep random time up to 500ms for 1GB part and up to 7 seconds for 300GB part. - /// It can sound too much, but we are trying to aquite these locks in background tasks which can be scheduled each 5 seconds or so. + /// It can sound too much, but we are trying to acquire these locks in background tasks which can be scheduled each 5 seconds or so. double start_to_sleep_seconds = std::logf(storage_settings_ptr->zero_copy_merge_mutation_min_parts_size_sleep_before_lock.value); uint64_t right_border_to_sleep_ms = static_cast((std::log(estimated_space_for_merge) - start_to_sleep_seconds + 0.5) * 1000); uint64_t time_to_sleep_milliseconds = std::min(10000UL, std::uniform_int_distribution(1, 1 + right_border_to_sleep_ms)(rng)); @@ -245,7 +245,11 @@ ReplicatedMergeMutateTaskBase::PrepareResult MergeFromLogEntryTask::prepare() if (!zero_copy_lock || !zero_copy_lock->isLocked()) { - LOG_DEBUG(log, "Merge of part {} started by some other replica, will wait it and fetch merged part", entry.new_part_name); + LOG_DEBUG( + log, + "Merge of part {} started by some other replica, will wait for it and fetch merged part. Number of tries {}", + entry.new_part_name, + entry.num_tries); storage.watchZeroCopyLock(entry.new_part_name, disk); /// Don't check for missing part -- it's missing because other replica still not /// finished merge. diff --git a/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp b/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp index 6eab4337162..e497a799274 100644 --- a/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp @@ -269,7 +269,7 @@ void MergeTreeBackgroundExecutor::routine(TaskRuntimeDataPtr item) try { ALLOW_ALLOCATIONS_IN_SCOPE; - item->task->getQueryId(); + query_id = item->task->getQueryId(); need_execute_again = item->task->executeStep(); } catch (...) diff --git a/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp b/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp index c7434eab05d..d830ba37e71 100644 --- a/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp +++ b/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp @@ -328,11 +328,22 @@ MergeTreeReadTaskColumns getReadTaskColumns( NameSet columns_from_previous_steps; auto add_step = [&](const PrewhereExprStep & step) { - Names step_column_names = step.actions->getActionsDAG().getRequiredColumnsNames(); + Names step_column_names; + + /// Computation results from previous steps might be used in the current step as well. In such a case these + /// computed columns will be present in the current step inputs. They don't need to be read from the disk so + /// exclude them from the list of columns to read. This filtering must be done before injecting required + /// columns to avoid adding unnecessary columns or failing to find required columns that are computation + /// results from previous steps. + /// Example: step1: sin(a)>b, step2: sin(a)>c + for (const auto & name : step.actions->getActionsDAG().getRequiredColumnsNames()) + if (!columns_from_previous_steps.contains(name)) + step_column_names.push_back(name); injectRequiredColumns( data_part_info_for_reader, storage_snapshot, with_subcolumns, step_column_names); + /// More columns could have been added, filter them as well by the list of columns from previous steps. Names columns_to_read_in_step; for (const auto & name : step_column_names) { @@ -343,6 +354,10 @@ MergeTreeReadTaskColumns getReadTaskColumns( columns_from_previous_steps.insert(name); } + /// Add results of the step to the list of already "known" columns so that we don't read or compute them again. + for (const auto & name : step.actions->getActionsDAG().getNames()) + columns_from_previous_steps.insert(name); + result.pre_columns.push_back(storage_snapshot->getColumnsByNames(options, columns_to_read_in_step)); }; diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 9c5e45aa488..c24f195c429 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -465,9 +465,10 @@ void MergeTreeData::checkProperties( const StorageInMemoryMetadata & new_metadata, const StorageInMemoryMetadata & old_metadata, bool attach, + bool allow_empty_sorting_key, ContextPtr local_context) const { - if (!new_metadata.sorting_key.definition_ast) + if (!new_metadata.sorting_key.definition_ast && !allow_empty_sorting_key) throw Exception(ErrorCodes::BAD_ARGUMENTS, "ORDER BY cannot be empty"); KeyDescription new_sorting_key = new_metadata.sorting_key; @@ -580,6 +581,9 @@ void MergeTreeData::checkProperties( if (projections_names.find(projection.name) != projections_names.end()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Projection with name {} already exists", backQuote(projection.name)); + /// We cannot alter a projection so far. So here we do not try to find a projection in old metadata. + bool is_aggregate = projection.type == ProjectionDescription::Type::Aggregate; + checkProperties(*projection.metadata, *projection.metadata, attach, is_aggregate, local_context); projections_names.insert(projection.name); } } @@ -593,7 +597,7 @@ void MergeTreeData::setProperties( bool attach, ContextPtr local_context) { - checkProperties(new_metadata, old_metadata, attach, local_context); + checkProperties(new_metadata, old_metadata, attach, false, local_context); setInMemoryMetadata(new_metadata); } @@ -2187,7 +2191,7 @@ MergeTreeData::DataPartsVector MergeTreeData::grabOldParts(bool force) const DataPartPtr & part = *it; - part->last_removal_attemp_time.store(time_now, std::memory_order_relaxed); + part->last_removal_attempt_time.store(time_now, std::memory_order_relaxed); /// Do not remove outdated part if it may be visible for some transaction if (!part->version.canBeRemoved()) @@ -3286,7 +3290,7 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, Context } } - checkProperties(new_metadata, old_metadata, false, local_context); + checkProperties(new_metadata, old_metadata, false, false, local_context); checkTTLExpressions(new_metadata, old_metadata); if (!columns_to_check_conversion.empty()) @@ -7027,7 +7031,9 @@ std::optional MergeTreeData::getQueryProcessingStageWithAgg max_added_blocks.get(), query_context); - // minmax_count_projection should not be used when there is no data to process. + // minmax_count_projection cannot be used used when there is no data to process, because + // it will produce incorrect result during constant aggregation. + // See https://github.com/ClickHouse/ClickHouse/issues/36728 if (!query_info.minmax_count_projection_block) return; diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 8b5b50b1841..28611d09386 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -1229,7 +1229,7 @@ protected: /// The same for clearOldTemporaryDirectories. std::mutex clear_old_temporary_directories_mutex; - void checkProperties(const StorageInMemoryMetadata & new_metadata, const StorageInMemoryMetadata & old_metadata, bool attach = false, ContextPtr local_context = nullptr) const; + void checkProperties(const StorageInMemoryMetadata & new_metadata, const StorageInMemoryMetadata & old_metadata, bool attach, bool allow_empty_sorting_key, ContextPtr local_context) const; void setProperties(const StorageInMemoryMetadata & new_metadata, const StorageInMemoryMetadata & old_metadata, bool attach = false, ContextPtr local_context = nullptr); diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp index 7e306880e9c..ea5d64212f5 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -171,23 +171,23 @@ void MergeTreeDataWriter::TemporaryPart::finalize() projection->getDataPartStorage().precommitTransaction(); } -std::vector scatterOffsetsBySelector(ChunkOffsetsPtr chunk_offsets, const IColumn::Selector & selector, size_t partition_num) +std::vector scatterAsyncInsertInfoBySelector(AsyncInsertInfoPtr async_insert_info, const IColumn::Selector & selector, size_t partition_num) { - if (nullptr == chunk_offsets) + if (nullptr == async_insert_info) { return {}; } if (selector.empty()) { - return {chunk_offsets}; + return {async_insert_info}; } - std::vector result(partition_num); + std::vector result(partition_num); std::vector last_row_for_partition(partition_num, -1); size_t offset_idx = 0; for (size_t i = 0; i < selector.size(); ++i) { ++last_row_for_partition[selector[i]]; - if (i + 1 == chunk_offsets->offsets[offset_idx]) + if (i + 1 == async_insert_info->offsets[offset_idx]) { for (size_t part_id = 0; part_id < last_row_for_partition.size(); ++part_id) { @@ -196,9 +196,12 @@ std::vector scatterOffsetsBySelector(ChunkOffsetsPtr chunk_offs continue; size_t offset = static_cast(last_row + 1); if (result[part_id] == nullptr) - result[part_id] = std::make_shared(); + result[part_id] = std::make_shared(); if (result[part_id]->offsets.empty() || offset > *result[part_id]->offsets.rbegin()) + { result[part_id]->offsets.push_back(offset); + result[part_id]->tokens.push_back(async_insert_info->tokens[offset_idx]); + } } ++offset_idx; } @@ -207,7 +210,7 @@ std::vector scatterOffsetsBySelector(ChunkOffsetsPtr chunk_offs } BlocksWithPartition MergeTreeDataWriter::splitBlockIntoParts( - const Block & block, size_t max_parts, const StorageMetadataPtr & metadata_snapshot, ContextPtr context, ChunkOffsetsPtr chunk_offsets) + const Block & block, size_t max_parts, const StorageMetadataPtr & metadata_snapshot, ContextPtr context, AsyncInsertInfoPtr async_insert_info) { BlocksWithPartition result; if (!block || !block.rows()) @@ -218,8 +221,11 @@ BlocksWithPartition MergeTreeDataWriter::splitBlockIntoParts( if (!metadata_snapshot->hasPartitionKey()) /// Table is not partitioned. { result.emplace_back(Block(block), Row{}); - if (chunk_offsets != nullptr) - result[0].offsets = std::move(chunk_offsets->offsets); + if (async_insert_info != nullptr) + { + result[0].offsets = std::move(async_insert_info->offsets); + result[0].tokens = std::move(async_insert_info->tokens); + } return result; } @@ -236,7 +242,7 @@ BlocksWithPartition MergeTreeDataWriter::splitBlockIntoParts( IColumn::Selector selector; buildScatterSelector(partition_columns, partition_num_to_first_row, selector, max_parts); - auto chunk_offsets_with_partition = scatterOffsetsBySelector(chunk_offsets, selector, partition_num_to_first_row.size()); + auto async_insert_info_with_partition = scatterAsyncInsertInfoBySelector(async_insert_info, selector, partition_num_to_first_row.size()); size_t partitions_count = partition_num_to_first_row.size(); result.reserve(partitions_count); @@ -255,8 +261,11 @@ BlocksWithPartition MergeTreeDataWriter::splitBlockIntoParts( /// NOTE: returning a copy of the original block so that calculated partition key columns /// do not interfere with possible calculated primary key columns of the same name. result.emplace_back(Block(block), get_partition(0)); - if (!chunk_offsets_with_partition.empty()) - result[0].offsets = std::move(chunk_offsets_with_partition[0]->offsets); + if (!async_insert_info_with_partition.empty()) + { + result[0].offsets = std::move(async_insert_info_with_partition[0]->offsets); + result[0].tokens = std::move(async_insert_info_with_partition[0]->tokens); + } return result; } @@ -270,8 +279,11 @@ BlocksWithPartition MergeTreeDataWriter::splitBlockIntoParts( result[i].block.getByPosition(col).column = std::move(scattered[i]); } - for (size_t i = 0; i < chunk_offsets_with_partition.size(); ++i) - result[i].offsets = std::move(chunk_offsets_with_partition[i]->offsets); + for (size_t i = 0; i < async_insert_info_with_partition.size(); ++i) + { + result[i].offsets = std::move(async_insert_info_with_partition[i]->offsets); + result[i].tokens = std::move(async_insert_info_with_partition[i]->tokens); + } return result; } diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.h b/src/Storages/MergeTree/MergeTreeDataWriter.h index 795453b2afa..2fb6b1f22d4 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.h +++ b/src/Storages/MergeTree/MergeTreeDataWriter.h @@ -23,14 +23,15 @@ struct BlockWithPartition Block block; Row partition; std::vector offsets; + std::vector tokens; BlockWithPartition(Block && block_, Row && partition_) : block(block_), partition(std::move(partition_)) { } - BlockWithPartition(Block && block_, Row && partition_, std::vector && offsets_) - : block(block_), partition(std::move(partition_)), offsets(std::move(offsets_)) + BlockWithPartition(Block && block_, Row && partition_, std::vector && offsets_, std::vector && tokens_) + : block(block_), partition(std::move(partition_)), offsets(std::move(offsets_)), tokens(std::move(tokens_)) { } }; @@ -51,7 +52,7 @@ public: * (split rows by partition) * Works deterministically: if same block was passed, function will return same result in same order. */ - static BlocksWithPartition splitBlockIntoParts(const Block & block, size_t max_parts, const StorageMetadataPtr & metadata_snapshot, ContextPtr context, ChunkOffsetsPtr chunk_offsets = nullptr); + static BlocksWithPartition splitBlockIntoParts(const Block & block, size_t max_parts, const StorageMetadataPtr & metadata_snapshot, ContextPtr context, AsyncInsertInfoPtr async_insert_info = nullptr); /// This structure contains not completely written temporary part. /// Some writes may happen asynchronously, e.g. for blob storages. diff --git a/src/Storages/MergeTree/MergeTreeMutationEntry.cpp b/src/Storages/MergeTree/MergeTreeMutationEntry.cpp index cac26c5ac23..4dbccb91620 100644 --- a/src/Storages/MergeTree/MergeTreeMutationEntry.cpp +++ b/src/Storages/MergeTree/MergeTreeMutationEntry.cpp @@ -61,7 +61,7 @@ MergeTreeMutationEntry::MergeTreeMutationEntry(MutationCommands commands_, DiskP { auto out = disk->writeFile(std::filesystem::path(path_prefix) / file_name, DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite, settings); *out << "format version: 1\n" - << "create time: " << LocalDateTime(create_time) << "\n"; + << "create time: " << LocalDateTime(create_time, DateLUT::serverTimezoneInstance()) << "\n"; *out << "commands: "; commands.writeText(*out, /* with_pure_metadata_commands = */ false); *out << "\n"; diff --git a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp index 1620ba98d58..5efb7286685 100644 --- a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp +++ b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp @@ -22,6 +22,33 @@ namespace DB /// This is used to assume that condition is likely to have good selectivity. static constexpr auto threshold = 2; +static NameToIndexMap fillNamesPositions(const Names & names) +{ + NameToIndexMap names_positions; + + for (size_t position = 0; position < names.size(); ++position) + { + const auto & name = names[position]; + names_positions[name] = position; + } + + return names_positions; +} + +/// Find minimal position of any of the column in primary key. +static Int64 findMinPosition(const NameSet & condition_table_columns, const NameToIndexMap & primary_key_positions) +{ + Int64 min_position = std::numeric_limits::max() - 1; + + for (const auto & column : condition_table_columns) + { + auto it = primary_key_positions.find(column); + if (it != primary_key_positions.end()) + min_position = std::min(min_position, static_cast(it->second)); + } + + return min_position; +} MergeTreeWhereOptimizer::MergeTreeWhereOptimizer( std::unordered_map column_sizes_, @@ -35,6 +62,7 @@ MergeTreeWhereOptimizer::MergeTreeWhereOptimizer( , supported_columns{supported_columns_} , sorting_key_names{NameSet( metadata_snapshot->getSortingKey().column_names.begin(), metadata_snapshot->getSortingKey().column_names.end())} + , primary_key_names_positions(fillNamesPositions(metadata_snapshot->getPrimaryKey().column_names)) , log{log_} , column_sizes{std::move(column_sizes_)} { @@ -60,6 +88,7 @@ void MergeTreeWhereOptimizer::optimize(SelectQueryInfo & select_query_info, cons where_optimizer_context.context = context; where_optimizer_context.array_joined_names = determineArrayJoinedNames(select); where_optimizer_context.move_all_conditions_to_prewhere = context->getSettingsRef().move_all_conditions_to_prewhere; + where_optimizer_context.move_primary_key_columns_to_end_of_prewhere = context->getSettingsRef().move_primary_key_columns_to_end_of_prewhere; where_optimizer_context.is_final = select.final(); RPNBuilderTreeContext tree_context(context, std::move(block_with_constants), {} /*prepared_sets*/); @@ -89,6 +118,7 @@ std::optional MergeTreeWhe where_optimizer_context.context = context; where_optimizer_context.array_joined_names = {}; where_optimizer_context.move_all_conditions_to_prewhere = context->getSettingsRef().move_all_conditions_to_prewhere; + where_optimizer_context.move_primary_key_columns_to_end_of_prewhere = context->getSettingsRef().move_primary_key_columns_to_end_of_prewhere; where_optimizer_context.is_final = is_final; RPNBuilderTreeContext tree_context(context); @@ -234,6 +264,14 @@ void MergeTreeWhereOptimizer::analyzeImpl(Conditions & res, const RPNBuilderTree if (cond.viable) cond.good = isConditionGood(node, table_columns); + if (where_optimizer_context.move_primary_key_columns_to_end_of_prewhere) + { + /// Consider all conditions good with this setting enabled. + cond.good = cond.viable; + /// Find min position in PK of any column that is used in this condition. + cond.min_position_in_primary_key = findMinPosition(cond.table_columns, primary_key_names_positions); + } + res.emplace_back(std::move(cond)); } } diff --git a/src/Storages/MergeTree/MergeTreeWhereOptimizer.h b/src/Storages/MergeTree/MergeTreeWhereOptimizer.h index 18555a72db1..fb5e84b67c6 100644 --- a/src/Storages/MergeTree/MergeTreeWhereOptimizer.h +++ b/src/Storages/MergeTree/MergeTreeWhereOptimizer.h @@ -72,9 +72,14 @@ private: /// Does the condition presumably have good selectivity? bool good = false; + /// Does the condition contain primary key column? + /// If so, it is better to move it further to the end of PREWHERE chain depending on minimal position in PK of any + /// column in this condition because this condition have bigger chances to be already satisfied by PK analysis. + Int64 min_position_in_primary_key = std::numeric_limits::max() - 1; + auto tuple() const { - return std::make_tuple(!viable, !good, columns_size, table_columns.size()); + return std::make_tuple(!viable, !good, -min_position_in_primary_key, columns_size, table_columns.size()); } /// Is condition a better candidate for moving to PREWHERE? @@ -91,6 +96,7 @@ private: ContextPtr context; NameSet array_joined_names; bool move_all_conditions_to_prewhere = false; + bool move_primary_key_columns_to_end_of_prewhere = false; bool is_final = false; }; @@ -141,6 +147,7 @@ private: const Names queried_columns; const std::optional supported_columns; const NameSet sorting_key_names; + const NameToIndexMap primary_key_names_positions; Poco::Logger * log; std::unordered_map column_sizes; UInt64 total_size_of_queried_columns = 0; diff --git a/src/Storages/MergeTree/MutateFromLogEntryTask.cpp b/src/Storages/MergeTree/MutateFromLogEntryTask.cpp index 6cb9d50436e..164b541d2b8 100644 --- a/src/Storages/MergeTree/MutateFromLogEntryTask.cpp +++ b/src/Storages/MergeTree/MutateFromLogEntryTask.cpp @@ -154,8 +154,12 @@ ReplicatedMergeMutateTaskBase::PrepareResult MutateFromLogEntryTask::prepare() if (!zero_copy_lock || !zero_copy_lock->isLocked()) { + LOG_DEBUG( + log, + "Mutation of part {} started by some other replica, will wait for it and mutated merged part. Number of tries {}", + entry.new_part_name, + entry.num_tries); storage.watchZeroCopyLock(entry.new_part_name, disk); - LOG_DEBUG(log, "Mutation of part {} started by some other replica, will wait it and mutated merged part", entry.new_part_name); return PrepareResult{ .prepared_successfully = false, diff --git a/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.cpp b/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.cpp index b4748ee77ea..6ad77119016 100644 --- a/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.cpp @@ -174,7 +174,7 @@ bool ReplicatedMergeMutateTaskBase::executeImpl() part_log_writer = prepare_result.part_log_writer; - /// Avoid resheduling, execute fetch here, in the same thread. + /// Avoid rescheduling, execute fetch here, in the same thread. if (!prepare_result.prepared_successfully) return execute_fetch(prepare_result.need_to_check_missing_part_in_fetch); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp index bcc4dc749fb..07cfced8362 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp @@ -492,7 +492,7 @@ size_t ReplicatedMergeTreeCleanupThread::clearOldBlocks(const String & blocks_di } else { - LOG_WARNING(log, "Error while deleting ZooKeeper path `{}`: {}, ignoring.", path, Coordination::errorMessage(rc)); + LOG_WARNING(log, "Error while deleting ZooKeeper path `{}`: {}, ignoring.", path, rc); } first_outdated_block++; } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp index ac956433eab..9eb8b6ce24c 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp @@ -48,7 +48,7 @@ void ReplicatedMergeTreeLogEntryData::writeText(WriteBuffer & out) const format_version = std::max(format_version, FORMAT_WITH_LOG_ENTRY_ID); out << "format version: " << format_version << "\n" - << "create_time: " << LocalDateTime(create_time ? create_time : time(nullptr)) << "\n" + << "create_time: " << LocalDateTime(create_time ? create_time : time(nullptr), DateLUT::serverTimezoneInstance()) << "\n" << "source replica: " << source_replica << '\n' << "block_id: " << escape << block_id << '\n'; diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeMutationEntry.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeMutationEntry.cpp index 1bbb246338c..e2c23ecfe85 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeMutationEntry.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeMutationEntry.cpp @@ -12,7 +12,7 @@ namespace DB void ReplicatedMergeTreeMutationEntry::writeText(WriteBuffer & out) const { out << "format version: 1\n" - << "create time: " << LocalDateTime(create_time ? create_time : time(nullptr)) << "\n" + << "create time: " << LocalDateTime(create_time ? create_time : time(nullptr), DateLUT::serverTimezoneInstance()) << "\n" << "source replica: " << source_replica << "\n" << "block numbers count: " << block_numbers.size() << "\n"; diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index 07f46c07466..80021d9e0eb 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -497,7 +497,7 @@ void ReplicatedMergeTreeQueue::updateTimesInZooKeeper( if (code != Coordination::Error::ZOK) LOG_ERROR(log, "Couldn't set value of nodes for insert times " "({}/min_unprocessed_insert_time, max_processed_insert_time): {}. " - "This shouldn't happen often.", replica_path, Coordination::errorMessage(code)); + "This shouldn't happen often.", replica_path, code); } } @@ -554,7 +554,7 @@ void ReplicatedMergeTreeQueue::removeProcessedEntry(zkutil::ZooKeeperPtr zookeep auto code = zookeeper->tryRemove(fs::path(replica_path) / "queue" / entry->znode_name); if (code != Coordination::Error::ZOK) - LOG_ERROR(log, "Couldn't remove {}/queue/{}: {}. This shouldn't happen often.", replica_path, entry->znode_name, Coordination::errorMessage(code)); + LOG_ERROR(log, "Couldn't remove {}/queue/{}: {}. This shouldn't happen often.", replica_path, entry->znode_name, code); updateTimesInZooKeeper(zookeeper, min_unprocessed_insert_time_changed, max_processed_insert_time_changed); } @@ -1147,7 +1147,7 @@ void ReplicatedMergeTreeQueue::removePartProducingOpsInRange( auto code = zookeeper->tryRemove(fs::path(replica_path) / "queue" / znode_name); if (code != Coordination::Error::ZOK) - LOG_INFO(log, "Couldn't remove {}: {}", (fs::path(replica_path) / "queue" / znode_name).string(), Coordination::errorMessage(code)); + LOG_INFO(log, "Couldn't remove {}: {}", (fs::path(replica_path) / "queue" / znode_name).string(), code); updateStateOnQueueEntryRemoval( *it, /* is_successful = */ false, @@ -1370,13 +1370,27 @@ bool ReplicatedMergeTreeQueue::shouldExecuteLogEntry( if (data_settings->allow_remote_fs_zero_copy_replication) { auto disks = storage.getDisks(); - bool only_s3_storage = true; + DiskPtr disk_with_zero_copy = nullptr; for (const auto & disk : disks) - if (!disk->supportZeroCopyReplication()) - only_s3_storage = false; + { + if (disk->supportZeroCopyReplication()) + { + disk_with_zero_copy = disk; + break; + } + } + /// Technically speaking if there are more than one disk that could store the part (a local hot + cloud cold) + /// It would be possible for the merge to happen concurrently with other replica if the other replica is doing + /// a merge using zero-copy and the cloud storage, and the local replica uses the local storage instead + /// The question is, is it worth keep retrying to do the merge over and over for the opportunity to do + /// double the work? Probably not + /// So what we do is that, even if hot merge could happen, check the zero copy lock anyway. + /// Keep in mind that for the zero copy lock check to happen (via existing_zero_copy_locks) we need to + /// have failed first because of it and added it via watchZeroCopyLock. Considering we've already tried to + /// use cloud storage and zero-copy replication, the most likely scenario is that we'll try again String replica_to_execute_merge; - if (!disks.empty() && only_s3_storage && storage.checkZeroCopyLockExists(entry.new_part_name, disks[0], replica_to_execute_merge)) + if (disk_with_zero_copy && storage.checkZeroCopyLockExists(entry.new_part_name, disk_with_zero_copy, replica_to_execute_merge)) { constexpr auto fmt_string = "Not executing merge/mutation for the part {}, waiting for {} to execute it and will fetch after."; out_postpone_reason = fmt::format(fmt_string, entry.new_part_name, replica_to_execute_merge); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index 22e2ab945eb..0db3464a637 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -78,7 +78,7 @@ struct ReplicatedMergeTreeSinkImpl::DelayedChunk unmerged_block_with_partition(std::move(unmerged_block_with_partition_)), part_counters(std::move(part_counters_)) { - initBlockIDMap(); + initBlockIDMap(); } void initBlockIDMap() @@ -209,8 +209,8 @@ std::vector testSelfDeduplicate(std::vector data, std::vectorinsert(datum); } Block block({ColumnWithTypeAndName(std::move(column), DataTypePtr(new DataTypeInt64()), "a")}); - - BlockWithPartition block1(std::move(block), Row(), std::move(offsets)); + std::vector tokens(offsets.size()); + BlockWithPartition block1(std::move(block), Row(), std::move(offsets), std::move(tokens)); ProfileEvents::Counters profile_counters; ReplicatedMergeTreeSinkImpl::DelayedChunk::Partition part( &Poco::Logger::get("testSelfDeduplicate"), MergeTreeDataWriter::TemporaryPart(), 0, std::move(hashes), std::move(block1), std::nullopt, std::move(profile_counters)); @@ -242,22 +242,29 @@ namespace size_t start = 0; auto cols = block.block.getColumns(); std::vector block_id_vec; - for (auto offset : block.offsets) + for (size_t i = 0; i < block.offsets.size(); ++i) { - SipHash hash; - for (size_t i = start; i < offset; ++i) + size_t offset = block.offsets[i]; + std::string_view token = block.tokens[i]; + if (token.empty()) { - for (const auto & col : cols) - col->updateHashWithValue(i, hash); - } - union - { - char bytes[16]; - UInt64 words[2]; - } hash_value; - hash.get128(hash_value.bytes); + SipHash hash; + for (size_t j = start; j < offset; ++j) + { + for (const auto & col : cols) + col->updateHashWithValue(j, hash); + } + union + { + char bytes[16]; + UInt64 words[2]; + } hash_value; + hash.get128(hash_value.bytes); - block_id_vec.push_back(partition_id + "_" + DB::toString(hash_value.words[0]) + "_" + DB::toString(hash_value.words[1])); + block_id_vec.push_back(partition_id + "_" + DB::toString(hash_value.words[0]) + "_" + DB::toString(hash_value.words[1])); + } + else + block_id_vec.push_back(partition_id + "_" + std::string(token)); start = offset; } @@ -418,18 +425,18 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk chunk) convertDynamicColumnsToTuples(block, storage_snapshot); - ChunkOffsetsPtr chunk_offsets; + AsyncInsertInfoPtr async_insert_info; if constexpr (async_insert) { const auto & chunk_info = chunk.getChunkInfo(); - if (const auto * chunk_offsets_ptr = typeid_cast(chunk_info.get())) - chunk_offsets = std::make_shared(chunk_offsets_ptr->offsets); + if (const auto * async_insert_info_ptr = typeid_cast(chunk_info.get())) + async_insert_info = std::make_shared(async_insert_info_ptr->offsets, async_insert_info_ptr->tokens); else throw Exception(ErrorCodes::LOGICAL_ERROR, "No chunk info for async inserts"); } - auto part_blocks = storage.writer.splitBlockIntoParts(block, max_parts_per_block, metadata_snapshot, context, chunk_offsets); + auto part_blocks = storage.writer.splitBlockIntoParts(block, max_parts_per_block, metadata_snapshot, context, async_insert_info); using DelayedPartition = typename ReplicatedMergeTreeSinkImpl::DelayedChunk::Partition; using DelayedPartitions = std::vector; @@ -453,7 +460,7 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk chunk) { /// we copy everything but offsets which we move because they are only used by async insert if (settings.optimize_on_insert && storage.writer.getMergingMode() != MergeTreeData::MergingParams::Mode::Ordinary) - unmerged_block.emplace(Block(current_block.block), Row(current_block.partition), std::move(current_block.offsets)); + unmerged_block.emplace(Block(current_block.block), Row(current_block.partition), std::move(current_block.offsets), std::move(current_block.tokens)); } /// Write part to the filesystem under temporary name. Calculate a checksum. @@ -468,7 +475,6 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk chunk) if constexpr (async_insert) { - /// TODO consider insert_deduplication_token block_id = getHashesForBlocks(unmerged_block.has_value() ? *unmerged_block : current_block, temp_part.part->info.partition_id); LOG_TRACE(log, "async insert part, part id {}, block id {}, offsets {}, size {}", temp_part.part->info.partition_id, toString(block_id), toString(current_block.offsets), current_block.offsets.size()); } @@ -723,7 +729,7 @@ std::pair, bool> ReplicatedMergeTreeSinkImpl:: retries_ctl.setUserError( ErrorCodes::UNEXPECTED_ZOOKEEPER_ERROR, "Insert failed due to zookeeper error. Please retry. Reason: {}", - Coordination::errorMessage(write_part_info_keeper_error)); + write_part_info_keeper_error); } retries_ctl.stopRetries(); @@ -1033,7 +1039,7 @@ std::pair, bool> ReplicatedMergeTreeSinkImpl:: retries_ctl.setUserError( ErrorCodes::UNKNOWN_STATUS_OF_INSERT, "Unknown status, client must retry. Reason: {}", - Coordination::errorMessage(multi_code)); + multi_code); return; } else if (Coordination::isUserError(multi_code)) @@ -1109,7 +1115,7 @@ std::pair, bool> ReplicatedMergeTreeSinkImpl:: "Unexpected logical error while adding block {} with ID '{}': {}, path {}", block_number, toString(block_id), - Coordination::errorMessage(multi_code), + multi_code, failed_op_path); } } @@ -1122,7 +1128,7 @@ std::pair, bool> ReplicatedMergeTreeSinkImpl:: "Unexpected ZooKeeper error while adding block {} with ID '{}': {}", block_number, toString(block_id), - Coordination::errorMessage(multi_code)); + multi_code); } }, [&zookeeper]() { zookeeper->cleanupEphemeralNodes(); }); diff --git a/src/Storages/MergeTree/ZooKeeperRetries.h b/src/Storages/MergeTree/ZooKeeperRetries.h index e55b04c27b3..512c0800de7 100644 --- a/src/Storages/MergeTree/ZooKeeperRetries.h +++ b/src/Storages/MergeTree/ZooKeeperRetries.h @@ -72,7 +72,7 @@ public: if (!Coordination::isHardwareError(e.code)) throw; - setKeeperError(e.code, e.message()); + setKeeperError(std::current_exception(), e.code, e.message()); } catch (...) { @@ -91,16 +91,16 @@ public: } catch (const zkutil::KeeperException & e) { - setKeeperError(e.code, e.message()); + setKeeperError(std::current_exception(), e.code, e.message()); } catch (const Exception & e) { - setUserError(e.code(), e.what()); + setUserError(std::current_exception(), e.code(), e.what()); } return false; } - void setUserError(int code, std::string message) + void setUserError(std::exception_ptr exception, int code, std::string message) { if (retries_info.logger) LOG_TRACE( @@ -113,16 +113,28 @@ public: iteration_succeeded = false; user_error.code = code; user_error.message = std::move(message); + user_error.exception = exception; keeper_error = KeeperError{}; } + template + void setUserError(std::exception_ptr exception, int code, fmt::format_string fmt, Args &&... args) + { + setUserError(exception, code, fmt::format(fmt, std::forward(args)...)); + } + + void setUserError(int code, std::string message) + { + setUserError(std::make_exception_ptr(Exception::createDeprecated(message, code)), code, message); + } + template void setUserError(int code, fmt::format_string fmt, Args &&... args) { setUserError(code, fmt::format(fmt, std::forward(args)...)); } - void setKeeperError(Coordination::Error code, std::string message) + void setKeeperError(std::exception_ptr exception, Coordination::Error code, std::string message) { if (retries_info.logger) LOG_TRACE( @@ -135,9 +147,21 @@ public: iteration_succeeded = false; keeper_error.code = code; keeper_error.message = std::move(message); + keeper_error.exception = exception; user_error = UserError{}; } + template + void setKeeperError(std::exception_ptr exception, Coordination::Error code, fmt::format_string fmt, Args &&... args) + { + setKeeperError(exception, code, fmt::format(fmt, std::forward(args)...)); + } + + void setKeeperError(Coordination::Error code, std::string message) + { + setKeeperError(std::make_exception_ptr(zkutil::KeeperException(message, code)), code, message); + } + template void setKeeperError(Coordination::Error code, fmt::format_string fmt, Args &&... args) { @@ -163,12 +187,14 @@ private: using Code = Coordination::Error; Code code = Code::ZOK; std::string message; + std::exception_ptr exception; }; struct UserError { int code = ErrorCodes::OK; std::string message; + std::exception_ptr exception; }; bool canTry() @@ -232,11 +258,11 @@ private: void throwIfError() const { - if (user_error.code != ErrorCodes::OK) - throw Exception::createDeprecated(user_error.message, user_error.code); + if (user_error.exception) + std::rethrow_exception(user_error.exception); - if (keeper_error.code != KeeperError::Code::ZOK) - throw zkutil::KeeperException(keeper_error.message, keeper_error.code); + if (keeper_error.exception) + std::rethrow_exception(keeper_error.exception); } void logLastError(std::string_view header) diff --git a/src/Storages/MergeTree/tests/gtest_async_inserts.cpp b/src/Storages/MergeTree/tests/gtest_async_inserts.cpp index f67c2f7fb0f..2d8cd0acc3e 100644 --- a/src/Storages/MergeTree/tests/gtest_async_inserts.cpp +++ b/src/Storages/MergeTree/tests/gtest_async_inserts.cpp @@ -8,7 +8,7 @@ namespace DB { -std::vector scatterOffsetsBySelector(ChunkOffsetsPtr chunk_offsets, const IColumn::Selector & selector, size_t partition_num); +std::vector scatterAsyncInsertInfoBySelector(AsyncInsertInfoPtr chunk_offsets, const IColumn::Selector & selector, size_t partition_num); class AsyncInsertsTest : public ::testing::TestPartResult {}; @@ -16,31 +16,36 @@ class AsyncInsertsTest : public ::testing::TestPartResult TEST(AsyncInsertsTest, testScatterOffsetsBySelector) { - auto test_impl = [](std::vector offsets, std::vector selector_data, size_t part_num, std::vector> expected) + auto test_impl = [](std::vector offsets, std::vector selector_data, std::vector tokens, size_t part_num, std::vector>> expected) { - auto offset_ptr = std::make_shared(offsets); + auto offset_ptr = std::make_shared(offsets, tokens); IColumn::Selector selector(selector_data.size()); size_t num_rows = selector_data.size(); for (size_t i = 0; i < num_rows; i++) selector[i] = selector_data[i]; - auto results = scatterOffsetsBySelector(offset_ptr, selector, part_num); + auto results = scatterAsyncInsertInfoBySelector(offset_ptr, selector, part_num); ASSERT_EQ(results.size(), expected.size()); for (size_t i = 0; i < results.size(); i++) { - auto result = results[i]->offsets; + auto result = results[i]; auto expect = expected[i]; - ASSERT_EQ(result.size(), expect.size()); - for (size_t j = 0; j < result.size(); j++) - ASSERT_EQ(result[j], expect[j]); + ASSERT_EQ(result->offsets.size(), expect.size()); + ASSERT_EQ(result->tokens.size(), expect.size()); + for (size_t j = 0; j < expect.size(); j++) + { + ASSERT_EQ(result->offsets[j], std::get<0>(expect[j])); + ASSERT_EQ(result->tokens[j], std::get<1>(expect[j])); + } } }; - test_impl({5}, {0,1,0,1,0}, 2, {{3},{2}}); - test_impl({5,10}, {0,1,0,1,0,1,0,1,0,1}, 2, {{3,5},{2,5}}); - test_impl({4,8,12}, {0,1,0,1,0,2,0,2,1,2,1,2}, 3, {{2,4},{2,4},{2,4}}); - test_impl({1,2,3,4,5}, {0,1,2,3,4}, 5, {{1},{1},{1},{1},{1}}); - test_impl({3,6,10}, {1,1,1,2,2,2,0,0,0,0}, 3, {{4},{3},{3}}); + test_impl({1}, {0}, {"a"}, 1, {{{1,"a"}}}); + test_impl({5}, {0,1,0,1,0}, {"a"}, 2, {{{3,"a"}},{{2,"a"}}}); + test_impl({5,10}, {0,1,0,1,0,1,0,1,0,1}, {"a", "b"}, 2, {{{3,"a"},{5,"b"}},{{2,"a"},{5,"b"}}}); + test_impl({4,8,12}, {0,1,0,1,0,2,0,2,1,2,1,2}, {"a", "b", "c"}, 3, {{{2, "a"},{4, "b"}},{{2,"a"},{4,"c"}},{{2,"b"},{4,"c"}}}); + test_impl({1,2,3,4,5}, {0,1,2,3,4}, {"a", "b", "c", "d", "e"}, 5, {{{1,"a"}},{{1,"b"}},{{1, "c"}},{{1, "d"}},{{1, "e"}}}); + test_impl({3,6,10}, {1,1,1,2,2,2,0,0,0,0}, {"a", "b", "c"}, 3, {{{4, "c"}},{{3, "a"}},{{3, "b"}}}); } std::vector testSelfDeduplicate(std::vector data, std::vector offsets, std::vector hashes); diff --git a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp index d048c94ac75..d01746ddf1b 100644 --- a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp +++ b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp @@ -556,8 +556,9 @@ void MaterializedPostgreSQLConsumer::processReplicationMessage(const char * repl void MaterializedPostgreSQLConsumer::syncTables() { - for (const auto & table_name : tables_to_sync) + while (!tables_to_sync.empty()) { + auto table_name = *tables_to_sync.begin(); auto & storage_data = storages.find(table_name)->second; Block result_rows = storage_data.buffer.description.sample_block.cloneWithColumns(std::move(storage_data.buffer.columns)); storage_data.buffer.columns = storage_data.buffer.description.sample_block.cloneEmptyColumns(); @@ -589,8 +590,12 @@ void MaterializedPostgreSQLConsumer::syncTables() } catch (...) { - tryLogCurrentException(__PRETTY_FUNCTION__); + /// Retry this buffer later. + storage_data.buffer.columns = result_rows.mutateColumns(); + throw; } + + tables_to_sync.erase(tables_to_sync.begin()); } LOG_DEBUG(log, "Table sync end for {} tables, last lsn: {} = {}, (attempted lsn {})", tables_to_sync.size(), current_lsn, getLSNValue(current_lsn), getLSNValue(final_lsn)); @@ -742,8 +747,12 @@ void MaterializedPostgreSQLConsumer::setSetting(const SettingChange & setting) /// Read binary changes from replication slot via COPY command (starting from current lsn in a slot). bool MaterializedPostgreSQLConsumer::consume() { - bool slot_empty = true; + if (!tables_to_sync.empty()) + { + syncTables(); + } + bool slot_empty = true; try { auto tx = std::make_shared(connection->getRef()); diff --git a/src/Storages/PostgreSQL/MaterializedPostgreSQLSettings.h b/src/Storages/PostgreSQL/MaterializedPostgreSQLSettings.h index e8d42ef3668..d3d2faba497 100644 --- a/src/Storages/PostgreSQL/MaterializedPostgreSQLSettings.h +++ b/src/Storages/PostgreSQL/MaterializedPostgreSQLSettings.h @@ -21,6 +21,9 @@ namespace DB M(Bool, materialized_postgresql_tables_list_with_schema, false, \ "Consider by default that if there is a dot in tables list 'name.name', " \ "then the first name is postgres schema and second is postgres table. This setting is needed to allow table names with dots", 0) \ + M(UInt64, materialized_postgresql_backoff_min_ms, 200, "Poll backoff start point", 0) \ + M(UInt64, materialized_postgresql_backoff_max_ms, 10000, "Poll backoff max point", 0) \ + M(UInt64, materialized_postgresql_backoff_factor, 2, "Poll backoff factor", 0) \ DECLARE_SETTINGS_TRAITS(MaterializedPostgreSQLSettingsTraits, LIST_OF_MATERIALIZED_POSTGRESQL_SETTINGS) diff --git a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp index 998db4ea79e..f57a6a26a62 100644 --- a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp +++ b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp @@ -22,8 +22,6 @@ namespace DB { -static const auto RESCHEDULE_MS = 1000; -static const auto BACKOFF_TRESHOLD_MS = 10000; static const auto CLEANUP_RESCHEDULE_MS = 600000 * 3; /// 30 min namespace ErrorCodes @@ -80,7 +78,10 @@ PostgreSQLReplicationHandler::PostgreSQLReplicationHandler( , schema_list(replication_settings.materialized_postgresql_schema_list) , schema_as_a_part_of_table_name(!schema_list.empty() || replication_settings.materialized_postgresql_tables_list_with_schema) , user_provided_snapshot(replication_settings.materialized_postgresql_snapshot) - , milliseconds_to_wait(RESCHEDULE_MS) + , reschedule_backoff_min_ms(replication_settings.materialized_postgresql_backoff_min_ms) + , reschedule_backoff_max_ms(replication_settings.materialized_postgresql_backoff_max_ms) + , reschedule_backoff_factor(replication_settings.materialized_postgresql_backoff_factor) + , milliseconds_to_wait(reschedule_backoff_min_ms) { if (!schema_list.empty() && !tables_list.empty()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot have schema list and tables list at the same time"); @@ -166,7 +167,7 @@ void PostgreSQLReplicationHandler::checkConnectionAndStart() throw; LOG_ERROR(log, "Unable to set up connection. Reconnection attempt will continue. Error message: {}", pqxx_error.what()); - startup_task->scheduleAfter(RESCHEDULE_MS); + startup_task->scheduleAfter(milliseconds_to_wait); } catch (...) { @@ -435,18 +436,18 @@ void PostgreSQLReplicationHandler::consumerFunc() if (schedule_now) { - milliseconds_to_wait = RESCHEDULE_MS; + milliseconds_to_wait = reschedule_backoff_min_ms; consumer_task->schedule(); LOG_DEBUG(log, "Scheduling replication thread: now"); } else { - consumer_task->scheduleAfter(milliseconds_to_wait); - if (milliseconds_to_wait < BACKOFF_TRESHOLD_MS) - milliseconds_to_wait *= 2; + if (milliseconds_to_wait < reschedule_backoff_max_ms) + milliseconds_to_wait = std::min(milliseconds_to_wait * reschedule_backoff_factor, reschedule_backoff_max_ms); LOG_DEBUG(log, "Scheduling replication thread: after {} ms", milliseconds_to_wait); + consumer_task->scheduleAfter(milliseconds_to_wait); } } @@ -892,7 +893,7 @@ void PostgreSQLReplicationHandler::addTableToReplication(StorageMaterializedPost catch (...) { consumer_task->activate(); - consumer_task->scheduleAfter(RESCHEDULE_MS); + consumer_task->scheduleAfter(milliseconds_to_wait); auto error_message = getCurrentExceptionMessage(false); throw Exception(ErrorCodes::POSTGRESQL_REPLICATION_INTERNAL_ERROR, @@ -922,7 +923,7 @@ void PostgreSQLReplicationHandler::removeTableFromReplication(const String & pos catch (...) { consumer_task->activate(); - consumer_task->scheduleAfter(RESCHEDULE_MS); + consumer_task->scheduleAfter(milliseconds_to_wait); auto error_message = getCurrentExceptionMessage(false); throw Exception(ErrorCodes::POSTGRESQL_REPLICATION_INTERNAL_ERROR, diff --git a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.h b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.h index 10a196cf31b..4c16ff95692 100644 --- a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.h +++ b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.h @@ -140,13 +140,16 @@ private: BackgroundSchedulePool::TaskHolder consumer_task; BackgroundSchedulePool::TaskHolder cleanup_task; + const UInt64 reschedule_backoff_min_ms; + const UInt64 reschedule_backoff_max_ms; + const UInt64 reschedule_backoff_factor; + UInt64 milliseconds_to_wait; + std::atomic stop_synchronization = false; /// MaterializedPostgreSQL tables. Used for managing all operations with its internal nested tables. MaterializedStorages materialized_storages; - UInt64 milliseconds_to_wait; - bool replication_handler_initialized = false; }; diff --git a/src/Storages/ProjectionsDescription.cpp b/src/Storages/ProjectionsDescription.cpp index 73fb279d51c..48825361a16 100644 --- a/src/Storages/ProjectionsDescription.cpp +++ b/src/Storages/ProjectionsDescription.cpp @@ -298,6 +298,7 @@ Block ProjectionDescription::calculate(const Block & block, ContextPtr context) SelectQueryOptions{ type == ProjectionDescription::Type::Normal ? QueryProcessingStage::FetchColumns : QueryProcessingStage::WithMergeableState} + .ignoreASTOptimizations() .ignoreSettingConstraints()) .buildQueryPipeline(); builder.resize(1); @@ -324,7 +325,7 @@ String ProjectionsDescription::toString() const for (const auto & projection : projections) list.children.push_back(projection.definition_ast); - return serializeAST(list, true); + return serializeAST(list); } ProjectionsDescription ProjectionsDescription::parse(const String & str, const ColumnsDescription & columns, ContextPtr query_context) diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index c46192ab43b..0727658160c 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -906,15 +906,14 @@ std::optional StorageDistributed::distributedWriteBetweenDistribu String new_query_str; { WriteBufferFromOwnString buf; - IAST::FormatSettings ast_format_settings(buf, /*one_line*/ true); - ast_format_settings.always_quote_identifiers = true; + IAST::FormatSettings ast_format_settings(buf, /*one_line*/ true, /*hilite*/ false, /*always_quote_identifiers_=*/ true); new_query->IAST::format(ast_format_settings); new_query_str = buf.str(); } QueryPipeline pipeline; ContextMutablePtr query_context = Context::createCopy(local_context); - ++query_context->getClientInfo().distributed_depth; + query_context->increaseDistributedDepth(); for (size_t shard_index : collections::range(0, shards_info.size())) { @@ -968,15 +967,14 @@ std::optional StorageDistributed::distributedWriteFromClusterStor String new_query_str; { WriteBufferFromOwnString buf; - IAST::FormatSettings ast_format_settings(buf, /*one_line*/ true); - ast_format_settings.always_quote_identifiers = true; + IAST::FormatSettings ast_format_settings(buf, /*one_line*/ true, /*hilite*/ false, /*always_quote_identifiers*/ true); new_query->IAST::format(ast_format_settings); new_query_str = buf.str(); } QueryPipeline pipeline; ContextMutablePtr query_context = Context::createCopy(local_context); - ++query_context->getClientInfo().distributed_depth; + query_context->increaseDistributedDepth(); /// Here we take addresses from destination cluster and assume source table exists on these nodes for (const auto & replicas : getCluster()->getShardsAddresses()) diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 5cb7575c3ae..cbd32460f7e 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -93,6 +93,65 @@ namespace ErrorCodes namespace { +/// Forward-declare to use in listFilesWithFoldedRegexpMatchingImpl() +void listFilesWithRegexpMatchingImpl( + const std::string & path_for_ls, + const std::string & for_match, + size_t & total_bytes_to_read, + std::vector & result, + bool recursive = false); + +/* + * When `{...}` has any `/`s, it must be processed in a different way: + * Basically, a path with globs is processed by listFilesWithRegexpMatchingImpl. In case it detects multi-dir glob {.../..., .../...}, + * listFilesWithFoldedRegexpMatchingImpl is in charge from now on. + * It works a bit different: it still recursively goes through subdirectories, but does not match every directory to glob. + * Instead, it goes many levels down (until the approximate max_depth is reached) and compares this multi-dir path to a glob. + * StorageHDFS.cpp has the same logic. +*/ +void listFilesWithFoldedRegexpMatchingImpl(const std::string & path_for_ls, + const std::string & processed_suffix, + const std::string & suffix_with_globs, + re2::RE2 & matcher, + size_t & total_bytes_to_read, + const size_t max_depth, + const size_t next_slash_after_glob_pos, + std::vector & result) +{ + if (!max_depth) + return; + + const fs::directory_iterator end; + for (fs::directory_iterator it(path_for_ls); it != end; ++it) + { + const std::string full_path = it->path().string(); + const size_t last_slash = full_path.rfind('/'); + const String dir_or_file_name = full_path.substr(last_slash); + + if (re2::RE2::FullMatch(processed_suffix + dir_or_file_name, matcher)) + { + if (next_slash_after_glob_pos == std::string::npos) + { + total_bytes_to_read += it->file_size(); + result.push_back(it->path().string()); + } + else + { + listFilesWithRegexpMatchingImpl(fs::path(full_path) / "" , + suffix_with_globs.substr(next_slash_after_glob_pos), + total_bytes_to_read, result); + } + } + else if (it->is_directory()) + { + listFilesWithFoldedRegexpMatchingImpl(fs::path(full_path), processed_suffix + dir_or_file_name, + suffix_with_globs, matcher, total_bytes_to_read, + max_depth - 1, next_slash_after_glob_pos, result); + } + + } +} + /* Recursive directory listing with matched paths as a result. * Have the same method in StorageHDFS. */ @@ -101,15 +160,42 @@ void listFilesWithRegexpMatchingImpl( const std::string & for_match, size_t & total_bytes_to_read, std::vector & result, - bool recursive = false) + bool recursive) { - const size_t first_glob = for_match.find_first_of("*?{"); + const size_t first_glob_pos = for_match.find_first_of("*?{"); + const bool has_glob = first_glob_pos != std::string::npos; - const size_t end_of_path_without_globs = for_match.substr(0, first_glob).rfind('/'); + const size_t end_of_path_without_globs = for_match.substr(0, first_glob_pos).rfind('/'); const std::string suffix_with_globs = for_match.substr(end_of_path_without_globs); /// begin with '/' - const size_t next_slash = suffix_with_globs.find('/', 1); - const std::string current_glob = suffix_with_globs.substr(0, next_slash); + /// slashes_in_glob counter is a upper-bound estimate of recursion depth + /// needed to process complex cases when `/` is included into glob, e.g. /pa{th1/a,th2/b}.csv + size_t slashes_in_glob = 0; + const size_t next_slash_after_glob_pos = [&]() + { + if (!has_glob) + return suffix_with_globs.find('/', 1); + + size_t in_curly = 0; + for (std::string::const_iterator it = ++suffix_with_globs.begin(); it != suffix_with_globs.end(); it++) + { + if (*it == '{') + ++in_curly; + else if (*it == '/') + { + if (in_curly) + ++slashes_in_glob; + else + return size_t(std::distance(suffix_with_globs.begin(), it)); + } + else if (*it == '}') + --in_curly; + } + return std::string::npos; + }(); + + const std::string current_glob = suffix_with_globs.substr(0, next_slash_after_glob_pos); + auto regexp = makeRegexpPatternFromGlobs(current_glob); re2::RE2 matcher(regexp); @@ -126,13 +212,22 @@ void listFilesWithRegexpMatchingImpl( if (!fs::exists(prefix_without_globs)) return; + const bool looking_for_directory = next_slash_after_glob_pos != std::string::npos; + + if (slashes_in_glob) + { + listFilesWithFoldedRegexpMatchingImpl(fs::path(prefix_without_globs), "", suffix_with_globs, + matcher, total_bytes_to_read, slashes_in_glob, + next_slash_after_glob_pos, result); + return; + } + const fs::directory_iterator end; for (fs::directory_iterator it(prefix_without_globs); it != end; ++it) { const std::string full_path = it->path().string(); const size_t last_slash = full_path.rfind('/'); const String file_name = full_path.substr(last_slash); - const bool looking_for_directory = next_slash != std::string::npos; /// Condition is_directory means what kind of path is it in current iteration of ls if (!it->is_directory() && !looking_for_directory) @@ -148,14 +243,12 @@ void listFilesWithRegexpMatchingImpl( if (recursive) { listFilesWithRegexpMatchingImpl(fs::path(full_path).append(it->path().string()) / "" , - looking_for_directory ? suffix_with_globs.substr(next_slash) : current_glob , + looking_for_directory ? suffix_with_globs.substr(next_slash_after_glob_pos) : current_glob , total_bytes_to_read, result, recursive); } else if (looking_for_directory && re2::RE2::FullMatch(file_name, matcher)) - { /// Recursion depth is limited by pattern. '*' works only for depth = 1, for depth = 2 pattern path is '*/*'. So we do not need additional check. - listFilesWithRegexpMatchingImpl(fs::path(full_path) / "", suffix_with_globs.substr(next_slash), total_bytes_to_read, result); - } + listFilesWithRegexpMatchingImpl(fs::path(full_path) / "", suffix_with_globs.substr(next_slash_after_glob_pos), total_bytes_to_read, result); } } } diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 40d01215498..419dab16d1e 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -1037,7 +1037,7 @@ void StorageReplicatedMergeTree::dropReplica(zkutil::ZooKeeperPtr zookeeper, con code = zookeeper->tryMulti(ops, res); if (code != Coordination::Error::ZOK) LOG_WARNING(logger, "Cannot quickly remove nodes without children: {} (replica: {}). Will remove recursively.", - Coordination::errorMessage(code), remote_replica_path); + code, remote_replica_path); /// And finally remove everything else recursively /// It may left some garbage if replica_path subtree is concurrently modified @@ -1145,7 +1145,7 @@ bool StorageReplicatedMergeTree::removeTableNodesFromZooKeeper(zkutil::ZooKeeper auto code = zookeeper->tryMulti(ops, res); if (code != Coordination::Error::ZOK) LOG_WARNING(logger, "Cannot quickly remove nodes without children: {} (table: {}). Will remove recursively.", - Coordination::errorMessage(code), zookeeper_path); + code, zookeeper_path); Strings children; code = zookeeper->tryGetChildren(zookeeper_path, children); @@ -1385,7 +1385,7 @@ void StorageReplicatedMergeTree::checkParts(bool skip_sanity_checks) } const UInt64 parts_to_fetch_blocks = std::accumulate(parts_to_fetch.cbegin(), parts_to_fetch.cend(), 0, - [&](UInt64 acc, const String& part_name) + [&](UInt64 acc, const String & part_name) { if (const auto part_info = MergeTreePartInfo::tryParsePartName(part_name, format_version)) return acc + part_info->getBlocksCount(); @@ -1896,7 +1896,7 @@ bool StorageReplicatedMergeTree::executeFetch(LogEntry & entry, bool need_to_che else if (code == Coordination::Error::ZBADVERSION || code == Coordination::Error::ZNONODE || code == Coordination::Error::ZNODEEXISTS) { LOG_DEBUG(log, "State was changed or isn't expected when trying to mark quorum for part {} as failed. Code: {}", - entry.new_part_name, Coordination::errorMessage(code)); + entry.new_part_name, code); } else throw Coordination::Exception(code); @@ -3104,7 +3104,7 @@ void StorageReplicatedMergeTree::cloneReplicaIfNeeded(zkutil::ZooKeeperPtr zooke if (get_is_lost.error != Coordination::Error::ZOK) { - LOG_INFO(log, "Not cloning {}, cannot get '/is_lost': {}", source_replica_name, Coordination::errorMessage(get_is_lost.error)); + LOG_INFO(log, "Not cloning {}, cannot get '/is_lost': {}", source_replica_name, get_is_lost.error); continue; } else if (get_is_lost.data != "0") @@ -3115,12 +3115,12 @@ void StorageReplicatedMergeTree::cloneReplicaIfNeeded(zkutil::ZooKeeperPtr zooke if (get_log_pointer.error != Coordination::Error::ZOK) { - LOG_INFO(log, "Not cloning {}, cannot get '/log_pointer': {}", source_replica_name, Coordination::errorMessage(get_log_pointer.error)); + LOG_INFO(log, "Not cloning {}, cannot get '/log_pointer': {}", source_replica_name, get_log_pointer.error); continue; } if (get_queue.error != Coordination::Error::ZOK) { - LOG_INFO(log, "Not cloning {}, cannot get '/queue': {}", source_replica_name, Coordination::errorMessage(get_queue.error)); + LOG_INFO(log, "Not cloning {}, cannot get '/queue': {}", source_replica_name, get_queue.error); continue; } @@ -5074,15 +5074,14 @@ std::optional StorageReplicatedMergeTree::distributedWriteFromClu String query_str; { WriteBufferFromOwnString buf; - IAST::FormatSettings ast_format_settings(buf, /*one_line*/ true); - ast_format_settings.always_quote_identifiers = true; + IAST::FormatSettings ast_format_settings(buf, /*one_line*/ true, /*hilite*/ false, /*always_quote_identifiers*/ true); query.IAST::format(ast_format_settings); query_str = buf.str(); } QueryPipeline pipeline; ContextMutablePtr query_context = Context::createCopy(local_context); - ++query_context->getClientInfo().distributed_depth; + query_context->increaseDistributedDepth(); for (const auto & replicas : src_cluster->getShardsAddresses()) { @@ -7252,7 +7251,7 @@ void StorageReplicatedMergeTree::clearBlocksInPartition( { for (size_t i = 0; i < delete_requests.size(); ++i) if (delete_responses[i]->error != Coordination::Error::ZOK) - LOG_WARNING(log, "Error while deleting ZooKeeper path `{}`: {}, ignoring.", delete_requests[i]->getPath(), Coordination::errorMessage(delete_responses[i]->error)); + LOG_WARNING(log, "Error while deleting ZooKeeper path `{}`: {}, ignoring.", delete_requests[i]->getPath(), delete_responses[i]->error); } LOG_TRACE(log, "Deleted {} deduplication block IDs in partition ID {}", delete_requests.size(), partition_id); @@ -8768,7 +8767,7 @@ std::pair> getParentLockedBlobs(const ZooKeeperWith zookeeper_ptr->tryGet(fs::path(zero_copy_part_path_prefix) / part_candidate_info_str, files_not_to_remove_str, nullptr, nullptr, &code); if (code != Coordination::Error::ZOK) { - LOG_TRACE(log, "Cannot get parent files from ZooKeeper on path ({}), error {}", (fs::path(zero_copy_part_path_prefix) / part_candidate_info_str).string(), errorMessage(code)); + LOG_TRACE(log, "Cannot get parent files from ZooKeeper on path ({}), error {}", (fs::path(zero_copy_part_path_prefix) / part_candidate_info_str).string(), code); return {true, std::nullopt}; } @@ -9178,8 +9177,7 @@ std::optional StorageReplicatedMergeTree::tryCreateZeroCopyExclusi String zc_zookeeper_path = *getZeroCopyPartPath(part_name, disk); /// Just recursively create ancestors for lock - zookeeper->createAncestors(zc_zookeeper_path); - zookeeper->createIfNotExists(zc_zookeeper_path, ""); + zookeeper->createAncestors(zc_zookeeper_path + "/"); /// Create actual lock ZeroCopyLock lock(zookeeper, zc_zookeeper_path, replica_name); diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index 17309e6da73..728d61298ec 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -974,6 +974,7 @@ StorageS3::StorageS3( FormatFactory::instance().checkFormatName(configuration.format); context_->getGlobalContext()->getRemoteHostFilter().checkURL(configuration.url.uri); + context_->getGlobalContext()->getHTTPHeaderFilter().checkHeaders(configuration.headers_from_ast); StorageInMemoryMetadata storage_metadata; if (columns_.empty()) diff --git a/src/Storages/StorageS3Cluster.cpp b/src/Storages/StorageS3Cluster.cpp index 153a3b7f11b..646fa59b33b 100644 --- a/src/Storages/StorageS3Cluster.cpp +++ b/src/Storages/StorageS3Cluster.cpp @@ -44,6 +44,8 @@ StorageS3Cluster::StorageS3Cluster( , s3_configuration{configuration_} { context_->getGlobalContext()->getRemoteHostFilter().checkURL(configuration_.url.uri); + context_->getGlobalContext()->getHTTPHeaderFilter().checkHeaders(configuration_.headers_from_ast); + StorageInMemoryMetadata storage_metadata; updateConfigurationIfChanged(context_); diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index e6953afe68e..b70a7de7909 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -1019,6 +1019,7 @@ StorageURL::StorageURL( distributed_processing_) { context_->getRemoteHostFilter().checkURL(Poco::URI(uri)); + context_->getHTTPHeaderFilter().checkHeaders(headers); } diff --git a/src/Storages/StorageURLCluster.cpp b/src/Storages/StorageURLCluster.cpp index f652a40a561..8804afb7af2 100644 --- a/src/Storages/StorageURLCluster.cpp +++ b/src/Storages/StorageURLCluster.cpp @@ -48,6 +48,7 @@ StorageURLCluster::StorageURLCluster( , uri(uri_) { context_->getRemoteHostFilter().checkURL(Poco::URI(uri)); + context_->getHTTPHeaderFilter().checkHeaders(configuration_.headers); StorageInMemoryMetadata storage_metadata; diff --git a/src/Storages/System/StorageSystemDatabases.cpp b/src/Storages/System/StorageSystemDatabases.cpp index a3d05281b28..2fcc91e49bb 100644 --- a/src/Storages/System/StorageSystemDatabases.cpp +++ b/src/Storages/System/StorageSystemDatabases.cpp @@ -5,6 +5,8 @@ #include #include #include +#include +#include #include #include @@ -69,20 +71,52 @@ static String getEngineFull(const ContextPtr & ctx, const DatabasePtr & database return engine_full; } -void StorageSystemDatabases::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +static ColumnPtr getFilteredDatabases(const Databases & databases, const SelectQueryInfo & query_info, ContextPtr context) +{ + MutableColumnPtr name_column = ColumnString::create(); + MutableColumnPtr engine_column = ColumnString::create(); + MutableColumnPtr uuid_column = ColumnUUID::create(); + + for (const auto & [database_name, database] : databases) + { + if (database_name == DatabaseCatalog::TEMPORARY_DATABASE) + continue; /// We don't want to show the internal database for temporary tables in system.tables + + name_column->insert(database_name); + engine_column->insert(database->getEngineName()); + uuid_column->insert(database->getUUID()); + } + + Block block + { + ColumnWithTypeAndName(std::move(name_column), std::make_shared(), "name"), + ColumnWithTypeAndName(std::move(engine_column), std::make_shared(), "engine"), + ColumnWithTypeAndName(std::move(uuid_column), std::make_shared(), "uuid") + }; + VirtualColumnUtils::filterBlockWithQuery(query_info.query, block, context); + return block.getByPosition(0).column; +} + +void StorageSystemDatabases::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const { const auto access = context->getAccess(); const bool check_access_for_databases = !access->isGranted(AccessType::SHOW_DATABASES); const auto databases = DatabaseCatalog::instance().getDatabases(); - for (const auto & [database_name, database] : databases) + ColumnPtr filtered_databases_column = getFilteredDatabases(databases, query_info, context); + + for (size_t i = 0; i < filtered_databases_column->size(); ++i) { + auto database_name = filtered_databases_column->getDataAt(i).toString(); + if (check_access_for_databases && !access->isGranted(AccessType::SHOW_DATABASES, database_name)) continue; if (database_name == DatabaseCatalog::TEMPORARY_DATABASE) continue; /// filter out the internal database for temporary tables in system.databases, asynchronous metric "NumberOfDatabases" behaves the same way + const auto & database = databases.at(database_name); + res_columns[0]->insert(database_name); res_columns[1]->insert(database->getEngineName()); res_columns[2]->insert(context->getPath() + database->getDataPath()); diff --git a/src/Storages/System/StorageSystemParts.cpp b/src/Storages/System/StorageSystemParts.cpp index 26cf96dc9b3..ac38c9c97b1 100644 --- a/src/Storages/System/StorageSystemParts.cpp +++ b/src/Storages/System/StorageSystemParts.cpp @@ -120,7 +120,7 @@ StorageSystemParts::StorageSystemParts(const StorageID & table_id_) {"has_lightweight_delete", std::make_shared()}, - {"last_removal_attemp_time", std::make_shared()}, + {"last_removal_attempt_time", std::make_shared()}, {"removal_state", std::make_shared()}, } ) @@ -346,7 +346,7 @@ void StorageSystemParts::processNextStorage( if (columns_mask[src_index++]) columns[res_index++]->insert(part->hasLightweightDelete()); if (columns_mask[src_index++]) - columns[res_index++]->insert(static_cast(part->last_removal_attemp_time.load(std::memory_order_relaxed))); + columns[res_index++]->insert(static_cast(part->last_removal_attempt_time.load(std::memory_order_relaxed))); if (columns_mask[src_index++]) columns[res_index++]->insert(getRemovalStateDescription(part->removal_state.load(std::memory_order_relaxed))); diff --git a/src/Storages/System/StorageSystemQueryCache.cpp b/src/Storages/System/StorageSystemQueryCache.cpp index 117fb4e8a5c..288e4fd52a0 100644 --- a/src/Storages/System/StorageSystemQueryCache.cpp +++ b/src/Storages/System/StorageSystemQueryCache.cpp @@ -44,7 +44,7 @@ void StorageSystemQueryCache::fillData(MutableColumns & res_columns, ContextPtr if (!key.is_shared && key.user_name != user_name) continue; - res_columns[0]->insert(key.queryStringFromAst()); /// approximates the original query string + res_columns[0]->insert(key.query_string); /// approximates the original query string res_columns[1]->insert(QueryCache::QueryCacheEntryWeight()(*query_result)); res_columns[2]->insert(key.expires_at < std::chrono::system_clock::now()); res_columns[3]->insert(key.is_shared); diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index 242e8e5d570..0f506040cd9 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -992,7 +992,7 @@ void StorageWindowView::cleanup() auto cleanup_context = Context::createCopy(getContext()); cleanup_context->makeQueryContext(); cleanup_context->setCurrentQueryId(""); - cleanup_context->getClientInfo().is_replicated_database_internal = true; + cleanup_context->setQueryKindReplicatedDatabaseInternal(); InterpreterAlterQuery interpreter_alter(alter_query, cleanup_context); interpreter_alter.execute(); diff --git a/src/Storages/transformQueryForExternalDatabase.cpp b/src/Storages/transformQueryForExternalDatabase.cpp index 548b55749d7..375510e62bf 100644 --- a/src/Storages/transformQueryForExternalDatabase.cpp +++ b/src/Storages/transformQueryForExternalDatabase.cpp @@ -334,9 +334,10 @@ String transformQueryForExternalDatabaseImpl( dropAliases(select_ptr); WriteBufferFromOwnString out; - IAST::FormatSettings settings(out, true); - settings.identifier_quoting_style = identifier_quoting_style; - settings.always_quote_identifiers = identifier_quoting_style != IdentifierQuotingStyle::None; + IAST::FormatSettings settings( + out, /*one_line*/ true, /*hilite*/ false, + /*always_quote_identifiers*/ identifier_quoting_style != IdentifierQuotingStyle::None, + /*identifier_quoting_style*/ identifier_quoting_style); select->format(settings); diff --git a/src/TableFunctions/TableFunctionFormat.cpp b/src/TableFunctions/TableFunctionFormat.cpp index f5aff4bd098..2a46f839bbe 100644 --- a/src/TableFunctions/TableFunctionFormat.cpp +++ b/src/TableFunctions/TableFunctionFormat.cpp @@ -10,6 +10,7 @@ #include #include +#include #include #include @@ -72,7 +73,17 @@ Block TableFunctionFormat::parseData(ColumnsDescription columns, ContextPtr cont auto read_buf = std::make_unique(data); auto input_format = context->getInputFormat(format, *read_buf, block, context->getSettingsRef().max_block_size); - auto pipeline = std::make_unique(input_format); + QueryPipelineBuilder builder; + builder.init(Pipe(input_format)); + if (columns.hasDefaults()) + { + builder.addSimpleTransform([&](const Block & header) + { + return std::make_shared(header, columns, *input_format, context); + }); + } + + auto pipeline = std::make_unique(QueryPipelineBuilder::getPipeline(std::move(builder))); auto reader = std::make_unique(*pipeline); std::vector blocks; diff --git a/src/TableFunctions/TableFunctionS3.cpp b/src/TableFunctions/TableFunctionS3.cpp index c8cc0cddd30..0f3078b1ca6 100644 --- a/src/TableFunctions/TableFunctionS3.cpp +++ b/src/TableFunctions/TableFunctionS3.cpp @@ -18,6 +18,8 @@ #include #include #include "registerTableFunctions.h" +#include +#include #include @@ -32,6 +34,24 @@ namespace ErrorCodes } +std::vector TableFunctionS3::skipAnalysisForArguments(const QueryTreeNodePtr & query_node_table_function, ContextPtr) const +{ + auto & table_function_node = query_node_table_function->as(); + auto & table_function_arguments_nodes = table_function_node.getArguments().getNodes(); + size_t table_function_arguments_size = table_function_arguments_nodes.size(); + + std::vector result; + + for (size_t i = 0; i < table_function_arguments_size; ++i) + { + auto * function_node = table_function_arguments_nodes[i]->as(); + if (function_node && function_node->getFunctionName() == "headers") + result.push_back(i); + } + + return result; +} + /// This is needed to avoid copy-pase. Because s3Cluster arguments only differ in additional argument (first) - cluster name void TableFunctionS3::parseArgumentsImpl(ASTs & args, const ContextPtr & context) { @@ -41,13 +61,14 @@ void TableFunctionS3::parseArgumentsImpl(ASTs & args, const ContextPtr & context } else { - if (args.empty() || args.size() > 6) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "The signature of table function {} shall be the following:\n{}", getName(), getSignature()); auto * header_it = StorageURL::collectHeaders(args, configuration.headers_from_ast, context); if (header_it != args.end()) args.erase(header_it); + if (args.empty() || args.size() > 6) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "The signature of table function {} shall be the following:\n{}", getName(), getSignature()); + for (auto & arg : args) arg = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context); diff --git a/src/TableFunctions/TableFunctionS3.h b/src/TableFunctions/TableFunctionS3.h index c983bec9bf4..d308f469236 100644 --- a/src/TableFunctions/TableFunctionS3.h +++ b/src/TableFunctions/TableFunctionS3.h @@ -73,6 +73,10 @@ protected: mutable StorageS3::Configuration configuration; ColumnsDescription structure_hint; + +private: + + std::vector skipAnalysisForArguments(const QueryTreeNodePtr & query_node_table_function, ContextPtr context) const override; }; } diff --git a/src/configure_config.cmake b/src/configure_config.cmake index c11a19b36ea..ae6305705c2 100644 --- a/src/configure_config.cmake +++ b/src/configure_config.cmake @@ -25,6 +25,9 @@ endif() if (TARGET ch_rust::skim) set(USE_SKIM 1) endif() +if (TARGET ch_rust::prql) + set(USE_PRQL 1) +endif() if (TARGET OpenSSL::SSL) set(USE_SSL 1) endif() diff --git a/tests/analyzer_integration_broken_tests.txt b/tests/analyzer_integration_broken_tests.txt index 46b9ade43de..02f70c8a6df 100644 --- a/tests/analyzer_integration_broken_tests.txt +++ b/tests/analyzer_integration_broken_tests.txt @@ -196,3 +196,8 @@ test_quota/test.py::test_tracking_quota test_quota/test.py::test_users_xml_is_readonly test_replicating_constants/test.py::test_different_versions test_merge_tree_s3/test.py::test_heavy_insert_select_check_memory[node] +test_drop_is_lock_free/test.py::test_query_is_lock_free[detach table] +test_backward_compatibility/test_data_skipping_indices.py::test_index +test_backward_compatibility/test_convert_ordinary.py::test_convert_ordinary_to_atomic +test_backward_compatibility/test_memory_bound_aggregation.py::test_backward_compatability +test_odbc_interaction/test.py::test_postgres_insert diff --git a/tests/analyzer_tech_debt.txt b/tests/analyzer_tech_debt.txt index e0f259306aa..19b90a39800 100644 --- a/tests/analyzer_tech_debt.txt +++ b/tests/analyzer_tech_debt.txt @@ -130,3 +130,4 @@ 02581_share_big_sets_between_mutation_tasks_long 02581_share_big_sets_between_multiple_mutations_tasks_long 00992_system_parts_race_condition_zookeeper_long +02815_range_dict_no_direct_join diff --git a/tests/ci/merge_pr.py b/tests/ci/merge_pr.py index 14844ed9b25..35b0614b01f 100644 --- a/tests/ci/merge_pr.py +++ b/tests/ci/merge_pr.py @@ -246,6 +246,12 @@ def main(): if args.check_running_workflows: workflows = get_workflows_for_head(repo, pr.head.sha) + logging.info( + "The PR #%s has following workflows:\n%s", + pr.number, + "\n".join(f"{wf.html_url}: status is {wf.status}" for wf in workflows), + ) + workflows_in_progress = [wf for wf in workflows if wf.status != "completed"] # At most one workflow in progress is fine. We check that there no # cases like, e.g. PullRequestCI and DocksCheck in progress at once diff --git a/tests/config/config.d/forbidden_headers.xml b/tests/config/config.d/forbidden_headers.xml new file mode 100644 index 00000000000..0d48f650fe6 --- /dev/null +++ b/tests/config/config.d/forbidden_headers.xml @@ -0,0 +1,6 @@ + + +
exact_header
+ (?i)(case_insensitive_header) +
+
diff --git a/tests/config/config.d/storage_conf.xml b/tests/config/config.d/storage_conf.xml index af04024d528..8533fef9fc9 100644 --- a/tests/config/config.d/storage_conf.xml +++ b/tests/config/config.d/storage_conf.xml @@ -1,64 +1,14 @@ - - + s3 - s3_common_disk/ + s3_disk/ http://localhost:11111/test/common/ clickhouse clickhouse 20000 - - - s3 - s3_disk/ - http://localhost:11111/test/00170_test/ - clickhouse - clickhouse - 20000 - - s3 - s3_disk_2/ - http://localhost:11111/test/00170_test/ - clickhouse - clickhouse - 20000 - - - s3 - s3_disk_3/ - http://localhost:11111/test/00170_test/ - clickhouse - clickhouse - 20000 - - - s3 - s3_disk_4/ - http://localhost:11111/test/00170_test/ - clickhouse - clickhouse - 20000 - - - s3 - s3_disk_5/ - http://localhost:11111/test/00170_test/ - clickhouse - clickhouse - 20000 - - - s3 - s3_disk_6/ - http://localhost:11111/test/00170_test/ - clickhouse - clickhouse - 20000 - - cache s3_disk @@ -67,65 +17,6 @@ 1 100 - - cache - s3_disk_2 - s3_cache_2/ - 128Mi - 100Mi - 100 - - - cache - s3_disk_3 - s3_disk_3_cache/ - 128Mi - 22548578304 - 1 - 1 - 100 - - - cache - s3_disk_4 - s3_cache_4/ - 128Mi - 1 - 1 - 100 - - - cache - s3_disk_5 - s3_cache_5/ - 128Mi - 100 - - - cache - s3_disk_6 - s3_cache_6/ - 128Mi - 1 - 100 - 100 - - - cache - s3_disk_6 - s3_cache_small/ - 1000 - 100 - - - cache - s3_disk_6 - s3_cache_small_segment_size/ - 128Mi - 10Ki - 1 - 100 - local_blob_storage @@ -167,7 +58,7 @@ cache - s3_cache_5 + s3_cache s3_cache_multi/ 22548578304 100 @@ -188,34 +79,6 @@ - - -
- s3_cache_2 -
-
-
- - -
- s3_cache_3 -
-
-
- - -
- s3_cache_4 -
-
-
- - -
- s3_cache_6 -
-
-
@@ -223,13 +86,6 @@
- - -
- s3_cache_small -
-
-
@@ -251,13 +107,6 @@
- - -
- s3_cache_small_segment_size -
-
-
diff --git a/tests/config/install.sh b/tests/config/install.sh index 50f2627d37c..d75a652f084 100755 --- a/tests/config/install.sh +++ b/tests/config/install.sh @@ -51,6 +51,7 @@ ln -sf $SRC_PATH/config.d/session_log.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/system_unfreeze.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/enable_zero_copy_replication.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/nlp.xml $DEST_SERVER_PATH/config.d/ +ln -sf $SRC_PATH/config.d/forbidden_headers.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/enable_keeper_map.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/custom_disks_base_path.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/display_name.xml $DEST_SERVER_PATH/config.d/ diff --git a/tests/integration/ci-runner.py b/tests/integration/ci-runner.py index 31f4a7666c8..a6e9716dc20 100755 --- a/tests/integration/ci-runner.py +++ b/tests/integration/ci-runner.py @@ -7,9 +7,11 @@ import json import logging import os import random +import re import shutil import subprocess import time +import shlex import zlib # for crc32 @@ -110,16 +112,36 @@ def get_counters(fname): if not (".py::" in line and " " in line): continue - line_arr = line.strip().split(" ") + line = line.strip() + # [gw0] [ 7%] ERROR test_mysql_protocol/test.py::test_golang_client + # ^^^^^^^^^^^^^ + if line.strip().startswith("["): + line = re.sub("^\[[^\[\]]*\] \[[^\[\]]*\] ", "", line) + + line_arr = line.split(" ") if len(line_arr) < 2: logging.debug("Strange line %s", line) continue # Lines like: - # [gw0] [ 7%] ERROR test_mysql_protocol/test.py::test_golang_client - # [gw3] [ 40%] PASSED test_replicated_users/test.py::test_rename_replicated[QUOTA] - state = line_arr[-2] - test_name = line_arr[-1] + # + # ERROR test_mysql_protocol/test.py::test_golang_client + # PASSED test_replicated_users/test.py::test_rename_replicated[QUOTA] + # PASSED test_drop_is_lock_free/test.py::test_query_is_lock_free[detach part] + # + state = line_arr.pop(0) + test_name = " ".join(line_arr) + + # Normalize test names for lines like this: + # + # FAILED test_storage_s3/test.py::test_url_reconnect_in_the_middle - Exception + # FAILED test_distributed_ddl/test.py::test_default_database[configs] - AssertionError: assert ... + # + test_name = re.sub( + r"^(?P[^\[\] ]+)(?P\[[^\[\]]*\]|)(?P - .*|)$", + r"\g\g", + test_name, + ) if state in counters: counters[state].add(test_name) @@ -411,7 +433,7 @@ class ClickhouseIntegrationTestsRunner: out_file_full = os.path.join(self.result_path, "runner_get_all_tests.log") cmd = ( "cd {repo_path}/tests/integration && " - "timeout -s 9 1h ./runner {runner_opts} {image_cmd} ' --setup-plan' " + "timeout -s 9 1h ./runner {runner_opts} {image_cmd} -- --setup-plan " "| tee {out_file_full} | grep '::' | sed 's/ (fixtures used:.*//g' | sed 's/^ *//g' | sed 's/ *$//g' " "| grep -v 'SKIPPED' | sort -u > {out_file}".format( repo_path=repo_path, @@ -646,7 +668,7 @@ class ClickhouseIntegrationTestsRunner: info_basename = test_group_str + "_" + str(i) + ".nfo" info_path = os.path.join(repo_path, "tests/integration", info_basename) - test_cmd = " ".join([test for test in sorted(test_names)]) + test_cmd = " ".join([shlex.quote(test) for test in sorted(test_names)]) parallel_cmd = ( " --parallel {} ".format(num_workers) if num_workers > 0 else "" ) @@ -655,7 +677,7 @@ class ClickhouseIntegrationTestsRunner: # -E -- (E)rror # -p -- (p)assed # -s -- (s)kipped - cmd = "cd {}/tests/integration && timeout -s 9 1h ./runner {} {} -t {} {} '-rfEps --run-id={} --color=no --durations=0 {}' | tee {}".format( + cmd = "cd {}/tests/integration && timeout -s 9 1h ./runner {} {} -t {} {} -- -rfEps --run-id={} --color=no --durations=0 {} | tee {}".format( repo_path, self._get_runner_opts(), image_cmd, @@ -766,6 +788,7 @@ class ClickhouseIntegrationTestsRunner: and test not in counters["ERROR"] and test not in counters["SKIPPED"] and test not in counters["FAILED"] + and test not in counters["BROKEN"] and "::" in test ): counters["ERROR"].append(test) @@ -999,16 +1022,6 @@ class ClickhouseIntegrationTestsRunner: if "(memory)" in self.params["context_name"]: result_state = "success" - for res in test_result: - # It's not easy to parse output of pytest - # Especially when test names may contain spaces - # Do not allow it to avoid obscure failures - if " " not in res[0]: - continue - logging.warning("Found invalid test name with space: %s", res[0]) - status_text = "Found test with invalid name, see main log" - result_state = "failure" - return result_state, status_text, test_result, [] diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index 5933883f7b0..a4e25e653b3 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -45,7 +45,7 @@ def cleanup_environment(): logging.debug("Pruning Docker networks") run_and_check( - ["docker network prune"], + ["docker network prune --force"], shell=True, nothrow=True, ) diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 0614cbf0e0d..eff44de842a 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -486,6 +486,8 @@ class ClickHouseCluster: self.kafka_docker_id = None self.schema_registry_host = "schema-registry" self._schema_registry_port = 0 + self.schema_registry_auth_host = "schema-registry-auth" + self._schema_registry_auth_port = 0 self.kafka_docker_id = self.get_instance_docker_id(self.kafka_host) self.coredns_host = "coredns" @@ -657,6 +659,13 @@ class ClickHouseCluster: self._schema_registry_port = get_free_port() return self._schema_registry_port + @property + def schema_registry_auth_port(self): + if self._schema_registry_auth_port: + return self._schema_registry_auth_port + self._schema_registry_auth_port = get_free_port() + return self._schema_registry_auth_port + @property def kerberized_kafka_port(self): if self._kerberized_kafka_port: @@ -1163,8 +1172,11 @@ class ClickHouseCluster: self.with_kafka = True env_variables["KAFKA_HOST"] = self.kafka_host env_variables["KAFKA_EXTERNAL_PORT"] = str(self.kafka_port) + env_variables["SCHEMA_REGISTRY_DIR"] = instance.path + "/" env_variables["SCHEMA_REGISTRY_EXTERNAL_PORT"] = str(self.schema_registry_port) - env_variables["SCHEMA_REGISTRY_INTERNAL_PORT"] = "8081" + env_variables["SCHEMA_REGISTRY_AUTH_EXTERNAL_PORT"] = str( + self.schema_registry_auth_port + ) self.base_cmd.extend( ["--file", p.join(docker_compose_yml_dir, "docker_compose_kafka.yml")] ) @@ -1498,6 +1510,7 @@ class ClickHouseCluster: with_kafka=False, with_kerberized_kafka=False, with_kerberos_kdc=False, + with_secrets=False, with_rabbitmq=False, with_nats=False, clickhouse_path_dir=None, @@ -1520,6 +1533,7 @@ class ClickHouseCluster: with_jdbc_bridge=False, with_hive=False, with_coredns=False, + allow_analyzer=True, hostname=None, env_variables=None, image="clickhouse/integration-test", @@ -1604,6 +1618,10 @@ class ClickHouseCluster: with_nats=with_nats, with_nginx=with_nginx, with_kerberized_hdfs=with_kerberized_hdfs, + with_secrets=with_secrets + or with_kerberized_hdfs + or with_kerberos_kdc + or with_kerberized_kafka, with_mongo=with_mongo or with_mongo_secure, with_meili=with_meili, with_redis=with_redis, @@ -1613,6 +1631,7 @@ class ClickHouseCluster: with_hive=with_hive, with_coredns=with_coredns, with_cassandra=with_cassandra, + allow_analyzer=allow_analyzer, server_bin_path=self.server_bin_path, odbc_bridge_bin_path=self.odbc_bridge_bin_path, library_bridge_bin_path=self.library_bridge_bin_path, @@ -2493,20 +2512,27 @@ class ClickHouseCluster: raise Exception("Can't wait Azurite to start") def wait_schema_registry_to_start(self, timeout=180): - sr_client = CachedSchemaRegistryClient( - {"url": "http://localhost:{}".format(self.schema_registry_port)} - ) - start = time.time() - while time.time() - start < timeout: - try: - sr_client._send_request(sr_client.url) - logging.debug("Connected to SchemaRegistry") - return sr_client - except Exception as ex: - logging.debug(("Can't connect to SchemaRegistry: %s", str(ex))) - time.sleep(1) + for port in self.schema_registry_port, self.schema_registry_auth_port: + reg_url = "http://localhost:{}".format(port) + arg = {"url": reg_url} + sr_client = CachedSchemaRegistryClient(arg) - raise Exception("Can't wait Schema Registry to start") + start = time.time() + sr_started = False + sr_auth_started = False + while time.time() - start < timeout: + try: + sr_client._send_request(sr_client.url) + logging.debug("Connected to SchemaRegistry") + # don't care about possible auth errors + sr_started = True + break + except Exception as ex: + logging.debug(("Can't connect to SchemaRegistry: %s", str(ex))) + time.sleep(1) + + if not sr_started: + raise Exception("Can't wait Schema Registry to start") def wait_cassandra_to_start(self, timeout=180): self.cassandra_ip = self.get_instance_ip(self.cassandra_host) @@ -3135,6 +3161,7 @@ class ClickHouseInstance: with_nats, with_nginx, with_kerberized_hdfs, + with_secrets, with_mongo, with_meili, with_redis, @@ -3144,6 +3171,7 @@ class ClickHouseInstance: with_hive, with_coredns, with_cassandra, + allow_analyzer, server_bin_path, odbc_bridge_bin_path, library_bridge_bin_path, @@ -3197,7 +3225,7 @@ class ClickHouseInstance: if clickhouse_path_dir else None ) - self.kerberos_secrets_dir = p.abspath(p.join(base_path, "secrets")) + self.secrets_dir = p.abspath(p.join(base_path, "secrets")) self.macros = macros if macros is not None else {} self.with_zookeeper = with_zookeeper self.zookeeper_config_path = zookeeper_config_path @@ -3220,6 +3248,7 @@ class ClickHouseInstance: self.with_nats = with_nats self.with_nginx = with_nginx self.with_kerberized_hdfs = with_kerberized_hdfs + self.with_secrets = with_secrets self.with_mongo = with_mongo self.with_meili = with_meili self.with_redis = with_redis @@ -3230,6 +3259,7 @@ class ClickHouseInstance: self.with_hive = with_hive self.with_coredns = with_coredns self.coredns_config_dir = p.abspath(p.join(base_path, "coredns_config")) + self.allow_analyzer = allow_analyzer self.main_config_name = main_config_name self.users_config_name = users_config_name @@ -3522,6 +3552,24 @@ class ClickHouseInstance: return error + def append_hosts(self, name, ip): + self.exec_in_container( + (["bash", "-c", "echo '{}' {} >> /etc/hosts".format(ip, name)]), + privileged=True, + user="root", + ) + + def set_hosts(self, hosts): + entries = ["127.0.0.1 localhost", "::1 localhost"] + for host in hosts: + entries.append(f"{host[0]} {host[1]}") + + self.exec_in_container( + ["bash", "-c", 'echo -e "{}" > /etc/hosts'.format("\\n".join(entries))], + privileged=True, + user="root", + ) + # Connects to the instance via HTTP interface, sends a query and returns both the answer and the error message # as a tuple (output, error). def http_query_and_get_answer_with_error( @@ -4201,7 +4249,10 @@ class ClickHouseInstance: ) write_embedded_config("0_common_instance_users.xml", users_d_dir) - if os.environ.get("CLICKHOUSE_USE_NEW_ANALYZER") is not None: + if ( + os.environ.get("CLICKHOUSE_USE_NEW_ANALYZER") is not None + and self.allow_analyzer + ): write_embedded_config("0_common_enable_analyzer.xml", users_d_dir) if len(self.custom_dictionaries_paths): @@ -4217,17 +4268,16 @@ class ClickHouseInstance: if self.with_zookeeper: shutil.copy(self.zookeeper_config_path, conf_d_dir) - if ( - self.with_kerberized_kafka - or self.with_kerberized_hdfs - or self.with_kerberos_kdc - ): + if self.with_secrets: if self.with_kerberos_kdc: base_secrets_dir = self.cluster.instances_dir else: base_secrets_dir = self.path + from_dir = self.secrets_dir + to_dir = p.abspath(p.join(base_secrets_dir, "secrets")) + logging.debug(f"Copy secret from {from_dir} to {to_dir}") shutil.copytree( - self.kerberos_secrets_dir, + self.secrets_dir, p.abspath(p.join(base_secrets_dir, "secrets")), dirs_exist_ok=True, ) diff --git a/tests/integration/helpers/keeper_utils.py b/tests/integration/helpers/keeper_utils.py index 3b909194b63..93ea3fa74b7 100644 --- a/tests/integration/helpers/keeper_utils.py +++ b/tests/integration/helpers/keeper_utils.py @@ -1,5 +1,6 @@ import socket import time +from kazoo.client import KazooClient def get_keeper_socket(cluster, node, port=9181): @@ -26,9 +27,17 @@ def send_4lw_cmd(cluster, node, cmd="ruok", port=9181): NOT_SERVING_REQUESTS_ERROR_MSG = "This instance is not currently serving requests" -def wait_until_connected(cluster, node, port=9181): +def wait_until_connected(cluster, node, port=9181, timeout=30.0): + elapsed = 0.0 + while send_4lw_cmd(cluster, node, "mntr", port) == NOT_SERVING_REQUESTS_ERROR_MSG: time.sleep(0.1) + elapsed += 0.1 + + if elapsed >= timeout: + raise Exception( + f"{timeout}s timeout while waiting for {node.name} to start serving requests" + ) def wait_until_quorum_lost(cluster, node, port=9181): @@ -51,3 +60,36 @@ def get_leader(cluster, nodes): if is_leader(cluster, node): return node raise Exception("No leader in Keeper cluster.") + + +def get_fake_zk(cluster, node, timeout: float = 30.0) -> KazooClient: + _fake = KazooClient( + hosts=cluster.get_instance_ip(node.name) + ":9181", timeout=timeout + ) + _fake.start() + return _fake + + +def get_config_str(zk: KazooClient) -> str: + """ + Return decoded contents of /keeper/config node + """ + return zk.get("/keeper/config")[0].decode("utf-8") + + +def wait_configs_equal(left_config: str, right_zk: KazooClient, timeout: float = 30.0): + """ + Check whether get /keeper/config result in left_config is equal + to get /keeper/config on right_zk ZK connection. + """ + elapsed: float = 0.0 + while sorted(left_config.split("\n")) != sorted( + get_config_str(right_zk).split("\n") + ): + time.sleep(1) + elapsed += 1 + if elapsed >= timeout: + raise Exception( + f"timeout while checking nodes configs to get equal. " + f"Left: {left_config}, right: {get_config_str(right_zk)}" + ) diff --git a/tests/integration/helpers/postgres_utility.py b/tests/integration/helpers/postgres_utility.py index dfae37af434..3c8a23b15a2 100644 --- a/tests/integration/helpers/postgres_utility.py +++ b/tests/integration/helpers/postgres_utility.py @@ -76,16 +76,26 @@ def drop_postgres_schema(cursor, schema_name): def create_postgres_table( - cursor, table_name, replica_identity_full=False, template=postgres_table_template + cursor, + table_name, + database_name="", + replica_identity_full=False, + template=postgres_table_template, ): - drop_postgres_table(cursor, table_name) - cursor.execute(template.format(table_name)) + if database_name == "": + name = table_name + else: + name = f"{database_name}.{table_name}" + drop_postgres_table(cursor, name) + query = template.format(name) + cursor.execute(query) + print(f"Query: {query}") if replica_identity_full: - cursor.execute(f"ALTER TABLE {table_name} REPLICA IDENTITY FULL;") + cursor.execute(f"ALTER TABLE {name} REPLICA IDENTITY FULL;") -def drop_postgres_table(cursor, table_name): - cursor.execute(f"""DROP TABLE IF EXISTS "{table_name}" """) +def drop_postgres_table(cursor, name): + cursor.execute(f"""DROP TABLE IF EXISTS "{name}" """) def create_postgres_table_with_schema(cursor, schema_name, table_name): @@ -103,13 +113,16 @@ class PostgresManager: self.created_materialized_postgres_db_list = set() self.created_ch_postgres_db_list = set() - def init(self, instance, ip, port): + def init(self, instance, ip, port, default_database="postgres_database"): self.instance = instance self.ip = ip self.port = port - self.conn = get_postgres_conn(ip=self.ip, port=self.port) + self.default_database = default_database self.prepare() + def get_default_database(self): + return self.default_database + def restart(self): try: self.clear() @@ -118,11 +131,22 @@ class PostgresManager: self.prepare() raise ex + def execute(self, query): + self.cursor.execute(query) + def prepare(self): - conn = get_postgres_conn(ip=self.ip, port=self.port) - cursor = conn.cursor() - self.create_postgres_db(cursor, "postgres_database") - self.create_clickhouse_postgres_db(ip=self.ip, port=self.port) + self.conn = get_postgres_conn(ip=self.ip, port=self.port) + self.cursor = self.conn.cursor() + if self.default_database != "": + self.create_postgres_db(self.default_database) + self.conn = get_postgres_conn( + ip=self.ip, + port=self.port, + database=True, + database_name=self.default_database, + ) + self.cursor = self.conn.cursor() + self.create_clickhouse_postgres_db() def clear(self): if self.conn.closed == 0: @@ -132,63 +156,79 @@ class PostgresManager: for db in self.created_ch_postgres_db_list.copy(): self.drop_clickhouse_postgres_db(db) if len(self.created_postgres_db_list) > 0: - conn = get_postgres_conn(ip=self.ip, port=self.port) - cursor = conn.cursor() + self.conn = get_postgres_conn(ip=self.ip, port=self.port) + self.cursor = self.conn.cursor() for db in self.created_postgres_db_list.copy(): - self.drop_postgres_db(cursor, db) + self.drop_postgres_db(db) - def get_db_cursor(self): - self.conn = get_postgres_conn(ip=self.ip, port=self.port, database=True) + def get_db_cursor(self, database_name=""): + if database_name == "": + database_name = self.default_database + self.conn = get_postgres_conn( + ip=self.ip, port=self.port, database=True, database_name=database_name + ) return self.conn.cursor() - def create_postgres_db(self, cursor, name="postgres_database"): - self.drop_postgres_db(cursor, name) - self.created_postgres_db_list.add(name) - cursor.execute(f"CREATE DATABASE {name}") + def database_or_default(self, database_name): + if database_name != "": + return database_name + if self.default_database != "": + return self.default_database + raise Exception("Database name is empty") - def drop_postgres_db(self, cursor, name="postgres_database"): - cursor.execute(f"DROP DATABASE IF EXISTS {name}") - if name in self.created_postgres_db_list: - self.created_postgres_db_list.remove(name) + def create_postgres_db(self, database_name=""): + database_name = self.database_or_default(database_name) + self.drop_postgres_db(database_name) + self.created_postgres_db_list.add(database_name) + self.cursor.execute(f"CREATE DATABASE {database_name}") + + def drop_postgres_db(self, database_name=""): + database_name = self.database_or_default(database_name) + self.cursor.execute(f"DROP DATABASE IF EXISTS {database_name}") + if database_name in self.created_postgres_db_list: + self.created_postgres_db_list.remove(database_name) def create_clickhouse_postgres_db( self, - ip, - port, - name="postgres_database", - database_name="postgres_database", + database_name="", schema_name="", + postgres_database="", ): - self.drop_clickhouse_postgres_db(name) - self.created_ch_postgres_db_list.add(name) + database_name = self.database_or_default(database_name) + if postgres_database == "": + postgres_database = database_name + self.drop_clickhouse_postgres_db(database_name) + self.created_ch_postgres_db_list.add(database_name) if len(schema_name) == 0: self.instance.query( f""" - CREATE DATABASE {name} - ENGINE = PostgreSQL('{ip}:{port}', '{database_name}', 'postgres', 'mysecretpassword')""" + CREATE DATABASE {database_name} + ENGINE = PostgreSQL('{self.ip}:{self.port}', '{postgres_database}', 'postgres', 'mysecretpassword')""" ) else: self.instance.query( f""" - CREATE DATABASE {name} - ENGINE = PostgreSQL('{ip}:{port}', '{database_name}', 'postgres', 'mysecretpassword', '{schema_name}')""" + CREATE DATABASE {database_name} + ENGINE = PostgreSQL('{self.ip}:{self.port}', '{postgres_database}', 'postgres', 'mysecretpassword', '{schema_name}')""" ) - def drop_clickhouse_postgres_db(self, name="postgres_database"): - self.instance.query(f"DROP DATABASE IF EXISTS {name}") - if name in self.created_ch_postgres_db_list: - self.created_ch_postgres_db_list.remove(name) + def drop_clickhouse_postgres_db(self, database_name=""): + database_name = self.database_or_default(database_name) + self.instance.query(f"DROP DATABASE IF EXISTS {database_name}") + if database_name in self.created_ch_postgres_db_list: + self.created_ch_postgres_db_list.remove(database_name) def create_materialized_db( self, ip, port, materialized_database="test_database", - postgres_database="postgres_database", + postgres_database="", settings=[], table_overrides="", ): + postgres_database = self.database_or_default(postgres_database) self.created_materialized_postgres_db_list.add(materialized_database) self.instance.query(f"DROP DATABASE IF EXISTS {materialized_database}") @@ -207,35 +247,32 @@ class PostgresManager: self.instance.query(f"DROP DATABASE IF EXISTS {materialized_database} SYNC") if materialized_database in self.created_materialized_postgres_db_list: self.created_materialized_postgres_db_list.remove(materialized_database) - assert materialized_database not in self.instance.query("SHOW DATABASES") - def create_and_fill_postgres_table(self, table_name): - conn = get_postgres_conn(ip=self.ip, port=self.port, database=True) - cursor = conn.cursor() - self.create_and_fill_postgres_table_from_cursor(cursor, table_name) + def create_postgres_schema(self, name): + create_postgres_schema(self.cursor, name) - def create_and_fill_postgres_table_from_cursor(self, cursor, table_name): - create_postgres_table(cursor, table_name) - self.instance.query( - f"INSERT INTO postgres_database.{table_name} SELECT number, number from numbers(50)" - ) - - def create_and_fill_postgres_tables(self, tables_num, numbers=50): - conn = get_postgres_conn(ip=self.ip, port=self.port, database=True) - cursor = conn.cursor() - self.create_and_fill_postgres_tables_from_cursor( - cursor, tables_num, numbers=numbers - ) - - def create_and_fill_postgres_tables_from_cursor( - self, cursor, tables_num, numbers=50 + def create_postgres_table( + self, table_name, database_name="", template=postgres_table_template ): + create_postgres_table( + self.cursor, table_name, database_name=database_name, template=template + ) + + def create_and_fill_postgres_table(self, table_name, database_name=""): + create_postgres_table(self.cursor, table_name, database_name) + database_name = self.database_or_default(database_name) + self.instance.query( + f"INSERT INTO {database_name}.{table_name} SELECT number, number from numbers(50)" + ) + + def create_and_fill_postgres_tables(self, tables_num, numbers=50, database_name=""): for i in range(tables_num): table_name = f"postgresql_replica_{i}" - create_postgres_table(cursor, table_name) + create_postgres_table(self.cursor, table_name, database_name) if numbers > 0: + db = self.database_or_default(database_name) self.instance.query( - f"INSERT INTO postgres_database.{table_name} SELECT number, number from numbers({numbers})" + f"INSERT INTO {db}.{table_name} SELECT number, number from numbers({numbers})" ) diff --git a/tests/integration/parallel_skip.json b/tests/integration/parallel_skip.json index d060218456a..407fe7d1b01 100644 --- a/tests/integration/parallel_skip.json +++ b/tests/integration/parallel_skip.json @@ -68,5 +68,9 @@ "test_server_reload/test.py::test_remove_postgresql_port", "test_server_reload/test.py::test_remove_tcp_port", - "test_keeper_map/test.py::test_keeper_map_without_zk" + "test_keeper_map/test.py::test_keeper_map_without_zk", + + "test_http_failover/test.py::test_url_destination_host_with_multiple_addrs", + "test_http_failover/test.py::test_url_invalid_hostname", + "test_http_failover/test.py::test_url_ip_change" ] diff --git a/tests/integration/runner b/tests/integration/runner index c124ad46447..1b902803741 100755 --- a/tests/integration/runner +++ b/tests/integration/runner @@ -11,6 +11,7 @@ import subprocess import sys import string import random +import shlex def random_str(length=6): @@ -135,9 +136,7 @@ def check_args_and_update_paths(args): def docker_kill_handler_handler(signum, frame): subprocess.check_call( - 'docker ps --all --quiet --filter name={name} --format="{{{{.ID}}}}"'.format( - name=CONTAINER_NAME - ), + "docker ps --all --quiet --filter name={name}".format(name=CONTAINER_NAME), shell=True, ) raise KeyboardInterrupt("Killed by Ctrl+C") @@ -407,8 +406,14 @@ if __name__ == "__main__": if args.analyzer: use_analyzer = "-e CLICKHOUSE_USE_NEW_ANALYZER=1" - pytest_opts = " ".join(args.pytest_args).replace("'", "\\'") - tests_list = " ".join(args.tests_list) + # NOTE: since pytest options is in the argument value already we need to additionally escape '"' + pytest_opts = " ".join( + map(lambda x: shlex.quote(x).replace('"', '\\"'), args.pytest_args) + ) + tests_list = " ".join( + map(lambda x: shlex.quote(x).replace('"', '\\"'), args.tests_list) + ) + cmd_base = ( f"docker run {net} {tty} --rm --name {CONTAINER_NAME} " "--privileged --dns-search='.' " # since recent dns search leaks from host @@ -420,7 +425,7 @@ if __name__ == "__main__": f"--volume={args.src_dir}/Server/grpc_protos:/ClickHouse/src/Server/grpc_protos " f"--volume=/run:/run/host:ro {dockerd_internal_volume} {env_tags} {env_cleanup} " f"-e DOCKER_CLIENT_TIMEOUT=300 -e COMPOSE_HTTP_TIMEOUT=600 {use_analyzer} -e PYTHONUNBUFFERED=1 " - f"-e PYTEST_OPTS='{parallel_args} {pytest_opts} {tests_list} {rand_args} -vvv'" + f'-e PYTEST_ADDOPTS="{parallel_args} {pytest_opts} {tests_list} {rand_args} -vvv"' f" {DIND_INTEGRATION_TESTS_IMAGE_NAME}:{args.docker_image_version}" ) @@ -431,7 +436,7 @@ if __name__ == "__main__": ) containers = subprocess.check_output( - f"docker ps --all --quiet --filter name={CONTAINER_NAME} --format={{{{.ID}}}}", + f"docker ps --all --quiet --filter name={CONTAINER_NAME}", shell=True, universal_newlines=True, ).splitlines() diff --git a/tests/integration/test_async_connect_to_multiple_ips/__init__.py b/tests/integration/test_async_connect_to_multiple_ips/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_async_connect_to_multiple_ips/configs/enable_hedged.xml b/tests/integration/test_async_connect_to_multiple_ips/configs/enable_hedged.xml new file mode 100644 index 00000000000..399d886ee6a --- /dev/null +++ b/tests/integration/test_async_connect_to_multiple_ips/configs/enable_hedged.xml @@ -0,0 +1,7 @@ + + + + 1 + + + diff --git a/tests/integration/test_async_connect_to_multiple_ips/configs/listen_host.xml b/tests/integration/test_async_connect_to_multiple_ips/configs/listen_host.xml new file mode 100644 index 00000000000..df0247fd651 --- /dev/null +++ b/tests/integration/test_async_connect_to_multiple_ips/configs/listen_host.xml @@ -0,0 +1,4 @@ + + :: + + diff --git a/tests/integration/test_async_connect_to_multiple_ips/test.py b/tests/integration/test_async_connect_to_multiple_ips/test.py new file mode 100644 index 00000000000..acc4d24d0fa --- /dev/null +++ b/tests/integration/test_async_connect_to_multiple_ips/test.py @@ -0,0 +1,72 @@ +import pytest +from helpers.cluster import ClickHouseCluster + + +cluster = ClickHouseCluster(__file__) + + +@pytest.fixture(scope="module") +def cluster_without_dns_cache_update(): + try: + cluster.start() + + yield cluster + + except Exception as ex: + print(ex) + + finally: + cluster.shutdown() + pass + + +node1 = cluster.add_instance( + "node1", + main_configs=["configs/listen_host.xml"], + user_configs=["configs/enable_hedged.xml"], + with_zookeeper=True, + ipv4_address="10.5.95.11", +) + +node2 = cluster.add_instance( + "node2", + main_configs=["configs/listen_host.xml"], + user_configs=["configs/enable_hedged.xml"], + with_zookeeper=True, + ipv4_address="10.5.95.12", +) + + +# node1 - source with table, have invalid ipv6 +# node2 - destination, doing remote query +def test(cluster_without_dns_cache_update): + node1.query( + "CREATE TABLE test(t Date, label UInt8) ENGINE = MergeTree PARTITION BY t ORDER BY label;" + ) + node1.query("INSERT INTO test SELECT toDate('2022-12-28'), 1;") + assert node1.query("SELECT count(*) FROM test") == "1\n" + + wrong_ip = "2001:3984:3989::1:1118" + + node2.exec_in_container( + (["bash", "-c", "echo '{} {}' >> /etc/hosts".format(wrong_ip, node1.name)]) + ) + node2.exec_in_container( + ( + [ + "bash", + "-c", + "echo '{} {}' >> /etc/hosts".format(node1.ipv4_address, node1.name), + ] + ) + ) + + assert node1.query("SELECT count(*) from test") == "1\n" + node2.query("SYSTEM DROP DNS CACHE") + node1.query("SYSTEM DROP DNS CACHE") + assert ( + node2.query( + f"SELECT count(*) FROM remote('{node1.name}', default.test) limit 1;" + ) + == "1\n" + ) diff --git a/tests/integration/test_backup_restore_on_cluster/test.py b/tests/integration/test_backup_restore_on_cluster/test.py index 6af3a7dbab8..39496b8a5c8 100644 --- a/tests/integration/test_backup_restore_on_cluster/test.py +++ b/tests/integration/test_backup_restore_on_cluster/test.py @@ -580,6 +580,7 @@ def test_required_privileges(): node1.query( f"RESTORE TABLE tbl AS tbl2 ON CLUSTER 'cluster' FROM {backup_name}", user="u1" ) + node2.query("SYSTEM SYNC REPLICA ON CLUSTER 'cluster' tbl2") assert node2.query("SELECT * FROM tbl2") == "100\n" @@ -593,6 +594,7 @@ def test_required_privileges(): node1.query("GRANT INSERT, CREATE TABLE ON tbl TO u1") node1.query(f"RESTORE ALL ON CLUSTER 'cluster' FROM {backup_name}", user="u1") + node2.query("SYSTEM SYNC REPLICA ON CLUSTER 'cluster' tbl") assert node2.query("SELECT * FROM tbl") == "100\n" diff --git a/tests/integration/test_backward_compatibility/test.py b/tests/integration/test_backward_compatibility/test.py index ea1d3ab9c07..6f21b184a95 100644 --- a/tests/integration/test_backward_compatibility/test.py +++ b/tests/integration/test_backward_compatibility/test.py @@ -10,11 +10,13 @@ node1 = cluster.add_instance( tag="19.17.8.54", stay_alive=True, with_installed_binary=True, + allow_analyzer=False, ) node2 = cluster.add_instance( "node2", main_configs=["configs/wide_parts_only.xml", "configs/no_compress_marks.xml"], with_zookeeper=True, + allow_analyzer=False, ) diff --git a/tests/integration/test_backward_compatibility/test_aggregate_fixed_key.py b/tests/integration/test_backward_compatibility/test_aggregate_fixed_key.py index 01c9736c354..cf258987cbf 100644 --- a/tests/integration/test_backward_compatibility/test_aggregate_fixed_key.py +++ b/tests/integration/test_backward_compatibility/test_aggregate_fixed_key.py @@ -9,9 +9,10 @@ node1 = cluster.add_instance( image="yandex/clickhouse-server", tag="21.3", with_installed_binary=True, + allow_analyzer=False, ) -node2 = cluster.add_instance("node2", with_zookeeper=True) -node3 = cluster.add_instance("node3", with_zookeeper=True) +node2 = cluster.add_instance("node2", with_zookeeper=True, allow_analyzer=False) +node3 = cluster.add_instance("node3", with_zookeeper=True, allow_analyzer=False) @pytest.fixture(scope="module") diff --git a/tests/integration/test_backward_compatibility/test_aggregate_function_state.py b/tests/integration/test_backward_compatibility/test_aggregate_function_state.py index 1f6d405603a..3a936239cc8 100644 --- a/tests/integration/test_backward_compatibility/test_aggregate_function_state.py +++ b/tests/integration/test_backward_compatibility/test_aggregate_function_state.py @@ -10,6 +10,7 @@ node1 = cluster.add_instance( tag="19.16.9.37", stay_alive=True, with_installed_binary=True, + allow_analyzer=False, ) node2 = cluster.add_instance( "node2", @@ -18,9 +19,10 @@ node2 = cluster.add_instance( tag="19.16.9.37", stay_alive=True, with_installed_binary=True, + allow_analyzer=False, ) -node3 = cluster.add_instance("node3", with_zookeeper=False) -node4 = cluster.add_instance("node4", with_zookeeper=False) +node3 = cluster.add_instance("node3", with_zookeeper=False, allow_analyzer=False) +node4 = cluster.add_instance("node4", with_zookeeper=False, allow_analyzer=False) @pytest.fixture(scope="module") diff --git a/tests/integration/test_backward_compatibility/test_convert_ordinary.py b/tests/integration/test_backward_compatibility/test_convert_ordinary.py index 8b1afd358eb..36facdd59b1 100644 --- a/tests/integration/test_backward_compatibility/test_convert_ordinary.py +++ b/tests/integration/test_backward_compatibility/test_convert_ordinary.py @@ -9,6 +9,7 @@ node = cluster.add_instance( stay_alive=True, with_zookeeper=True, with_installed_binary=True, + allow_analyzer=False, ) diff --git a/tests/integration/test_backward_compatibility/test_cte_distributed.py b/tests/integration/test_backward_compatibility/test_cte_distributed.py index 7ea0d2d9f21..c68468aad75 100644 --- a/tests/integration/test_backward_compatibility/test_cte_distributed.py +++ b/tests/integration/test_backward_compatibility/test_cte_distributed.py @@ -3,7 +3,7 @@ import pytest from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) -node1 = cluster.add_instance("node1", with_zookeeper=False) +node1 = cluster.add_instance("node1", with_zookeeper=False, allow_analyzer=False) node2 = cluster.add_instance( "node2", with_zookeeper=False, @@ -11,6 +11,7 @@ node2 = cluster.add_instance( tag="21.7.3.14", stay_alive=True, with_installed_binary=True, + allow_analyzer=False, ) @@ -31,7 +32,7 @@ WITH quantile(0.05)(cnt) as p05, quantile(0.95)(cnt) as p95, p95 - p05 as inter_percentile_range -SELECT +SELECT sum(cnt) as total_requests, count() as data_points, inter_percentile_range @@ -49,7 +50,7 @@ WITH quantile(0.05)(cnt) as p05, quantile(0.95)(cnt) as p95, p95 - p05 as inter_percentile_range -SELECT +SELECT sum(cnt) as total_requests, count() as data_points, inter_percentile_range diff --git a/tests/integration/test_backward_compatibility/test_data_skipping_indices.py b/tests/integration/test_backward_compatibility/test_data_skipping_indices.py index c65dc6d3841..46ab27d2ab0 100644 --- a/tests/integration/test_backward_compatibility/test_data_skipping_indices.py +++ b/tests/integration/test_backward_compatibility/test_data_skipping_indices.py @@ -12,6 +12,7 @@ node = cluster.add_instance( tag="21.6", stay_alive=True, with_installed_binary=True, + allow_analyzer=False, ) diff --git a/tests/integration/test_backward_compatibility/test_functions.py b/tests/integration/test_backward_compatibility/test_functions.py index afb19901e74..fa24b146fec 100644 --- a/tests/integration/test_backward_compatibility/test_functions.py +++ b/tests/integration/test_backward_compatibility/test_functions.py @@ -9,7 +9,7 @@ from helpers.cluster import ClickHouseCluster from helpers.client import QueryRuntimeException cluster = ClickHouseCluster(__file__) -upstream = cluster.add_instance("upstream") +upstream = cluster.add_instance("upstream", allow_analyzer=False) backward = cluster.add_instance( "backward", image="clickhouse/clickhouse-server", @@ -19,6 +19,7 @@ backward = cluster.add_instance( # Affected at least: singleValueOrNull, last_value, min, max, any, anyLast, anyHeavy, first_value, argMin, argMax tag="22.6", with_installed_binary=True, + allow_analyzer=False, ) diff --git a/tests/integration/test_backward_compatibility/test_in_memory_parts_still_read.py b/tests/integration/test_backward_compatibility/test_in_memory_parts_still_read.py index d55f155918e..cd67f1f6344 100644 --- a/tests/integration/test_backward_compatibility/test_in_memory_parts_still_read.py +++ b/tests/integration/test_backward_compatibility/test_in_memory_parts_still_read.py @@ -12,6 +12,7 @@ node = cluster.add_instance( tag="23.4", stay_alive=True, with_installed_binary=True, + allow_analyzer=False, ) diff --git a/tests/integration/test_backward_compatibility/test_insert_profile_events.py b/tests/integration/test_backward_compatibility/test_insert_profile_events.py index 0fd453e57d4..8564c6b5952 100644 --- a/tests/integration/test_backward_compatibility/test_insert_profile_events.py +++ b/tests/integration/test_backward_compatibility/test_insert_profile_events.py @@ -7,12 +7,13 @@ import pytest from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) -upstream_node = cluster.add_instance("upstream_node") +upstream_node = cluster.add_instance("upstream_node", allow_analyzer=False) old_node = cluster.add_instance( "old_node", image="clickhouse/clickhouse-server", tag="22.5.1.2079", with_installed_binary=True, + allow_analyzer=False, ) diff --git a/tests/integration/test_backward_compatibility/test_ip_types_binary_compatibility.py b/tests/integration/test_backward_compatibility/test_ip_types_binary_compatibility.py index bb40dff27ac..04016755a24 100644 --- a/tests/integration/test_backward_compatibility/test_ip_types_binary_compatibility.py +++ b/tests/integration/test_backward_compatibility/test_ip_types_binary_compatibility.py @@ -10,6 +10,7 @@ node_22_6 = cluster.add_instance( tag="22.6", stay_alive=True, with_installed_binary=True, + allow_analyzer=False, ) diff --git a/tests/integration/test_backward_compatibility/test_memory_bound_aggregation.py b/tests/integration/test_backward_compatibility/test_memory_bound_aggregation.py index d76c4eba409..96b41c81384 100644 --- a/tests/integration/test_backward_compatibility/test_memory_bound_aggregation.py +++ b/tests/integration/test_backward_compatibility/test_memory_bound_aggregation.py @@ -10,6 +10,7 @@ node1 = cluster.add_instance( tag="21.1", stay_alive=True, with_installed_binary=True, + allow_analyzer=False, ) node2 = cluster.add_instance( "node2", @@ -18,8 +19,9 @@ node2 = cluster.add_instance( tag="21.1", stay_alive=True, with_installed_binary=True, + allow_analyzer=False, ) -node3 = cluster.add_instance("node3", with_zookeeper=False) +node3 = cluster.add_instance("node3", with_zookeeper=False, allow_analyzer=False) @pytest.fixture(scope="module") diff --git a/tests/integration/test_backward_compatibility/test_normalized_count_comparison.py b/tests/integration/test_backward_compatibility/test_normalized_count_comparison.py index fcdedd29dad..3cd708d5029 100644 --- a/tests/integration/test_backward_compatibility/test_normalized_count_comparison.py +++ b/tests/integration/test_backward_compatibility/test_normalized_count_comparison.py @@ -3,7 +3,7 @@ import pytest from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) -node1 = cluster.add_instance("node1", with_zookeeper=False) +node1 = cluster.add_instance("node1", with_zookeeper=False, allow_analyzer=False) node2 = cluster.add_instance( "node2", with_zookeeper=False, @@ -11,6 +11,7 @@ node2 = cluster.add_instance( tag="21.7.2.7", stay_alive=True, with_installed_binary=True, + allow_analyzer=False, ) diff --git a/tests/integration/test_backward_compatibility/test_select_aggregate_alias_column.py b/tests/integration/test_backward_compatibility/test_select_aggregate_alias_column.py index 8bdae54a889..7e10b6ab430 100644 --- a/tests/integration/test_backward_compatibility/test_select_aggregate_alias_column.py +++ b/tests/integration/test_backward_compatibility/test_select_aggregate_alias_column.py @@ -3,7 +3,7 @@ import pytest from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) -node1 = cluster.add_instance("node1", with_zookeeper=False) +node1 = cluster.add_instance("node1", with_zookeeper=False, allow_analyzer=False) node2 = cluster.add_instance( "node2", with_zookeeper=False, @@ -11,6 +11,7 @@ node2 = cluster.add_instance( tag="21.7.2.7", stay_alive=True, with_installed_binary=True, + allow_analyzer=False, ) diff --git a/tests/integration/test_backward_compatibility/test_short_strings_aggregation.py b/tests/integration/test_backward_compatibility/test_short_strings_aggregation.py index 17a7282b7b5..e4fda618031 100644 --- a/tests/integration/test_backward_compatibility/test_short_strings_aggregation.py +++ b/tests/integration/test_backward_compatibility/test_short_strings_aggregation.py @@ -10,6 +10,7 @@ node1 = cluster.add_instance( tag="19.16.9.37", stay_alive=True, with_installed_binary=True, + allow_analyzer=False, ) node2 = cluster.add_instance( "node2", @@ -18,8 +19,9 @@ node2 = cluster.add_instance( tag="19.16.9.37", stay_alive=True, with_installed_binary=True, + allow_analyzer=False, ) -node3 = cluster.add_instance("node3", with_zookeeper=False) +node3 = cluster.add_instance("node3", with_zookeeper=False, allow_analyzer=False) @pytest.fixture(scope="module") diff --git a/tests/integration/test_backward_compatibility/test_vertical_merges_from_compact_parts.py b/tests/integration/test_backward_compatibility/test_vertical_merges_from_compact_parts.py index 3d006caad0d..82ffcc20b60 100644 --- a/tests/integration/test_backward_compatibility/test_vertical_merges_from_compact_parts.py +++ b/tests/integration/test_backward_compatibility/test_vertical_merges_from_compact_parts.py @@ -11,12 +11,14 @@ node_old = cluster.add_instance( stay_alive=True, with_installed_binary=True, with_zookeeper=True, + allow_analyzer=False, ) node_new = cluster.add_instance( "node2", main_configs=["configs/no_compress_marks.xml"], with_zookeeper=True, stay_alive=True, + allow_analyzer=False, ) diff --git a/tests/integration/test_dns_cache/test.py b/tests/integration/test_dns_cache/test.py index 98417930713..9c1c9797383 100644 --- a/tests/integration/test_dns_cache/test.py +++ b/tests/integration/test_dns_cache/test.py @@ -55,6 +55,13 @@ def cluster_without_dns_cache_update(): # node1 is a source, node2 downloads data # node2 has long dns_cache_update_period, so dns cache update wouldn't work def test_ip_change_drop_dns_cache(cluster_without_dns_cache_update): + # In this case we should manually set up the static DNS entries on the source host + # to exclude resplving addresses automatically added by docker. + # We use ipv6 for hosts, but resolved DNS entries may contain an unexpected ipv4 address. + node2.set_hosts([("2001:3984:3989::1:1111", "node1")]) + # drop DNS cache + node2.query("SYSTEM DROP DNS CACHE") + # First we check, that normal replication works node1.query( "INSERT INTO test_table_drop VALUES ('2018-10-01', 1), ('2018-10-02', 2), ('2018-10-03', 3)" @@ -64,6 +71,7 @@ def test_ip_change_drop_dns_cache(cluster_without_dns_cache_update): # We change source node ip cluster.restart_instance_with_ip_change(node1, "2001:3984:3989::1:7777") + node2.set_hosts([("2001:3984:3989::1:7777", "node1")]) # Put some data to source node1 node1.query( @@ -163,17 +171,8 @@ def test_ip_change_update_dns_cache(cluster_with_dns_cache_update): assert_eq_with_retry(node4, "SELECT count(*) from test_table_update", "7") -def set_hosts(node, hosts): - new_content = "\\n".join(["127.0.0.1 localhost", "::1 localhost"] + hosts) - node.exec_in_container( - ["bash", "-c", 'echo -e "{}" > /etc/hosts'.format(new_content)], - privileged=True, - user="root", - ) - - def test_dns_cache_update(cluster_with_dns_cache_update): - set_hosts(node4, ["127.255.255.255 lost_host"]) + node4.set_hosts([("127.255.255.255", "lost_host")]) with pytest.raises(QueryRuntimeException): node4.query("SELECT * FROM remote('lost_host', 'system', 'one')") @@ -184,7 +183,7 @@ def test_dns_cache_update(cluster_with_dns_cache_update): with pytest.raises(QueryRuntimeException): node4.query("SELECT * FROM distributed_lost_host") - set_hosts(node4, ["127.0.0.1 lost_host"]) + node4.set_hosts([("127.0.0.1", "lost_host")]) # Wait a bit until dns cache will be updated assert_eq_with_retry( @@ -239,11 +238,10 @@ def test_user_access_ip_change(cluster_with_dns_cache_update, node): == "0\n" ) - set_hosts( - node, + node.set_hosts( [ - "127.255.255.255 node3", - "2001:3984:3989::1:88{}4 unknown_host".format(node_num), + ("127.255.255.255", "node3"), + (f"2001:3984:3989::1:88{node_num}4", "unknown_host"), ], ) @@ -260,7 +258,7 @@ def test_user_access_ip_change(cluster_with_dns_cache_update, node): node4.query("SELECT * FROM remote('{}', 'system', 'one')".format(node_name)) # now wrong addresses are cached - set_hosts(node, []) + node.set_hosts([]) retry_count = 60 if node_name == "node5": # client is not allowed to connect, so execute it directly in container to send query from localhost @@ -298,7 +296,7 @@ def test_host_is_drop_from_cache_after_consecutive_failures( # Note that the list of hosts in variable since lost_host will be there too (and it's dropped and added back) # dns_update_short -> dns_max_consecutive_failures set to 6 assert node4.wait_for_log_line( - "Cannot resolve host \\(InvalidHostThatDoesNotExist\\), error 0: Host not found." + "Code: 198. DB::Exception: Not found address of host: InvalidHostThatDoesNotExist." ) assert node4.wait_for_log_line( "Cached hosts not found:.*InvalidHostThatDoesNotExist**", diff --git a/tests/integration/test_format_avro_confluent/secrets/password b/tests/integration/test_format_avro_confluent/secrets/password new file mode 100644 index 00000000000..a367925c806 --- /dev/null +++ b/tests/integration/test_format_avro_confluent/secrets/password @@ -0,0 +1,3 @@ +schemauser: MD5:0d107d09f5bbe40cade3de5c71e9e9b7,user +schemauser/slash: MD5:0d107d09f5bbe40cade3de5c71e9e9b7,user +complexschemauser: MD5:fcaeda86837fcd37755044e7258edc5d,user diff --git a/tests/integration/test_format_avro_confluent/secrets/schema_registry_jaas.conf b/tests/integration/test_format_avro_confluent/secrets/schema_registry_jaas.conf new file mode 100644 index 00000000000..7d0e6e2bf35 --- /dev/null +++ b/tests/integration/test_format_avro_confluent/secrets/schema_registry_jaas.conf @@ -0,0 +1,5 @@ +RealmFooBar { + org.eclipse.jetty.jaas.spi.PropertyFileLoginModule required + file="/etc/schema-registry/secrets/password" + debug="true"; +}; \ No newline at end of file diff --git a/tests/integration/test_format_avro_confluent/test.py b/tests/integration/test_format_avro_confluent/test.py index 42b7ddce193..540f90ae05e 100644 --- a/tests/integration/test_format_avro_confluent/test.py +++ b/tests/integration/test_format_avro_confluent/test.py @@ -1,5 +1,6 @@ import io import logging +import time import avro.schema import pytest @@ -8,13 +9,14 @@ from confluent_kafka.avro.cached_schema_registry_client import ( ) from confluent_kafka.avro.serializer.message_serializer import MessageSerializer from helpers.cluster import ClickHouseCluster, ClickHouseInstance +from urllib import parse @pytest.fixture(scope="module") def started_cluster(): try: cluster = ClickHouseCluster(__file__) - cluster.add_instance("dummy", with_kafka=True) + cluster.add_instance("dummy", with_kafka=True, with_secrets=True) logging.info("Starting cluster...") cluster.start() logging.info("Cluster started") @@ -40,14 +42,15 @@ def run_query(instance, query, data=None, settings=None): def test_select(started_cluster): # type: (ClickHouseCluster) -> None - schema_registry_client = CachedSchemaRegistryClient( - "http://localhost:{}".format(started_cluster.schema_registry_port) - ) + reg_url = "http://localhost:{}".format(started_cluster.schema_registry_port) + arg = {"url": reg_url} + + schema_registry_client = CachedSchemaRegistryClient(arg) serializer = MessageSerializer(schema_registry_client) schema = avro.schema.make_avsc_object( { - "name": "test_record", + "name": "test_record1", "type": "record", "fields": [{"name": "value", "type": "long"}], } @@ -56,14 +59,14 @@ def test_select(started_cluster): buf = io.BytesIO() for x in range(0, 3): message = serializer.encode_record_with_schema( - "test_subject", schema, {"value": x} + "test_subject1", schema, {"value": x} ) buf.write(message) data = buf.getvalue() instance = started_cluster.instances["dummy"] # type: ClickHouseInstance schema_registry_url = "http://{}:{}".format( - started_cluster.schema_registry_host, 8081 + started_cluster.schema_registry_host, started_cluster.schema_registry_port ) run_query(instance, "create table avro_data(value Int64) engine = Memory()") @@ -75,3 +78,164 @@ def test_select(started_cluster): ["1"], ["2"], ] + + +def test_select_auth(started_cluster): + # type: (ClickHouseCluster) -> None + + reg_url = "http://localhost:{}".format(started_cluster.schema_registry_auth_port) + arg = { + "url": reg_url, + "basic.auth.credentials.source": "USER_INFO", + "basic.auth.user.info": "schemauser:letmein", + } + + schema_registry_client = CachedSchemaRegistryClient(arg) + serializer = MessageSerializer(schema_registry_client) + + schema = avro.schema.make_avsc_object( + { + "name": "test_record_auth", + "type": "record", + "fields": [{"name": "value", "type": "long"}], + } + ) + + buf = io.BytesIO() + for x in range(0, 3): + message = serializer.encode_record_with_schema( + "test_subject_auth", schema, {"value": x} + ) + buf.write(message) + data = buf.getvalue() + + instance = started_cluster.instances["dummy"] # type: ClickHouseInstance + schema_registry_url = "http://{}:{}@{}:{}".format( + "schemauser", + "letmein", + started_cluster.schema_registry_auth_host, + started_cluster.schema_registry_auth_port, + ) + + run_query(instance, "create table avro_data_auth(value Int64) engine = Memory()") + settings = {"format_avro_schema_registry_url": schema_registry_url} + run_query( + instance, "insert into avro_data_auth format AvroConfluent", data, settings + ) + stdout = run_query(instance, "select * from avro_data_auth") + assert list(map(str.split, stdout.splitlines())) == [ + ["0"], + ["1"], + ["2"], + ] + + +def test_select_auth_encoded(started_cluster): + # type: (ClickHouseCluster) -> None + + reg_url = "http://localhost:{}".format(started_cluster.schema_registry_auth_port) + arg = { + "url": reg_url, + "basic.auth.credentials.source": "USER_INFO", + "basic.auth.user.info": "schemauser:letmein", + } + + schema_registry_client = CachedSchemaRegistryClient(arg) + serializer = MessageSerializer(schema_registry_client) + + schema = avro.schema.make_avsc_object( + { + "name": "test_record_auth_encoded", + "type": "record", + "fields": [{"name": "value", "type": "long"}], + } + ) + + buf = io.BytesIO() + for x in range(0, 3): + message = serializer.encode_record_with_schema( + "test_subject_auth_encoded", schema, {"value": x} + ) + buf.write(message) + data = buf.getvalue() + + instance = started_cluster.instances["dummy"] # type: ClickHouseInstance + schema_registry_url = "http://{}:{}@{}:{}".format( + parse.quote_plus("schemauser/slash"), + parse.quote_plus("letmein"), + started_cluster.schema_registry_auth_host, + started_cluster.schema_registry_auth_port, + ) + + run_query( + instance, "create table avro_data_auth_encoded(value Int64) engine = Memory()" + ) + settings = {"format_avro_schema_registry_url": schema_registry_url} + run_query( + instance, + "insert into avro_data_auth_encoded format AvroConfluent", + data, + settings, + ) + stdout = run_query(instance, "select * from avro_data_auth_encoded") + assert list(map(str.split, stdout.splitlines())) == [ + ["0"], + ["1"], + ["2"], + ] + + +def test_select_auth_encoded_complex(started_cluster): + # type: (ClickHouseCluster) -> None + + reg_url = "http://localhost:{}".format(started_cluster.schema_registry_auth_port) + arg = { + "url": reg_url, + "basic.auth.credentials.source": "USER_INFO", + "basic.auth.user.info": "schemauser:letmein", + } + + schema_registry_client = CachedSchemaRegistryClient(arg) + serializer = MessageSerializer(schema_registry_client) + + schema = avro.schema.make_avsc_object( + { + "name": "test_record_auth_encoded_complex", + "type": "record", + "fields": [{"name": "value", "type": "long"}], + } + ) + + buf = io.BytesIO() + for x in range(0, 3): + message = serializer.encode_record_with_schema( + "test_subject_auth_encoded_complex", schema, {"value": x} + ) + buf.write(message) + data = buf.getvalue() + + instance = started_cluster.instances["dummy"] # type: ClickHouseInstance + schema_registry_url = "http://{}:{}@{}:{}".format( + parse.quote_plus("complexschemauser"), + parse.quote_plus("letmein%@:/"), + started_cluster.schema_registry_auth_host, + started_cluster.schema_registry_auth_port, + ) + + run_query( + instance, + "create table avro_data_auth_encoded_complex(value Int64) engine = Memory()", + ) + settings = {"format_avro_schema_registry_url": schema_registry_url} + run_query( + instance, + "insert into avro_data_auth_encoded_complex format AvroConfluent", + data, + settings, + ) + stdout = run_query(instance, "select * from avro_data_auth_encoded_complex") + assert list(map(str.split, stdout.splitlines())) == [ + ["0"], + ["1"], + ["2"], + ] diff --git a/tests/integration/test_http_failover/__init__.py b/tests/integration/test_http_failover/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_http_failover/configs/listen.xml b/tests/integration/test_http_failover/configs/listen.xml new file mode 100644 index 00000000000..3abb37d5da2 --- /dev/null +++ b/tests/integration/test_http_failover/configs/listen.xml @@ -0,0 +1 @@ +:: diff --git a/tests/integration/test_http_failover/test.py b/tests/integration/test_http_failover/test.py new file mode 100644 index 00000000000..41b55ef635c --- /dev/null +++ b/tests/integration/test_http_failover/test.py @@ -0,0 +1,113 @@ +import pytest +from contextlib import nullcontext as does_not_raise +from helpers.cluster import ClickHouseCluster +from helpers.client import QueryRuntimeException +from helpers.test_tools import exec_query_with_retry +from helpers.test_tools import assert_eq_with_retry + + +ACCESSIBLE_IPV4 = "10.5.172.10" +OTHER_ACCESSIBLE_IPV4 = "10.5.172.20" +NOT_ACCESSIBLE_IPV4 = "10.5.172.11" + +ACCESSIBLE_IPV6 = "2001:3984:3989::1:1000" +NOT_ACCESSIBLE_IPV6 = "2001:3984:3989::1:1001" + +DST_NODE_IPV4 = ACCESSIBLE_IPV4 +DST_NODE_IPV6 = ACCESSIBLE_IPV6 +SRC_NODE_IPV6 = "2001:3984:3989::1:2000" + + +cluster = ClickHouseCluster(__file__) + +# Destination node +dst_node = cluster.add_instance( + "dst_node", + with_zookeeper=True, + ipv4_address=DST_NODE_IPV4, + ipv6_address=DST_NODE_IPV6, + main_configs=["configs/listen.xml"], +) +# Source node +src_node = cluster.add_instance( + "src_node", + with_zookeeper=True, + ipv6_address=SRC_NODE_IPV6, +) + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + yield cluster + except Exception as ex: + print(ex) + + finally: + cluster.shutdown() + pass + + +@pytest.fixture +def dst_node_addrs(started_cluster, request): + src_node.set_hosts([(ip, "dst_node") for ip in request.param]) + src_node.query("SYSTEM DROP DNS CACHE") + + yield + + # Clear static DNS entries + src_node.set_hosts([]) + src_node.query("SYSTEM DROP DNS CACHE") + + +@pytest.mark.parametrize( + "dst_node_addrs, expectation", + [ + ((ACCESSIBLE_IPV4, ACCESSIBLE_IPV6), does_not_raise()), + ((NOT_ACCESSIBLE_IPV4, ACCESSIBLE_IPV6), does_not_raise()), + ((ACCESSIBLE_IPV4, NOT_ACCESSIBLE_IPV6), does_not_raise()), + ( + (NOT_ACCESSIBLE_IPV4, NOT_ACCESSIBLE_IPV6), + pytest.raises(QueryRuntimeException), + ), + ], + indirect=["dst_node_addrs"], +) +def test_url_destination_host_with_multiple_addrs(dst_node_addrs, expectation): + with expectation: + result = src_node.query( + "SELECT * FROM url('http://dst_node:8123/?query=SELECT+42', TSV, 'column1 UInt32')" + ) + assert result == "42\n" + + +def test_url_invalid_hostname(started_cluster): + with pytest.raises(QueryRuntimeException): + src_node.query( + "SELECT count(*) FROM url('http://notvalidhost:8123/?query=SELECT+1', TSV, 'column1 UInt32');" + ) + + +def test_url_ip_change(started_cluster): + assert ( + src_node.query( + "SELECT * FROM url('http://dst_node:8123/?query=SELECT+42', TSV, 'column1 UInt32')" + ) + == "42\n" + ) + + started_cluster.restart_instance_with_ip_change(dst_node, OTHER_ACCESSIBLE_IPV4) + + # Ensure that only new IPV4 address is accessible + src_node.set_hosts( + [(OTHER_ACCESSIBLE_IPV4, "dst_node"), (NOT_ACCESSIBLE_IPV6, "dst_node")] + ) + src_node.query("SYSTEM DROP DNS CACHE") + + assert ( + src_node.query( + "SELECT * FROM url('http://dst_node:8123/?query=SELECT+42', TSV, 'column1 UInt32')" + ) + == "42\n" + ) diff --git a/tests/integration/test_keeper_disks/configs/enable_keeper.xml b/tests/integration/test_keeper_disks/configs/enable_keeper.xml index 5814979229c..50d0329637a 100644 --- a/tests/integration/test_keeper_disks/configs/enable_keeper.xml +++ b/tests/integration/test_keeper_disks/configs/enable_keeper.xml @@ -1,6 +1,10 @@ + + hdfs + hdfs://hdfs1:9000/ + local /var/lib/clickhouse/coordination/logs/ diff --git a/tests/integration/test_keeper_disks/test.py b/tests/integration/test_keeper_disks/test.py index 11bb215be54..86682bcde01 100644 --- a/tests/integration/test_keeper_disks/test.py +++ b/tests/integration/test_keeper_disks/test.py @@ -9,7 +9,11 @@ import os CURRENT_TEST_DIR = os.path.dirname(os.path.abspath(__file__)) cluster = ClickHouseCluster(__file__) node = cluster.add_instance( - "node", main_configs=["configs/enable_keeper.xml"], stay_alive=True, with_minio=True + "node", + main_configs=["configs/enable_keeper.xml"], + stay_alive=True, + with_minio=True, + with_hdfs=True, ) from kazoo.client import KazooClient, KazooState @@ -117,6 +121,12 @@ def get_local_snapshots(): return get_local_files("/var/lib/clickhouse/coordination/snapshots") +def test_supported_disk_types(started_cluster): + node.stop_clickhouse() + node.start_clickhouse() + node.contains_in_log("Disk type 'hdfs' is not supported for Keeper") + + def test_logs_with_disks(started_cluster): setup_local_storage(started_cluster) diff --git a/tests/integration/test_keeper_force_recovery/configs/enable_keeper1.xml b/tests/integration/test_keeper_force_recovery/configs/enable_keeper1.xml index b7f9d1b058e..62109ee9092 100644 --- a/tests/integration/test_keeper_force_recovery/configs/enable_keeper1.xml +++ b/tests/integration/test_keeper_force_recovery/configs/enable_keeper1.xml @@ -1,5 +1,6 @@ + false 9181 1 /var/lib/clickhouse/coordination/log @@ -10,6 +11,7 @@ 10000 75 trace + 200 diff --git a/tests/integration/test_keeper_force_recovery/configs/enable_keeper2.xml b/tests/integration/test_keeper_force_recovery/configs/enable_keeper2.xml index b773d59f259..2696c573180 100644 --- a/tests/integration/test_keeper_force_recovery/configs/enable_keeper2.xml +++ b/tests/integration/test_keeper_force_recovery/configs/enable_keeper2.xml @@ -1,5 +1,6 @@ + false 9181 2 /var/lib/clickhouse/coordination/log @@ -10,6 +11,7 @@ 10000 75 trace + 200 diff --git a/tests/integration/test_keeper_force_recovery/configs/enable_keeper3.xml b/tests/integration/test_keeper_force_recovery/configs/enable_keeper3.xml index d4c2befc10f..fc0c0fd0300 100644 --- a/tests/integration/test_keeper_force_recovery/configs/enable_keeper3.xml +++ b/tests/integration/test_keeper_force_recovery/configs/enable_keeper3.xml @@ -1,5 +1,6 @@ + false 9181 3 /var/lib/clickhouse/coordination/log @@ -10,6 +11,7 @@ 10000 75 trace + 200 diff --git a/tests/integration/test_keeper_force_recovery/configs/enable_keeper4.xml b/tests/integration/test_keeper_force_recovery/configs/enable_keeper4.xml index c039e709c9e..06f1c1d7195 100644 --- a/tests/integration/test_keeper_force_recovery/configs/enable_keeper4.xml +++ b/tests/integration/test_keeper_force_recovery/configs/enable_keeper4.xml @@ -1,5 +1,6 @@ + false 9181 4 /var/lib/clickhouse/coordination/log @@ -10,6 +11,7 @@ 10000 75 trace + 200 diff --git a/tests/integration/test_keeper_force_recovery/configs/enable_keeper5.xml b/tests/integration/test_keeper_force_recovery/configs/enable_keeper5.xml index fb43b6524c8..5d3767ae969 100644 --- a/tests/integration/test_keeper_force_recovery/configs/enable_keeper5.xml +++ b/tests/integration/test_keeper_force_recovery/configs/enable_keeper5.xml @@ -1,5 +1,6 @@ + false 9181 5 /var/lib/clickhouse/coordination/log @@ -10,6 +11,7 @@ 10000 75 trace + 200 diff --git a/tests/integration/test_keeper_force_recovery/configs/enable_keeper6.xml b/tests/integration/test_keeper_force_recovery/configs/enable_keeper6.xml index 430e662bf36..4d30822741a 100644 --- a/tests/integration/test_keeper_force_recovery/configs/enable_keeper6.xml +++ b/tests/integration/test_keeper_force_recovery/configs/enable_keeper6.xml @@ -1,5 +1,6 @@ + false 9181 6 /var/lib/clickhouse/coordination/log @@ -10,6 +11,7 @@ 10000 75 trace + 200 diff --git a/tests/integration/test_keeper_force_recovery/configs/enable_keeper7.xml b/tests/integration/test_keeper_force_recovery/configs/enable_keeper7.xml index aa10774ad7d..b59141042ea 100644 --- a/tests/integration/test_keeper_force_recovery/configs/enable_keeper7.xml +++ b/tests/integration/test_keeper_force_recovery/configs/enable_keeper7.xml @@ -1,5 +1,6 @@ + false 9181 7 /var/lib/clickhouse/coordination/log @@ -10,6 +11,7 @@ 10000 75 trace + 200 diff --git a/tests/integration/test_keeper_force_recovery/configs/enable_keeper8.xml b/tests/integration/test_keeper_force_recovery/configs/enable_keeper8.xml index 4f1c21a1084..711d70cb1ac 100644 --- a/tests/integration/test_keeper_force_recovery/configs/enable_keeper8.xml +++ b/tests/integration/test_keeper_force_recovery/configs/enable_keeper8.xml @@ -1,5 +1,6 @@ + false 9181 8 /var/lib/clickhouse/coordination/log @@ -10,6 +11,7 @@ 10000 75 trace + 200 diff --git a/tests/integration/test_keeper_force_recovery/configs/recovered_keeper1.xml b/tests/integration/test_keeper_force_recovery/configs/recovered_keeper1.xml index eaf0f01afc9..abd4ef85bf7 100644 --- a/tests/integration/test_keeper_force_recovery/configs/recovered_keeper1.xml +++ b/tests/integration/test_keeper_force_recovery/configs/recovered_keeper1.xml @@ -1,5 +1,6 @@ + false 9181 1 /var/lib/clickhouse/coordination/log @@ -10,6 +11,7 @@ 10000 75 trace + 200 diff --git a/tests/integration/test_keeper_force_recovery/configs/use_keeper.xml b/tests/integration/test_keeper_force_recovery/configs/use_keeper.xml deleted file mode 100644 index f41e8c6e49c..00000000000 --- a/tests/integration/test_keeper_force_recovery/configs/use_keeper.xml +++ /dev/null @@ -1,36 +0,0 @@ - - - - node1 - 9181 - - - node2 - 9181 - - - node3 - 9181 - - - node4 - 9181 - - - node5 - 9181 - - - node6 - 9181 - - - node7 - 9181 - - - node8 - 9181 - - - diff --git a/tests/integration/test_keeper_force_recovery/test.py b/tests/integration/test_keeper_force_recovery/test.py index f7c3787b4d8..f630e5a422b 100644 --- a/tests/integration/test_keeper_force_recovery/test.py +++ b/tests/integration/test_keeper_force_recovery/test.py @@ -22,10 +22,7 @@ def get_nodes(): nodes.append( cluster.add_instance( f"node{i+1}", - main_configs=[ - f"configs/enable_keeper{i+1}.xml", - f"configs/use_keeper.xml", - ], + main_configs=[f"configs/enable_keeper{i+1}.xml"], stay_alive=True, ) ) diff --git a/tests/integration/test_keeper_force_recovery_single_node/configs/enable_keeper1.xml b/tests/integration/test_keeper_force_recovery_single_node/configs/enable_keeper1.xml index 441c1bc185d..94e59128bd3 100644 --- a/tests/integration/test_keeper_force_recovery_single_node/configs/enable_keeper1.xml +++ b/tests/integration/test_keeper_force_recovery_single_node/configs/enable_keeper1.xml @@ -1,5 +1,6 @@ + false 9181 1 /var/lib/clickhouse/coordination/log @@ -10,6 +11,7 @@ 10000 75 trace + 200 diff --git a/tests/integration/test_keeper_force_recovery_single_node/configs/enable_keeper1_solo.xml b/tests/integration/test_keeper_force_recovery_single_node/configs/enable_keeper1_solo.xml index f0cb887b062..6367b4b4c29 100644 --- a/tests/integration/test_keeper_force_recovery_single_node/configs/enable_keeper1_solo.xml +++ b/tests/integration/test_keeper_force_recovery_single_node/configs/enable_keeper1_solo.xml @@ -1,5 +1,6 @@ + false 1 9181 1 @@ -11,6 +12,7 @@ 10000 75 trace + 200 diff --git a/tests/integration/test_keeper_force_recovery_single_node/configs/enable_keeper2.xml b/tests/integration/test_keeper_force_recovery_single_node/configs/enable_keeper2.xml index e2e2c1fd7db..548d12c2e0a 100644 --- a/tests/integration/test_keeper_force_recovery_single_node/configs/enable_keeper2.xml +++ b/tests/integration/test_keeper_force_recovery_single_node/configs/enable_keeper2.xml @@ -1,5 +1,6 @@ + false 9181 2 /var/lib/clickhouse/coordination/log @@ -10,6 +11,7 @@ 10000 75 trace + 200 diff --git a/tests/integration/test_keeper_force_recovery_single_node/configs/enable_keeper3.xml b/tests/integration/test_keeper_force_recovery_single_node/configs/enable_keeper3.xml index e2ac0400d88..65f9675cbd6 100644 --- a/tests/integration/test_keeper_force_recovery_single_node/configs/enable_keeper3.xml +++ b/tests/integration/test_keeper_force_recovery_single_node/configs/enable_keeper3.xml @@ -1,5 +1,6 @@ + false 9181 3 /var/lib/clickhouse/coordination/log @@ -10,6 +11,7 @@ 10000 75 trace + 200 diff --git a/tests/integration/test_keeper_force_recovery_single_node/configs/use_keeper.xml b/tests/integration/test_keeper_force_recovery_single_node/configs/use_keeper.xml deleted file mode 100644 index 384e984f210..00000000000 --- a/tests/integration/test_keeper_force_recovery_single_node/configs/use_keeper.xml +++ /dev/null @@ -1,16 +0,0 @@ - - - - node1 - 9181 - - - node2 - 9181 - - - node3 - 9181 - - - diff --git a/tests/integration/test_keeper_force_recovery_single_node/test.py b/tests/integration/test_keeper_force_recovery_single_node/test.py index 1c0d5e9a306..132c5488df6 100644 --- a/tests/integration/test_keeper_force_recovery_single_node/test.py +++ b/tests/integration/test_keeper_force_recovery_single_node/test.py @@ -20,10 +20,7 @@ def get_nodes(): nodes.append( cluster.add_instance( f"node{i+1}", - main_configs=[ - f"configs/enable_keeper{i+1}.xml", - f"configs/use_keeper.xml", - ], + main_configs=[f"configs/enable_keeper{i+1}.xml"], stay_alive=True, ) ) diff --git a/tests/integration/test_keeper_nodes_move/test.py b/tests/integration/test_keeper_nodes_move/test.py index 6884ff29607..8ac7bc9b5e2 100644 --- a/tests/integration/test_keeper_nodes_move/test.py +++ b/tests/integration/test_keeper_nodes_move/test.py @@ -1,12 +1,7 @@ #!/usr/bin/env python3 - -#!/usr/bin/env python3 - import pytest from helpers.cluster import ClickHouseCluster -import random -import string import os import time from multiprocessing.dummy import Pool diff --git a/tests/integration/test_keeper_reconfig_add/__init__.py b/tests/integration/test_keeper_reconfig_add/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_keeper_reconfig_add/configs/keeper1.xml b/tests/integration/test_keeper_reconfig_add/configs/keeper1.xml new file mode 100644 index 00000000000..44e2090e9d8 --- /dev/null +++ b/tests/integration/test_keeper_reconfig_add/configs/keeper1.xml @@ -0,0 +1,20 @@ + + + true + + 9181 + 1 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + + + 5000 + 10000 + trace + + + + 1 node1 9234 + + + diff --git a/tests/integration/test_keeper_reconfig_add/configs/keeper2.xml b/tests/integration/test_keeper_reconfig_add/configs/keeper2.xml new file mode 100644 index 00000000000..e9249f7091c --- /dev/null +++ b/tests/integration/test_keeper_reconfig_add/configs/keeper2.xml @@ -0,0 +1,21 @@ + + + true + + 9181 + 2 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + + + 5000 + 10000 + trace + + + + 1 node1 9234 + 2 node2 9234 + + + diff --git a/tests/integration/test_keeper_reconfig_add/configs/keeper3.xml b/tests/integration/test_keeper_reconfig_add/configs/keeper3.xml new file mode 100644 index 00000000000..a7ff1f6de28 --- /dev/null +++ b/tests/integration/test_keeper_reconfig_add/configs/keeper3.xml @@ -0,0 +1,22 @@ + + + true + + 9181 + 3 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + + + 5000 + 10000 + trace + + + + 1 node1 9234 + 2 node2 9234 + 3 node3 9234 + + + diff --git a/tests/integration/test_keeper_reconfig_add/test.py b/tests/integration/test_keeper_reconfig_add/test.py new file mode 100644 index 00000000000..2c2da7403a1 --- /dev/null +++ b/tests/integration/test_keeper_reconfig_add/test.py @@ -0,0 +1,155 @@ +#!/usr/bin/env python3 + +import pytest +from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as ku +import os +from kazoo.client import KazooClient +from kazoo.exceptions import BadArgumentsException + +cluster = ClickHouseCluster(__file__) +CONFIG_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), "configs") + +node1 = cluster.add_instance("node1", main_configs=["configs/keeper1.xml"]) +node2 = cluster.add_instance("node2", stay_alive=True) +node3 = cluster.add_instance("node3", stay_alive=True) + +server_join_msg = "confirms it will join" +part_of_cluster = "now this node is the part of cluster" +zk1, zk2, zk3 = None, None, None + + +def get_fake_zk(node): + return ku.get_fake_zk(cluster, node) + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + + node2.stop_clickhouse() + node2.copy_file_to_container( + os.path.join(CONFIG_DIR, "keeper2.xml"), + "/etc/clickhouse-server/config.d/keeper.xml", + ) + + node3.stop_clickhouse() + node3.copy_file_to_container( + os.path.join(CONFIG_DIR, "keeper3.xml"), + "/etc/clickhouse-server/config.d/keeper.xml", + ) + + yield cluster + + finally: + for conn in [zk1, zk2, zk3]: + if conn: + conn.stop() + conn.close() + + cluster.shutdown() + + +def test_reconfig_add(started_cluster): + """ + Add a node to another node. Then add another node to two. + """ + + zk1 = get_fake_zk(node1) + config = ku.get_config_str(zk1) + print("Initial config", config) + + assert len(config.split("\n")) == 1 + assert "node1" in config + assert "node2" not in config + assert "node3" not in config + + with pytest.raises(BadArgumentsException): + # duplicate id with different endpoint + zk1.reconfig(joining="server.1=localhost:1337", leaving=None, new_members=None) + + with pytest.raises(BadArgumentsException): + # duplicate endpoint + zk1.reconfig(joining="server.8=node1:9234", leaving=None, new_members=None) + + for i in range(100): + zk1.create(f"/test_three_{i}", b"somedata") + + node2.start_clickhouse() + config, _ = zk1.reconfig( + joining="server.2=node2:9234", leaving=None, new_members=None + ) + ku.wait_until_connected(cluster, node2) + + config = config.decode("utf-8") + print("After adding 2", config) + + assert len(config.split("\n")) == 2 + assert "node1" in config + assert "node2" in config + assert "node3" not in config + + zk2 = get_fake_zk(node2) + ku.wait_configs_equal(config, zk2) + + for i in range(100): + assert zk2.exists(f"/test_three_{i}") is not None + zk2.create(f"/test_three_{100 + i}", b"somedata") + + # Why not both? + # One node will process add_srv request, other will pull out updated config, apply + # and return true in config update thread (without calling add_srv again) + assert node1.contains_in_log(server_join_msg) or node2.contains_in_log( + server_join_msg + ) + + assert node2.contains_in_log(part_of_cluster) + + zk1.stop() + zk1.close() + zk1 = get_fake_zk(node1) + zk1.sync("/test_three_0") + + for i in range(200): + assert zk1.exists(f"/test_three_{i}") is not None + + for i in range(100): + zk2.create(f"/test_four_{i}", b"somedata") + + node3.start_clickhouse() + config, _ = zk2.reconfig( + joining="server.3=node3:9234", leaving=None, new_members=None + ) + ku.wait_until_connected(cluster, node3) + + config = config.decode("utf-8") + print("After adding 3", config) + + assert len(config.split("\n")) == 3 + assert "node1" in config + assert "node2" in config + assert "node3" in config + + zk3 = get_fake_zk(node3) + ku.wait_configs_equal(config, zk3) + + for i in range(100): + assert zk3.exists(f"/test_four_{i}") is not None + zk3.create(f"/test_four_{100 + i}", b"somedata") + + zk1.stop() + zk1.close() + zk1 = get_fake_zk(node1) + zk1.sync("/test_four_0") + + zk2.stop() + zk2.close() + zk2 = get_fake_zk(node2) + zk2.sync("/test_four_0") + + for i in range(200): + assert zk1.exists(f"/test_four_{i}") is not None + assert zk2.exists(f"/test_four_{i}") is not None + + assert node3.contains_in_log(part_of_cluster) diff --git a/tests/integration/test_keeper_reconfig_remove/__init__.py b/tests/integration/test_keeper_reconfig_remove/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_keeper_reconfig_remove/configs/keeper1.xml b/tests/integration/test_keeper_reconfig_remove/configs/keeper1.xml new file mode 100644 index 00000000000..bbadc2741af --- /dev/null +++ b/tests/integration/test_keeper_reconfig_remove/configs/keeper1.xml @@ -0,0 +1,37 @@ + + + 9181 + 1 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + true + + + 5000 + 10000 + trace + + + + + 1 + node1 + 9234 + + + 2 + node2 + 9234 + true + 0 + + + 3 + node3 + 9234 + true + 0 + + + + diff --git a/tests/integration/test_keeper_reconfig_remove/configs/keeper2.xml b/tests/integration/test_keeper_reconfig_remove/configs/keeper2.xml new file mode 100644 index 00000000000..0191a522a50 --- /dev/null +++ b/tests/integration/test_keeper_reconfig_remove/configs/keeper2.xml @@ -0,0 +1,37 @@ + + + 9181 + 2 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + true + + + 5000 + 10000 + trace + + + + + 1 + node1 + 9234 + + + 2 + node2 + 9234 + true + 0 + + + 3 + node3 + 9234 + true + 0 + + + + diff --git a/tests/integration/test_keeper_reconfig_remove/configs/keeper3.xml b/tests/integration/test_keeper_reconfig_remove/configs/keeper3.xml new file mode 100644 index 00000000000..345bf402336 --- /dev/null +++ b/tests/integration/test_keeper_reconfig_remove/configs/keeper3.xml @@ -0,0 +1,37 @@ + + + 9181 + 3 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + true + + + 5000 + 10000 + trace + + + + + 1 + node1 + 9234 + + + 2 + node2 + 9234 + true + 0 + + + 3 + node3 + 9234 + true + 0 + + + + diff --git a/tests/integration/test_keeper_reconfig_remove/test.py b/tests/integration/test_keeper_reconfig_remove/test.py new file mode 100644 index 00000000000..fb0a9472df3 --- /dev/null +++ b/tests/integration/test_keeper_reconfig_remove/test.py @@ -0,0 +1,145 @@ +#!/usr/bin/env python3 + +import pytest +from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as ku +import os +from kazoo.client import KazooClient +from kazoo.exceptions import BadVersionException, BadArgumentsException + +cluster = ClickHouseCluster(__file__) +CONFIG_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), "configs") +node1 = cluster.add_instance("node1", main_configs=["configs/keeper1.xml"]) +node2 = cluster.add_instance("node2", main_configs=["configs/keeper2.xml"]) +node3 = cluster.add_instance("node3", main_configs=["configs/keeper3.xml"]) + +log_msg_removed = "has been removed from the cluster" +zk1, zk2, zk3 = None, None, None + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + yield cluster + finally: + for conn in [zk1, zk2, zk3]: + if conn: + conn.stop() + conn.close() + + cluster.shutdown() + + +def get_fake_zk(node): + return ku.get_fake_zk(cluster, node) + + +def test_reconfig_remove_followers_from_3(started_cluster): + """ + Remove 1 follower node from cluster of 3. + Then remove another follower from two left nodes. + Check that remaining node is in standalone mode. + """ + + zk1 = get_fake_zk(node1) + config, _ = zk1.get("/keeper/config") + config = config.decode("utf-8") + print("Initial config", config) + + assert len(config.split("\n")) == 3 + assert "node1" in config + assert "node2" in config + assert "node3" in config + + with pytest.raises(BadVersionException): + zk1.reconfig(joining=None, leaving="1", new_members=None, from_config=20) + with pytest.raises(BadArgumentsException): + zk1.reconfig(joining=None, leaving=None, new_members=None) + with pytest.raises(BadArgumentsException): + # bulk reconfiguration is not supported + zk1.reconfig(joining=None, leaving=None, new_members="3") + with pytest.raises(BadArgumentsException): + zk1.reconfig(joining="1", leaving="1", new_members="3") + with pytest.raises(BadArgumentsException): + # at least one node must be left + zk1.reconfig(joining=None, leaving="1,2,3", new_members=None) + + for i in range(100): + zk1.create(f"/test_two_{i}", b"somedata") + + zk2 = get_fake_zk(node2) + zk2.sync("/test_two_0") + ku.wait_configs_equal(config, zk2) + + zk3 = get_fake_zk(node3) + zk3.sync("/test_two_0") + ku.wait_configs_equal(config, zk3) + + for i in range(100): + assert zk2.exists(f"test_two_{i}") is not None + assert zk3.exists(f"test_two_{i}") is not None + + config, _ = zk1.reconfig(joining=None, leaving="3", new_members=None) + config = config.decode("utf-8") + print("After removing 3", config) + + assert len(config.split("\n")) == 2 + assert "node1" in config + assert "node2" in config + assert "node3" not in config + + zk2.stop() + zk2.close() + zk2 = get_fake_zk(node2) + ku.wait_configs_equal(config, zk2) + + for i in range(100): + assert zk2.exists(f"test_two_{i}") is not None + zk2.create(f"/test_two_{100 + i}", b"otherdata") + + zk1.stop() + zk1.close() + zk1 = get_fake_zk(node1) + zk1.sync("/test_two_0") + + for i in range(200): + assert zk1.exists(f"test_two_{i}") is not None + + with pytest.raises(Exception): + zk3.stop() + zk3.close() + zk3 = get_fake_zk(node3) + zk3.sync("/test_two_0") + + assert node3.contains_in_log(log_msg_removed) + + for i in range(100): + zk2.create(f"/test_two_{200 + i}", b"otherdata") + + config, _ = zk1.reconfig(joining=None, leaving="2", new_members=None) + config = config.decode("utf-8") + + print("After removing 2", config) + assert len(config.split("\n")) == 1 + assert "node1" in config + assert "node2" not in config + assert "node3" not in config + + zk1.stop() + zk1.close() + zk1 = get_fake_zk(node1) + zk1.sync("/test_two_0") + + for i in range(300): + assert zk1.exists(f"test_two_{i}") is not None + + with pytest.raises(Exception): + zk2.stop() + zk2.close() + zk2 = get_fake_zk(node2) + zk2.sync("/test_two_0") + + assert not node1.contains_in_log(log_msg_removed) + assert node2.contains_in_log(log_msg_removed) + assert "Mode: standalone" in zk1.command(b"stat") diff --git a/tests/integration/test_keeper_reconfig_remove_many/__init__.py b/tests/integration/test_keeper_reconfig_remove_many/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_keeper_reconfig_remove_many/configs/keeper1.xml b/tests/integration/test_keeper_reconfig_remove_many/configs/keeper1.xml new file mode 100644 index 00000000000..9976169624b --- /dev/null +++ b/tests/integration/test_keeper_reconfig_remove_many/configs/keeper1.xml @@ -0,0 +1,47 @@ + + + 9181 + 1 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + true + + + 5000 + 10000 + trace + + + + + 1 + node1 + 9234 + + + 2 + node2 + 9234 + true + + + 3 + node3 + 9234 + true + + + 4 + node4 + 9234 + true + + + 5 + node5 + 9234 + true + + + + diff --git a/tests/integration/test_keeper_reconfig_remove_many/configs/keeper2.xml b/tests/integration/test_keeper_reconfig_remove_many/configs/keeper2.xml new file mode 100644 index 00000000000..edc43142464 --- /dev/null +++ b/tests/integration/test_keeper_reconfig_remove_many/configs/keeper2.xml @@ -0,0 +1,47 @@ + + + 9181 + 2 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + true + + + 5000 + 10000 + trace + + + + + 1 + node1 + 9234 + + + 2 + node2 + 9234 + true + + + 3 + node3 + 9234 + true + + + 4 + node4 + 9234 + true + + + 5 + node5 + 9234 + true + + + + diff --git a/tests/integration/test_keeper_reconfig_remove_many/configs/keeper3.xml b/tests/integration/test_keeper_reconfig_remove_many/configs/keeper3.xml new file mode 100644 index 00000000000..8cebcbc0808 --- /dev/null +++ b/tests/integration/test_keeper_reconfig_remove_many/configs/keeper3.xml @@ -0,0 +1,47 @@ + + + 9181 + 3 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + true + + + 5000 + 10000 + trace + + + + + 1 + node1 + 9234 + + + 2 + node2 + 9234 + true + + + 3 + node3 + 9234 + true + + + 4 + node4 + 9234 + true + + + 5 + node5 + 9234 + true + + + + diff --git a/tests/integration/test_keeper_reconfig_remove_many/configs/keeper4.xml b/tests/integration/test_keeper_reconfig_remove_many/configs/keeper4.xml new file mode 100644 index 00000000000..99ac7e53f30 --- /dev/null +++ b/tests/integration/test_keeper_reconfig_remove_many/configs/keeper4.xml @@ -0,0 +1,47 @@ + + + 9181 + 4 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + true + + + 5000 + 10000 + trace + + + + + 1 + node1 + 9234 + + + 2 + node2 + 9234 + true + + + 3 + node3 + 9234 + true + + + 4 + node4 + 9234 + true + + + 5 + node5 + 9234 + true + + + + diff --git a/tests/integration/test_keeper_reconfig_remove_many/configs/keeper5.xml b/tests/integration/test_keeper_reconfig_remove_many/configs/keeper5.xml new file mode 100644 index 00000000000..92102ad486b --- /dev/null +++ b/tests/integration/test_keeper_reconfig_remove_many/configs/keeper5.xml @@ -0,0 +1,47 @@ + + + 9181 + 5 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + true + + + 5000 + 10000 + trace + + + + + 1 + node1 + 9234 + + + 2 + node2 + 9234 + true + + + 3 + node3 + 9234 + true + + + 4 + node4 + 9234 + true + + + 5 + node5 + 9234 + true + + + + diff --git a/tests/integration/test_keeper_reconfig_remove_many/test.py b/tests/integration/test_keeper_reconfig_remove_many/test.py new file mode 100644 index 00000000000..ec0d8b95eff --- /dev/null +++ b/tests/integration/test_keeper_reconfig_remove_many/test.py @@ -0,0 +1,149 @@ +#!/usr/bin/env python3 + +import pytest +from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as ku +import os +from kazoo.client import KazooClient, KazooState +from kazoo.exceptions import BadVersionException, BadArgumentsException + +cluster = ClickHouseCluster(__file__) +CONFIG_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), "configs") + +nodes = [ + cluster.add_instance(f"node{i}", main_configs=[f"configs/keeper{i}.xml"]) + for i in range(1, 6) +] +node1, node2, node3, node4, node5 = nodes + +log_msg_removed = "has been removed from the cluster" +zk1, zk2, zk3, zk4, zk5 = None, None, None, None, None + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + yield cluster + finally: + for conn in [zk1, zk2, zk3, zk4, zk5]: + if conn: + conn.stop() + conn.close() + + cluster.shutdown() + + +def get_fake_zk(node): + return ku.get_fake_zk(cluster, node) + + +def test_reconfig_remove_2_and_leader(started_cluster): + """ + Remove 2 followers from a cluster of 5. Remove leader from 3 nodes. + """ + + zk1 = get_fake_zk(node1) + config = ku.get_config_str(zk1) + print("Initial config", config) + + assert len(config.split("\n")) == 5 + + for i in range(100): + zk1.create(f"/test_two_{i}", b"somedata") + + zk4 = get_fake_zk(node4) + zk4.sync("/test_two_0") + ku.wait_configs_equal(config, zk4) + + zk5 = get_fake_zk(node5) + zk5.sync("/test_two_0") + ku.wait_configs_equal(config, zk5) + + for i in range(100): + assert zk4.exists(f"test_two_{i}") is not None + assert zk5.exists(f"test_two_{i}") is not None + + zk4.create(f"/test_two_{100 + i}", b"otherdata") + + zk2 = get_fake_zk(node2) + config, _ = zk2.reconfig(joining=None, leaving="4,5", new_members=None) + config = config.decode("utf-8") + + print("After removing 4,5", config) + assert len(config.split("\n")) == 3 + assert "node1" in config + assert "node2" in config + assert "node3" in config + assert "node4" not in config + assert "node5" not in config + + zk1.stop() + zk1.close() + zk1 = get_fake_zk(node1) + zk1.sync("/test_two_0") + + ku.wait_configs_equal(config, zk1) + + for i in range(200): + assert zk1.exists(f"test_two_{i}") is not None + assert zk2.exists(f"test_two_{i}") is not None + + with pytest.raises(Exception): + zk4.stop() + zk4.close() + zk4 = get_fake_zk(node4) + zk4.sync("/test_two_0") + + with pytest.raises(Exception): + zk5.stop() + zk5.close() + zk5 = get_fake_zk(node5) + zk5.sync("/test_two_0") + + assert not node1.contains_in_log(log_msg_removed) + assert not node2.contains_in_log(log_msg_removed) + assert not node3.contains_in_log(log_msg_removed) + assert node4.contains_in_log(log_msg_removed) + assert node5.contains_in_log(log_msg_removed) + + assert ku.is_leader(cluster, node1) + + for i in range(100): + zk1.create(f"/test_leader_{i}", b"somedata") + + # when a leader gets a remove request, it must yield leadership + config, _ = zk1.reconfig(joining=None, leaving="1", new_members=None) + config = config.decode("utf-8") + print("After removing 1 (leader)", config) + + assert len(config.split("\n")) == 2 + assert "node1" not in config + assert "node2" in config + assert "node3" in config + assert "node4" not in config + assert "node5" not in config + + zk2.stop() + zk2.close() + zk2 = get_fake_zk(node2) + zk2.sync("/test_leader_0") + ku.wait_configs_equal(config, zk2) + + zk3 = get_fake_zk(node3) + zk3.sync("/test_leader_0") + ku.wait_configs_equal(config, zk3) + + for i in range(100): + assert zk2.exists(f"test_leader_{i}") is not None + assert zk3.exists(f"test_leader_{i}") is not None + + with pytest.raises(Exception): + zk1.stop() + zk1.close() + zk1 = get_fake_zk(node1) + zk1.sync("/test_leader_0") + + assert node1.contains_in_log(log_msg_removed) + assert not node2.contains_in_log(log_msg_removed) + assert not node3.contains_in_log(log_msg_removed) diff --git a/tests/integration/test_keeper_reconfig_replace_leader/__init__.py b/tests/integration/test_keeper_reconfig_replace_leader/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_keeper_reconfig_replace_leader/configs/keeper1.xml b/tests/integration/test_keeper_reconfig_replace_leader/configs/keeper1.xml new file mode 100644 index 00000000000..71f3403aca3 --- /dev/null +++ b/tests/integration/test_keeper_reconfig_replace_leader/configs/keeper1.xml @@ -0,0 +1,35 @@ + + + 9181 + 1 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + true + + + 5000 + 10000 + trace + + + + + 1 + node1 + 9234 + + + 2 + node2 + 9234 + true + + + 3 + node3 + 9234 + true + + + + diff --git a/tests/integration/test_keeper_reconfig_replace_leader/configs/keeper2.xml b/tests/integration/test_keeper_reconfig_replace_leader/configs/keeper2.xml new file mode 100644 index 00000000000..faefb4d1102 --- /dev/null +++ b/tests/integration/test_keeper_reconfig_replace_leader/configs/keeper2.xml @@ -0,0 +1,35 @@ + + + 9181 + 2 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + true + + + 5000 + 10000 + trace + + + + + 1 + node1 + 9234 + + + 2 + node2 + 9234 + true + + + 3 + node3 + 9234 + true + + + + diff --git a/tests/integration/test_keeper_reconfig_replace_leader/configs/keeper3.xml b/tests/integration/test_keeper_reconfig_replace_leader/configs/keeper3.xml new file mode 100644 index 00000000000..80a9caa92c2 --- /dev/null +++ b/tests/integration/test_keeper_reconfig_replace_leader/configs/keeper3.xml @@ -0,0 +1,35 @@ + + + 9181 + 3 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + true + + + 5000 + 10000 + trace + + + + + 1 + node1 + 9234 + + + 2 + node2 + 9234 + true + + + 3 + node3 + 9234 + true + + + + diff --git a/tests/integration/test_keeper_reconfig_replace_leader/configs/keeper4.xml b/tests/integration/test_keeper_reconfig_replace_leader/configs/keeper4.xml new file mode 100644 index 00000000000..9fd88fe5d63 --- /dev/null +++ b/tests/integration/test_keeper_reconfig_replace_leader/configs/keeper4.xml @@ -0,0 +1,21 @@ + + + 9181 + 4 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + true + + + 5000 + 10000 + trace + + + + 2 node2 9234 + 3 node3 9234 + 4 node4 9234 + + + diff --git a/tests/integration/test_keeper_reconfig_replace_leader/test.py b/tests/integration/test_keeper_reconfig_replace_leader/test.py new file mode 100644 index 00000000000..ca1ec3a0c92 --- /dev/null +++ b/tests/integration/test_keeper_reconfig_replace_leader/test.py @@ -0,0 +1,127 @@ +#!/usr/bin/env python3 + +import pytest +from helpers.cluster import ClickHouseCluster +from os.path import join, dirname, realpath +import time +import helpers.keeper_utils as ku +from kazoo.client import KazooClient, KazooState + +cluster = ClickHouseCluster(__file__) +CONFIG_DIR = join(dirname(realpath(__file__)), "configs") + +node1 = cluster.add_instance("node1", main_configs=["configs/keeper1.xml"]) +node2 = cluster.add_instance("node2", main_configs=["configs/keeper2.xml"]) +node3 = cluster.add_instance("node3", main_configs=["configs/keeper3.xml"]) +node4 = cluster.add_instance("node4", stay_alive=True) +zk1, zk2, zk3, zk4 = None, None, None, None + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + + node4.stop_clickhouse() + node4.copy_file_to_container( + join(CONFIG_DIR, "keeper4.xml"), + "/etc/clickhouse-server/config.d/keeper.xml", + ) + + yield cluster + + finally: + for conn in [zk1, zk2, zk3, zk4]: + if conn: + conn.stop() + conn.close() + + cluster.shutdown() + + +def get_fake_zk(node): + return ku.get_fake_zk(cluster, node) + + +def test_reconfig_replace_leader(started_cluster): + """ + Remove leader from a cluster of 3 and add a new node via two commands. + """ + + zk1 = get_fake_zk(node1) + config = ku.get_config_str(zk1) + + assert len(config.split("\n")) == 3 + assert "node1" in config + assert "node2" in config + assert "node3" in config + assert "node4" not in config + + for i in range(100): + zk1.create(f"/test_four_{i}", b"somedata") + + zk2 = get_fake_zk(node2) + zk2.sync("/test_four_0") + ku.wait_configs_equal(config, zk2) + + zk3 = get_fake_zk(node3) + zk3.sync("/test_four_0") + ku.wait_configs_equal(config, zk3) + + for i in range(100): + assert zk2.exists(f"/test_four_{i}") is not None + assert zk3.exists(f"/test_four_{i}") is not None + + assert ku.is_leader(cluster, node1) + config, _ = zk2.reconfig(joining=None, leaving="1", new_members=None) + config = config.decode("utf-8") + + print("After removing 1 (leader)", config) + assert len(config.split("\n")) == 2 + assert "node1" not in config + assert "node2" in config + assert "node3" in config + assert "node4" not in config + + with pytest.raises(Exception): + zk1.stop() + zk1.close() + zk1 = get_fake_zk(node1) + zk1.sync("/test_four_0") + + node4.start_clickhouse() + config, _ = zk2.reconfig( + joining="server.4=node4:9234", leaving=None, new_members=None + ) + config = config.decode("utf-8") + ku.wait_until_connected(cluster, node4) + + print("After adding 4", config) + assert len(config.split("\n")) == 3 + assert "node1" not in config + assert "node2" in config + assert "node3" in config + assert "node4" in config + + zk4 = get_fake_zk(node4) + ku.wait_configs_equal(config, zk4) + + for i in range(100): + assert zk4.exists(f"test_four_{i}") is not None + zk4.create(f"/test_four_{100 + i}", b"somedata") + + zk2.stop() + zk2.close() + zk2 = get_fake_zk(node2) + zk2.sync("/test_four_0") + ku.wait_configs_equal(config, zk2) + + zk3.stop() + zk3.close() + zk3 = get_fake_zk(node3) + zk3.sync("/test_four_0") + ku.wait_configs_equal(config, zk3) + + for i in range(200): + assert zk2.exists(f"test_four_{i}") is not None + assert zk3.exists(f"test_four_{i}") is not None diff --git a/tests/integration/test_keeper_reconfig_replace_leader_in_one_command/__init__.py b/tests/integration/test_keeper_reconfig_replace_leader_in_one_command/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_keeper_reconfig_replace_leader_in_one_command/configs/keeper1.xml b/tests/integration/test_keeper_reconfig_replace_leader_in_one_command/configs/keeper1.xml new file mode 100644 index 00000000000..71f3403aca3 --- /dev/null +++ b/tests/integration/test_keeper_reconfig_replace_leader_in_one_command/configs/keeper1.xml @@ -0,0 +1,35 @@ + + + 9181 + 1 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + true + + + 5000 + 10000 + trace + + + + + 1 + node1 + 9234 + + + 2 + node2 + 9234 + true + + + 3 + node3 + 9234 + true + + + + diff --git a/tests/integration/test_keeper_reconfig_replace_leader_in_one_command/configs/keeper2.xml b/tests/integration/test_keeper_reconfig_replace_leader_in_one_command/configs/keeper2.xml new file mode 100644 index 00000000000..faefb4d1102 --- /dev/null +++ b/tests/integration/test_keeper_reconfig_replace_leader_in_one_command/configs/keeper2.xml @@ -0,0 +1,35 @@ + + + 9181 + 2 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + true + + + 5000 + 10000 + trace + + + + + 1 + node1 + 9234 + + + 2 + node2 + 9234 + true + + + 3 + node3 + 9234 + true + + + + diff --git a/tests/integration/test_keeper_reconfig_replace_leader_in_one_command/configs/keeper3.xml b/tests/integration/test_keeper_reconfig_replace_leader_in_one_command/configs/keeper3.xml new file mode 100644 index 00000000000..80a9caa92c2 --- /dev/null +++ b/tests/integration/test_keeper_reconfig_replace_leader_in_one_command/configs/keeper3.xml @@ -0,0 +1,35 @@ + + + 9181 + 3 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + true + + + 5000 + 10000 + trace + + + + + 1 + node1 + 9234 + + + 2 + node2 + 9234 + true + + + 3 + node3 + 9234 + true + + + + diff --git a/tests/integration/test_keeper_reconfig_replace_leader_in_one_command/configs/keeper4.xml b/tests/integration/test_keeper_reconfig_replace_leader_in_one_command/configs/keeper4.xml new file mode 100644 index 00000000000..9fd88fe5d63 --- /dev/null +++ b/tests/integration/test_keeper_reconfig_replace_leader_in_one_command/configs/keeper4.xml @@ -0,0 +1,21 @@ + + + 9181 + 4 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + true + + + 5000 + 10000 + trace + + + + 2 node2 9234 + 3 node3 9234 + 4 node4 9234 + + + diff --git a/tests/integration/test_keeper_reconfig_replace_leader_in_one_command/test.py b/tests/integration/test_keeper_reconfig_replace_leader_in_one_command/test.py new file mode 100644 index 00000000000..e23d0674c12 --- /dev/null +++ b/tests/integration/test_keeper_reconfig_replace_leader_in_one_command/test.py @@ -0,0 +1,120 @@ +#!/usr/bin/env python3 + +import pytest +from helpers.cluster import ClickHouseCluster +from os.path import join, dirname, realpath +import time +import helpers.keeper_utils as ku +from kazoo.client import KazooClient, KazooState + +cluster = ClickHouseCluster(__file__) +CONFIG_DIR = join(dirname(realpath(__file__)), "configs") + +node1 = cluster.add_instance("node1", main_configs=["configs/keeper1.xml"]) +node2 = cluster.add_instance("node2", main_configs=["configs/keeper2.xml"]) +node3 = cluster.add_instance("node3", main_configs=["configs/keeper3.xml"]) +node4 = cluster.add_instance("node4", stay_alive=True) +zk1, zk2, zk3, zk4 = None, None, None, None + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + + node4.stop_clickhouse() + node4.copy_file_to_container( + join(CONFIG_DIR, "keeper4.xml"), + "/etc/clickhouse-server/config.d/keeper.xml", + ) + + yield cluster + + finally: + for conn in [zk1, zk2, zk3, zk4]: + if conn: + conn.stop() + conn.close() + + cluster.shutdown() + + +def get_fake_zk(node): + return ku.get_fake_zk(cluster, node) + + +def test_reconfig_replace_leader_in_one_command(started_cluster): + """ + Remove leader from a cluster of 3 and add a new node to this cluster in a single command + """ + + zk1 = get_fake_zk(node1) + config = ku.get_config_str(zk1) + + assert len(config.split("\n")) == 3 + assert "node1" in config + assert "node2" in config + assert "node3" in config + assert "node4" not in config + + for i in range(100): + zk1.create(f"/test_four_{i}", b"somedata") + + zk2 = get_fake_zk(node2) + zk2.sync("/test_four_0") + ku.wait_configs_equal(config, zk2) + + zk3 = get_fake_zk(node3) + zk3.sync("/test_four_0") + ku.wait_configs_equal(config, zk3) + + for i in range(100): + assert zk2.exists(f"/test_four_{i}") is not None + assert zk3.exists(f"/test_four_{i}") is not None + + assert ku.is_leader(cluster, node1) + node4.start_clickhouse() + config, _ = zk2.reconfig( + joining="server.4=node4:9234", leaving="1", new_members=None + ) + config = config.decode("utf-8") + + print("After removing 1 and adding 4", config) + assert len(config.split("\n")) == 3 + assert "node1" not in config + assert "node2" in config + assert "node3" in config + assert "node4" in config + + ku.wait_until_connected(cluster, node4) + time.sleep(1) + + zk4 = get_fake_zk(node4) + zk4.sync("/test_four_0") + ku.wait_configs_equal(config, zk4) + + for i in range(100): + assert zk4.exists(f"test_four_{i}") is not None + zk4.create(f"/test_four_{100 + i}", b"somedata") + + with pytest.raises(Exception): + zk1.stop() + zk1.close() + zk1 = get_fake_zk(node1) + zk1.sync("/test_four_0") + + zk2.stop() + zk2.close() + zk2 = get_fake_zk(node2) + zk2.sync("/test_four_0") + ku.wait_configs_equal(config, zk2) + + zk3.stop() + zk3.close() + zk3 = get_fake_zk(node3) + zk3.sync("/test_four_0") + ku.wait_configs_equal(config, zk3) + + for i in range(200): + assert zk2.exists(f"test_four_{i}") is not None + assert zk3.exists(f"test_four_{i}") is not None diff --git a/tests/integration/test_keeper_session/test.py b/tests/integration/test_keeper_session/test.py index e57057a8258..cd012ad6e9e 100644 --- a/tests/integration/test_keeper_session/test.py +++ b/tests/integration/test_keeper_session/test.py @@ -6,6 +6,7 @@ import socket import struct from kazoo.client import KazooClient +from kazoo.exceptions import NoNodeError # from kazoo.protocol.serialization import Connect, read_buffer, write_buffer @@ -162,17 +163,40 @@ def test_session_timeout(started_cluster): def test_session_close_shutdown(started_cluster): wait_nodes() - node1_zk = get_fake_zk(node1.name) - node2_zk = get_fake_zk(node2.name) + node1_zk = None + node2_zk = None + for i in range(20): + node1_zk = get_fake_zk(node1.name) + node2_zk = get_fake_zk(node2.name) - eph_node = "/test_node" - node2_zk.create(eph_node, ephemeral=True) - node1_zk.sync(eph_node) - assert node1_zk.exists(eph_node) != None + eph_node = "/test_node" + node2_zk.create(eph_node, ephemeral=True) + node1_zk.sync(eph_node) - # shutdown while session is active - node2.stop_clickhouse() + node1_zk.exists(eph_node) != None - assert node1_zk.exists(eph_node) == None + # restart while session is active so it's closed during shutdown + node2.restart_clickhouse() - node2.start_clickhouse() + if node1_zk.exists(eph_node) == None: + break + + assert node2.contains_in_log( + "Sessions cannot be closed during shutdown because there is no active leader" + ) + + try: + node1_zk.delete(eph_node) + except NoNodeError: + pass + + assert node1_zk.exists(eph_node) == None + + destroy_zk_client(node1_zk) + node1_zk = None + destroy_zk_client(node2_zk) + node2_zk = None + + time.sleep(1) + else: + assert False, "Session wasn't properly cleaned up on shutdown" diff --git a/tests/integration/test_lost_part/test.py b/tests/integration/test_lost_part/test.py index 0bc24268040..382539df7de 100644 --- a/tests/integration/test_lost_part/test.py +++ b/tests/integration/test_lost_part/test.py @@ -39,257 +39,261 @@ def test_lost_part_same_replica(start_cluster): node1.query("DROP TABLE IF EXISTS mt0 SYNC") node2.query("DROP TABLE IF EXISTS mt0 SYNC") - for node in [node1, node2]: - node.query( - f"CREATE TABLE mt0 (id UInt64, date Date) ENGINE ReplicatedMergeTree('/clickhouse/tables/t', '{node.name}') ORDER BY tuple() PARTITION BY date " - "SETTINGS cleanup_delay_period=1, cleanup_delay_period_random_add=1, cleanup_thread_preferred_points_per_iteration=0," - "merge_selecting_sleep_ms=100, max_merge_selecting_sleep_ms=1000" - ) + try: + for node in [node1, node2]: + node.query( + f"CREATE TABLE mt0 (id UInt64, date Date) ENGINE ReplicatedMergeTree('/clickhouse/tables/t', '{node.name}') ORDER BY tuple() PARTITION BY date " + "SETTINGS cleanup_delay_period=1, cleanup_delay_period_random_add=1, cleanup_thread_preferred_points_per_iteration=0," + "merge_selecting_sleep_ms=100, max_merge_selecting_sleep_ms=1000" + ) - node1.query("SYSTEM STOP MERGES mt0") - node2.query("SYSTEM STOP REPLICATION QUEUES") + node1.query("SYSTEM STOP MERGES mt0") + node2.query("SYSTEM STOP REPLICATION QUEUES") - for i in range(5): - node1.query(f"INSERT INTO mt0 VALUES ({i}, toDate('2020-10-01'))") + for i in range(5): + node1.query(f"INSERT INTO mt0 VALUES ({i}, toDate('2020-10-01'))") - for i in range(20): - parts_to_merge = node1.query( - "SELECT parts_to_merge FROM system.replication_queue WHERE table='mt0' AND length(parts_to_merge) > 0" - ) - if parts_to_merge: - parts_list = list(sorted(ast.literal_eval(parts_to_merge))) - print("Got parts list", parts_list) - if len(parts_list) < 3: - raise Exception(f"Got too small parts list {parts_list}") - break - time.sleep(1) + for i in range(20): + parts_to_merge = node1.query( + "SELECT parts_to_merge FROM system.replication_queue WHERE table='mt0' AND length(parts_to_merge) > 0" + ) + if parts_to_merge: + parts_list = list(sorted(ast.literal_eval(parts_to_merge))) + print("Got parts list", parts_list) + if len(parts_list) < 3: + raise Exception(f"Got too small parts list {parts_list}") + break + time.sleep(1) - victim_part_from_the_middle = random.choice(parts_list[1:-1]) - print("Will corrupt part", victim_part_from_the_middle) + victim_part_from_the_middle = random.choice(parts_list[1:-1]) + print("Will corrupt part", victim_part_from_the_middle) - remove_part_from_disk(node1, "mt0", victim_part_from_the_middle) + remove_part_from_disk(node1, "mt0", victim_part_from_the_middle) - node1.query("DETACH TABLE mt0") + node1.query("DETACH TABLE mt0") - node1.query("ATTACH TABLE mt0") + node1.query("ATTACH TABLE mt0") - node1.query("SYSTEM START MERGES mt0") - res, err = node1.query_and_get_answer_with_error("SYSTEM SYNC REPLICA mt0") - print("result: ", res) - print("error: ", res) + node1.query("SYSTEM START MERGES mt0") + res, err = node1.query_and_get_answer_with_error("SYSTEM SYNC REPLICA mt0") + print("result: ", res) + print("error: ", res) - for i in range(10): - result = node1.query("SELECT count() FROM system.replication_queue") - if int(result) == 0: - break - time.sleep(1) - else: - assert False, "Still have something in replication queue:\n" + node1.query( - "SELECT count() FROM system.replication_queue FORMAT Vertical" - ) + for i in range(10): + result = node1.query("SELECT count() FROM system.replication_queue") + if int(result) == 0: + break + time.sleep(1) + else: + assert False, "Still have something in replication queue:\n" + node1.query( + "SELECT count() FROM system.replication_queue FORMAT Vertical" + ) - assert node1.contains_in_log( - "Created empty part" - ), f"Seems like empty part {victim_part_from_the_middle} is not created or log message changed" + assert node1.contains_in_log( + "Created empty part" + ), f"Seems like empty part {victim_part_from_the_middle} is not created or log message changed" - assert node1.query("SELECT COUNT() FROM mt0") == "4\n" + assert node1.query("SELECT COUNT() FROM mt0") == "4\n" - node2.query("SYSTEM START REPLICATION QUEUES") + node2.query("SYSTEM START REPLICATION QUEUES") - assert_eq_with_retry(node2, "SELECT COUNT() FROM mt0", "4") - assert_eq_with_retry(node2, "SELECT COUNT() FROM system.replication_queue", "0") - - node1.query("DROP TABLE IF EXISTS mt0 SYNC") - node2.query("DROP TABLE IF EXISTS mt0 SYNC") + assert_eq_with_retry(node2, "SELECT COUNT() FROM mt0", "4") + assert_eq_with_retry(node2, "SELECT COUNT() FROM system.replication_queue", "0") + finally: + node1.query("DROP TABLE IF EXISTS mt0 SYNC") + node2.query("DROP TABLE IF EXISTS mt0 SYNC") def test_lost_part_other_replica(start_cluster): node1.query("DROP TABLE IF EXISTS mt1 SYNC") node2.query("DROP TABLE IF EXISTS mt1 SYNC") - for node in [node1, node2]: - node.query( - f"CREATE TABLE mt1 (id UInt64) ENGINE ReplicatedMergeTree('/clickhouse/tables/t1', '{node.name}') ORDER BY tuple() " - "SETTINGS cleanup_delay_period=1, cleanup_delay_period_random_add=1, cleanup_thread_preferred_points_per_iteration=0," - "merge_selecting_sleep_ms=100, max_merge_selecting_sleep_ms=1000" + try: + for node in [node1, node2]: + node.query( + f"CREATE TABLE mt1 (id UInt64) ENGINE ReplicatedMergeTree('/clickhouse/tables/t1', '{node.name}') ORDER BY tuple() " + "SETTINGS cleanup_delay_period=1, cleanup_delay_period_random_add=1, cleanup_thread_preferred_points_per_iteration=0," + "merge_selecting_sleep_ms=100, max_merge_selecting_sleep_ms=1000" + ) + + node1.query("SYSTEM STOP MERGES mt1") + node2.query("SYSTEM STOP REPLICATION QUEUES") + + for i in range(5): + node1.query(f"INSERT INTO mt1 VALUES ({i})") + + for i in range(20): + parts_to_merge = node1.query( + "SELECT parts_to_merge FROM system.replication_queue WHERE table='mt1' AND length(parts_to_merge) > 0" + ) + if parts_to_merge: + parts_list = list(sorted(ast.literal_eval(parts_to_merge))) + print("Got parts list", parts_list) + if len(parts_list) < 3: + raise Exception("Got too small parts list {}".format(parts_list)) + break + time.sleep(1) + + victim_part_from_the_middle = random.choice(parts_list[1:-1]) + print("Will corrupt part", victim_part_from_the_middle) + + remove_part_from_disk(node1, "mt1", victim_part_from_the_middle) + + # other way to detect broken parts + node1.query("CHECK TABLE mt1") + + node2.query("SYSTEM START REPLICATION QUEUES") + res, err = node1.query_and_get_answer_with_error("SYSTEM SYNC REPLICA mt1") + print("result: ", res) + print("error: ", res) + + for i in range(10): + result = node2.query("SELECT count() FROM system.replication_queue") + if int(result) == 0: + break + time.sleep(1) + else: + assert False, "Still have something in replication queue:\n" + node2.query( + "SELECT * FROM system.replication_queue FORMAT Vertical" + ) + + assert node1.contains_in_log( + "Created empty part" + ), "Seems like empty part {} is not created or log message changed".format( + victim_part_from_the_middle ) - node1.query("SYSTEM STOP MERGES mt1") - node2.query("SYSTEM STOP REPLICATION QUEUES") + assert_eq_with_retry(node2, "SELECT COUNT() FROM mt1", "4") + assert_eq_with_retry(node2, "SELECT COUNT() FROM system.replication_queue", "0") - for i in range(5): - node1.query(f"INSERT INTO mt1 VALUES ({i})") + node1.query("SYSTEM START MERGES mt1") - for i in range(20): - parts_to_merge = node1.query( - "SELECT parts_to_merge FROM system.replication_queue WHERE table='mt1' AND length(parts_to_merge) > 0" - ) - if parts_to_merge: - parts_list = list(sorted(ast.literal_eval(parts_to_merge))) - print("Got parts list", parts_list) - if len(parts_list) < 3: - raise Exception("Got too small parts list {}".format(parts_list)) - break - time.sleep(1) - - victim_part_from_the_middle = random.choice(parts_list[1:-1]) - print("Will corrupt part", victim_part_from_the_middle) - - remove_part_from_disk(node1, "mt1", victim_part_from_the_middle) - - # other way to detect broken parts - node1.query("CHECK TABLE mt1") - - node2.query("SYSTEM START REPLICATION QUEUES") - res, err = node1.query_and_get_answer_with_error("SYSTEM SYNC REPLICA mt1") - print("result: ", res) - print("error: ", res) - - for i in range(10): - result = node2.query("SELECT count() FROM system.replication_queue") - if int(result) == 0: - break - time.sleep(1) - else: - assert False, "Still have something in replication queue:\n" + node2.query( - "SELECT * FROM system.replication_queue FORMAT Vertical" - ) - - assert node1.contains_in_log( - "Created empty part" - ), "Seems like empty part {} is not created or log message changed".format( - victim_part_from_the_middle - ) - - assert_eq_with_retry(node2, "SELECT COUNT() FROM mt1", "4") - assert_eq_with_retry(node2, "SELECT COUNT() FROM system.replication_queue", "0") - - node1.query("SYSTEM START MERGES mt1") - - assert_eq_with_retry(node1, "SELECT COUNT() FROM mt1", "4") - assert_eq_with_retry(node1, "SELECT COUNT() FROM system.replication_queue", "0") - - node1.query("DROP TABLE IF EXISTS mt1 SYNC") - node2.query("DROP TABLE IF EXISTS mt1 SYNC") + assert_eq_with_retry(node1, "SELECT COUNT() FROM mt1", "4") + assert_eq_with_retry(node1, "SELECT COUNT() FROM system.replication_queue", "0") + finally: + node1.query("DROP TABLE IF EXISTS mt1 SYNC") + node2.query("DROP TABLE IF EXISTS mt1 SYNC") def test_lost_part_mutation(start_cluster): node1.query("DROP TABLE IF EXISTS mt2 SYNC") node2.query("DROP TABLE IF EXISTS mt2 SYNC") - for node in [node1, node2]: - node.query( - f"CREATE TABLE mt2 (id UInt64) ENGINE ReplicatedMergeTree('/clickhouse/tables/t2', '{node.name}') ORDER BY tuple() " - "SETTINGS cleanup_delay_period=1, cleanup_delay_period_random_add=1, cleanup_thread_preferred_points_per_iteration=0," - "merge_selecting_sleep_ms=100, max_merge_selecting_sleep_ms=1000" + try: + for node in [node1, node2]: + node.query( + f"CREATE TABLE mt2 (id UInt64) ENGINE ReplicatedMergeTree('/clickhouse/tables/t2', '{node.name}') ORDER BY tuple() " + "SETTINGS cleanup_delay_period=1, cleanup_delay_period_random_add=1, cleanup_thread_preferred_points_per_iteration=0," + "merge_selecting_sleep_ms=100, max_merge_selecting_sleep_ms=1000" + ) + + node1.query("SYSTEM STOP MERGES mt2") + node2.query("SYSTEM STOP REPLICATION QUEUES") + + for i in range(2): + node1.query(f"INSERT INTO mt2 VALUES ({i})") + + node1.query( + "ALTER TABLE mt2 UPDATE id = 777 WHERE 1", settings={"mutations_sync": "0"} ) - node1.query("SYSTEM STOP MERGES mt2") - node2.query("SYSTEM STOP REPLICATION QUEUES") + for i in range(20): + parts_to_mutate = node1.query( + "SELECT count() FROM system.replication_queue WHERE table='mt2'" + ) + # two mutations for both replicas + if int(parts_to_mutate) == 4: + break + time.sleep(1) - for i in range(2): - node1.query(f"INSERT INTO mt2 VALUES ({i})") + remove_part_from_disk(node1, "mt2", "all_1_1_0") - node1.query( - "ALTER TABLE mt2 UPDATE id = 777 WHERE 1", settings={"mutations_sync": "0"} - ) + # other way to detect broken parts + node1.query("CHECK TABLE mt2") - for i in range(20): - parts_to_mutate = node1.query( - "SELECT count() FROM system.replication_queue WHERE table='mt2'" - ) - # two mutations for both replicas - if int(parts_to_mutate) == 4: - break - time.sleep(1) + node1.query("SYSTEM START MERGES mt2") + res, err = node1.query_and_get_answer_with_error("SYSTEM SYNC REPLICA mt2") + print("result: ", res) + print("error: ", res) - remove_part_from_disk(node1, "mt2", "all_1_1_0") + for i in range(10): + result = node1.query("SELECT count() FROM system.replication_queue") + if int(result) == 0: + break + time.sleep(1) + else: + assert False, "Still have something in replication queue:\n" + node1.query( + "SELECT * FROM system.replication_queue FORMAT Vertical" + ) - # other way to detect broken parts - node1.query("CHECK TABLE mt2") + assert_eq_with_retry(node1, "SELECT COUNT() FROM mt2", "1") + assert_eq_with_retry(node1, "SELECT SUM(id) FROM mt2", "777") + assert_eq_with_retry(node1, "SELECT COUNT() FROM system.replication_queue", "0") - node1.query("SYSTEM START MERGES mt2") - res, err = node1.query_and_get_answer_with_error("SYSTEM SYNC REPLICA mt2") - print("result: ", res) - print("error: ", res) + node2.query("SYSTEM START REPLICATION QUEUES") - for i in range(10): - result = node1.query("SELECT count() FROM system.replication_queue") - if int(result) == 0: - break - time.sleep(1) - else: - assert False, "Still have something in replication queue:\n" + node1.query( - "SELECT * FROM system.replication_queue FORMAT Vertical" - ) - - assert_eq_with_retry(node1, "SELECT COUNT() FROM mt2", "1") - assert_eq_with_retry(node1, "SELECT SUM(id) FROM mt2", "777") - assert_eq_with_retry(node1, "SELECT COUNT() FROM system.replication_queue", "0") - - node2.query("SYSTEM START REPLICATION QUEUES") - - assert_eq_with_retry(node2, "SELECT COUNT() FROM mt2", "1") - assert_eq_with_retry(node2, "SELECT SUM(id) FROM mt2", "777") - assert_eq_with_retry(node2, "SELECT COUNT() FROM system.replication_queue", "0") - - node1.query("DROP TABLE IF EXISTS mt2 SYNC") - node2.query("DROP TABLE IF EXISTS mt2 SYNC") + assert_eq_with_retry(node2, "SELECT COUNT() FROM mt2", "1") + assert_eq_with_retry(node2, "SELECT SUM(id) FROM mt2", "777") + assert_eq_with_retry(node2, "SELECT COUNT() FROM system.replication_queue", "0") + finally: + node1.query("DROP TABLE IF EXISTS mt2 SYNC") + node2.query("DROP TABLE IF EXISTS mt2 SYNC") def test_lost_last_part(start_cluster): node1.query("DROP TABLE IF EXISTS mt3 SYNC") node2.query("DROP TABLE IF EXISTS mt3 SYNC") - for node in [node1, node2]: - node.query( - f"CREATE TABLE mt3 (id UInt64, p String) ENGINE ReplicatedMergeTree('/clickhouse/tables/t3', '{node.name}') " - "ORDER BY tuple() PARTITION BY p SETTINGS cleanup_delay_period=1, cleanup_delay_period_random_add=1, cleanup_thread_preferred_points_per_iteration=0," - "merge_selecting_sleep_ms=100, max_merge_selecting_sleep_ms=1000" + try: + for node in [node1, node2]: + node.query( + f"CREATE TABLE mt3 (id UInt64, p String) ENGINE ReplicatedMergeTree('/clickhouse/tables/t3', '{node.name}') " + "ORDER BY tuple() PARTITION BY p SETTINGS cleanup_delay_period=1, cleanup_delay_period_random_add=1, cleanup_thread_preferred_points_per_iteration=0," + "merge_selecting_sleep_ms=100, max_merge_selecting_sleep_ms=1000" + ) + + node1.query("SYSTEM STOP MERGES mt3") + node2.query("SYSTEM STOP REPLICATION QUEUES") + + for i in range(1): + node1.query(f"INSERT INTO mt3 VALUES ({i}, 'x')") + + # actually not important + node1.query( + "ALTER TABLE mt3 UPDATE id = 777 WHERE 1", settings={"mutations_sync": "0"} ) - node1.query("SYSTEM STOP MERGES mt3") - node2.query("SYSTEM STOP REPLICATION QUEUES") + partition_id = node1.query("select partitionId('x')").strip() + remove_part_from_disk(node1, "mt3", f"{partition_id}_0_0_0") - for i in range(1): - node1.query(f"INSERT INTO mt3 VALUES ({i}, 'x')") + # other way to detect broken parts + node1.query("CHECK TABLE mt3") - # actually not important - node1.query( - "ALTER TABLE mt3 UPDATE id = 777 WHERE 1", settings={"mutations_sync": "0"} - ) + node1.query("SYSTEM START MERGES mt3") - partition_id = node1.query("select partitionId('x')").strip() - remove_part_from_disk(node1, "mt3", f"{partition_id}_0_0_0") + for i in range(100): + result = node1.query( + "SELECT count() FROM system.replication_queue WHERE table='mt3'" + ) + assert int(result) <= 2, "Have a lot of entries in queue {}".format( + node1.query("SELECT * FROM system.replication_queue FORMAT Vertical") + ) + if node1.contains_in_log( + "Cannot create empty part" + ) and node1.contains_in_log("DROP/DETACH PARTITION"): + break + if node1.contains_in_log( + "Created empty part 8b8f0fede53df97513a9fb4cb19dc1e4_0_0_0 " + ): + break + time.sleep(0.5) + else: + assert False, "Don't have required messages in node1 log" - # other way to detect broken parts - node1.query("CHECK TABLE mt3") + node1.query(f"ALTER TABLE mt3 DROP PARTITION ID '{partition_id}'") - node1.query("SYSTEM START MERGES mt3") - - for i in range(10): - result = node1.query( - "SELECT count() FROM system.replication_queue WHERE table='mt3'" - ) - assert int(result) <= 2, "Have a lot of entries in queue {}".format( - node1.query("SELECT * FROM system.replication_queue FORMAT Vertical") - ) - if node1.contains_in_log("Cannot create empty part") and node1.contains_in_log( - "DROP/DETACH PARTITION" - ): - break - if node1.contains_in_log( - "Created empty part 8b8f0fede53df97513a9fb4cb19dc1e4_0_0_0 " - ): - break - time.sleep(1) - else: - assert False, "Don't have required messages in node1 log" - - node1.query(f"ALTER TABLE mt3 DROP PARTITION ID '{partition_id}'") - - assert_eq_with_retry(node1, "SELECT COUNT() FROM mt3", "0") - assert_eq_with_retry(node1, "SELECT COUNT() FROM system.replication_queue", "0") - - node1.query("DROP TABLE IF EXISTS mt3 SYNC") - node2.query("DROP TABLE IF EXISTS mt3 SYNC") + assert_eq_with_retry(node1, "SELECT COUNT() FROM mt3", "0") + assert_eq_with_retry(node1, "SELECT COUNT() FROM system.replication_queue", "0") + finally: + node1.query("DROP TABLE IF EXISTS mt3 SYNC") + node2.query("DROP TABLE IF EXISTS mt3 SYNC") diff --git a/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py b/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py index 8cf9e67bf63..bc64f6bff64 100644 --- a/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py +++ b/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py @@ -992,6 +992,7 @@ def select_without_columns(clickhouse_node, mysql_node, service_name): ) check_query(clickhouse_node, "SHOW TABLES FROM db FORMAT TSV", "t\n") clickhouse_node.query("SYSTEM STOP MERGES db.t") + clickhouse_node.query("DROP VIEW IF EXISTS v") clickhouse_node.query("CREATE VIEW v AS SELECT * FROM db.t") mysql_node.query("INSERT INTO db.t VALUES (1, 1), (2, 2)") mysql_node.query("DELETE FROM db.t WHERE a = 2;") @@ -2336,3 +2337,32 @@ def named_collections(clickhouse_node, mysql_node, service_name): ) clickhouse_node.query(f"DROP DATABASE IF EXISTS {db}") mysql_node.query(f"DROP DATABASE IF EXISTS {db}") + + +def create_table_as_select(clickhouse_node, mysql_node, service_name): + db = "create_table_as_select" + mysql_node.query(f"DROP DATABASE IF EXISTS {db}") + clickhouse_node.query(f"DROP DATABASE IF EXISTS {db}") + mysql_node.query(f"CREATE DATABASE {db}") + clickhouse_node.query( + f"CREATE DATABASE {db} ENGINE = MaterializeMySQL('{service_name}:3306', '{db}', 'root', 'clickhouse')" + ) + mysql_node.query( + f"CREATE TABLE {db}.t1(a INT NOT NULL PRIMARY KEY) ENGINE = InnoDB" + ) + mysql_node.query(f"INSERT INTO {db}.t1 VALUES (1)") + check_query( + clickhouse_node, + f"SHOW TABLES FROM {db} FORMAT TSV", + "t1\n", + ) + + mysql_node.query(f"CREATE TABLE {db}.t2(PRIMARY KEY(a)) AS SELECT * FROM {db}.t1") + check_query( + clickhouse_node, + f"SHOW TABLES FROM {db} FORMAT TSV", + "t1\nt2\n", + ) + + clickhouse_node.query(f"DROP DATABASE IF EXISTS {db}") + mysql_node.query(f"DROP DATABASE IF EXISTS {db}") diff --git a/tests/integration/test_materialized_mysql_database/test.py b/tests/integration/test_materialized_mysql_database/test.py index 21316d1a474..f227c19e6b8 100644 --- a/tests/integration/test_materialized_mysql_database/test.py +++ b/tests/integration/test_materialized_mysql_database/test.py @@ -529,3 +529,9 @@ def test_named_collections(started_cluster, started_mysql_8_0, clickhouse_node): materialized_with_ddl.named_collections( clickhouse_node, started_mysql_8_0, "mysql80" ) + + +def test_create_table_as_select(started_cluster, started_mysql_8_0, clickhouse_node): + materialized_with_ddl.create_table_as_select( + clickhouse_node, started_mysql_8_0, "mysql80" + ) diff --git a/tests/integration/test_memory_profiler_min_max_borders/__init__.py b/tests/integration/test_memory_profiler_min_max_borders/__init__.py new file mode 100644 index 00000000000..e5a0d9b4834 --- /dev/null +++ b/tests/integration/test_memory_profiler_min_max_borders/__init__.py @@ -0,0 +1 @@ +#!/usr/bin/env python3 diff --git a/tests/integration/test_memory_profiler_min_max_borders/configs/max_untracked_memory.xml b/tests/integration/test_memory_profiler_min_max_borders/configs/max_untracked_memory.xml new file mode 100644 index 00000000000..56fc5ed34ca --- /dev/null +++ b/tests/integration/test_memory_profiler_min_max_borders/configs/max_untracked_memory.xml @@ -0,0 +1,7 @@ + + + + 1 + + + diff --git a/tests/integration/test_memory_profiler_min_max_borders/configs/memory_profiler.xml b/tests/integration/test_memory_profiler_min_max_borders/configs/memory_profiler.xml new file mode 100644 index 00000000000..5b3e17d145f --- /dev/null +++ b/tests/integration/test_memory_profiler_min_max_borders/configs/memory_profiler.xml @@ -0,0 +1,5 @@ + + 1 + 4096 + 8192 + diff --git a/tests/integration/test_memory_profiler_min_max_borders/test.py b/tests/integration/test_memory_profiler_min_max_borders/test.py new file mode 100644 index 00000000000..6ab971fa9c4 --- /dev/null +++ b/tests/integration/test_memory_profiler_min_max_borders/test.py @@ -0,0 +1,37 @@ +from helpers.cluster import ClickHouseCluster +import pytest + +cluster = ClickHouseCluster(__file__) +node = cluster.add_instance( + "node", + main_configs=["configs/memory_profiler.xml"], + user_configs=["configs/max_untracked_memory.xml"], +) + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + yield cluster + + finally: + cluster.shutdown() + + +def test_trace_boundaries_work(started_cluster): + node.query("select randomPrintableASCII(number) from numbers(1000) FORMAT Null") + node.query("SYSTEM FLUSH LOGS") + + assert ( + node.query( + "SELECT countDistinct(abs(size)) > 0 FROM system.trace_log where trace_type = 'MemorySample'" + ) + == "1\n" + ) + assert ( + node.query( + "SELECT count() FROM system.trace_log where trace_type = 'MemorySample' and (abs(size) > 8192 or abs(size) < 4096)" + ) + == "0\n" + ) diff --git a/tests/integration/test_odbc_interaction/test.py b/tests/integration/test_odbc_interaction/test.py index 14f5de17870..e8b3ba3fcf3 100644 --- a/tests/integration/test_odbc_interaction/test.py +++ b/tests/integration/test_odbc_interaction/test.py @@ -582,75 +582,83 @@ def test_sqlite_odbc_cached_dictionary(started_cluster): def test_postgres_odbc_hashed_dictionary_with_schema(started_cluster): skip_test_msan(node1) - conn = get_postgres_conn(started_cluster) - cursor = conn.cursor() - cursor.execute( - "insert into clickhouse.test_table values(1, 1, 'hello'),(2, 2, 'world')" - ) - node1.query("SYSTEM RELOAD DICTIONARY postgres_odbc_hashed") - node1.exec_in_container( - ["ss", "-K", "dport", "postgresql"], privileged=True, user="root" - ) - node1.query("SYSTEM RELOAD DICTIONARY postgres_odbc_hashed") - assert_eq_with_retry( - node1, - "select dictGetString('postgres_odbc_hashed', 'column2', toUInt64(1))", - "hello", - ) - assert_eq_with_retry( - node1, - "select dictGetString('postgres_odbc_hashed', 'column2', toUInt64(2))", - "world", - ) - cursor.execute("truncate table clickhouse.test_table") + try: + conn = get_postgres_conn(started_cluster) + cursor = conn.cursor() + cursor.execute( + "insert into clickhouse.test_table values(1, 1, 'hello'),(2, 2, 'world')" + ) + node1.query("SYSTEM RELOAD DICTIONARY postgres_odbc_hashed") + node1.exec_in_container( + ["ss", "-K", "dport", "postgresql"], privileged=True, user="root" + ) + node1.query("SYSTEM RELOAD DICTIONARY postgres_odbc_hashed") + assert_eq_with_retry( + node1, + "select dictGetString('postgres_odbc_hashed', 'column2', toUInt64(1))", + "hello", + ) + assert_eq_with_retry( + node1, + "select dictGetString('postgres_odbc_hashed', 'column2', toUInt64(2))", + "world", + ) + finally: + cursor.execute("truncate table clickhouse.test_table") def test_postgres_odbc_hashed_dictionary_no_tty_pipe_overflow(started_cluster): skip_test_msan(node1) - conn = get_postgres_conn(started_cluster) - cursor = conn.cursor() - cursor.execute("insert into clickhouse.test_table values(3, 3, 'xxx')") - for i in range(100): - try: - node1.query("system reload dictionary postgres_odbc_hashed", timeout=15) - except Exception as ex: - assert False, "Exception occured -- odbc-bridge hangs: " + str(ex) + try: + conn = get_postgres_conn(started_cluster) + cursor = conn.cursor() + cursor.execute("insert into clickhouse.test_table values(3, 3, 'xxx')") + for i in range(100): + try: + node1.query("system reload dictionary postgres_odbc_hashed", timeout=15) + except Exception as ex: + assert False, "Exception occured -- odbc-bridge hangs: " + str(ex) - assert_eq_with_retry( - node1, - "select dictGetString('postgres_odbc_hashed', 'column2', toUInt64(3))", - "xxx", - ) - cursor.execute("truncate table clickhouse.test_table") + assert_eq_with_retry( + node1, + "select dictGetString('postgres_odbc_hashed', 'column2', toUInt64(3))", + "xxx", + ) + finally: + cursor.execute("truncate table clickhouse.test_table") def test_no_connection_pooling(started_cluster): skip_test_msan(node1) - conn = get_postgres_conn(started_cluster) - cursor = conn.cursor() - cursor.execute( - "insert into clickhouse.test_table values(1, 1, 'hello'),(2, 2, 'world')" - ) - node1.exec_in_container(["ss", "-K", "dport", "5432"], privileged=True, user="root") - node1.query("SYSTEM RELOAD DICTIONARY postgres_odbc_nopool") - assert_eq_with_retry( - node1, - "select dictGetString('postgres_odbc_nopool', 'column2', toUInt64(1))", - "hello", - ) - assert_eq_with_retry( - node1, - "select dictGetString('postgres_odbc_nopool', 'column2', toUInt64(2))", - "world", - ) + try: + conn = get_postgres_conn(started_cluster) + cursor = conn.cursor() + cursor.execute( + "insert into clickhouse.test_table values(1, 1, 'hello'),(2, 2, 'world')" + ) + node1.exec_in_container( + ["ss", "-K", "dport", "5432"], privileged=True, user="root" + ) + node1.query("SYSTEM RELOAD DICTIONARY postgres_odbc_nopool") + assert_eq_with_retry( + node1, + "select dictGetString('postgres_odbc_nopool', 'column2', toUInt64(1))", + "hello", + ) + assert_eq_with_retry( + node1, + "select dictGetString('postgres_odbc_nopool', 'column2', toUInt64(2))", + "world", + ) - # No open connections should be left because we don't use connection pooling. - assert "" == node1.exec_in_container( - ["ss", "-H", "dport", "5432"], privileged=True, user="root" - ) - cursor.execute("truncate table clickhouse.test_table") + # No open connections should be left because we don't use connection pooling. + assert "" == node1.exec_in_container( + ["ss", "-H", "dport", "5432"], privileged=True, user="root" + ) + finally: + cursor.execute("truncate table clickhouse.test_table") def test_postgres_insert(started_cluster): @@ -662,112 +670,119 @@ def test_postgres_insert(started_cluster): # postgres .yml file). This is needed to check parsing, validation and # reconstruction of connection string. - node1.query( - "create table pg_insert (id UInt64, column1 UInt8, column2 String) engine=ODBC('DSN=postgresql_odbc;Servername=postgre-sql.local', 'clickhouse', 'test_table')" - ) - node1.query("insert into pg_insert values (1, 1, 'hello'), (2, 2, 'world')") - assert node1.query("select * from pg_insert") == "1\t1\thello\n2\t2\tworld\n" - node1.query( - "insert into table function odbc('DSN=postgresql_odbc', 'clickhouse', 'test_table') format CSV 3,3,test" - ) - node1.query( - "insert into table function odbc('DSN=postgresql_odbc;Servername=postgre-sql.local', 'clickhouse', 'test_table')" - " select number, number, 's' || toString(number) from numbers (4, 7)" - ) - assert ( - node1.query("select sum(column1), count(column1) from pg_insert") == "55\t10\n" - ) - assert ( + try: node1.query( - "select sum(n), count(n) from (select (*,).1 as n from (select * from odbc('DSN=postgresql_odbc', 'clickhouse', 'test_table')))" + "create table pg_insert (id UInt64, column1 UInt8, column2 String) engine=ODBC('DSN=postgresql_odbc;Servername=postgre-sql.local', 'clickhouse', 'test_table')" ) - == "55\t10\n" - ) - node1.query("DROP TABLE pg_insert") - conn.cursor().execute("truncate table clickhouse.test_table") + node1.query("insert into pg_insert values (1, 1, 'hello'), (2, 2, 'world')") + assert node1.query("select * from pg_insert") == "1\t1\thello\n2\t2\tworld\n" + node1.query( + "insert into table function odbc('DSN=postgresql_odbc', 'clickhouse', 'test_table') format CSV 3,3,test" + ) + node1.query( + "insert into table function odbc('DSN=postgresql_odbc;Servername=postgre-sql.local', 'clickhouse', 'test_table')" + " select number, number, 's' || toString(number) from numbers (4, 7)" + ) + assert ( + node1.query("select sum(column1), count(column1) from pg_insert") + == "55\t10\n" + ) + assert ( + node1.query( + "select sum(n), count(n) from (select (*,).1 as n from (select * from odbc('DSN=postgresql_odbc', 'clickhouse', 'test_table')))" + ) + == "55\t10\n" + ) + finally: + node1.query("DROP TABLE IF EXISTS pg_insert") + conn.cursor().execute("truncate table clickhouse.test_table") def test_odbc_postgres_date_data_type(started_cluster): skip_test_msan(node1) - conn = get_postgres_conn(started_cluster) - cursor = conn.cursor() - cursor.execute( - "CREATE TABLE clickhouse.test_date (id integer, column1 integer, column2 date)" - ) + try: + conn = get_postgres_conn(started_cluster) + cursor = conn.cursor() + cursor.execute( + "CREATE TABLE clickhouse.test_date (id integer, column1 integer, column2 date)" + ) - cursor.execute("INSERT INTO clickhouse.test_date VALUES (1, 1, '2020-12-01')") - cursor.execute("INSERT INTO clickhouse.test_date VALUES (2, 2, '2020-12-02')") - cursor.execute("INSERT INTO clickhouse.test_date VALUES (3, 3, '2020-12-03')") - conn.commit() + cursor.execute("INSERT INTO clickhouse.test_date VALUES (1, 1, '2020-12-01')") + cursor.execute("INSERT INTO clickhouse.test_date VALUES (2, 2, '2020-12-02')") + cursor.execute("INSERT INTO clickhouse.test_date VALUES (3, 3, '2020-12-03')") + conn.commit() - node1.query( - """ - CREATE TABLE test_date (id UInt64, column1 UInt64, column2 Date) - ENGINE=ODBC('DSN=postgresql_odbc; Servername=postgre-sql.local', 'clickhouse', 'test_date')""" - ) + node1.query( + """ + CREATE TABLE test_date (id UInt64, column1 UInt64, column2 Date) + ENGINE=ODBC('DSN=postgresql_odbc; Servername=postgre-sql.local', 'clickhouse', 'test_date')""" + ) - expected = "1\t1\t2020-12-01\n2\t2\t2020-12-02\n3\t3\t2020-12-03\n" - result = node1.query("SELECT * FROM test_date") - assert result == expected - cursor.execute("DROP TABLE clickhouse.test_date") - node1.query("DROP TABLE test_date") + expected = "1\t1\t2020-12-01\n2\t2\t2020-12-02\n3\t3\t2020-12-03\n" + result = node1.query("SELECT * FROM test_date") + assert result == expected + finally: + cursor.execute("DROP TABLE clickhouse.test_date") + node1.query("DROP TABLE IF EXISTS test_date") def test_odbc_postgres_conversions(started_cluster): skip_test_msan(node1) - conn = get_postgres_conn(started_cluster) - cursor = conn.cursor() + try: + conn = get_postgres_conn(started_cluster) + cursor = conn.cursor() - cursor.execute( - """CREATE TABLE clickhouse.test_types ( - a smallint, b integer, c bigint, d real, e double precision, f serial, g bigserial, - h timestamp)""" - ) + cursor.execute( + """CREATE TABLE clickhouse.test_types ( + a smallint, b integer, c bigint, d real, e double precision, f serial, g bigserial, + h timestamp)""" + ) - node1.query( - """ - INSERT INTO TABLE FUNCTION - odbc('DSN=postgresql_odbc; Servername=postgre-sql.local', 'clickhouse', 'test_types') - VALUES (-32768, -2147483648, -9223372036854775808, 1.12345, 1.1234567890, 2147483647, 9223372036854775807, '2000-05-12 12:12:12')""" - ) + node1.query( + """ + INSERT INTO TABLE FUNCTION + odbc('DSN=postgresql_odbc; Servername=postgre-sql.local', 'clickhouse', 'test_types') + VALUES (-32768, -2147483648, -9223372036854775808, 1.12345, 1.1234567890, 2147483647, 9223372036854775807, '2000-05-12 12:12:12')""" + ) - result = node1.query( - """ - SELECT a, b, c, d, e, f, g, h - FROM odbc('DSN=postgresql_odbc; Servername=postgre-sql.local', 'clickhouse', 'test_types') - """ - ) + result = node1.query( + """ + SELECT a, b, c, d, e, f, g, h + FROM odbc('DSN=postgresql_odbc; Servername=postgre-sql.local', 'clickhouse', 'test_types') + """ + ) - assert ( - result - == "-32768\t-2147483648\t-9223372036854775808\t1.12345\t1.123456789\t2147483647\t9223372036854775807\t2000-05-12 12:12:12\n" - ) - cursor.execute("DROP TABLE IF EXISTS clickhouse.test_types") + assert ( + result + == "-32768\t-2147483648\t-9223372036854775808\t1.12345\t1.123456789\t2147483647\t9223372036854775807\t2000-05-12 12:12:12\n" + ) + cursor.execute("DROP TABLE IF EXISTS clickhouse.test_types") - cursor.execute( - """CREATE TABLE clickhouse.test_types (column1 Timestamp, column2 Numeric)""" - ) + cursor.execute( + """CREATE TABLE clickhouse.test_types (column1 Timestamp, column2 Numeric)""" + ) - node1.query( - """ - CREATE TABLE test_types (column1 DateTime64, column2 Decimal(5, 1)) - ENGINE=ODBC('DSN=postgresql_odbc; Servername=postgre-sql.local', 'clickhouse', 'test_types')""" - ) + node1.query( + """ + CREATE TABLE test_types (column1 DateTime64, column2 Decimal(5, 1)) + ENGINE=ODBC('DSN=postgresql_odbc; Servername=postgre-sql.local', 'clickhouse', 'test_types')""" + ) - node1.query( - """INSERT INTO test_types - SELECT toDateTime64('2019-01-01 00:00:00', 3, 'Etc/UTC'), toDecimal32(1.1, 1)""" - ) + node1.query( + """INSERT INTO test_types + SELECT toDateTime64('2019-01-01 00:00:00', 3, 'Etc/UTC'), toDecimal32(1.1, 1)""" + ) - expected = node1.query( - "SELECT toDateTime64('2019-01-01 00:00:00', 3, 'Etc/UTC'), toDecimal32(1.1, 1)" - ) - result = node1.query("SELECT * FROM test_types") - cursor.execute("DROP TABLE clickhouse.test_types") - node1.query("DROP TABLE test_types") - assert result == expected + expected = node1.query( + "SELECT toDateTime64('2019-01-01 00:00:00', 3, 'Etc/UTC'), toDecimal32(1.1, 1)" + ) + result = node1.query("SELECT * FROM test_types") + assert result == expected + finally: + cursor.execute("DROP TABLE IF EXISTS clickhouse.test_types") + node1.query("DROP TABLE IF EXISTS test_types") def test_odbc_cyrillic_with_varchar(started_cluster): diff --git a/tests/integration/test_postgresql_replica_database_engine_1/test.py b/tests/integration/test_postgresql_replica_database_engine_1/test.py index 377b1c89efc..57ee920d49c 100644 --- a/tests/integration/test_postgresql_replica_database_engine_1/test.py +++ b/tests/integration/test_postgresql_replica_database_engine_1/test.py @@ -46,7 +46,12 @@ pg_manager = PostgresManager() def started_cluster(): try: cluster.start() - pg_manager.init(instance, cluster.postgres_ip, cluster.postgres_port) + pg_manager.init( + instance, + cluster.postgres_ip, + cluster.postgres_port, + default_database="postgres_database", + ) yield cluster finally: @@ -74,16 +79,10 @@ def test_load_and_sync_all_database_tables(started_cluster): def test_replicating_dml(started_cluster): - conn = get_postgres_conn( - ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, - database=True, - ) - cursor = conn.cursor() NUM_TABLES = 5 for i in range(NUM_TABLES): - create_postgres_table(cursor, "postgresql_replica_{}".format(i)) + pg_manager.create_postgres_table(f"postgresql_replica_{i}") instance.query( "INSERT INTO postgres_database.postgresql_replica_{} SELECT number, {} from numbers(50)".format( i, i @@ -96,39 +95,29 @@ def test_replicating_dml(started_cluster): for i in range(NUM_TABLES): instance.query( - "INSERT INTO postgres_database.postgresql_replica_{} SELECT 50 + number, {} from numbers(1000)".format( - i, i - ) + f"INSERT INTO postgres_database.postgresql_replica_{i} SELECT 50 + number, {i} from numbers(1000)" ) check_several_tables_are_synchronized(instance, NUM_TABLES) for i in range(NUM_TABLES): - cursor.execute( - "UPDATE postgresql_replica_{} SET value = {} * {} WHERE key < 50;".format( - i, i, i - ) + pg_manager.execute( + f"UPDATE postgresql_replica_{i} SET value = {i} * {i} WHERE key < 50;" ) - cursor.execute( - "UPDATE postgresql_replica_{} SET value = {} * {} * {} WHERE key >= 50;".format( - i, i, i, i - ) + pg_manager.execute( + f"UPDATE postgresql_replica_{i} SET value = {i} * {i} * {i} WHERE key >= 50;" ) + check_several_tables_are_synchronized(instance, NUM_TABLES) for i in range(NUM_TABLES): - cursor.execute( - "DELETE FROM postgresql_replica_{} WHERE (value*value + {}) % 2 = 0;".format( - i, i - ) + pg_manager.execute( + f"DELETE FROM postgresql_replica_{i} WHERE (value*value + {i}) % 2 = 0;" ) - cursor.execute( - "UPDATE postgresql_replica_{} SET value = value - (value % 7) WHERE key > 128 AND key < 512;".format( - i - ) - ) - cursor.execute( - "DELETE FROM postgresql_replica_{} WHERE key % 7 = 1;".format(i, i) + pg_manager.execute( + f"UPDATE postgresql_replica_{i} SET value = value - (value % 7) WHERE key > 128 AND key < 512;" ) + pg_manager.execute(f"DELETE FROM postgresql_replica_{i} WHERE key % 7 = 1;") + check_several_tables_are_synchronized(instance, NUM_TABLES) @@ -288,13 +277,7 @@ def test_load_and_sync_subset_of_database_tables(started_cluster): def test_changing_replica_identity_value(started_cluster): - conn = get_postgres_conn( - ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, - database=True, - ) - cursor = conn.cursor() - create_postgres_table(cursor, "postgresql_replica") + pg_manager.create_postgres_table("postgresql_replica") instance.query( "INSERT INTO postgres_database.postgresql_replica SELECT 50 + number, number from numbers(50)" ) @@ -307,7 +290,7 @@ def test_changing_replica_identity_value(started_cluster): "INSERT INTO postgres_database.postgresql_replica SELECT 100 + number, number from numbers(50)" ) check_tables_are_synchronized(instance, "postgresql_replica") - cursor.execute("UPDATE postgresql_replica SET key=key-25 WHERE key<100 ") + pg_manager.execute("UPDATE postgresql_replica SET key=key-25 WHERE key<100 ") check_tables_are_synchronized(instance, "postgresql_replica") @@ -331,18 +314,13 @@ def test_clickhouse_restart(started_cluster): def test_replica_identity_index(started_cluster): - conn = get_postgres_conn( - ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, - database=True, + pg_manager.create_postgres_table( + "postgresql_replica", template=postgres_table_template_3 ) - cursor = conn.cursor() - - create_postgres_table( - cursor, "postgresql_replica", template=postgres_table_template_3 + pg_manager.execute("CREATE unique INDEX idx on postgresql_replica(key1, key2);") + pg_manager.execute( + "ALTER TABLE postgresql_replica REPLICA IDENTITY USING INDEX idx" ) - cursor.execute("CREATE unique INDEX idx on postgresql_replica(key1, key2);") - cursor.execute("ALTER TABLE postgresql_replica REPLICA IDENTITY USING INDEX idx") instance.query( "INSERT INTO postgres_database.postgresql_replica SELECT number, number, number, number from numbers(50, 10)" ) @@ -355,35 +333,29 @@ def test_replica_identity_index(started_cluster): ) check_tables_are_synchronized(instance, "postgresql_replica", order_by="key1") - cursor.execute("UPDATE postgresql_replica SET key1=key1-25 WHERE key1<100 ") - cursor.execute("UPDATE postgresql_replica SET key2=key2-25 WHERE key2>100 ") - cursor.execute("UPDATE postgresql_replica SET value1=value1+100 WHERE key1<100 ") - cursor.execute("UPDATE postgresql_replica SET value2=value2+200 WHERE key2>100 ") + pg_manager.execute("UPDATE postgresql_replica SET key1=key1-25 WHERE key1<100 ") + pg_manager.execute("UPDATE postgresql_replica SET key2=key2-25 WHERE key2>100 ") + pg_manager.execute( + "UPDATE postgresql_replica SET value1=value1+100 WHERE key1<100 " + ) + pg_manager.execute( + "UPDATE postgresql_replica SET value2=value2+200 WHERE key2>100 " + ) check_tables_are_synchronized(instance, "postgresql_replica", order_by="key1") - cursor.execute("DELETE FROM postgresql_replica WHERE key2<75;") + pg_manager.execute("DELETE FROM postgresql_replica WHERE key2<75;") check_tables_are_synchronized(instance, "postgresql_replica", order_by="key1") def test_table_schema_changes(started_cluster): - conn = get_postgres_conn( - ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, - database=True, - ) - cursor = conn.cursor() NUM_TABLES = 5 for i in range(NUM_TABLES): - create_postgres_table( - cursor, - "postgresql_replica_{}".format(i), - template=postgres_table_template_2, + pg_manager.create_postgres_table( + f"postgresql_replica_{i}", template=postgres_table_template_2 ) instance.query( - "INSERT INTO postgres_database.postgresql_replica_{} SELECT number, {}, {}, {} from numbers(25)".format( - i, i, i, i - ) + f"INSERT INTO postgres_database.postgresql_replica_{i} SELECT number, {i}, {i}, {i} from numbers(25)" ) pg_manager.create_materialized_db( @@ -393,9 +365,7 @@ def test_table_schema_changes(started_cluster): for i in range(NUM_TABLES): instance.query( - "INSERT INTO postgres_database.postgresql_replica_{} SELECT 25 + number, {}, {}, {} from numbers(25)".format( - i, i, i, i - ) + f"INSERT INTO postgres_database.postgresql_replica_{i} SELECT 25 + number, {i}, {i}, {i} from numbers(25)" ) check_several_tables_are_synchronized(instance, NUM_TABLES) @@ -410,9 +380,9 @@ def test_table_schema_changes(started_cluster): instance.query(f"SELECT count() FROM test_database.{altered_table}") ) - cursor.execute(f"ALTER TABLE {altered_table} DROP COLUMN value2") + pg_manager.execute(f"ALTER TABLE {altered_table} DROP COLUMN value2") for i in range(NUM_TABLES): - cursor.execute(f"INSERT INTO postgresql_replica_{i} VALUES (50, {i}, {i})") + pg_manager.execute(f"INSERT INTO postgresql_replica_{i} VALUES (50, {i}, {i})") assert instance.wait_for_log_line( f"Table postgresql_replica_{altered_idx} is skipped from replication stream" @@ -444,10 +414,7 @@ def test_many_concurrent_queries(started_cluster): port=started_cluster.postgres_port, database=True, ) - cursor = conn.cursor() - pg_manager.create_and_fill_postgres_tables_from_cursor( - cursor, NUM_TABLES, numbers=10000 - ) + pg_manager.create_and_fill_postgres_tables(NUM_TABLES, numbers=10000) def attack(thread_id): print("thread {}".format(thread_id)) @@ -589,9 +556,8 @@ def test_multiple_databases(started_cluster): port=started_cluster.postgres_port, database=False, ) - cursor = conn.cursor() - pg_manager.create_postgres_db(cursor, "postgres_database_1") - pg_manager.create_postgres_db(cursor, "postgres_database_2") + pg_manager.create_postgres_db("postgres_database_1") + pg_manager.create_postgres_db("postgres_database_2") conn1 = get_postgres_conn( ip=started_cluster.postgres_ip, @@ -610,15 +576,13 @@ def test_multiple_databases(started_cluster): cursor2 = conn2.cursor() pg_manager.create_clickhouse_postgres_db( - cluster.postgres_ip, - cluster.postgres_port, "postgres_database_1", + "", "postgres_database_1", ) pg_manager.create_clickhouse_postgres_db( - cluster.postgres_ip, - cluster.postgres_port, "postgres_database_2", + "", "postgres_database_2", ) diff --git a/tests/integration/test_postgresql_replica_database_engine_2/configs/merge_tree_too_many_parts.xml b/tests/integration/test_postgresql_replica_database_engine_2/configs/merge_tree_too_many_parts.xml new file mode 100644 index 00000000000..4bc63453f55 --- /dev/null +++ b/tests/integration/test_postgresql_replica_database_engine_2/configs/merge_tree_too_many_parts.xml @@ -0,0 +1,5 @@ + + + 5 + + diff --git a/tests/integration/test_postgresql_replica_database_engine_2/test.py b/tests/integration/test_postgresql_replica_database_engine_2/test.py index 90d19e9532c..3f2ec74180b 100644 --- a/tests/integration/test_postgresql_replica_database_engine_2/test.py +++ b/tests/integration/test_postgresql_replica_database_engine_2/test.py @@ -46,14 +46,32 @@ instance = cluster.add_instance( stay_alive=True, ) +instance2 = cluster.add_instance( + "instance2", + main_configs=["configs/log_conf.xml", "configs/merge_tree_too_many_parts.xml"], + user_configs=["configs/users.xml"], + with_postgres=True, + stay_alive=True, +) + + pg_manager = PostgresManager() +pg_manager2 = PostgresManager() @pytest.fixture(scope="module") def started_cluster(): try: cluster.start() - pg_manager.init(instance, cluster.postgres_ip, cluster.postgres_port) + pg_manager.init( + instance, + cluster.postgres_ip, + cluster.postgres_port, + default_database="postgres_database", + ) + pg_manager2.init( + instance2, cluster.postgres_ip, cluster.postgres_port, "postgres_database2" + ) yield cluster finally: @@ -68,11 +86,9 @@ def setup_teardown(): def test_add_new_table_to_replication(started_cluster): - cursor = pg_manager.get_db_cursor() - cursor.execute("DROP TABLE IF EXISTS test_table") NUM_TABLES = 5 - pg_manager.create_and_fill_postgres_tables_from_cursor(cursor, NUM_TABLES, 10000) + pg_manager.create_and_fill_postgres_tables(NUM_TABLES, 10000) pg_manager.create_materialized_db( ip=started_cluster.postgres_ip, port=started_cluster.postgres_port ) @@ -85,7 +101,7 @@ def test_add_new_table_to_replication(started_cluster): ) table_name = "postgresql_replica_5" - pg_manager.create_and_fill_postgres_table_from_cursor(cursor, table_name) + pg_manager.create_and_fill_postgres_table(table_name) result = instance.query("SHOW CREATE DATABASE test_database") assert ( @@ -138,7 +154,7 @@ def test_add_new_table_to_replication(started_cluster): ) table_name = "postgresql_replica_6" - create_postgres_table(cursor, table_name) + pg_manager.create_postgres_table(table_name) instance.query( "INSERT INTO postgres_database.{} SELECT number, number from numbers(10000)".format( table_name @@ -149,7 +165,7 @@ def test_add_new_table_to_replication(started_cluster): instance.restart_clickhouse() table_name = "postgresql_replica_7" - create_postgres_table(cursor, table_name) + pg_manager.create_postgres_table(table_name) instance.query( "INSERT INTO postgres_database.{} SELECT number, number from numbers(10000)".format( table_name @@ -251,8 +267,7 @@ def test_remove_table_from_replication(started_cluster): == ")\\nSETTINGS materialized_postgresql_tables_list = \\'postgresql_replica_0,postgresql_replica_2,postgresql_replica_3,postgresql_replica_4\\'\n" ) - cursor = pg_manager.get_db_cursor() - cursor.execute(f"drop table if exists postgresql_replica_0;") + pg_manager.execute(f"drop table if exists postgresql_replica_0;") # Removing from replication table which does not exist in PostgreSQL must be ok. instance.query("DETACH TABLE test_database.postgresql_replica_0 PERMANENTLY") @@ -262,10 +277,11 @@ def test_remove_table_from_replication(started_cluster): def test_predefined_connection_configuration(started_cluster): - cursor = pg_manager.get_db_cursor() - cursor.execute(f"DROP TABLE IF EXISTS test_table") - cursor.execute(f"CREATE TABLE test_table (key integer PRIMARY KEY, value integer)") - cursor.execute(f"INSERT INTO test_table SELECT 1, 2") + pg_manager.execute(f"DROP TABLE IF EXISTS test_table") + pg_manager.execute( + f"CREATE TABLE test_table (key integer PRIMARY KEY, value integer)" + ) + pg_manager.execute(f"INSERT INTO test_table SELECT 1, 2") instance.query( "CREATE DATABASE test_database ENGINE = MaterializedPostgreSQL(postgres1) SETTINGS materialized_postgresql_tables_list='test_table'" ) @@ -312,10 +328,9 @@ def test_database_with_single_non_default_schema(started_cluster): create_postgres_schema(cursor, schema_name) pg_manager.create_clickhouse_postgres_db( - ip=cluster.postgres_ip, - port=cluster.postgres_port, - name=clickhouse_postgres_db, + database_name=clickhouse_postgres_db, schema_name=schema_name, + postgres_database="postgres_database", ) for i in range(NUM_TABLES): @@ -347,7 +362,7 @@ def test_database_with_single_non_default_schema(started_cluster): check_all_tables_are_synchronized() altered_table = random.randint(0, NUM_TABLES - 1) - cursor.execute( + pg_manager.execute( "ALTER TABLE test_schema.postgresql_replica_{} ADD COLUMN value2 integer".format( altered_table ) @@ -414,10 +429,9 @@ def test_database_with_multiple_non_default_schemas_1(started_cluster): create_postgres_schema(cursor, schema_name) pg_manager.create_clickhouse_postgres_db( - ip=cluster.postgres_ip, - port=cluster.postgres_port, - name=clickhouse_postgres_db, + database_name=clickhouse_postgres_db, schema_name=schema_name, + postgres_database="postgres_database", ) for i in range(NUM_TABLES): @@ -452,7 +466,7 @@ def test_database_with_multiple_non_default_schemas_1(started_cluster): check_all_tables_are_synchronized() altered_table = random.randint(0, NUM_TABLES - 1) - cursor.execute( + pg_manager.execute( "ALTER TABLE test_schema.postgresql_replica_{} ADD COLUMN value2 integer".format( altered_table ) @@ -530,10 +544,9 @@ def test_database_with_multiple_non_default_schemas_2(started_cluster): clickhouse_postgres_db = f"clickhouse_postgres_db{i}" create_postgres_schema(cursor, schema_name) pg_manager.create_clickhouse_postgres_db( - ip=cluster.postgres_ip, - port=cluster.postgres_port, - name=clickhouse_postgres_db, + database_name=clickhouse_postgres_db, schema_name=schema_name, + postgres_database="postgres_database", ) for ti in range(NUM_TABLES): table_name = f"postgresql_replica_{ti}" @@ -566,7 +579,7 @@ def test_database_with_multiple_non_default_schemas_2(started_cluster): altered_schema = random.randint(0, schemas_num - 1) altered_table = random.randint(0, NUM_TABLES - 1) clickhouse_postgres_db = f"clickhouse_postgres_db{altered_schema}" - cursor.execute( + pg_manager.execute( f"ALTER TABLE schema{altered_schema}.postgresql_replica_{altered_table} ADD COLUMN value2 integer" ) @@ -599,10 +612,9 @@ def test_database_with_multiple_non_default_schemas_2(started_cluster): def test_table_override(started_cluster): - cursor = pg_manager.get_db_cursor() table_name = "table_override" materialized_database = "test_database" - create_postgres_table(cursor, table_name, template=postgres_table_template_5) + pg_manager.create_postgres_table(table_name, template=postgres_table_template_5) instance.query( f"create table {table_name}(key Int32, value UUID) engine = PostgreSQL (postgres1, table={table_name})" ) @@ -629,10 +641,11 @@ def test_table_override(started_cluster): def test_materialized_view(started_cluster): - cursor = pg_manager.get_db_cursor() - cursor.execute(f"DROP TABLE IF EXISTS test_table") - cursor.execute(f"CREATE TABLE test_table (key integer PRIMARY KEY, value integer)") - cursor.execute(f"INSERT INTO test_table SELECT 1, 2") + pg_manager.execute(f"DROP TABLE IF EXISTS test_table") + pg_manager.execute( + f"CREATE TABLE test_table (key integer PRIMARY KEY, value integer)" + ) + pg_manager.execute(f"INSERT INTO test_table SELECT 1, 2") instance.query("DROP DATABASE IF EXISTS test_database") instance.query( "CREATE DATABASE test_database ENGINE = MaterializedPostgreSQL(postgres1) SETTINGS materialized_postgresql_tables_list='test_table'" @@ -643,12 +656,69 @@ def test_materialized_view(started_cluster): "CREATE MATERIALIZED VIEW mv ENGINE=MergeTree ORDER BY tuple() POPULATE AS SELECT * FROM test_database.test_table" ) assert "1\t2" == instance.query("SELECT * FROM mv").strip() - cursor.execute(f"INSERT INTO test_table SELECT 3, 4") + pg_manager.execute(f"INSERT INTO test_table SELECT 3, 4") check_tables_are_synchronized(instance, "test_table") assert "1\t2\n3\t4" == instance.query("SELECT * FROM mv ORDER BY 1, 2").strip() pg_manager.drop_materialized_db() +def test_too_many_parts(started_cluster): + table = "test_table" + pg_manager2.create_and_fill_postgres_table(table) + pg_manager2.create_materialized_db( + ip=started_cluster.postgres_ip, + port=started_cluster.postgres_port, + settings=[ + f"materialized_postgresql_tables_list = 'test_table', materialized_postgresql_backoff_min_ms = 100, materialized_postgresql_backoff_max_ms = 100" + ], + ) + check_tables_are_synchronized( + instance2, "test_table", postgres_database=pg_manager2.get_default_database() + ) + assert ( + "50" == instance2.query("SELECT count() FROM test_database.test_table").strip() + ) + + instance2.query("SYSTEM STOP MERGES") + num = 50 + for i in range(10): + instance2.query( + f""" + INSERT INTO {pg_manager2.get_default_database()}.test_table SELECT {num}, {num}; + """ + ) + num = num + 1 + for i in range(30): + if num == int( + instance2.query("SELECT count() FROM test_database.test_table") + ) or instance2.contains_in_log("DB::Exception: Too many parts"): + break + time.sleep(1) + print(f"wait sync try {i}") + instance2.query("SYSTEM FLUSH LOGS") + if instance2.contains_in_log("DB::Exception: Too many parts"): + break + assert num == int( + instance2.query("SELECT count() FROM test_database.test_table") + ) or num - 1 == int( + instance2.query("SELECT count() FROM test_database.test_table") + ) + + assert instance2.contains_in_log("DB::Exception: Too many parts") + print(num) + assert num == int( + instance2.query("SELECT count() FROM test_database.test_table") + ) or num - 1 == int(instance2.query("SELECT count() FROM test_database.test_table")) + + instance2.query("SYSTEM START MERGES") + check_tables_are_synchronized( + instance2, "test_table", postgres_database=pg_manager2.get_default_database() + ) + + # assert "200" == instance.query("SELECT count FROM test_database.test_table").strip() + pg_manager2.drop_materialized_db() + + if __name__ == "__main__": cluster.start() input("Cluster created, press any key to destroy...") diff --git a/tests/integration/test_replicated_database/test.py b/tests/integration/test_replicated_database/test.py index b3ba8d4737f..ed034a326da 100644 --- a/tests/integration/test_replicated_database/test.py +++ b/tests/integration/test_replicated_database/test.py @@ -132,14 +132,15 @@ def test_create_replicated_table(started_cluster): @pytest.mark.parametrize("engine", ["MergeTree", "ReplicatedMergeTree"]) def test_simple_alter_table(started_cluster, engine): + database = f"test_simple_alter_table_{engine}" main_node.query( - "CREATE DATABASE test_simple_alter_table ENGINE = Replicated('/test/simple_alter_table', 'shard1', 'replica1');" + f"CREATE DATABASE {database} ENGINE = Replicated('/test/{database}', 'shard1', 'replica1');" ) dummy_node.query( - "CREATE DATABASE test_simple_alter_table ENGINE = Replicated('/test/simple_alter_table', 'shard1', 'replica2');" + f"CREATE DATABASE {database} ENGINE = Replicated('/test/{database}', 'shard1', 'replica2');" ) # test_simple_alter_table - name = "test_simple_alter_table.alter_test_{}".format(engine) + name = f"{database}.alter_test" main_node.query( "CREATE TABLE {} " "(CounterID UInt32, StartDate Date, UserID UInt32, VisitID UInt32, NestedColumn Nested(A UInt8, S String), ToDrop UInt32) " @@ -187,10 +188,9 @@ def test_simple_alter_table(started_cluster, engine): # test_create_replica_after_delay competing_node.query( - "CREATE DATABASE IF NOT EXISTS test_simple_alter_table ENGINE = Replicated('/test/simple_alter_table', 'shard1', 'replica3');" + f"CREATE DATABASE IF NOT EXISTS {database} ENGINE = Replicated('/test/{database}', 'shard1', 'replica3');" ) - name = "test_simple_alter_table.alter_test_{}".format(engine) main_node.query("ALTER TABLE {} ADD COLUMN Added3 UInt32;".format(name)) main_node.query("ALTER TABLE {} DROP COLUMN AddedNested1;".format(name)) main_node.query("ALTER TABLE {} RENAME COLUMN Added1 TO AddedNested1;".format(name)) @@ -210,21 +210,23 @@ def test_simple_alter_table(started_cluster, engine): ) assert_create_query([main_node, dummy_node, competing_node], name, expected) - main_node.query("DROP DATABASE test_simple_alter_table SYNC") - dummy_node.query("DROP DATABASE test_simple_alter_table SYNC") - competing_node.query("DROP DATABASE test_simple_alter_table SYNC") + main_node.query(f"DROP DATABASE {database} SYNC") + dummy_node.query(f"DROP DATABASE {database} SYNC") + competing_node.query(f"DROP DATABASE {database} SYNC") @pytest.mark.parametrize("engine", ["MergeTree", "ReplicatedMergeTree"]) def test_delete_from_table(started_cluster, engine): + database = f"delete_from_table_{engine}" + main_node.query( - "CREATE DATABASE delete_from_table ENGINE = Replicated('/test/simple_alter_table', 'shard1', 'replica1');" + f"CREATE DATABASE {database} ENGINE = Replicated('/test/{database}', 'shard1', 'replica1');" ) dummy_node.query( - "CREATE DATABASE delete_from_table ENGINE = Replicated('/test/simple_alter_table', 'shard2', 'replica1');" + f"CREATE DATABASE {database} ENGINE = Replicated('/test/{database}', 'shard2', 'replica1');" ) - name = "delete_from_table.delete_test_{}".format(engine) + name = f"{database}.delete_test" main_node.query( "CREATE TABLE {} " "(id UInt64, value String) " @@ -241,7 +243,7 @@ def test_delete_from_table(started_cluster, engine): table_for_select = name if not "Replicated" in engine: - table_for_select = "cluster('delete_from_table', {})".format(name) + table_for_select = f"cluster('{database}', {name})" for node in [main_node, dummy_node]: assert_eq_with_retry( node, @@ -249,8 +251,8 @@ def test_delete_from_table(started_cluster, engine): expected, ) - main_node.query("DROP DATABASE delete_from_table SYNC") - dummy_node.query("DROP DATABASE delete_from_table SYNC") + main_node.query(f"DROP DATABASE {database} SYNC") + dummy_node.query(f"DROP DATABASE {database} SYNC") def get_table_uuid(database, name): @@ -278,18 +280,18 @@ def fixture_attachable_part(started_cluster): @pytest.mark.parametrize("engine", ["MergeTree", "ReplicatedMergeTree"]) def test_alter_attach(started_cluster, attachable_part, engine): + database = f"alter_attach_{engine}" main_node.query( - "CREATE DATABASE alter_attach ENGINE = Replicated('/test/alter_attach', 'shard1', 'replica1');" + f"CREATE DATABASE {database} ENGINE = Replicated('/test/{database}', 'shard1', 'replica1');" ) dummy_node.query( - "CREATE DATABASE alter_attach ENGINE = Replicated('/test/alter_attach', 'shard1', 'replica2');" + f"CREATE DATABASE {database} ENGINE = Replicated('/test/{database}', 'shard1', 'replica2');" ) - name = "alter_attach_test_{}".format(engine) main_node.query( - f"CREATE TABLE alter_attach.{name} (CounterID UInt32) ENGINE = {engine} ORDER BY (CounterID)" + f"CREATE TABLE {database}.alter_attach_test (CounterID UInt32) ENGINE = {engine} ORDER BY (CounterID)" ) - table_uuid = get_table_uuid("alter_attach", name) + table_uuid = get_table_uuid(database, "alter_attach_test") # Provide and attach a part to the main node shutil.copytree( attachable_part, @@ -298,146 +300,157 @@ def test_alter_attach(started_cluster, attachable_part, engine): f"database/store/{table_uuid[:3]}/{table_uuid}/detached/all_1_1_0", ), ) - main_node.query(f"ALTER TABLE alter_attach.{name} ATTACH PART 'all_1_1_0'") + main_node.query(f"ALTER TABLE {database}.alter_attach_test ATTACH PART 'all_1_1_0'") # On the main node, data is attached - assert main_node.query(f"SELECT CounterID FROM alter_attach.{name}") == "123\n" + assert ( + main_node.query(f"SELECT CounterID FROM {database}.alter_attach_test") + == "123\n" + ) # On the other node, data is replicated only if using a Replicated table engine if engine == "ReplicatedMergeTree": - assert dummy_node.query(f"SELECT CounterID FROM alter_attach.{name}") == "123\n" + assert ( + dummy_node.query(f"SELECT CounterID FROM {database}.alter_attach_test") + == "123\n" + ) else: - assert dummy_node.query(f"SELECT CounterID FROM alter_attach.{name}") == "" - main_node.query("DROP DATABASE alter_attach SYNC") - dummy_node.query("DROP DATABASE alter_attach SYNC") + assert ( + dummy_node.query(f"SELECT CounterID FROM {database}.alter_attach_test") + == "" + ) + main_node.query(f"DROP DATABASE {database} SYNC") + dummy_node.query(f"DROP DATABASE {database} SYNC") @pytest.mark.parametrize("engine", ["MergeTree", "ReplicatedMergeTree"]) def test_alter_drop_part(started_cluster, engine): + database = f"alter_drop_part_{engine}" main_node.query( - "CREATE DATABASE alter_drop_part ENGINE = Replicated('/test/alter_drop_part', 'shard1', 'replica1');" + f"CREATE DATABASE {database} ENGINE = Replicated('/test/{database}', 'shard1', 'replica1');" ) dummy_node.query( - "CREATE DATABASE alter_drop_part ENGINE = Replicated('/test/alter_drop_part', 'shard1', 'replica2');" + f"CREATE DATABASE {database} ENGINE = Replicated('/test/{database}', 'shard1', 'replica2');" ) - table = f"alter_drop_{engine}" part_name = "all_0_0_0" if engine == "ReplicatedMergeTree" else "all_1_1_0" main_node.query( - f"CREATE TABLE alter_drop_part.{table} (CounterID UInt32) ENGINE = {engine} ORDER BY (CounterID)" + f"CREATE TABLE {database}.alter_drop_part (CounterID UInt32) ENGINE = {engine} ORDER BY (CounterID)" ) - main_node.query(f"INSERT INTO alter_drop_part.{table} VALUES (123)") + main_node.query(f"INSERT INTO {database}.alter_drop_part VALUES (123)") if engine == "MergeTree": - dummy_node.query(f"INSERT INTO alter_drop_part.{table} VALUES (456)") - main_node.query(f"ALTER TABLE alter_drop_part.{table} DROP PART '{part_name}'") - assert main_node.query(f"SELECT CounterID FROM alter_drop_part.{table}") == "" + dummy_node.query(f"INSERT INTO {database}.alter_drop_part VALUES (456)") + main_node.query(f"ALTER TABLE {database}.alter_drop_part DROP PART '{part_name}'") + assert main_node.query(f"SELECT CounterID FROM {database}.alter_drop_part") == "" if engine == "ReplicatedMergeTree": # The DROP operation is still replicated at the table engine level - assert dummy_node.query(f"SELECT CounterID FROM alter_drop_part.{table}") == "" + assert ( + dummy_node.query(f"SELECT CounterID FROM {database}.alter_drop_part") == "" + ) else: assert ( - dummy_node.query(f"SELECT CounterID FROM alter_drop_part.{table}") + dummy_node.query(f"SELECT CounterID FROM {database}.alter_drop_part") == "456\n" ) - main_node.query("DROP DATABASE alter_drop_part SYNC") - dummy_node.query("DROP DATABASE alter_drop_part SYNC") + main_node.query(f"DROP DATABASE {database} SYNC") + dummy_node.query(f"DROP DATABASE {database} SYNC") @pytest.mark.parametrize("engine", ["MergeTree", "ReplicatedMergeTree"]) def test_alter_detach_part(started_cluster, engine): + database = f"alter_detach_part_{engine}" main_node.query( - "CREATE DATABASE alter_detach_part ENGINE = Replicated('/test/alter_detach_part', 'shard1', 'replica1');" + f"CREATE DATABASE {database} ENGINE = Replicated('/test/{database}', 'shard1', 'replica1');" ) dummy_node.query( - "CREATE DATABASE alter_detach_part ENGINE = Replicated('/test/alter_detach_part', 'shard1', 'replica2');" + f"CREATE DATABASE {database} ENGINE = Replicated('/test/{database}', 'shard1', 'replica2');" ) - table = f"alter_detach_{engine}" part_name = "all_0_0_0" if engine == "ReplicatedMergeTree" else "all_1_1_0" main_node.query( - f"CREATE TABLE alter_detach_part.{table} (CounterID UInt32) ENGINE = {engine} ORDER BY (CounterID)" + f"CREATE TABLE {database}.alter_detach (CounterID UInt32) ENGINE = {engine} ORDER BY (CounterID)" ) - main_node.query(f"INSERT INTO alter_detach_part.{table} VALUES (123)") + main_node.query(f"INSERT INTO {database}.alter_detach VALUES (123)") if engine == "MergeTree": - dummy_node.query(f"INSERT INTO alter_detach_part.{table} VALUES (456)") - main_node.query(f"ALTER TABLE alter_detach_part.{table} DETACH PART '{part_name}'") - detached_parts_query = f"SELECT name FROM system.detached_parts WHERE database='alter_detach_part' AND table='{table}'" + dummy_node.query(f"INSERT INTO {database}.alter_detach VALUES (456)") + main_node.query(f"ALTER TABLE {database}.alter_detach DETACH PART '{part_name}'") + detached_parts_query = f"SELECT name FROM system.detached_parts WHERE database='{database}' AND table='alter_detach'" assert main_node.query(detached_parts_query) == f"{part_name}\n" if engine == "ReplicatedMergeTree": # The detach operation is still replicated at the table engine level assert dummy_node.query(detached_parts_query) == f"{part_name}\n" else: assert dummy_node.query(detached_parts_query) == "" - main_node.query("DROP DATABASE alter_detach_part SYNC") - dummy_node.query("DROP DATABASE alter_detach_part SYNC") + main_node.query(f"DROP DATABASE {database} SYNC") + dummy_node.query(f"DROP DATABASE {database} SYNC") @pytest.mark.parametrize("engine", ["MergeTree", "ReplicatedMergeTree"]) def test_alter_drop_detached_part(started_cluster, engine): + database = f"alter_drop_detached_part_{engine}" main_node.query( - "CREATE DATABASE alter_drop_detached_part ENGINE = Replicated('/test/alter_drop_detached_part', 'shard1', 'replica1');" + f"CREATE DATABASE {database} ENGINE = Replicated('/test/{database}', 'shard1', 'replica1');" ) dummy_node.query( - "CREATE DATABASE alter_drop_detached_part ENGINE = Replicated('/test/alter_drop_detached_part', 'shard1', 'replica2');" + f"CREATE DATABASE {database} ENGINE = Replicated('/test/{database}', 'shard1', 'replica2');" ) - table = f"alter_drop_detached_{engine}" part_name = "all_0_0_0" if engine == "ReplicatedMergeTree" else "all_1_1_0" main_node.query( - f"CREATE TABLE alter_drop_detached_part.{table} (CounterID UInt32) ENGINE = {engine} ORDER BY (CounterID)" + f"CREATE TABLE {database}.alter_drop_detached (CounterID UInt32) ENGINE = {engine} ORDER BY (CounterID)" ) - main_node.query(f"INSERT INTO alter_drop_detached_part.{table} VALUES (123)") + main_node.query(f"INSERT INTO {database}.alter_drop_detached VALUES (123)") main_node.query( - f"ALTER TABLE alter_drop_detached_part.{table} DETACH PART '{part_name}'" + f"ALTER TABLE {database}.alter_drop_detached DETACH PART '{part_name}'" ) if engine == "MergeTree": - dummy_node.query(f"INSERT INTO alter_drop_detached_part.{table} VALUES (456)") + dummy_node.query(f"INSERT INTO {database}.alter_drop_detached VALUES (456)") dummy_node.query( - f"ALTER TABLE alter_drop_detached_part.{table} DETACH PART '{part_name}'" + f"ALTER TABLE {database}.alter_drop_detached DETACH PART '{part_name}'" ) main_node.query( - f"ALTER TABLE alter_drop_detached_part.{table} DROP DETACHED PART '{part_name}'" + f"ALTER TABLE {database}.alter_drop_detached DROP DETACHED PART '{part_name}'" ) - detached_parts_query = f"SELECT name FROM system.detached_parts WHERE database='alter_drop_detached_part' AND table='{table}'" + detached_parts_query = f"SELECT name FROM system.detached_parts WHERE database='{database}' AND table='alter_drop_detached'" assert main_node.query(detached_parts_query) == "" assert dummy_node.query(detached_parts_query) == f"{part_name}\n" - main_node.query("DROP DATABASE alter_drop_detached_part SYNC") - dummy_node.query("DROP DATABASE alter_drop_detached_part SYNC") + main_node.query(f"DROP DATABASE {database} SYNC") + dummy_node.query(f"DROP DATABASE {database} SYNC") @pytest.mark.parametrize("engine", ["MergeTree", "ReplicatedMergeTree"]) def test_alter_drop_partition(started_cluster, engine): + database = f"alter_drop_partition_{engine}" main_node.query( - "CREATE DATABASE alter_drop_partition ENGINE = Replicated('/test/alter_drop_partition', 'shard1', 'replica1');" + f"CREATE DATABASE {database} ENGINE = Replicated('/test/{database}', 'shard1', 'replica1');" ) dummy_node.query( - "CREATE DATABASE alter_drop_partition ENGINE = Replicated('/test/alter_drop_partition', 'shard1', 'replica2');" + f"CREATE DATABASE {database} ENGINE = Replicated('/test/{database}', 'shard1', 'replica2');" ) snapshotting_node.query( - "CREATE DATABASE alter_drop_partition ENGINE = Replicated('/test/alter_drop_partition', 'shard2', 'replica1');" + f"CREATE DATABASE {database} ENGINE = Replicated('/test/{database}', 'shard2', 'replica1');" ) - table = f"alter_drop_partition.alter_drop_{engine}" main_node.query( - f"CREATE TABLE {table} (CounterID UInt32) ENGINE = {engine} ORDER BY (CounterID)" + f"CREATE TABLE {database}.alter_drop (CounterID UInt32) ENGINE = {engine} ORDER BY (CounterID)" ) - main_node.query(f"INSERT INTO {table} VALUES (123)") + main_node.query(f"INSERT INTO {database}.alter_drop VALUES (123)") if engine == "MergeTree": - dummy_node.query(f"INSERT INTO {table} VALUES (456)") - snapshotting_node.query(f"INSERT INTO {table} VALUES (789)") + dummy_node.query(f"INSERT INTO {database}.alter_drop VALUES (456)") + snapshotting_node.query(f"INSERT INTO {database}.alter_drop VALUES (789)") main_node.query( - f"ALTER TABLE {table} ON CLUSTER alter_drop_partition DROP PARTITION ID 'all'", + f"ALTER TABLE {database}.alter_drop ON CLUSTER {database} DROP PARTITION ID 'all'", settings={"replication_alter_partitions_sync": 2}, ) assert ( main_node.query( - f"SELECT CounterID FROM clusterAllReplicas('alter_drop_partition', {table})" + f"SELECT CounterID FROM clusterAllReplicas('{database}', {database}.alter_drop)" ) == "" ) - assert dummy_node.query(f"SELECT CounterID FROM {table}") == "" - main_node.query("DROP DATABASE alter_drop_partition") - dummy_node.query("DROP DATABASE alter_drop_partition") - snapshotting_node.query("DROP DATABASE alter_drop_partition") + assert dummy_node.query(f"SELECT CounterID FROM {database}.alter_drop") == "" + main_node.query(f"DROP DATABASE {database}") + dummy_node.query(f"DROP DATABASE {database}") + snapshotting_node.query(f"DROP DATABASE {database}") def test_alter_fetch(started_cluster): diff --git a/tests/integration/test_replicated_merge_tree_encrypted_disk/test.py b/tests/integration/test_replicated_merge_tree_encrypted_disk/test.py index 05d7bbb7282..25d30eb9c82 100644 --- a/tests/integration/test_replicated_merge_tree_encrypted_disk/test.py +++ b/tests/integration/test_replicated_merge_tree_encrypted_disk/test.py @@ -67,6 +67,8 @@ def optimize_table(): def check_table(): expected = [[1, "str1"], [2, "str2"]] + node1.query("SYSTEM SYNC REPLICA tbl LIGHTWEIGHT") + node2.query("SYSTEM SYNC REPLICA tbl LIGHTWEIGHT") assert node1.query("SELECT * FROM tbl ORDER BY id") == TSV(expected) assert node2.query("SELECT * FROM tbl ORDER BY id") == TSV(expected) assert node1.query("CHECK TABLE tbl") == "1\n" diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py index 5ac1d3bea6f..8ff88791a3a 100644 --- a/tests/integration/test_storage_hdfs/test.py +++ b/tests/integration/test_storage_hdfs/test.py @@ -85,6 +85,32 @@ def test_read_write_storage_with_globs(started_cluster): assert "in readonly mode" in str(ex) +def test_storage_with_multidirectory_glob(started_cluster): + hdfs_api = started_cluster.hdfs_api + for i in ["1", "2"]: + hdfs_api.write_data( + f"/multiglob/p{i}/path{i}/postfix/data{i}", f"File{i}\t{i}{i}\n" + ) + assert ( + hdfs_api.read_data(f"/multiglob/p{i}/path{i}/postfix/data{i}") + == f"File{i}\t{i}{i}\n" + ) + + r = node1.query( + "SELECT * FROM hdfs('hdfs://hdfs1:9000/multiglob/{p1/path1,p2/path2}/postfix/data{1,2}', TSV)" + ) + assert (r == f"File1\t11\nFile2\t22\n") or (r == f"File2\t22\nFile1\t11\n") + + try: + node1.query( + "SELECT * FROM hdfs('hdfs://hdfs1:9000/multiglob/{p4/path1,p2/path3}/postfix/data{1,2}.nonexist', TSV)" + ) + assert False, "Exception have to be thrown" + except Exception as ex: + print(ex) + assert "no files" in str(ex) + + def test_read_write_table(started_cluster): hdfs_api = started_cluster.hdfs_api diff --git a/tests/integration/test_storage_kafka/test.py b/tests/integration/test_storage_kafka/test.py index 9a6d3e0513c..d0686c7c36f 100644 --- a/tests/integration/test_storage_kafka/test.py +++ b/tests/integration/test_storage_kafka/test.py @@ -762,7 +762,7 @@ def test_kafka_formats(kafka_cluster): ), ], "extra_settings": ", format_avro_schema_registry_url='http://{}:{}'".format( - kafka_cluster.schema_registry_host, 8081 + kafka_cluster.schema_registry_host, kafka_cluster.schema_registry_port ), "supports_empty_value": True, }, @@ -4339,7 +4339,7 @@ def test_row_based_formats(kafka_cluster): f""" DROP TABLE IF EXISTS test.view; DROP TABLE IF EXISTS test.kafka; - + CREATE TABLE test.kafka (key UInt64, value UInt64) ENGINE = Kafka SETTINGS kafka_broker_list = 'kafka1:19092', @@ -4347,10 +4347,10 @@ def test_row_based_formats(kafka_cluster): kafka_group_name = '{format_name}', kafka_format = '{format_name}', kafka_max_rows_per_message = 5; - + CREATE MATERIALIZED VIEW test.view Engine=Log AS SELECT key, value FROM test.kafka; - + INSERT INTO test.kafka SELECT number * 10 as key, number * 100 as value FROM numbers({num_rows}); """ ) @@ -4459,17 +4459,17 @@ def test_block_based_formats_2(kafka_cluster): f""" DROP TABLE IF EXISTS test.view; DROP TABLE IF EXISTS test.kafka; - + CREATE TABLE test.kafka (key UInt64, value UInt64) ENGINE = Kafka SETTINGS kafka_broker_list = 'kafka1:19092', kafka_topic_list = '{format_name}', kafka_group_name = '{format_name}', kafka_format = '{format_name}'; - + CREATE MATERIALIZED VIEW test.view Engine=Log AS SELECT key, value FROM test.kafka; - + INSERT INTO test.kafka SELECT number * 10 as key, number * 100 as value FROM numbers({num_rows}) settings max_block_size=12, optimize_trivial_insert_select=0; """ ) diff --git a/tests/integration/test_temporary_data/__init__.py b/tests/integration/test_temporary_data/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_temporary_data/test.py b/tests/integration/test_temporary_data/test.py new file mode 100644 index 00000000000..9228da0698f --- /dev/null +++ b/tests/integration/test_temporary_data/test.py @@ -0,0 +1,57 @@ +# pylint: disable=unused-argument +# pylint: disable=redefined-outer-name + +import pytest +import time + +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) + +node = cluster.add_instance( + "node", + stay_alive=True, +) + + +@pytest.fixture(scope="module") +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +def test_tmp_data_no_leftovers(start_cluster): + q = node.get_query_request + + settings = { + "max_bytes_before_external_group_by": "10K", + "max_bytes_before_external_sort": "10K", + "join_algorithm": "grace_hash", + "max_bytes_in_join": "10K", + "grace_hash_join_initial_buckets": "16", + } + + # Run some queries in the background to generate temporary data + q( + "SELECT ignore(*) FROM numbers(10 * 1024 * 1024) ORDER BY sipHash64(number)", + settings=settings, + ) + q("SELECT * FROM system.numbers GROUP BY ALL", settings=settings) + q( + "SELECT * FROM system.numbers as t1 JOIN system.numbers as t2 USING (number)", + settings=settings, + ) + + # Wait a bit to make sure the temporary data is written to disk + time.sleep(5) + + # Hard restart the node + node.restart_clickhouse(kill=True) + path_to_data = "/var/lib/clickhouse/" + + # Check that there are no temporary files left + result = node.exec_in_container(["ls", path_to_data + "tmp/"]) + assert result == "" diff --git a/tests/queries/0_stateless/00834_kill_mutation_replicated_zookeeper.sh b/tests/queries/0_stateless/00834_kill_mutation_replicated_zookeeper.sh index 6797e9cac12..16ad08deeb2 100755 --- a/tests/queries/0_stateless/00834_kill_mutation_replicated_zookeeper.sh +++ b/tests/queries/0_stateless/00834_kill_mutation_replicated_zookeeper.sh @@ -36,7 +36,7 @@ ${CLICKHOUSE_CLIENT} --query="SYSTEM SYNC REPLICA kill_mutation_r1" ${CLICKHOUSE_CLIENT} --query="SYSTEM SYNC REPLICA kill_mutation_r2" # Should be empty, but in case of problems we will see some diagnostics -${CLICKHOUSE_CLIENT} --query="SELECT * FROM system.replication_queue WHERE table like 'kill_mutation_r%'" +${CLICKHOUSE_CLIENT} --query="SELECT * FROM system.replication_queue WHERE database = '$CLICKHOUSE_DATABASE' AND table like 'kill_mutation_r%'" ${CLICKHOUSE_CLIENT} --query="ALTER TABLE kill_mutation_r1 DELETE WHERE toUInt32(s) = 1" @@ -57,6 +57,14 @@ $CLICKHOUSE_CLIENT --query="SELECT count() FROM system.mutations WHERE database ${CLICKHOUSE_CLIENT} --query="KILL MUTATION WHERE database = '$CLICKHOUSE_DATABASE' AND table = 'kill_mutation_r1' AND mutation_id = '0000000001'" +# Wait for the 1st mutation to be actually killed and the 2nd to finish +query_result=$($CLICKHOUSE_CLIENT --query="$check_query1" 2>&1) +while [ "$query_result" != "0" ] +do + query_result=$($CLICKHOUSE_CLIENT --query="$check_query1" 2>&1) + sleep 0.5 +done + ${CLICKHOUSE_CLIENT} --query="SYSTEM SYNC REPLICA kill_mutation_r1" ${CLICKHOUSE_CLIENT} --query="SYSTEM SYNC REPLICA kill_mutation_r2" diff --git a/tests/queries/0_stateless/00976_asof_join_on.reference b/tests/queries/0_stateless/00976_asof_join_on.reference index 4d1b1273363..433d896426c 100644 --- a/tests/queries/0_stateless/00976_asof_join_on.reference +++ b/tests/queries/0_stateless/00976_asof_join_on.reference @@ -33,3 +33,4 @@ 1 3 1 4 2 1 2 3 2 2 2 3 +1 2 1 2 diff --git a/tests/queries/0_stateless/00976_asof_join_on.sql b/tests/queries/0_stateless/00976_asof_join_on.sql index 8060fb86831..afa125a9271 100644 --- a/tests/queries/0_stateless/00976_asof_join_on.sql +++ b/tests/queries/0_stateless/00976_asof_join_on.sql @@ -23,5 +23,10 @@ SELECT count() FROM A ASOF JOIN B ON A.a == B.b AND A.t != B.t; -- { serverError SELECT A.a, A.t, B.b, B.t FROM A ASOF JOIN B ON A.a == B.b AND A.t < B.t OR A.a == B.b + 1 ORDER BY (A.a, A.t); -- { serverError 48 } +SELECT A.a, A.t, B.b, B.t FROM A +ASOF INNER JOIN (SELECT * FROM B UNION ALL SELECT 1, 3) AS B ON B.t <= A.t AND A.a == B.b +WHERE B.t != 3 ORDER BY (A.a, A.t) +; + DROP TABLE A; DROP TABLE B; diff --git a/tests/queries/0_stateless/01076_parallel_alter_replicated_zookeeper.sh b/tests/queries/0_stateless/01076_parallel_alter_replicated_zookeeper.sh index 5f69427c0cd..79d2c736793 100755 --- a/tests/queries/0_stateless/01076_parallel_alter_replicated_zookeeper.sh +++ b/tests/queries/0_stateless/01076_parallel_alter_replicated_zookeeper.sh @@ -138,8 +138,13 @@ while true ; do done for i in $(seq $REPLICAS); do + $CLICKHOUSE_CLIENT --query "SYSTEM SYNC REPLICA concurrent_mutate_mt_$i" + $CLICKHOUSE_CLIENT --query "CHECK TABLE concurrent_mutate_mt_$i" &> /dev/null # if we will remove something the output of select will be wrong $CLICKHOUSE_CLIENT --query "SELECT SUM(toUInt64(value1)) > $INITIAL_SUM FROM concurrent_mutate_mt_$i" $CLICKHOUSE_CLIENT --query "SELECT COUNT() FROM system.mutations WHERE table='concurrent_mutate_mt_$i' and is_done=0" # all mutations have to be done $CLICKHOUSE_CLIENT --query "SELECT * FROM system.mutations WHERE table='concurrent_mutate_mt_$i' and is_done=0" # for verbose output +done + +for i in $(seq $REPLICAS); do $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS concurrent_mutate_mt_$i" done diff --git a/tests/queries/0_stateless/01111_create_drop_replicated_db_stress.sh b/tests/queries/0_stateless/01111_create_drop_replicated_db_stress.sh index 4d341e5b8a3..cc63af3676b 100755 --- a/tests/queries/0_stateless/01111_create_drop_replicated_db_stress.sh +++ b/tests/queries/0_stateless/01111_create_drop_replicated_db_stress.sh @@ -8,7 +8,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) function create_db() { - while true; do + local TIMELIMIT=$((SECONDS+$1)) + while [ $SECONDS -lt "$TIMELIMIT" ]; do SHARD=$(($RANDOM % 2)) REPLICA=$(($RANDOM % 2)) SUFFIX=$(($RANDOM % 16)) @@ -24,7 +25,8 @@ function create_db() function drop_db() { - while true; do + local TIMELIMIT=$((SECONDS+$1)) + while [ $SECONDS -lt "$TIMELIMIT" ]; do database=$($CLICKHOUSE_CLIENT -q "select name from system.databases where name like '${CLICKHOUSE_DATABASE}%' order by rand() limit 1") if [[ "$database" == "$CLICKHOUSE_DATABASE" ]]; then continue; fi if [ -z "$database" ]; then continue; fi @@ -36,7 +38,8 @@ function drop_db() function sync_db() { - while true; do + local TIMELIMIT=$((SECONDS+$1)) + while [ $SECONDS -lt "$TIMELIMIT" ]; do database=$($CLICKHOUSE_CLIENT -q "select name from system.databases where name like '${CLICKHOUSE_DATABASE}%' order by rand() limit 1") if [ -z "$database" ]; then continue; fi $CLICKHOUSE_CLIENT --receive_timeout=1 -q \ @@ -47,7 +50,8 @@ function sync_db() function create_table() { - while true; do + local TIMELIMIT=$((SECONDS+$1)) + while [ $SECONDS -lt "$TIMELIMIT" ]; do database=$($CLICKHOUSE_CLIENT -q "select name from system.databases where name like '${CLICKHOUSE_DATABASE}%' order by rand() limit 1") if [ -z "$database" ]; then continue; fi $CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=0 -q \ @@ -59,7 +63,8 @@ function create_table() function alter_table() { - while true; do + local TIMELIMIT=$((SECONDS+$1)) + while [ $SECONDS -lt "$TIMELIMIT" ]; do table=$($CLICKHOUSE_CLIENT -q "select database || '.' || name from system.tables where database like '${CLICKHOUSE_DATABASE}%' order by rand() limit 1") if [ -z "$table" ]; then continue; fi $CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=0 -q \ @@ -71,7 +76,8 @@ function alter_table() function insert() { - while true; do + local TIMELIMIT=$((SECONDS+$1)) + while [ $SECONDS -lt "$TIMELIMIT" ]; do table=$($CLICKHOUSE_CLIENT -q "select database || '.' || name from system.tables where database like '${CLICKHOUSE_DATABASE}%' order by rand() limit 1") if [ -z "$table" ]; then continue; fi $CLICKHOUSE_CLIENT -q \ @@ -81,23 +87,16 @@ function insert() -export -f create_db -export -f drop_db -export -f sync_db -export -f create_table -export -f alter_table -export -f insert - TIMEOUT=30 -timeout $TIMEOUT bash -c create_db & -timeout $TIMEOUT bash -c sync_db & -timeout $TIMEOUT bash -c create_table & -timeout $TIMEOUT bash -c alter_table & -timeout $TIMEOUT bash -c insert & +create_db $TIMEOUT & +sync_db $TIMEOUT & +create_table $TIMEOUT & +alter_table $TIMEOUT & +insert $TIMEOUT & sleep 1 # give other queries a head start -timeout $TIMEOUT bash -c drop_db & +drop_db $TIMEOUT & wait diff --git a/tests/queries/0_stateless/01410_full_join_and_null_predicates.reference b/tests/queries/0_stateless/01410_full_join_and_null_predicates.reference new file mode 100644 index 00000000000..785d581c685 --- /dev/null +++ b/tests/queries/0_stateless/01410_full_join_and_null_predicates.reference @@ -0,0 +1,12 @@ +select 1 +\N 1232 Johny +select 2 +\N 1232 Johny +select 3 +\N 1232 Johny +select 4 +\N 1232 Johny +select 5 +\N 1232 Johny +select 6 +\N 1232 Johny diff --git a/tests/queries/0_stateless/01410_full_join_and_null_predicates.sql b/tests/queries/0_stateless/01410_full_join_and_null_predicates.sql new file mode 100644 index 00000000000..f7d5fa67b1d --- /dev/null +++ b/tests/queries/0_stateless/01410_full_join_and_null_predicates.sql @@ -0,0 +1,58 @@ +drop table if EXISTS l; +drop table if EXISTS r; + +CREATE TABLE l (luid Nullable(Int16), name String) +ENGINE=MergeTree order by luid settings allow_nullable_key=1 as +select * from VALUES ((1231, 'John'),(6666, 'Ksenia'),(Null, '---')); + +CREATE TABLE r (ruid Nullable(Int16), name String) +ENGINE=MergeTree order by ruid settings allow_nullable_key=1 as +select * from VALUES ((1231, 'John'),(1232, 'Johny')); + +select 'select 1'; +SELECT * FROM l full outer join r on l.luid = r.ruid +where luid is null + and ruid is not null; + +select 'select 2'; +select * from ( +SELECT * FROM l full outer join r on l.luid = r.ruid) + where luid is null + and ruid is not null; + +select 'select 3'; +select * from ( +SELECT * FROM l full outer join r on l.luid = r.ruid +limit 100000000) + where luid is null + and ruid is not null; + +drop table l; +drop table r; + +CREATE TABLE l (luid Nullable(Int16), name String) ENGINE=MergeTree order by tuple() as +select * from VALUES ((1231, 'John'),(6666, 'Ksenia'),(Null, '---')); + +CREATE TABLE r (ruid Nullable(Int16), name String) ENGINE=MergeTree order by tuple() as +select * from VALUES ((1231, 'John'),(1232, 'Johny')); + +select 'select 4'; +SELECT * FROM l full outer join r on l.luid = r.ruid +where luid is null + and ruid is not null; + +select 'select 5'; +select * from ( +SELECT * FROM l full outer join r on l.luid = r.ruid) + where luid is null + and ruid is not null; + +select 'select 6'; +select * from ( +SELECT * FROM l full outer join r on l.luid = r.ruid +limit 100000000) + where luid is null + and ruid is not null; + +drop table l; +drop table r; diff --git a/tests/queries/0_stateless/01582_move_to_prewhere_compact_parts.reference b/tests/queries/0_stateless/01582_move_to_prewhere_compact_parts.reference index 6b762abd192..30b5ae9c648 100644 --- a/tests/queries/0_stateless/01582_move_to_prewhere_compact_parts.reference +++ b/tests/queries/0_stateless/01582_move_to_prewhere_compact_parts.reference @@ -9,5 +9,4 @@ SELECT x3, x4 FROM prewhere_move -PREWHERE x1 > 100 -WHERE (x1 > 100) AND ((x2 > 100) AND (x3 > 100) AND (x4 > 100)) +PREWHERE (x1 > 100) AND (x2 > 100) AND (x3 > 100) AND (x4 > 100) diff --git a/tests/queries/0_stateless/01590_countSubstrings.reference b/tests/queries/0_stateless/01590_countSubstrings.reference index 056cb4c53b6..367b910e569 100644 --- a/tests/queries/0_stateless/01590_countSubstrings.reference +++ b/tests/queries/0_stateless/01590_countSubstrings.reference @@ -7,6 +7,11 @@ empty 0 0 0 +0 +0 +0 +0 +0 char 1 2 diff --git a/tests/queries/0_stateless/01590_countSubstrings.sql b/tests/queries/0_stateless/01590_countSubstrings.sql index dd2122ed6ff..b38cbb7d188 100644 --- a/tests/queries/0_stateless/01590_countSubstrings.sql +++ b/tests/queries/0_stateless/01590_countSubstrings.sql @@ -12,6 +12,11 @@ select 'empty'; select countSubstrings('', '.'); select countSubstrings('', ''); select countSubstrings('.', ''); +select countSubstrings(toString(number), '') from numbers(1); +select countSubstrings('', toString(number)) from numbers(1); +select countSubstrings('aaa', materialize('')); +select countSubstrings(materialize('aaa'), ''); +select countSubstrings(materialize('aaa'), materialize('')); select 'char'; select countSubstrings('foobar.com', '.'); diff --git a/tests/queries/0_stateless/01606_git_import.sh b/tests/queries/0_stateless/01606_git_import.sh index c9aa2c7d82e..48558d79f93 100755 --- a/tests/queries/0_stateless/01606_git_import.sh +++ b/tests/queries/0_stateless/01606_git_import.sh @@ -13,7 +13,7 @@ cd $CLICKHOUSE_TMP || exit # Protection for network errors for _ in {1..10}; do rm -rf ./clickhouse-odbc - git clone --quiet https://github.com/ClickHouse/clickhouse-odbc.git && pushd clickhouse-odbc > /dev/null && git checkout --quiet 5d84ec591c53cbb272593f024230a052690fdf69 && break + git clone --quiet https://github.com/ClickHouse/clickhouse-odbc.git && pushd clickhouse-odbc 2> /dev/null > /dev/null && git checkout --quiet 5d84ec591c53cbb272593f024230a052690fdf69 && break sleep 1 done diff --git a/tests/queries/0_stateless/01710_aggregate_projection_with_monotonic_key_expr.reference b/tests/queries/0_stateless/01710_aggregate_projection_with_monotonic_key_expr.reference new file mode 100644 index 00000000000..06e9efbe839 --- /dev/null +++ b/tests/queries/0_stateless/01710_aggregate_projection_with_monotonic_key_expr.reference @@ -0,0 +1,5 @@ +1 +1 +-1 +1 +-1 diff --git a/tests/queries/0_stateless/01710_aggregate_projection_with_monotonic_key_expr.sql b/tests/queries/0_stateless/01710_aggregate_projection_with_monotonic_key_expr.sql new file mode 100644 index 00000000000..51dafb07b91 --- /dev/null +++ b/tests/queries/0_stateless/01710_aggregate_projection_with_monotonic_key_expr.sql @@ -0,0 +1,17 @@ +DROP TABLE IF EXISTS t0; +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS t2; + +CREATE TABLE t0 (c0 Int16, projection h (SELECT min(c0), max(c0), count() GROUP BY -c0)) ENGINE = MergeTree ORDER BY (); + +INSERT INTO t0(c0) VALUES (1); + +SELECT count() FROM t0 GROUP BY gcd(-sign(c0), -c0) SETTINGS optimize_use_implicit_projections = 1; + +create table t1 (c0 Int32) engine = MergeTree order by sin(c0); +insert into t1 values (-1), (1); +select c0 from t1 order by sin(-c0) settings optimize_read_in_order=0; +select c0 from t1 order by sin(-c0) settings optimize_read_in_order=1; + +DROP TABLE t0; +DROP TABLE t1; diff --git a/tests/queries/0_stateless/01710_normal_projection_with_query_plan_optimization.reference b/tests/queries/0_stateless/01710_normal_projection_with_query_plan_optimization.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/01710_normal_projection_with_query_plan_optimization.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/01710_normal_projection_with_query_plan_optimization.sql b/tests/queries/0_stateless/01710_normal_projection_with_query_plan_optimization.sql new file mode 100644 index 00000000000..30306ec5637 --- /dev/null +++ b/tests/queries/0_stateless/01710_normal_projection_with_query_plan_optimization.sql @@ -0,0 +1,11 @@ +drop table if exists t; + +CREATE TABLE t (id UInt64, id2 UInt64, id3 UInt64, PROJECTION t_reverse (SELECT id, id2, id3 ORDER BY id2, id, id3)) ENGINE = MergeTree ORDER BY (id) settings index_granularity = 4; + +insert into t SELECT number, -number, number FROM numbers(10000); + +set max_rows_to_read = 4; + +select count() from t where id = 3; + +drop table t; diff --git a/tests/queries/0_stateless/01710_query_log_with_projection_info.reference b/tests/queries/0_stateless/01710_query_log_with_projection_info.reference new file mode 100644 index 00000000000..9c2e9df6662 --- /dev/null +++ b/tests/queries/0_stateless/01710_query_log_with_projection_info.reference @@ -0,0 +1,3 @@ +t.t_normal +t.t_agg +t._minmax_count_projection diff --git a/tests/queries/0_stateless/01710_query_log_with_projection_info.sql b/tests/queries/0_stateless/01710_query_log_with_projection_info.sql new file mode 100644 index 00000000000..cd84b392fe5 --- /dev/null +++ b/tests/queries/0_stateless/01710_query_log_with_projection_info.sql @@ -0,0 +1,66 @@ +set log_queries=1; +set log_queries_min_type='QUERY_FINISH'; +set optimize_use_implicit_projections=1; + +DROP TABLE IF EXISTS t; + +CREATE TABLE t +( + `id` UInt64, + `id2` UInt64, + `id3` UInt64, + PROJECTION t_normal + ( + SELECT + id, + id2, + id3 + ORDER BY + id2, + id, + id3 + ), + PROJECTION t_agg + ( + SELECT + sum(id3) + GROUP BY id2 + ) +) +ENGINE = MergeTree +ORDER BY id +SETTINGS index_granularity = 8; + +insert into t SELECT number, -number, number FROM numbers(10000); + +SELECT * FROM t WHERE id2 = 3 FORMAT Null; +SELECT sum(id3) FROM t GROUP BY id2 FORMAT Null; +SELECT min(id) FROM t FORMAT Null; + +SYSTEM FLUSH LOGS; + +SELECT + --Remove the prefix string which is a mutable database name. + arrayStringConcat(arrayPopFront(splitByString('.', projections[1])), '.') +FROM + system.query_log +WHERE + current_database=currentDatabase() and query = 'SELECT * FROM t WHERE id2 = 3 FORMAT Null;'; + +SELECT + --Remove the prefix string which is a mutable database name. + arrayStringConcat(arrayPopFront(splitByString('.', projections[1])), '.') +FROM + system.query_log +WHERE + current_database=currentDatabase() and query = 'SELECT sum(id3) FROM t GROUP BY id2 FORMAT Null;'; + +SELECT + --Remove the prefix string which is a mutable database name. + arrayStringConcat(arrayPopFront(splitByString('.', projections[1])), '.') +FROM + system.query_log +WHERE + current_database=currentDatabase() and query = 'SELECT min(id) FROM t FORMAT Null;'; + +DROP TABLE t; diff --git a/tests/queries/0_stateless/01721_join_implicit_cast_long.reference.j2 b/tests/queries/0_stateless/01721_join_implicit_cast_long.reference.j2 index e9f32087439..ae43aa7195c 100644 --- a/tests/queries/0_stateless/01721_join_implicit_cast_long.reference.j2 +++ b/tests/queries/0_stateless/01721_join_implicit_cast_long.reference.j2 @@ -1,7 +1,6 @@ {% for join_algorithm in ['hash', 'partial_merge', 'auto', 'full_sorting_merge', 'grace_hash'] -%} === {{ join_algorithm }} === = full = -{% if join_algorithm not in ['grace_hash'] -%} -4 0 196 -3 0 197 -2 0 198 @@ -17,7 +16,6 @@ 8 108 \N 9 109 \N 10 110 \N -{% endif -%} = left = 1 101 201 2 102 202 @@ -30,7 +28,6 @@ 9 109 \N 10 110 \N = right = -{% if join_algorithm not in ['grace_hash'] -%} -4 0 196 -3 0 197 -2 0 198 @@ -41,7 +38,6 @@ 3 103 203 4 104 204 5 105 205 -{% endif -%} = inner = 1 101 201 2 102 202 @@ -49,7 +45,6 @@ 4 104 204 5 105 205 = full = -{% if join_algorithm not in ['grace_hash'] -%} 0 0 -4 0 0 -3 0 0 -2 @@ -65,7 +60,6 @@ 8 8 0 9 9 0 10 10 0 -{% endif -%} = left = 1 1 1 2 2 2 @@ -78,7 +72,6 @@ 9 9 0 10 10 0 = right = -{% if join_algorithm not in ['grace_hash'] -%} 0 0 -4 0 0 -3 0 0 -2 @@ -89,7 +82,6 @@ 3 3 3 4 4 4 5 5 5 -{% endif -%} = inner = 1 1 1 2 2 2 @@ -98,7 +90,6 @@ 5 5 5 = join on = = full = -{% if join_algorithm not in ['grace_hash'] -%} 0 0 -4 196 0 0 -3 197 0 0 -2 198 @@ -114,7 +105,6 @@ 8 108 0 \N 9 109 0 \N 10 110 0 \N -{% endif -%} = left = 1 101 1 201 2 102 2 202 @@ -127,7 +117,6 @@ 9 109 0 \N 10 110 0 \N = right = -{% if join_algorithm not in ['grace_hash'] -%} 0 0 -4 196 0 0 -3 197 0 0 -2 198 @@ -138,7 +127,6 @@ 3 103 3 203 4 104 4 204 5 105 5 205 -{% endif -%} = inner = 1 101 1 201 2 102 2 202 @@ -146,7 +134,6 @@ 4 104 4 204 5 105 5 205 = full = -{% if join_algorithm not in ['grace_hash'] -%} 0 0 -4 196 0 0 -3 197 0 0 -2 198 @@ -162,7 +149,6 @@ 8 108 0 \N 9 109 0 \N 10 110 0 \N -{% endif -%} = left = 1 101 1 201 2 102 2 202 @@ -175,7 +161,6 @@ 9 109 0 \N 10 110 0 \N = right = -{% if join_algorithm not in ['grace_hash'] -%} 0 0 -4 196 0 0 -3 197 0 0 -2 198 @@ -186,7 +171,6 @@ 3 103 3 203 4 104 4 204 5 105 5 205 -{% endif -%} = inner = 1 101 1 201 2 102 2 202 @@ -196,7 +180,6 @@ = agg = 1 1 -{% if join_algorithm not in ['grace_hash'] -%} 1 1 1 @@ -205,13 +188,11 @@ 1 55 1055 0 0 -10 0 990 1 55 15 1055 1015 -{% endif -%} = types = 1 1 1 1 -{% if join_algorithm not in ['grace_hash'] -%} 1 1 1 @@ -219,11 +200,9 @@ 1 1 1 -{% endif -%} {% if join_algorithm not in ['full_sorting_merge'] -%} === join use nulls === = full = -{% if join_algorithm not in ['grace_hash'] -%} -4 \N 196 -3 \N 197 -2 \N 198 @@ -239,7 +218,6 @@ 8 108 \N 9 109 \N 10 110 \N -{% endif -%} = left = 1 101 201 2 102 202 @@ -252,7 +230,6 @@ 9 109 \N 10 110 \N = right = -{% if join_algorithm not in ['grace_hash'] -%} -4 \N 196 -3 \N 197 -2 \N 198 @@ -263,7 +240,6 @@ 3 103 203 4 104 204 5 105 205 -{% endif -%} = inner = 1 101 201 2 102 202 @@ -271,7 +247,6 @@ 4 104 204 5 105 205 = full = -{% if join_algorithm not in ['grace_hash'] -%} 1 1 1 2 2 2 3 3 3 @@ -287,7 +262,6 @@ \N \N -2 \N \N -1 \N \N 0 -{% endif -%} = left = 1 1 1 2 2 2 @@ -300,7 +274,6 @@ 9 9 \N 10 10 \N = right = -{% if join_algorithm not in ['grace_hash'] -%} 1 1 1 2 2 2 3 3 3 @@ -311,7 +284,6 @@ \N \N -2 \N \N -1 \N \N 0 -{% endif -%} = inner = 1 1 1 2 2 2 @@ -320,7 +292,6 @@ 5 5 5 = join on = = full = -{% if join_algorithm not in ['grace_hash'] -%} 1 101 1 201 2 102 2 202 3 103 3 203 @@ -336,7 +307,6 @@ \N \N -2 198 \N \N -1 199 \N \N 0 200 -{% endif -%} = left = 1 101 1 201 2 102 2 202 @@ -349,7 +319,6 @@ 9 109 \N \N 10 110 \N \N = right = -{% if join_algorithm not in ['grace_hash'] -%} 1 101 1 201 2 102 2 202 3 103 3 203 @@ -360,7 +329,6 @@ \N \N -2 198 \N \N -1 199 \N \N 0 200 -{% endif -%} = inner = 1 101 1 201 2 102 2 202 @@ -368,7 +336,6 @@ 4 104 4 204 5 105 5 205 = full = -{% if join_algorithm not in ['grace_hash'] -%} 1 101 1 201 2 102 2 202 3 103 3 203 @@ -384,7 +351,6 @@ \N \N -2 198 \N \N -1 199 \N \N 0 200 -{% endif -%} = left = 1 101 1 201 2 102 2 202 @@ -397,7 +363,6 @@ 9 109 \N \N 10 110 \N \N = right = -{% if join_algorithm not in ['grace_hash'] -%} 1 101 1 201 2 102 2 202 3 103 3 203 @@ -408,7 +373,6 @@ \N \N -2 198 \N \N -1 199 \N \N 0 200 -{% endif -%} = inner = 1 101 1 201 2 102 2 202 @@ -418,7 +382,6 @@ = agg = 1 1 -{% if join_algorithm not in ['grace_hash'] -%} 1 1 1 @@ -427,13 +390,11 @@ 1 55 1055 1 55 15 1055 1015 \N \N -10 \N 990 -{% endif -%} = types = 1 1 1 1 -{% if join_algorithm not in ['grace_hash'] -%} 1 1 1 @@ -442,5 +403,4 @@ 1 1 {% endif -%} -{% endif -%} {% endfor -%} diff --git a/tests/queries/0_stateless/01721_join_implicit_cast_long.sql.j2 b/tests/queries/0_stateless/01721_join_implicit_cast_long.sql.j2 index f5321939f28..38f71f4c5ec 100644 --- a/tests/queries/0_stateless/01721_join_implicit_cast_long.sql.j2 +++ b/tests/queries/0_stateless/01721_join_implicit_cast_long.sql.j2 @@ -10,7 +10,6 @@ INSERT INTO t1 SELECT number as a, 100 + number as b FROM system.numbers LIMIT 1 INSERT INTO t2 SELECT number - 5 as a, 200 + number - 5 as b FROM system.numbers LIMIT 1, 10; {% macro is_implemented(join_algorithm) -%} -{% if join_algorithm == 'grace_hash' %} -- { serverError NOT_IMPLEMENTED } {% endif %} {% endmacro -%} {% for join_algorithm in ['hash', 'partial_merge', 'auto', 'full_sorting_merge', 'grace_hash'] -%} diff --git a/tests/queries/0_stateless/01739_index_hint.reference b/tests/queries/0_stateless/01739_index_hint.reference index 3a4b380de65..766dff8c7b0 100644 --- a/tests/queries/0_stateless/01739_index_hint.reference +++ b/tests/queries/0_stateless/01739_index_hint.reference @@ -33,3 +33,8 @@ insert into XXXX select number*60, 0 from numbers(100000); SELECT count() FROM XXXX WHERE indexHint(t = toDateTime(0)) SETTINGS optimize_use_implicit_projections = 1; 100000 drop table XXXX; +CREATE TABLE XXXX (p Nullable(Int64), k Decimal(76, 39)) ENGINE = MergeTree PARTITION BY toDate(p) ORDER BY k SETTINGS index_granularity = 1, allow_nullable_key = 1; +INSERT INTO XXXX FORMAT Values ('2020-09-01 00:01:02', 1), ('2020-09-01 20:01:03', 2), ('2020-09-02 00:01:03', 3); +SELECT count() FROM XXXX WHERE indexHint(p = 1.) SETTINGS optimize_use_implicit_projections = 1; +0 +drop table XXXX; diff --git a/tests/queries/0_stateless/01739_index_hint.sql b/tests/queries/0_stateless/01739_index_hint.sql index e1e66c630e1..77c2760535d 100644 --- a/tests/queries/0_stateless/01739_index_hint.sql +++ b/tests/queries/0_stateless/01739_index_hint.sql @@ -33,3 +33,11 @@ insert into XXXX select number*60, 0 from numbers(100000); SELECT count() FROM XXXX WHERE indexHint(t = toDateTime(0)) SETTINGS optimize_use_implicit_projections = 1; drop table XXXX; + +CREATE TABLE XXXX (p Nullable(Int64), k Decimal(76, 39)) ENGINE = MergeTree PARTITION BY toDate(p) ORDER BY k SETTINGS index_granularity = 1, allow_nullable_key = 1; + +INSERT INTO XXXX FORMAT Values ('2020-09-01 00:01:02', 1), ('2020-09-01 20:01:03', 2), ('2020-09-02 00:01:03', 3); + +SELECT count() FROM XXXX WHERE indexHint(p = 1.) SETTINGS optimize_use_implicit_projections = 1; + +drop table XXXX; diff --git a/tests/queries/0_stateless/01778_mmap_cache_infra.reference b/tests/queries/0_stateless/01778_mmap_cache_infra.reference index ed365028ecc..0e82b277bc1 100644 --- a/tests/queries/0_stateless/01778_mmap_cache_infra.reference +++ b/tests/queries/0_stateless/01778_mmap_cache_infra.reference @@ -2,7 +2,5 @@ CreatedReadBufferMMap CreatedReadBufferMMapFailed MMappedFileCacheHits MMappedFileCacheMisses -MMappedAllocBytes -MMappedAllocs MMappedFileBytes MMappedFiles diff --git a/tests/queries/0_stateless/01786_explain_merge_tree.reference b/tests/queries/0_stateless/01786_explain_merge_tree.reference index 8d3954484dd..794acc310ce 100644 --- a/tests/queries/0_stateless/01786_explain_merge_tree.reference +++ b/tests/queries/0_stateless/01786_explain_merge_tree.reference @@ -1,85 +1,83 @@ - ReadFromMergeTree (default.test_index) - Indexes: - MinMax - Keys: - y - Condition: (y in [1, +Inf)) - Parts: 4/5 - Granules: 11/12 - Partition - Keys: - y - bitAnd(z, 3) - Condition: and((y in [1, +Inf)), (bitAnd(z, 3) not in [1, 1])) - Parts: 3/4 - Granules: 10/11 - PrimaryKey - Keys: - x - y - Condition: and((x in [11, +Inf)), (y in [1, +Inf))) - Parts: 2/3 - Granules: 6/10 - Skip - Name: t_minmax - Description: minmax GRANULARITY 2 - Parts: 1/2 - Granules: 4/6 - Skip - Name: t_set - Description: set GRANULARITY 2 - Parts: 1/1 - Granules: 2/4 + ReadFromMergeTree (default.test_index) + Indexes: + MinMax + Keys: + y + Condition: (y in [1, +Inf)) + Parts: 4/5 + Granules: 11/12 + Partition + Keys: + y + bitAnd(z, 3) + Condition: and((y in [1, +Inf)), (bitAnd(z, 3) not in [1, 1])) + Parts: 3/4 + Granules: 10/11 + PrimaryKey + Keys: + x + y + Condition: and((x in [11, +Inf)), (y in [1, +Inf))) + Parts: 2/3 + Granules: 6/10 + Skip + Name: t_minmax + Description: minmax GRANULARITY 2 + Parts: 1/2 + Granules: 4/6 + Skip + Name: t_set + Description: set GRANULARITY 2 + Parts: 1/1 + Granules: 2/4 ----------------- - "Node Type": "ReadFromMergeTree", - "Description": "default.test_index", - "Indexes": [ - { - "Type": "MinMax", - "Keys": ["y"], - "Condition": "(y in [1, +Inf))", - "Initial Parts": 5, - "Selected Parts": 4, - "Initial Granules": 12, - "Selected Granules": 11 - }, - { - "Type": "Partition", - "Keys": ["y", "bitAnd(z, 3)"], - "Condition": "and((y in [1, +Inf)), (bitAnd(z, 3) not in [1, 1]))", - "Initial Parts": 4, - "Selected Parts": 3, - "Initial Granules": 11, - "Selected Granules": 10 - }, - { - "Type": "PrimaryKey", - "Keys": ["x", "y"], - "Condition": "and((x in [11, +Inf)), (y in [1, +Inf)))", - "Initial Parts": 3, - "Selected Parts": 2, - "Initial Granules": 10, - "Selected Granules": 6 - }, - { - "Type": "Skip", - "Name": "t_minmax", - "Description": "minmax GRANULARITY 2", - "Initial Parts": 2, - "Selected Parts": 1, - "Initial Granules": 6, - "Selected Granules": 4 - }, - { - "Type": "Skip", - "Name": "t_set", - "Description": "set GRANULARITY 2", - "Initial Parts": 1, - "Selected Parts": 1, - "Initial Granules": 4, - "Selected Granules": 2 - } - ] + "Node Type": "ReadFromMergeTree", + "Description": "default.test_index", + "Indexes": [ + { + "Type": "MinMax", + "Keys": ["y"], + "Condition": "(y in [1, +Inf))", + "Initial Parts": 5, + "Selected Parts": 4, + "Initial Granules": 12, + "Selected Granules": 11 + }, + { + "Type": "Partition", + "Keys": ["y", "bitAnd(z, 3)"], + "Condition": "and((y in [1, +Inf)), (bitAnd(z, 3) not in [1, 1]))", + "Initial Parts": 4, + "Selected Parts": 3, + "Initial Granules": 11, + "Selected Granules": 10 + }, + { + "Type": "PrimaryKey", + "Keys": ["x", "y"], + "Condition": "and((x in [11, +Inf)), (y in [1, +Inf)))", + "Initial Parts": 3, + "Selected Parts": 2, + "Initial Granules": 10, + "Selected Granules": 6 + }, + { + "Type": "Skip", + "Name": "t_minmax", + "Description": "minmax GRANULARITY 2", + "Initial Parts": 2, + "Selected Parts": 1, + "Initial Granules": 6, + "Selected Granules": 4 + }, + { + "Type": "Skip", + "Name": "t_set", + "Description": "set GRANULARITY 2", + "Initial Parts": 1, + "Selected Parts": 1, + "Initial Granules": 4, + "Selected Granules": 2 } ] } diff --git a/tests/queries/0_stateless/01824_move_to_prewhere_many_columns.reference b/tests/queries/0_stateless/01824_move_to_prewhere_many_columns.reference index adce19321d5..686a864f222 100644 --- a/tests/queries/0_stateless/01824_move_to_prewhere_many_columns.reference +++ b/tests/queries/0_stateless/01824_move_to_prewhere_many_columns.reference @@ -3,12 +3,10 @@ 35 SELECT count() FROM t_move_to_prewhere -PREWHERE a AND b AND c -WHERE (a AND b AND c) AND (NOT ignore(fat_string)) +PREWHERE a AND b AND c AND (NOT ignore(fat_string)) 1 Compact 2 Compact 35 SELECT count() FROM t_move_to_prewhere -PREWHERE a -WHERE a AND (b AND c AND (NOT ignore(fat_string))) +PREWHERE a AND b AND c AND (NOT ignore(fat_string)) diff --git a/tests/queries/0_stateless/01825_type_json_ghdata.sh b/tests/queries/0_stateless/01825_type_json_ghdata.sh index bdb439f756f..cea02131d86 100755 --- a/tests/queries/0_stateless/01825_type_json_ghdata.sh +++ b/tests/queries/0_stateless/01825_type_json_ghdata.sh @@ -6,7 +6,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CUR_DIR"/../shell_config.sh ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS ghdata" -${CLICKHOUSE_CLIENT} -q "CREATE TABLE ghdata (data JSON) ENGINE = MergeTree ORDER BY tuple()" --allow_experimental_object_type 1 +${CLICKHOUSE_CLIENT} -q "CREATE TABLE ghdata (data JSON) ENGINE = MergeTree ORDER BY tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'" --allow_experimental_object_type 1 cat $CUR_DIR/data_json/ghdata_sample.json | ${CLICKHOUSE_CLIENT} -q "INSERT INTO ghdata FORMAT JSONAsObject" diff --git a/tests/queries/0_stateless/01825_type_json_ghdata_insert_select.sh b/tests/queries/0_stateless/01825_type_json_ghdata_insert_select.sh index 487c95137ae..498e1db1f69 100755 --- a/tests/queries/0_stateless/01825_type_json_ghdata_insert_select.sh +++ b/tests/queries/0_stateless/01825_type_json_ghdata_insert_select.sh @@ -9,9 +9,9 @@ ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS ghdata_2" ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS ghdata_2_string" ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS ghdata_2_from_string" -${CLICKHOUSE_CLIENT} -q "CREATE TABLE ghdata_2 (data JSON) ENGINE = MergeTree ORDER BY tuple()" --allow_experimental_object_type 1 -${CLICKHOUSE_CLIENT} -q "CREATE TABLE ghdata_2_string (data String) ENGINE = MergeTree ORDER BY tuple()" -${CLICKHOUSE_CLIENT} -q "CREATE TABLE ghdata_2_from_string (data JSON) ENGINE = MergeTree ORDER BY tuple()" --allow_experimental_object_type 1 +${CLICKHOUSE_CLIENT} -q "CREATE TABLE ghdata_2 (data JSON) ENGINE = MergeTree ORDER BY tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'" --allow_experimental_object_type 1 +${CLICKHOUSE_CLIENT} -q "CREATE TABLE ghdata_2_string (data String) ENGINE = MergeTree ORDER BY tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'" +${CLICKHOUSE_CLIENT} -q "CREATE TABLE ghdata_2_from_string (data JSON) ENGINE = MergeTree ORDER BY tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'" --allow_experimental_object_type 1 cat $CUR_DIR/data_json/ghdata_sample.json | ${CLICKHOUSE_CLIENT} -q "INSERT INTO ghdata_2 FORMAT JSONAsObject" cat $CUR_DIR/data_json/ghdata_sample.json | ${CLICKHOUSE_CLIENT} -q "INSERT INTO ghdata_2_string FORMAT JSONAsString" diff --git a/tests/queries/0_stateless/01849_geoToS2.reference b/tests/queries/0_stateless/01849_geoToS2.reference index 08d76978791..a8196994361 100644 --- a/tests/queries/0_stateless/01849_geoToS2.reference +++ b/tests/queries/0_stateless/01849_geoToS2.reference @@ -39,4 +39,3 @@ Checking s2 index generation. (74.0061,-68.32124) (74.0061,-68.32124) ok (10.61077,-64.1841) (10.61077,-64.1841) ok (-89.81096,-57.01398) (-89.81096,-57.01398) ok -4864204703484167331 diff --git a/tests/queries/0_stateless/01849_geoToS2.sql b/tests/queries/0_stateless/01849_geoToS2.sql index abd084a2b19..e997fec14e5 100644 --- a/tests/queries/0_stateless/01849_geoToS2.sql +++ b/tests/queries/0_stateless/01849_geoToS2.sql @@ -44,7 +44,7 @@ SELECT first, second, result FROM ( SELECT s2ToGeo(toUInt64(-1)); -- { serverError 36 } SELECT s2ToGeo(nan); -- { serverError 43 } -SELECT geoToS2(toFloat64(toUInt64(-1)), toFloat64(toUInt64(-1))); +SELECT geoToS2(toFloat64(toUInt64(-1)), toFloat64(toUInt64(-1))); -- { serverError BAD_ARGUMENTS } SELECT geoToS2(nan, nan); -- { serverError 43 } SELECT geoToS2(-inf, 1.1754943508222875e-38); -- { serverError 43 } diff --git a/tests/queries/0_stateless/01917_prewhere_column_type.reference b/tests/queries/0_stateless/01917_prewhere_column_type.reference index 58c9bdf9d01..99c26d55eda 100644 --- a/tests/queries/0_stateless/01917_prewhere_column_type.reference +++ b/tests/queries/0_stateless/01917_prewhere_column_type.reference @@ -1 +1,5 @@ 111 +111 +111 +111 +111 diff --git a/tests/queries/0_stateless/01917_prewhere_column_type.sql b/tests/queries/0_stateless/01917_prewhere_column_type.sql index c0bc0c3e36b..9ce87ab548c 100644 --- a/tests/queries/0_stateless/01917_prewhere_column_type.sql +++ b/tests/queries/0_stateless/01917_prewhere_column_type.sql @@ -6,7 +6,10 @@ CREATE TABLE t1 ( s String, f Float32, e UInt16 ) ENGINE = MergeTree ORDER BY tu INSERT INTO t1 VALUES ('111', 1, 1); -SELECT s FROM t1 WHERE f AND (e = 1); -- { serverError 59 } +SELECT s FROM t1 WHERE f AND (e = 1); +SELECT s FROM t1 WHERE f AND (e = 1) SETTINGS optimize_move_to_prewhere=true; +SELECT s FROM t1 WHERE f AND (e = 1) SETTINGS optimize_move_to_prewhere=false; +SELECT s FROM t1 PREWHERE f AND (e = 1); SELECT s FROM t1 PREWHERE f; -- { serverError 59 } SELECT s FROM t1 PREWHERE f WHERE (e = 1); -- { serverError 59 } SELECT s FROM t1 PREWHERE f WHERE f AND (e = 1); -- { serverError 59 } diff --git a/tests/queries/0_stateless/02117_show_create_table_system.reference b/tests/queries/0_stateless/02117_show_create_table_system.reference index 3b3186f8cc0..e08a581f573 100644 --- a/tests/queries/0_stateless/02117_show_create_table_system.reference +++ b/tests/queries/0_stateless/02117_show_create_table_system.reference @@ -517,7 +517,7 @@ CREATE TABLE system.parts `creation_csn` UInt64, `removal_csn` UInt64, `has_lightweight_delete` UInt8, - `last_removal_attemp_time` DateTime, + `last_removal_attempt_time` DateTime, `removal_state` String, `bytes` UInt64, `marks_size` UInt64, diff --git a/tests/queries/0_stateless/02122_parallel_formatting.lib b/tests/queries/0_stateless/02122_parallel_formatting.lib index 56119012788..5175e004cc5 100755 --- a/tests/queries/0_stateless/02122_parallel_formatting.lib +++ b/tests/queries/0_stateless/02122_parallel_formatting.lib @@ -11,14 +11,14 @@ non_parallel_file=$CLICKHOUSE_TMP/$CLICKHOUSE_TEST_UNIQUE_NAME"_non_parallel" format=$1 echo $format-1 -$CLICKHOUSE_CLIENT -q "select number, number + 1, concat('string: ', toString(number)) from numbers(200000) format $format" --output_format_parallel_formatting=0 --output_format_pretty_max_rows=1000000 | grep -a -v "elapsed" > $non_parallel_file -$CLICKHOUSE_CLIENT -q "select number, number + 1, concat('string: ', toString(number)) from numbers(200000) format $format" --output_format_parallel_formatting=1 --output_format_pretty_max_rows=1000000 | grep -a -v "elapsed" > $parallel_file +$CLICKHOUSE_CLIENT -q "select number, number + 1, concat('string: ', toString(number)) from numbers(200000) format $format" --output_format_write_statistics=0 --output_format_parallel_formatting=0 --output_format_pretty_max_rows=1000000 | grep -a -v "elapsed" > $non_parallel_file +$CLICKHOUSE_CLIENT -q "select number, number + 1, concat('string: ', toString(number)) from numbers(200000) format $format" --output_format_write_statistics=0 --output_format_parallel_formatting=1 --output_format_pretty_max_rows=1000000 | grep -a -v "elapsed" > $parallel_file diff $non_parallel_file $parallel_file echo $format-2 -$CLICKHOUSE_CLIENT -q "select number, number + 1, concat('string: ', toString(number)) from numbers(200000) group by number with totals order by number limit 190000 format $format" --extremes=1 --output_format_parallel_formatting=0 --output_format_pretty_max_rows=1000000 | grep -a -v "elapsed" > $non_parallel_file -$CLICKHOUSE_CLIENT -q "select number, number + 1, concat('string: ', toString(number)) from numbers(200000) group by number with totals order by number limit 190000 format $format" --extremes=1 --output_format_parallel_formatting=1 --output_format_pretty_max_rows=1000000 | grep -a -v "elapsed" > $parallel_file +$CLICKHOUSE_CLIENT -q "select number, number + 1, concat('string: ', toString(number)) from numbers(200000) group by number with totals order by number limit 190000 format $format" --extremes=1 --output_format_write_statistics=0 --output_format_parallel_formatting=0 --output_format_pretty_max_rows=1000000 | grep -a -v "elapsed" > $non_parallel_file +$CLICKHOUSE_CLIENT -q "select number, number + 1, concat('string: ', toString(number)) from numbers(200000) group by number with totals order by number limit 190000 format $format" --extremes=1 --output_format_write_statistics=0 --output_format_parallel_formatting=1 --output_format_pretty_max_rows=1000000 | grep -a -v "elapsed" > $parallel_file diff $non_parallel_file $parallel_file diff --git a/tests/queries/0_stateless/02125_many_mutations_2.sh b/tests/queries/0_stateless/02125_many_mutations_2.sh index 5b779c1b276..819ac8c9524 100755 --- a/tests/queries/0_stateless/02125_many_mutations_2.sh +++ b/tests/queries/0_stateless/02125_many_mutations_2.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-tsan, no-debug, no-asan, no-msan, no-ubsan +# Tags: long, no-tsan, no-debug, no-asan, no-msan, no-ubsan, no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02156_storage_merge_prewhere.reference b/tests/queries/0_stateless/02156_storage_merge_prewhere.reference index 2dc83f1eaa5..74ba452d783 100644 --- a/tests/queries/0_stateless/02156_storage_merge_prewhere.reference +++ b/tests/queries/0_stateless/02156_storage_merge_prewhere.reference @@ -1,7 +1,6 @@ SELECT count() FROM t_02156_merge1 -PREWHERE k = 3 -WHERE (k = 3) AND notEmpty(v) +PREWHERE notEmpty(v) AND (k = 3) 2 SELECT count() FROM t_02156_merge2 diff --git a/tests/queries/0_stateless/02156_storage_merge_prewhere.sql b/tests/queries/0_stateless/02156_storage_merge_prewhere.sql index b75d3fa22e5..83d88a68d9b 100644 --- a/tests/queries/0_stateless/02156_storage_merge_prewhere.sql +++ b/tests/queries/0_stateless/02156_storage_merge_prewhere.sql @@ -1,4 +1,5 @@ SET optimize_move_to_prewhere = 1; +SET enable_multiple_prewhere_read_steps = 1; DROP TABLE IF EXISTS t_02156_mt1; DROP TABLE IF EXISTS t_02156_mt2; @@ -8,8 +9,8 @@ DROP TABLE IF EXISTS t_02156_merge1; DROP TABLE IF EXISTS t_02156_merge2; DROP TABLE IF EXISTS t_02156_merge3; -CREATE TABLE t_02156_mt1 (k UInt32, v String) ENGINE = MergeTree ORDER BY k; -CREATE TABLE t_02156_mt2 (k UInt32, v String) ENGINE = MergeTree ORDER BY k; +CREATE TABLE t_02156_mt1 (k UInt32, v String) ENGINE = MergeTree ORDER BY k SETTINGS min_bytes_for_wide_part=0; +CREATE TABLE t_02156_mt2 (k UInt32, v String) ENGINE = MergeTree ORDER BY k SETTINGS min_bytes_for_wide_part=0; CREATE TABLE t_02156_log (k UInt32, v String) ENGINE = Log; CREATE TABLE t_02156_dist (k UInt32, v String) ENGINE = Distributed(test_shard_localhost, currentDatabase(), t_02156_mt1); diff --git a/tests/queries/0_stateless/02224_s2_test_const_columns.reference b/tests/queries/0_stateless/02224_s2_test_const_columns.reference index 9982596f097..20f32ec6be7 100644 --- a/tests/queries/0_stateless/02224_s2_test_const_columns.reference +++ b/tests/queries/0_stateless/02224_s2_test_const_columns.reference @@ -16,4 +16,4 @@ (5179062030687166815,5177056748191934217) (5179062030687166815,5177057445452335297) (5178914411069187297,5177056748191934217) -(6304347505408739331,8070450532247928833) +(5178914411069187297,5177912432982045463) diff --git a/tests/queries/0_stateless/02224_s2_test_const_columns.sql b/tests/queries/0_stateless/02224_s2_test_const_columns.sql index f33a7f2b696..1d3e51065b5 100644 --- a/tests/queries/0_stateless/02224_s2_test_const_columns.sql +++ b/tests/queries/0_stateless/02224_s2_test_const_columns.sql @@ -9,4 +9,4 @@ SELECT s2CapUnion(3814912406305146967, toFloat64(1), 1157347770437378819, toFloa SELECT s2RectAdd(5178914411069187297, 5177056748191934217, arrayJoin([5179056748191934217,5177914411069187297])); SELECT s2RectContains(5179062030687166815, 5177056748191934217, arrayJoin([5177914411069187297, 5177914411069187297])); SELECT s2RectUnion(5178914411069187297, 5177056748191934217, 5179062030687166815, arrayJoin([5177056748191934217, 5177914411069187297])); -SELECT s2RectIntersection(5178914411069187297, 5177056748191934217, 5179062030687166815, arrayJoin([5177056748191934217,1157347770437378819])); +SELECT s2RectIntersection(5178914411069187297, 5177056748191934217, 5179062030687166815, arrayJoin([5177056748191934217,5177914411069187297])); diff --git a/tests/queries/0_stateless/02240_filesystem_cache_bypass_cache_threshold.reference b/tests/queries/0_stateless/02240_filesystem_cache_bypass_cache_threshold.reference index 997105c9da3..eb6c9305ebd 100644 --- a/tests/queries/0_stateless/02240_filesystem_cache_bypass_cache_threshold.reference +++ b/tests/queries/0_stateless/02240_filesystem_cache_bypass_cache_threshold.reference @@ -3,7 +3,20 @@ SYSTEM DROP FILESYSTEM CACHE; SET enable_filesystem_cache_on_write_operations=0; DROP TABLE IF EXISTS test; -CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache_6', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false; +CREATE TABLE test (key UInt32, value String) +Engine=MergeTree() +ORDER BY key +SETTINGS min_bytes_for_wide_part = 10485760, + compress_marks=false, + compress_primary_key=false, + disk = disk( + type = cache, + max_size = '128Mi', + path = '/var/lib/clickhouse/${CLICKHOUSE_TEST_UNIQUE_NAME}_cache', + enable_bypass_cache_with_threashold = 1, + bypass_cache_threashold = 100, + delayed_cleanup_interval_ms = 100, + disk = 's3_disk'); INSERT INTO test SELECT number, toString(number) FROM numbers(100); SELECT * FROM test FORMAT Null; SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_end, size; diff --git a/tests/queries/0_stateless/02240_filesystem_cache_bypass_cache_threshold.sql b/tests/queries/0_stateless/02240_filesystem_cache_bypass_cache_threshold.sql index f6671b82291..8871f8655dd 100644 --- a/tests/queries/0_stateless/02240_filesystem_cache_bypass_cache_threshold.sql +++ b/tests/queries/0_stateless/02240_filesystem_cache_bypass_cache_threshold.sql @@ -6,7 +6,21 @@ SYSTEM DROP FILESYSTEM CACHE; SET enable_filesystem_cache_on_write_operations=0; DROP TABLE IF EXISTS test; -CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache_6', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false; +CREATE TABLE test (key UInt32, value String) +Engine=MergeTree() +ORDER BY key +SETTINGS min_bytes_for_wide_part = 10485760, + compress_marks=false, + compress_primary_key=false, + disk = disk( + type = cache, + max_size = '128Mi', + path = '/var/lib/clickhouse/${CLICKHOUSE_TEST_UNIQUE_NAME}_cache', + enable_bypass_cache_with_threashold = 1, + bypass_cache_threashold = 100, + delayed_cleanup_interval_ms = 100, + disk = 's3_disk'); + INSERT INTO test SELECT number, toString(number) FROM numbers(100); SELECT * FROM test FORMAT Null; diff --git a/tests/queries/0_stateless/02240_filesystem_query_cache.reference b/tests/queries/0_stateless/02240_filesystem_query_cache.reference index 16c4cd1c049..26340c271e1 100644 --- a/tests/queries/0_stateless/02240_filesystem_query_cache.reference +++ b/tests/queries/0_stateless/02240_filesystem_query_cache.reference @@ -5,7 +5,20 @@ SET enable_filesystem_cache_on_write_operations=0; SET skip_download_if_exceeds_query_cache=1; SET filesystem_cache_max_download_size=128; DROP TABLE IF EXISTS test; -CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache_4', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false; +CREATE TABLE test (key UInt32, value String) +Engine=MergeTree() +ORDER BY key +SETTINGS min_bytes_for_wide_part = 10485760, + compress_marks=false, + compress_primary_key=false, + disk = disk( + type = cache, + max_size = '128Mi', + path = '/var/lib/clickhouse/${CLICKHOUSE_TEST_UNIQUE_NAME}_cache', + cache_on_write_operations= 1, + enable_filesystem_query_cache_limit = 1, + delayed_cleanup_interval_ms = 100, + disk = 's3_disk'); SYSTEM DROP FILESYSTEM CACHE; INSERT INTO test SELECT number, toString(number) FROM numbers(100); SELECT * FROM test FORMAT Null; diff --git a/tests/queries/0_stateless/02240_filesystem_query_cache.sql b/tests/queries/0_stateless/02240_filesystem_query_cache.sql index 44856a2188c..d85b3f543e1 100644 --- a/tests/queries/0_stateless/02240_filesystem_query_cache.sql +++ b/tests/queries/0_stateless/02240_filesystem_query_cache.sql @@ -8,7 +8,20 @@ SET skip_download_if_exceeds_query_cache=1; SET filesystem_cache_max_download_size=128; DROP TABLE IF EXISTS test; -CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache_4', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false; +CREATE TABLE test (key UInt32, value String) +Engine=MergeTree() +ORDER BY key +SETTINGS min_bytes_for_wide_part = 10485760, + compress_marks=false, + compress_primary_key=false, + disk = disk( + type = cache, + max_size = '128Mi', + path = '/var/lib/clickhouse/${CLICKHOUSE_TEST_UNIQUE_NAME}_cache', + cache_on_write_operations= 1, + enable_filesystem_query_cache_limit = 1, + delayed_cleanup_interval_ms = 100, + disk = 's3_disk'); SYSTEM DROP FILESYSTEM CACHE; INSERT INTO test SELECT number, toString(number) FROM numbers(100); SELECT * FROM test FORMAT Null; diff --git a/tests/queries/0_stateless/02240_system_filesystem_cache_table.reference b/tests/queries/0_stateless/02240_system_filesystem_cache_table.reference index f960b4eb21c..93b6d4de94f 100644 --- a/tests/queries/0_stateless/02240_system_filesystem_cache_table.reference +++ b/tests/queries/0_stateless/02240_system_filesystem_cache_table.reference @@ -16,22 +16,6 @@ DOWNLOADED 0 79 80 DOWNLOADED 0 745 746 2 Expect no cache -Expect cache -DOWNLOADED 0 0 1 -DOWNLOADED 0 79 80 -DOWNLOADED 0 745 746 -3 -Expect cache -DOWNLOADED 0 0 1 -DOWNLOADED 0 79 80 -DOWNLOADED 0 745 746 -3 -Expect no cache -Expect cache -DOWNLOADED 0 79 80 -DOWNLOADED 0 745 746 -2 -Expect no cache Using storage policy: local_cache 0 Expect cache @@ -50,19 +34,3 @@ DOWNLOADED 0 79 80 DOWNLOADED 0 745 746 2 Expect no cache -Expect cache -DOWNLOADED 0 0 1 -DOWNLOADED 0 79 80 -DOWNLOADED 0 745 746 -3 -Expect cache -DOWNLOADED 0 0 1 -DOWNLOADED 0 79 80 -DOWNLOADED 0 745 746 -3 -Expect no cache -Expect cache -DOWNLOADED 0 79 80 -DOWNLOADED 0 745 746 -2 -Expect no cache diff --git a/tests/queries/0_stateless/02240_system_filesystem_cache_table.sh b/tests/queries/0_stateless/02240_system_filesystem_cache_table.sh index c7dc9fbd961..6a94cffea5a 100755 --- a/tests/queries/0_stateless/02240_system_filesystem_cache_table.sh +++ b/tests/queries/0_stateless/02240_system_filesystem_cache_table.sh @@ -45,33 +45,4 @@ for STORAGE_POLICY in 's3_cache' 'local_cache'; do echo 'Expect no cache' ${CLICKHOUSE_CLIENT} --query "SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache" - ${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS test_02240_storage_policy_3" - ${CLICKHOUSE_CLIENT} --query "CREATE TABLE test_02240_storage_policy_3 (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='${STORAGE_POLICY}_3', min_bytes_for_wide_part = 1000000, compress_marks=false, compress_primary_key=false" - ${CLICKHOUSE_CLIENT} --enable_filesystem_cache_on_write_operations=0 --query "INSERT INTO test_02240_storage_policy_3 SELECT number, toString(number) FROM numbers(100)" - - echo 'Expect cache' - ${CLICKHOUSE_CLIENT} --query "SYSTEM DROP MARK CACHE" - ${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_02240_storage_policy_3 FORMAT Null" - ${CLICKHOUSE_CLIENT} --query "SELECT state, file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_begin, file_segment_range_end, size" - ${CLICKHOUSE_CLIENT} --query "SELECT uniqExact(key) FROM system.filesystem_cache"; - - echo 'Expect cache' - ${CLICKHOUSE_CLIENT} --query "SYSTEM DROP MARK CACHE" - ${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_02240_storage_policy_3 FORMAT Null" - ${CLICKHOUSE_CLIENT} --query "SELECT state, file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_begin, file_segment_range_end, size" - ${CLICKHOUSE_CLIENT} --query "SELECT uniqExact(key) FROM system.filesystem_cache"; - - echo 'Expect no cache' - ${CLICKHOUSE_CLIENT} --query "SYSTEM DROP FILESYSTEM CACHE" - ${CLICKHOUSE_CLIENT} --query "SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache" - - echo 'Expect cache' - ${CLICKHOUSE_CLIENT} --query "SYSTEM DROP MARK CACHE" - ${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_02240_storage_policy_3 FORMAT Null" - ${CLICKHOUSE_CLIENT} --query "SELECT state, file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_begin, file_segment_range_end, size" - ${CLICKHOUSE_CLIENT} --query "SELECT uniqExact(key) FROM system.filesystem_cache"; - - ${CLICKHOUSE_CLIENT} --query "SYSTEM DROP FILESYSTEM CACHE" - echo 'Expect no cache' - ${CLICKHOUSE_CLIENT} --query "SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache" done diff --git a/tests/queries/0_stateless/02273_full_sort_join.reference.j2 b/tests/queries/0_stateless/02273_full_sort_join.reference.j2 index 98bfd9d9b2b..0af4158e971 100644 --- a/tests/queries/0_stateless/02273_full_sort_join.reference.j2 +++ b/tests/queries/0_stateless/02273_full_sort_join.reference.j2 @@ -1,7 +1,7 @@ {% set table_size = 15 -%} {% for join_algorithm in ['default', 'full_sorting_merge', 'grace_hash'] -%} -- {{ join_algorithm }} -- -{% for block_size in range(1, table_size + 1) -%} +{% for block_size in range(1, table_size + 1, 4) -%} ALL INNER USING | bs = {{ block_size }} 4 0 0 5 0 0 @@ -50,7 +50,6 @@ ALL LEFT | bs = {{ block_size }} 14 14 val9 0 14 14 val9 0 ALL RIGHT | bs = {{ block_size }} -{% if join_algorithm != 'grace_hash' -%} 4 4 0 val10 5 5 0 val6 6 6 0 val8 @@ -64,7 +63,6 @@ ALL RIGHT | bs = {{ block_size }} 13 13 0 val9 14 14 0 val3 14 14 0 val7 -{% endif -%} ALL INNER | bs = {{ block_size }} | copmosite key 2 2 2 2 2 2 0 0 2 2 2 2 2 2 0 0 @@ -85,7 +83,6 @@ ALL LEFT | bs = {{ block_size }} | copmosite key 2 2 2 2 2 2 val12 0 2 2 2 2 2 2 val9 0 ALL RIGHT | bs = {{ block_size }} | copmosite key -{% if join_algorithm != 'grace_hash' -%} 0 \N 0 1 1 1 1 val2 0 \N 0 1 1 1 1 val7 0 \N 0 1 1 2 1 val5 @@ -99,7 +96,6 @@ ALL RIGHT | bs = {{ block_size }} | copmosite key 0 \N 0 2 2 \N 1 val9 2 2 2 2 2 2 0 val4 2 2 2 2 2 2 0 val4 -{% endif -%} ANY INNER USING | bs = {{ block_size }} 4 0 0 5 0 0 @@ -137,7 +133,6 @@ ANY LEFT | bs = {{ block_size }} 13 13 val13 0 14 14 val9 0 ANY RIGHT | bs = {{ block_size }} -{% if join_algorithm != 'grace_hash' -%} 4 4 0 val10 5 5 0 val6 6 6 0 val8 @@ -150,7 +145,6 @@ ANY RIGHT | bs = {{ block_size }} 13 13 0 val9 14 14 0 val3 14 14 0 val7 -{% endif -%} ANY INNER | bs = {{ block_size }} | copmosite key 2 2 2 2 2 2 0 0 ANY LEFT | bs = {{ block_size }} | copmosite key @@ -170,7 +164,6 @@ ANY LEFT | bs = {{ block_size }} | copmosite key 2 2 2 2 2 2 val12 0 2 2 2 2 2 2 val9 0 ANY RIGHT | bs = {{ block_size }} | copmosite key -{% if join_algorithm != 'grace_hash' -%} 0 \N 0 1 1 1 1 val2 0 \N 0 1 1 1 1 val7 0 \N 0 1 1 2 1 val5 @@ -183,7 +176,6 @@ ANY RIGHT | bs = {{ block_size }} | copmosite key 0 \N 0 2 1 \N 1 val3 0 \N 0 2 2 \N 1 val9 2 2 2 2 2 2 0 val4 -{% endif -%} {% endfor -%} ALL INNER | join_use_nulls = 1 4 4 0 0 @@ -219,7 +211,6 @@ ALL LEFT | join_use_nulls = 1 14 14 val9 0 14 14 val9 0 ALL RIGHT | join_use_nulls = 1 -{% if join_algorithm != 'grace_hash' -%} 4 4 0 val10 5 5 0 val6 6 6 0 val8 @@ -233,7 +224,6 @@ ALL RIGHT | join_use_nulls = 1 13 13 0 val9 14 14 0 val3 14 14 0 val7 -{% endif -%} ALL INNER | join_use_nulls = 1 | copmosite key 2 2 2 2 2 2 0 0 2 2 2 2 2 2 0 0 @@ -254,7 +244,6 @@ ALL LEFT | join_use_nulls = 1 | copmosite key 2 2 2 2 2 2 val12 0 2 2 2 2 2 2 val9 0 ALL RIGHT | join_use_nulls = 1 | copmosite key -{% if join_algorithm != 'grace_hash' -%} 2 2 2 2 2 2 0 val4 2 2 2 2 2 2 0 val4 \N \N \N 1 1 1 \N val2 @@ -268,7 +257,6 @@ ALL RIGHT | join_use_nulls = 1 | copmosite key \N \N \N 2 1 2 \N val8 \N \N \N 2 1 \N \N val3 \N \N \N 2 2 \N \N val9 -{% endif -%} ANY INNER | join_use_nulls = 1 4 4 0 0 5 5 0 0 @@ -296,7 +284,6 @@ ANY LEFT | join_use_nulls = 1 13 13 val13 0 14 14 val9 0 ANY RIGHT | join_use_nulls = 1 -{% if join_algorithm != 'grace_hash' -%} 4 4 0 val10 5 5 0 val6 6 6 0 val8 @@ -309,7 +296,6 @@ ANY RIGHT | join_use_nulls = 1 13 13 0 val9 14 14 0 val3 14 14 0 val7 -{% endif -%} ANY INNER | join_use_nulls = 1 | copmosite key 2 2 2 2 2 2 0 0 ANY LEFT | join_use_nulls = 1 | copmosite key @@ -329,7 +315,6 @@ ANY LEFT | join_use_nulls = 1 | copmosite key 2 2 2 2 2 2 val12 0 2 2 2 2 2 2 val9 0 ANY RIGHT | join_use_nulls = 1 | copmosite key -{% if join_algorithm != 'grace_hash' -%} 2 2 2 2 2 2 0 val4 \N \N \N 1 1 1 \N val2 \N \N \N 1 1 1 \N val7 @@ -342,5 +327,4 @@ ANY RIGHT | join_use_nulls = 1 | copmosite key \N \N \N 2 1 2 \N val8 \N \N \N 2 1 \N \N val3 \N \N \N 2 2 \N \N val9 -{% endif -%} {% endfor -%} diff --git a/tests/queries/0_stateless/02273_full_sort_join.sql.j2 b/tests/queries/0_stateless/02273_full_sort_join.sql.j2 index 43f7354017c..6b6aa53836e 100644 --- a/tests/queries/0_stateless/02273_full_sort_join.sql.j2 +++ b/tests/queries/0_stateless/02273_full_sort_join.sql.j2 @@ -28,9 +28,7 @@ INSERT INTO t2 'val' || toString(number) as s FROM numbers_mt({{ table_size - 3 }}); - {% macro is_implemented(join_algorithm) -%} -{% if join_algorithm == 'grace_hash' %} -- { serverError NOT_IMPLEMENTED } {% endif %} {% endmacro -%} {% for join_algorithm in ['default', 'full_sorting_merge', 'grace_hash'] -%} @@ -40,7 +38,7 @@ SET max_bytes_in_join = '{% if join_algorithm == 'grace_hash' %}10K{% else %}0{% SELECT '-- {{ join_algorithm }} --'; SET join_algorithm = '{{ join_algorithm }}'; -{% for block_size in range(1, table_size + 1) -%} +{% for block_size in range(1, table_size + 1, 4) -%} {% for kind in ['ALL', 'ANY'] -%} SET max_block_size = {{ block_size }}; diff --git a/tests/queries/0_stateless/02274_full_sort_join_nodistinct.reference.j2 b/tests/queries/0_stateless/02274_full_sort_join_nodistinct.reference.j2 index 2cc6c6e85d6..df968e86e8d 100644 --- a/tests/queries/0_stateless/02274_full_sort_join_nodistinct.reference.j2 +++ b/tests/queries/0_stateless/02274_full_sort_join_nodistinct.reference.j2 @@ -1,6 +1,6 @@ {% for join_algorithm in ['full_sorting_merge', 'grace_hash'] -%} --- {{ join_algorithm }} --- -{% for block_size in range(1, 11) -%} +{% for block_size in range(1, 11, 4) -%} t1 ALL INNER JOIN t2 | bs = {{ block_size }} 1 1 4 5 1 1 4 5 @@ -108,7 +108,6 @@ t1 ALL LEFT JOIN t2 | bs = {{ block_size }} 2 2 val27 5 3 3 val3 4 t1 ALL RIGHT JOIN t2 | bs = {{ block_size }} -{% if join_algorithm != 'grace_hash' -%} 1 1 4 val11 1 1 4 val12 2 2 5 val22 @@ -161,7 +160,6 @@ t1 ALL RIGHT JOIN t2 | bs = {{ block_size }} 2 2 5 val28 2 2 5 val28 3 3 4 val3 -{% endif -%} t1 ANY INNER JOIN t2 | bs = {{ block_size }} 1 1 4 5 2 2 5 5 @@ -177,7 +175,6 @@ t1 ANY LEFT JOIN t2 | bs = {{ block_size }} 2 2 val27 5 3 3 val3 4 t1 ANY RIGHT JOIN t2 | bs = {{ block_size }} -{% if join_algorithm != 'grace_hash' -%} 1 1 4 val11 1 1 4 val12 2 2 5 val22 @@ -188,9 +185,7 @@ t1 ANY RIGHT JOIN t2 | bs = {{ block_size }} 2 2 5 val27 2 2 5 val28 3 3 4 val3 -{% endif -%} t1 ALL FULL JOIN t2 | bs = {{ block_size }} -{% if join_algorithm != 'grace_hash' -%} 1 1 4 5 1 1 4 5 2 2 5 5 @@ -243,9 +238,7 @@ t1 ALL FULL JOIN t2 | bs = {{ block_size }} 2 2 5 5 2 2 5 5 3 3 4 4 -{% endif -%} t1 ALL FULL JOIN USING t2 | bs = {{ block_size }} -{% if join_algorithm != 'grace_hash' -%} 1 4 5 1 4 5 2 5 5 @@ -298,7 +291,6 @@ t1 ALL FULL JOIN USING t2 | bs = {{ block_size }} 2 5 5 2 5 5 3 4 4 -{% endif -%} t1 ALL INNER JOIN tn2 | bs = {{ block_size }} 1 1 4 5 1 1 4 5 @@ -315,7 +307,6 @@ t1 ALL LEFT JOIN tn2 | bs = {{ block_size }} 2 \N val27 0 3 3 val3 4 t1 ALL RIGHT JOIN tn2 | bs = {{ block_size }} -{% if join_algorithm != 'grace_hash' -%} 0 \N 0 val22 0 \N 0 val23 0 \N 0 val24 @@ -326,7 +317,6 @@ t1 ALL RIGHT JOIN tn2 | bs = {{ block_size }} 1 1 4 val11 1 1 4 val12 3 3 4 val3 -{% endif -%} t1 ANY INNER JOIN tn2 | bs = {{ block_size }} 1 1 4 5 3 3 4 4 @@ -341,7 +331,6 @@ t1 ANY LEFT JOIN tn2 | bs = {{ block_size }} 2 \N val27 0 3 3 val3 4 t1 ANY RIGHT JOIN tn2 | bs = {{ block_size }} -{% if join_algorithm != 'grace_hash' -%} 0 \N 0 val22 0 \N 0 val23 0 \N 0 val24 @@ -352,9 +341,7 @@ t1 ANY RIGHT JOIN tn2 | bs = {{ block_size }} 1 1 4 val11 1 1 4 val12 3 3 4 val3 -{% endif -%} t1 ALL FULL JOIN tn2 | bs = {{ block_size }} -{% if join_algorithm != 'grace_hash' -%} 0 \N 0 5 0 \N 0 5 0 \N 0 5 @@ -372,9 +359,8 @@ t1 ALL FULL JOIN tn2 | bs = {{ block_size }} 2 \N 5 0 2 \N 5 0 3 3 4 4 -{% endif -%} -t1 ALL FULL JOIN USING tn2 | bs = {{ block_size }} {% if join_algorithm != 'grace_hash' -%} +t1 ALL FULL JOIN USING tn2 | bs = {{ block_size }} 1 4 5 1 4 5 2 5 0 @@ -409,7 +395,6 @@ tn1 ALL LEFT JOIN t2 | bs = {{ block_size }} \N 0 val26 0 \N 0 val27 0 tn1 ALL RIGHT JOIN t2 | bs = {{ block_size }} -{% if join_algorithm != 'grace_hash' -%} 1 1 4 val11 1 1 4 val12 3 3 4 val3 @@ -420,7 +405,6 @@ tn1 ALL RIGHT JOIN t2 | bs = {{ block_size }} \N 2 0 val26 \N 2 0 val27 \N 2 0 val28 -{% endif -%} tn1 ANY INNER JOIN t2 | bs = {{ block_size }} 1 1 4 5 3 3 4 4 @@ -435,7 +419,6 @@ tn1 ANY LEFT JOIN t2 | bs = {{ block_size }} \N 0 val26 0 \N 0 val27 0 tn1 ANY RIGHT JOIN t2 | bs = {{ block_size }} -{% if join_algorithm != 'grace_hash' -%} 1 1 4 val11 1 1 4 val12 3 3 4 val3 @@ -446,9 +429,7 @@ tn1 ANY RIGHT JOIN t2 | bs = {{ block_size }} \N 2 0 val26 \N 2 0 val27 \N 2 0 val28 -{% endif -%} tn1 ALL FULL JOIN t2 | bs = {{ block_size }} -{% if join_algorithm != 'grace_hash' -%} 1 1 4 5 1 1 4 5 3 3 4 4 @@ -466,9 +447,7 @@ tn1 ALL FULL JOIN t2 | bs = {{ block_size }} \N 2 0 5 \N 2 0 5 \N 2 0 5 -{% endif -%} tn1 ALL FULL JOIN USING t2 | bs = {{ block_size }} -{% if join_algorithm != 'grace_hash' -%} 1 4 5 1 4 5 2 0 5 @@ -486,7 +465,6 @@ tn1 ALL FULL JOIN USING t2 | bs = {{ block_size }} \N 5 0 \N 5 0 \N 5 0 -{% endif -%} tn1 ALL INNER JOIN tn2 | bs = {{ block_size }} 1 1 4 5 1 1 4 5 @@ -503,7 +481,6 @@ tn1 ALL LEFT JOIN tn2 | bs = {{ block_size }} \N \N val26 0 \N \N val27 0 tn1 ALL RIGHT JOIN tn2 | bs = {{ block_size }} -{% if join_algorithm != 'grace_hash' -%} 1 1 4 val11 1 1 4 val12 3 3 4 val3 @@ -514,7 +491,6 @@ tn1 ALL RIGHT JOIN tn2 | bs = {{ block_size }} \N \N 0 val26 \N \N 0 val27 \N \N 0 val28 -{% endif -%} tn1 ANY INNER JOIN tn2 | bs = {{ block_size }} 1 1 4 5 3 3 4 4 @@ -529,7 +505,6 @@ tn1 ANY LEFT JOIN tn2 | bs = {{ block_size }} \N \N val26 0 \N \N val27 0 tn1 ANY RIGHT JOIN tn2 | bs = {{ block_size }} -{% if join_algorithm != 'grace_hash' -%} 1 1 4 val11 1 1 4 val12 3 3 4 val3 @@ -540,9 +515,7 @@ tn1 ANY RIGHT JOIN tn2 | bs = {{ block_size }} \N \N 0 val26 \N \N 0 val27 \N \N 0 val28 -{% endif -%} tn1 ALL FULL JOIN tn2 | bs = {{ block_size }} -{% if join_algorithm != 'grace_hash' -%} 1 1 4 5 1 1 4 5 3 3 4 4 @@ -560,9 +533,8 @@ tn1 ALL FULL JOIN tn2 | bs = {{ block_size }} \N \N 5 0 \N \N 5 0 \N \N 5 0 -{% endif -%} -tn1 ALL FULL JOIN USING tn2 | bs = {{ block_size }} {% if join_algorithm != 'grace_hash' -%} +tn1 ALL FULL JOIN USING tn2 | bs = {{ block_size }} 1 4 5 1 4 5 3 4 4 diff --git a/tests/queries/0_stateless/02274_full_sort_join_nodistinct.sql.j2 b/tests/queries/0_stateless/02274_full_sort_join_nodistinct.sql.j2 index 613da65421e..f8eb4b1a53e 100644 --- a/tests/queries/0_stateless/02274_full_sort_join_nodistinct.sql.j2 +++ b/tests/queries/0_stateless/02274_full_sort_join_nodistinct.sql.j2 @@ -16,7 +16,6 @@ INSERT INTO t2 VALUES (1, 'val11'), (1, 'val12'), (2, 'val22'), (2, 'val23'), (2 INSERT INTO tn2 VALUES (1, 'val11'), (1, 'val12'), (NULL, 'val22'), (NULL, 'val23'), (NULL, 'val24'), (NULL, 'val25'), (NULL, 'val26'), (NULL, 'val27'), (NULL, 'val28'), (3, 'val3'); {% macro is_implemented(join_algorithm) -%} -{% if join_algorithm == 'grace_hash' %} -- { serverError NOT_IMPLEMENTED } {% endif %} {% endmacro -%} {% for join_algorithm in ['full_sorting_merge', 'grace_hash'] -%} @@ -27,7 +26,7 @@ SET join_algorithm = '{{ join_algorithm }}'; SELECT '--- {{ join_algorithm }} ---'; -{% for block_size in range(1, 11) -%} +{% for block_size in range(1, 11, 4) -%} SET max_block_size = {{ block_size }}; {% for t1, t2 in [('t1', 't2'), ('t1', 'tn2'), ('tn1', 't2'), ('tn1', 'tn2')] -%} @@ -47,9 +46,10 @@ SELECT t1.key, t2.key, length(t1.s), t2.s FROM {{ t1 }} AS t1 {{ kind }} RIGHT J SELECT '{{ t1 }} ALL FULL JOIN {{ t2 }} | bs = {{ block_size }}'; SELECT t1.key, t2.key, length(t1.s), length(t2.s) FROM {{ t1 }} AS t1 {{ kind }} FULL JOIN {{ t2 }} AS t2 ON t1.key == t2.key ORDER BY t1.key, t2.key, length(t1.s), length(t2.s); {{ is_implemented(join_algorithm) }} +{% if join_algorithm == 'full_sorting_merge' or t2 != 'tn2' -%} SELECT '{{ t1 }} ALL FULL JOIN USING {{ t2 }} | bs = {{ block_size }}'; SELECT key, length(t1.s), length(t2.s) FROM {{ t1 }} AS t1 ALL FULL JOIN {{ t2 }} AS t2 USING (key) ORDER BY key, length(t1.s), length(t2.s); {{ is_implemented(join_algorithm) }} - +{% endif -%} {% endfor -%} {% endfor -%} SET max_bytes_in_join = 0; diff --git a/tests/queries/0_stateless/02275_full_sort_join_long.reference b/tests/queries/0_stateless/02275_full_sort_join_long.reference index 9ec06aea3e6..73482358d12 100644 --- a/tests/queries/0_stateless/02275_full_sort_join_long.reference +++ b/tests/queries/0_stateless/02275_full_sort_join_long.reference @@ -41,16 +41,34 @@ ALL INNER ALL LEFT 50195752660639 500353531835 10369589 10369589 1000342 ALL RIGHT -skipped +500353531835 684008812186 1367170 1000342 1367170 ALL INNER 500353531835 500353531835 1000342 1000342 1000342 ALL LEFT 50195752660639 500353531835 10369589 10369589 1000342 ALL RIGHT -skipped +500353531835 684008812186 1367170 1000342 1367170 ALL INNER 500353531835 500353531835 1000342 1000342 1000342 ALL LEFT 50195752660639 500353531835 10369589 10369589 1000342 ALL RIGHT -skipped +500353531835 684008812186 1367170 1000342 1367170 +ANY INNER +199622811843 199622811843 399458 399458 399458 +ANY LEFT +50010619420459 315220291655 10000000 10000000 630753 +ANY RIGHT +316611844056 500267124407 1000000 633172 1000000 +ANY INNER +199622811843 199622811843 399458 399458 399458 +ANY LEFT +50010619420459 315220291655 10000000 10000000 630753 +ANY RIGHT +316611844056 500267124407 1000000 633172 1000000 +ANY INNER +199622811843 199622811843 399458 399458 399458 +ANY LEFT +50010619420459 315220291655 10000000 10000000 630753 +ANY RIGHT +316611844056 500267124407 1000000 633172 1000000 diff --git a/tests/queries/0_stateless/02275_full_sort_join_long.sql.j2 b/tests/queries/0_stateless/02275_full_sort_join_long.sql.j2 index 7276e77dc16..621352f9c25 100644 --- a/tests/queries/0_stateless/02275_full_sort_join_long.sql.j2 +++ b/tests/queries/0_stateless/02275_full_sort_join_long.sql.j2 @@ -22,11 +22,6 @@ INSERT INTO t2 FROM numbers_mt({{ rtable_size }}) ; -{% macro is_implemented(join_algorithm) -%} -{% if join_algorithm == 'grace_hash' %} -- { serverError NOT_IMPLEMENTED } -SELECT 'skipped'; -{% endif -%} -{% endmacro -%} {% for join_algorithm in ['full_sorting_merge', 'grace_hash'] -%} @@ -40,7 +35,6 @@ SET join_algorithm = '{{ join_algorithm }}'; SET max_block_size = {{ block_size }}; -{% if not (kind == 'ANY' and join_algorithm == 'grace_hash') -%} SELECT '{{ kind }} INNER'; SELECT sum(t1.key), sum(t2.key), count(), countIf(t1.key != 0), countIf(t2.key != 0) FROM t1 @@ -58,9 +52,8 @@ SELECT '{{ kind }} RIGHT'; SELECT sum(t1.key), sum(t2.key), count(), countIf(t1.key != 0), countIf(t2.key != 0) FROM t1 {{ kind }} RIGHT JOIN t2 ON t1.key == t2.key -; {{ is_implemented(join_algorithm) }} +; -{% endif -%} {% endfor -%} {% endfor -%} diff --git a/tests/queries/0_stateless/02286_drop_filesystem_cache.reference b/tests/queries/0_stateless/02286_drop_filesystem_cache.reference index 62907a7c81c..b4e5b6715de 100644 --- a/tests/queries/0_stateless/02286_drop_filesystem_cache.reference +++ b/tests/queries/0_stateless/02286_drop_filesystem_cache.reference @@ -7,8 +7,6 @@ Using storage policy: s3_cache 1 1 0 -2 -0 Using storage policy: local_cache 0 2 @@ -18,5 +16,3 @@ Using storage policy: local_cache 1 1 0 -2 -0 diff --git a/tests/queries/0_stateless/02286_drop_filesystem_cache.sh b/tests/queries/0_stateless/02286_drop_filesystem_cache.sh index a6fa0457078..1e1841862e9 100755 --- a/tests/queries/0_stateless/02286_drop_filesystem_cache.sh +++ b/tests/queries/0_stateless/02286_drop_filesystem_cache.sh @@ -67,18 +67,4 @@ for STORAGE_POLICY in 's3_cache' 'local_cache'; do ON data_paths.cache_path = caches.cache_path" $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS test_022862" - - $CLICKHOUSE_CLIENT -n --query "CREATE TABLE test_022862 (key UInt32, value String) - Engine=MergeTree() - ORDER BY key - SETTINGS storage_policy='${STORAGE_POLICY}_2', min_bytes_for_wide_part = 10485760" - - $CLICKHOUSE_CLIENT --enable_filesystem_cache_on_write_operations=0 --query "INSERT INTO test_022862 SELECT number, toString(number) FROM numbers(100)" - $CLICKHOUSE_CLIENT --query "SELECT * FROM test_022862 FORMAT Null" - $CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache" - - $CLICKHOUSE_CLIENT --query "SYSTEM DROP FILESYSTEM CACHE '${STORAGE_POLICY}_2'" - $CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache" - - $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS test_022862" done diff --git a/tests/queries/0_stateless/02293_hashid.reference b/tests/queries/0_stateless/02293_hashid.reference deleted file mode 100644 index dfc78349c05..00000000000 --- a/tests/queries/0_stateless/02293_hashid.reference +++ /dev/null @@ -1,15 +0,0 @@ -0 gY -1 jR -2 k5 -3 l5 -4 mO -0 pbgkmdljlpjoapne -1 akemglnjepjpodba -2 obmgndljgajpkeao -3 dldokmpjpgjgeanb -4 nkdlpgajngjnobme -YQrvD5XGvbx -Bm3zaOq7zbp -oV -oV -6b diff --git a/tests/queries/0_stateless/02293_hashid.sql b/tests/queries/0_stateless/02293_hashid.sql deleted file mode 100644 index 06af0b5e1d8..00000000000 --- a/tests/queries/0_stateless/02293_hashid.sql +++ /dev/null @@ -1,16 +0,0 @@ --- Tags: no-upgrade-check -SET allow_experimental_hash_functions = 1; - -select number, hashid(number) from system.numbers limit 5; -select number, hashid(number, 's3cr3t', 16, 'abcdefghijklmnop') from system.numbers limit 5; -select hashid(1234567890123456, 's3cr3t'); -select hashid(1234567890123456, 's3cr3t2'); - -SELECT hashid(1, hashid(2)); -SELECT hashid(1, 'k5'); -SELECT hashid(1, 'k5_othersalt'); - --- https://github.com/ClickHouse/ClickHouse/issues/39672 -SELECT - JSONExtractRaw(257, NULL), - hashid(1024, if(rand() % 10, 'truetruetruetrue', NULL), 's3\0r3t'); -- {serverError 43} diff --git a/tests/queries/0_stateless/02344_describe_cache.reference b/tests/queries/0_stateless/02344_describe_cache.reference index f1d0240d80e..da84cdabf79 100644 --- a/tests/queries/0_stateless/02344_describe_cache.reference +++ b/tests/queries/0_stateless/02344_describe_cache.reference @@ -1,2 +1 @@ 134217728 10000000 33554432 4194304 1 0 0 0 /var/lib/clickhouse/caches/s3_cache/ 100 2 0 -134217728 10000000 104857600 4194304 0 0 0 0 /var/lib/clickhouse/caches/s3_cache_2/ 100 2 0 diff --git a/tests/queries/0_stateless/02344_describe_cache.sql b/tests/queries/0_stateless/02344_describe_cache.sql index a687ad01394..9c5c5c10952 100644 --- a/tests/queries/0_stateless/02344_describe_cache.sql +++ b/tests/queries/0_stateless/02344_describe_cache.sql @@ -1,7 +1,4 @@ -- Tags: no-fasttest, no-parallel SYSTEM DROP FILESYSTEM CACHE 's3_cache'; -SYSTEM DROP FILESYSTEM CACHE 's3_cache_2'; - DESCRIBE FILESYSTEM CACHE 's3_cache'; -DESCRIBE FILESYSTEM CACHE 's3_cache_2'; diff --git a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference index fc00bfdadca..595ebb483d5 100644 --- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference +++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference @@ -346,7 +346,6 @@ hasAny hasColumnInTable hasSubstr hasThreadFuzzer -hashid hex hiveHash hop diff --git a/tests/queries/0_stateless/02481_async_insert_dedup.python b/tests/queries/0_stateless/02481_async_insert_dedup.python index 9fd82da1038..ca83253eaf8 100644 --- a/tests/queries/0_stateless/02481_async_insert_dedup.python +++ b/tests/queries/0_stateless/02481_async_insert_dedup.python @@ -39,7 +39,7 @@ client.query("DROP TABLE IF EXISTS t_async_insert_dedup_no_part SYNC") # generate data and push to queue -def generate_data(q, total_number): +def generate_data(q, total_number, use_token): old_data = [] max_chunk_size = 30 partitions = ["2022-11-11 10:10:10", "2022-12-12 10:10:10"] @@ -63,12 +63,16 @@ def generate_data(q, total_number): end = start + chunk_size if end > total_number: end = total_number + + token = "" for i in range(start, end + 1): partition = partitions[random.randint(0, 1)] insert_stmt += "('{}', {}),".format(partition, i) + if use_token: + token = str(i) insert_stmt = insert_stmt[:-1] - q.put(insert_stmt) - old_data.append(insert_stmt) + q.put((insert_stmt, token)) + old_data.append((insert_stmt, token)) last_number = end if end >= total_number: break @@ -80,13 +84,14 @@ def fetch_and_insert_data(q, client): while True: insert = q.get() client.query( - insert, + insert[0], settings={ "async_insert": 1, "async_insert_deduplicate": 1, "wait_for_async_insert": 0, "async_insert_busy_timeout_ms": 1500, "insert_keeper_fault_injection_probability": 0, + "insert_deduplication_token": insert[1], }, ) q.task_done() @@ -110,7 +115,11 @@ ORDER BY (KeyID, EventDate) SETTINGS use_async_block_ids_cache = 1 q = queue.Queue(100) total_number = 10000 -gen = Thread(target=generate_data, args=[q, total_number]) +use_token = False +if sys.argv[-1] == "token": + use_token = True + +gen = Thread(target=generate_data, args=[q, total_number, use_token]) gen.start() for i in range(3): diff --git a/tests/queries/0_stateless/02481_async_insert_dedup_token.reference b/tests/queries/0_stateless/02481_async_insert_dedup_token.reference new file mode 100644 index 00000000000..a91c59a7fc5 --- /dev/null +++ b/tests/queries/0_stateless/02481_async_insert_dedup_token.reference @@ -0,0 +1,3 @@ +5 + +10000 diff --git a/tests/queries/0_stateless/02481_async_insert_dedup_token.sh b/tests/queries/0_stateless/02481_async_insert_dedup_token.sh new file mode 100755 index 00000000000..8ef6eecda24 --- /dev/null +++ b/tests/queries/0_stateless/02481_async_insert_dedup_token.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash +# Tags: long, zookeeper, no-parallel, no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +# We should have correct env vars from shell_config.sh to run this test +python3 "$CURDIR"/02481_async_insert_dedup.python token diff --git a/tests/queries/0_stateless/02494_query_cache_secrets.reference b/tests/queries/0_stateless/02494_query_cache_secrets.reference index dd6341262bc..306374eed4b 100644 --- a/tests/queries/0_stateless/02494_query_cache_secrets.reference +++ b/tests/queries/0_stateless/02494_query_cache_secrets.reference @@ -1,2 +1,2 @@ A2193552DCF8A9F99AC35F86BC4D2FFD -SELECT hex(encrypt(\'aes-128-ecb\', \'[HIDDEN]\')) SETTINGS +SELECT hex(encrypt(\'aes-128-ecb\', \'[HIDDEN]\')) SETTINGS use_query_cache = 1 diff --git a/tests/queries/0_stateless/02497_trace_events_stress_long.sh b/tests/queries/0_stateless/02497_trace_events_stress_long.sh index 3ec729079b8..91f6a9bb541 100755 --- a/tests/queries/0_stateless/02497_trace_events_stress_long.sh +++ b/tests/queries/0_stateless/02497_trace_events_stress_long.sh @@ -10,7 +10,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) function thread1() { thread_id=$1 - while true; do + local TIMELIMIT=$((SECONDS+$2)) + while [ $SECONDS -lt "$TIMELIMIT" ]; do query_id="02497_$CLICKHOUSE_DATABASE-$RANDOM-$thread_id" $CLICKHOUSE_CLIENT --query_id=$query_id --query " SELECT count() FROM numbers_mt(100000) SETTINGS @@ -25,7 +26,8 @@ function thread1() function thread2() { - while true; do + local TIMELIMIT=$((SECONDS+$1)) + while [ $SECONDS -lt "$TIMELIMIT" ]; do $CLICKHOUSE_CLIENT -q "SYSTEM FLUSH LOGS" done } @@ -35,29 +37,12 @@ export -f thread2 TIMEOUT=10 -timeout $TIMEOUT bash -c "thread1 0" >/dev/null & -timeout $TIMEOUT bash -c "thread1 1" >/dev/null & -timeout $TIMEOUT bash -c "thread1 2" >/dev/null & -timeout $TIMEOUT bash -c "thread1 3" >/dev/null & -timeout $TIMEOUT bash -c thread2 >/dev/null & +thread1 0 $TIMEOUT >/dev/null & +thread1 1 $TIMEOUT >/dev/null & +thread1 2 $TIMEOUT >/dev/null & +thread1 3 $TIMEOUT >/dev/null & +thread2 $TIMEOUT >/dev/null & wait -for _ in {1..10} -do - $CLICKHOUSE_CLIENT -q "KILL QUERY WHERE query_id LIKE '02497_$CLICKHOUSE_DATABASE%' SYNC" >/dev/null - - # After this moment, the server can still run another query. - # For example, the 'timeout' command killed all threads of thread1, - # and the 'timeout' itself has finished, and we have successfully 'wait'-ed for it, - # but just before that, one of the threads successfully sent a query to the server, - # but the server didn't start to run this query yet, - # and even when the KILL QUERY was run, the query from the thread didn't start, - # but only started after the KILL QUERY has been already processed. - - # That's why we have to run this in a loop. - - $CLICKHOUSE_CLIENT -q "SELECT count() FROM system.processes WHERE query_id LIKE '02497_$CLICKHOUSE_DATABASE%'" | rg '^0$' && break - - sleep 1 -done +$CLICKHOUSE_CLIENT -q "SELECT count() FROM system.processes WHERE query_id LIKE '02497_$CLICKHOUSE_DATABASE%'" | rg '^0$' \ No newline at end of file diff --git a/tests/queries/0_stateless/02503_cache_on_write_with_small_segment_size.sh b/tests/queries/0_stateless/02503_cache_on_write_with_small_segment_size.sh index ed66c36b823..229c68bf8ec 100755 --- a/tests/queries/0_stateless/02503_cache_on_write_with_small_segment_size.sh +++ b/tests/queries/0_stateless/02503_cache_on_write_with_small_segment_size.sh @@ -13,7 +13,21 @@ function random { ${CLICKHOUSE_CLIENT} --multiline --multiquery -q " drop table if exists ttt; -create table ttt (id Int32, value String) engine=MergeTree() order by tuple() settings storage_policy='s3_cache_small_segment_size', min_bytes_for_wide_part=0; + +CREATE TABLE ttt (id Int32, value String) +Engine=MergeTree() +ORDER BY tuple() +SETTINGS min_bytes_for_wide_part = 0, + disk = disk( + type = cache, + max_size = '128Mi', + max_file_segment_size = '10Ki', + path = '/var/lib/clickhouse/${CLICKHOUSE_TEST_UNIQUE_NAME}_cache', + cache_on_write_operations = 1, + enable_filesystem_query_cache_limit = 1, + delayed_cleanup_interval_ms = 100, + disk = 's3_disk'); + insert into ttt select number, toString(number) from numbers(100000) settings throw_on_error_from_cache_on_write_operations = 1; " diff --git a/tests/queries/0_stateless/02540_duplicate_primary_key.sql b/tests/queries/0_stateless/02540_duplicate_primary_key.sql index a084d76964b..6905c9d5133 100644 --- a/tests/queries/0_stateless/02540_duplicate_primary_key.sql +++ b/tests/queries/0_stateless/02540_duplicate_primary_key.sql @@ -86,20 +86,4 @@ CREATE TABLE test ) ENGINE = MergeTree PARTITION BY toYYYYMM(coverage) -ORDER BY (coverage, situation_name, NAME_toe, NAME_cockroach); - -insert into test select * from generateRandom() limit 10; - -with dissonance as ( - Select cast(toStartOfInterval(coverage, INTERVAL 1 day) as Date) as flour, count() as regulation - from test - group by flour having flour >= toDate(now())-100 - ), -cheetah as ( - Select flour, regulation from dissonance - union distinct - Select toDate(now())-1, ifnull((select regulation from dissonance where flour = toDate(now())-1),0) as regulation -) -Select flour, regulation from cheetah order by flour with fill step 1 limit 100 format Null; - -drop table test; +ORDER BY (coverage, situation_name, NAME_toe, NAME_cockroach); -- { serverError BAD_ARGUMENTS } diff --git a/tests/queries/0_stateless/02540_duplicate_primary_key2.reference b/tests/queries/0_stateless/02540_duplicate_primary_key2.reference index 08839f6bb29..e69de29bb2d 100644 --- a/tests/queries/0_stateless/02540_duplicate_primary_key2.reference +++ b/tests/queries/0_stateless/02540_duplicate_primary_key2.reference @@ -1 +0,0 @@ -200 diff --git a/tests/queries/0_stateless/02540_duplicate_primary_key2.sql b/tests/queries/0_stateless/02540_duplicate_primary_key2.sql index d0f02a894f2..53800c95550 100644 --- a/tests/queries/0_stateless/02540_duplicate_primary_key2.sql +++ b/tests/queries/0_stateless/02540_duplicate_primary_key2.sql @@ -88,12 +88,4 @@ CREATE TABLE test ) ENGINE = MergeTree PARTITION BY toYYYYMM(timestamp) -ORDER BY (xxxx17, xxxx14, xxxx16, toStartOfDay(timestamp), left(xxxx19, 10), timestamp); - -INSERT INTO test SELECT * replace 1 as xxxx16 replace 1 as xxxx1 replace '2022-02-02 01:00:00' as timestamp replace 'Airtel' as xxxx14 FROM generateRandom() LIMIT 100; -INSERT INTO test SELECT * replace 1 as xxxx16 replace 1 as xxxx1 replace '2022-02-02 01:00:00' as timestamp replace 'BSNL' as xxxx14 FROM generateRandom() LIMIT 100; -INSERT INTO test SELECT * replace 1 as xxxx16 replace 1 as xxxx1 replace '2022-02-02 01:00:00' as timestamp replace 'xxx' as xxxx14 FROM generateRandom() LIMIT 100; - -select sum(1) from test where toStartOfInterval(timestamp, INTERVAL 1 day) >= TIMESTAMP '2022-02-01 01:00:00' and xxxx14 in ('Airtel', 'BSNL') and xxxx1 = 1 GROUP BY xxxx16; - -drop table test; +ORDER BY (xxxx17, xxxx14, xxxx16, toStartOfDay(timestamp), left(xxxx19, 10), timestamp); -- { serverError BAD_ARGUMENTS} diff --git a/tests/queries/0_stateless/02559_multiple_read_steps_in_prewhere_reuse_computation.reference b/tests/queries/0_stateless/02559_multiple_read_steps_in_prewhere_reuse_computation.reference new file mode 100644 index 00000000000..904d46b184a --- /dev/null +++ b/tests/queries/0_stateless/02559_multiple_read_steps_in_prewhere_reuse_computation.reference @@ -0,0 +1,14 @@ +-- { echoOn } + +SELECT a FROM t_02559 PREWHERE sin(a) < b AND sin(a) < c; +1 +2 +SELECT sin(a) > 2 FROM t_02559 PREWHERE sin(a) < b AND sin(a) < c; +0 +0 +SELECT sin(a) < a FROM t_02559 PREWHERE sin(a) < b AND sin(a) < c AND sin(a) > -a; +1 +1 +SELECT sin(a) < a FROM t_02559 PREWHERE sin(a) < b AND a <= c AND sin(a) > -a; +1 +1 diff --git a/tests/queries/0_stateless/02559_multiple_read_steps_in_prewhere_reuse_computation.sql b/tests/queries/0_stateless/02559_multiple_read_steps_in_prewhere_reuse_computation.sql new file mode 100644 index 00000000000..544f5f03cad --- /dev/null +++ b/tests/queries/0_stateless/02559_multiple_read_steps_in_prewhere_reuse_computation.sql @@ -0,0 +1,17 @@ +DROP TABLE IF EXISTS t_02559; +CREATE TABLE t_02559 (a Int64, b Int64, c Int64) ENGINE = MergeTree ORDER BY a; + +INSERT INTO t_02559 SELECT number, number, number FROM numbers(3); + +SET enable_multiple_prewhere_read_steps = 1; + +-- { echoOn } + +SELECT a FROM t_02559 PREWHERE sin(a) < b AND sin(a) < c; +SELECT sin(a) > 2 FROM t_02559 PREWHERE sin(a) < b AND sin(a) < c; +SELECT sin(a) < a FROM t_02559 PREWHERE sin(a) < b AND sin(a) < c AND sin(a) > -a; +SELECT sin(a) < a FROM t_02559 PREWHERE sin(a) < b AND a <= c AND sin(a) > -a; + +-- {echoOff} + +DROP TABLE t_02559; diff --git a/tests/queries/0_stateless/02724_show_indexes.reference b/tests/queries/0_stateless/02724_show_indexes.reference index 8365ade3231..e41f2521f5c 100644 --- a/tests/queries/0_stateless/02724_show_indexes.reference +++ b/tests/queries/0_stateless/02724_show_indexes.reference @@ -1,47 +1,51 @@ --- Aliases of SHOW INDEX -tbl 0 blf_idx \N \N \N \N \N \N \N bloom_filter \N \N YES d, b -tbl 0 mm1_idx \N \N \N \N \N \N \N minmax \N \N YES a, c, d -tbl 0 mm2_idx \N \N \N \N \N \N \N minmax \N \N YES c, d, e -tbl 0 PRIMARY \N \N A \N \N \N \N primary \N \N YES c, a -tbl 0 set_idx \N \N \N \N \N \N \N set \N \N YES e -tbl 0 blf_idx \N \N \N \N \N \N \N bloom_filter \N \N YES d, b -tbl 0 mm1_idx \N \N \N \N \N \N \N minmax \N \N YES a, c, d -tbl 0 mm2_idx \N \N \N \N \N \N \N minmax \N \N YES c, d, e -tbl 0 PRIMARY \N \N A \N \N \N \N primary \N \N YES c, a -tbl 0 set_idx \N \N \N \N \N \N \N set \N \N YES e -tbl 0 blf_idx \N \N \N \N \N \N \N bloom_filter \N \N YES d, b -tbl 0 mm1_idx \N \N \N \N \N \N \N minmax \N \N YES a, c, d -tbl 0 mm2_idx \N \N \N \N \N \N \N minmax \N \N YES c, d, e -tbl 0 PRIMARY \N \N A \N \N \N \N primary \N \N YES c, a -tbl 0 set_idx \N \N \N \N \N \N \N set \N \N YES e -tbl 0 blf_idx \N \N \N \N \N \N \N bloom_filter \N \N YES d, b -tbl 0 mm1_idx \N \N \N \N \N \N \N minmax \N \N YES a, c, d -tbl 0 mm2_idx \N \N \N \N \N \N \N minmax \N \N YES c, d, e -tbl 0 PRIMARY \N \N A \N \N \N \N primary \N \N YES c, a -tbl 0 set_idx \N \N \N \N \N \N \N set \N \N YES e +tbl 1 blf_idx 1 \N 0 \N \N \N BLOOM_FILTER YES d, b +tbl 1 mm1_idx 1 \N 0 \N \N \N MINMAX YES a, c, d +tbl 1 mm2_idx 1 \N 0 \N \N \N MINMAX YES c, d, e +tbl 1 PRIMARY 1 a A 0 \N \N \N PRIMARY YES +tbl 1 PRIMARY 2 c A 0 \N \N \N PRIMARY YES +tbl 1 set_idx 1 \N 0 \N \N \N SET YES e +tbl 1 blf_idx 1 \N 0 \N \N \N BLOOM_FILTER YES d, b +tbl 1 mm1_idx 1 \N 0 \N \N \N MINMAX YES a, c, d +tbl 1 mm2_idx 1 \N 0 \N \N \N MINMAX YES c, d, e +tbl 1 PRIMARY 1 a A 0 \N \N \N PRIMARY YES +tbl 1 PRIMARY 2 c A 0 \N \N \N PRIMARY YES +tbl 1 set_idx 1 \N 0 \N \N \N SET YES e +tbl 1 blf_idx 1 \N 0 \N \N \N BLOOM_FILTER YES d, b +tbl 1 mm1_idx 1 \N 0 \N \N \N MINMAX YES a, c, d +tbl 1 mm2_idx 1 \N 0 \N \N \N MINMAX YES c, d, e +tbl 1 PRIMARY 1 a A 0 \N \N \N PRIMARY YES +tbl 1 PRIMARY 2 c A 0 \N \N \N PRIMARY YES +tbl 1 set_idx 1 \N 0 \N \N \N SET YES e +tbl 1 blf_idx 1 \N 0 \N \N \N BLOOM_FILTER YES d, b +tbl 1 mm1_idx 1 \N 0 \N \N \N MINMAX YES a, c, d +tbl 1 mm2_idx 1 \N 0 \N \N \N MINMAX YES c, d, e +tbl 1 PRIMARY 1 a A 0 \N \N \N PRIMARY YES +tbl 1 PRIMARY 2 c A 0 \N \N \N PRIMARY YES +tbl 1 set_idx 1 \N 0 \N \N \N SET YES e --- EXTENDED -tbl 0 blf_idx \N \N \N \N \N \N \N bloom_filter \N \N YES d, b -tbl 0 mm1_idx \N \N \N \N \N \N \N minmax \N \N YES a, c, d -tbl 0 mm2_idx \N \N \N \N \N \N \N minmax \N \N YES c, d, e -tbl 0 PRIMARY \N \N A \N \N \N \N primary \N \N YES c, a -tbl 0 set_idx \N \N \N \N \N \N \N set \N \N YES e +tbl 1 blf_idx 1 \N 0 \N \N \N BLOOM_FILTER YES d, b +tbl 1 mm1_idx 1 \N 0 \N \N \N MINMAX YES a, c, d +tbl 1 mm2_idx 1 \N 0 \N \N \N MINMAX YES c, d, e +tbl 1 PRIMARY 1 a A 0 \N \N \N PRIMARY YES +tbl 1 PRIMARY 2 c A 0 \N \N \N PRIMARY YES +tbl 1 set_idx 1 \N 0 \N \N \N SET YES e --- WHERE -tbl 0 mm1_idx \N \N \N \N \N \N \N minmax \N \N YES a, c, d -tbl 0 mm2_idx \N \N \N \N \N \N \N minmax \N \N YES c, d, e --- Check with weird table names -$4@^7 0 PRIMARY \N \N A \N \N \N \N primary \N \N YES c -NULL 0 PRIMARY \N \N A \N \N \N \N primary \N \N YES c -\' 0 PRIMARY \N \N A \N \N \N \N primary \N \N YES c -\' 0 PRIMARY \N \N A \N \N \N \N primary \N \N YES c +$4@^7 1 PRIMARY 1 c A 0 \N \N \N PRIMARY YES +NULL 1 PRIMARY 1 c A 0 \N \N \N PRIMARY YES +\' 1 PRIMARY 1 c A 0 \N \N \N PRIMARY YES +\' 1 PRIMARY 1 c A 0 \N \N \N PRIMARY YES --- Original table -tbl 0 blf_idx \N \N \N \N \N \N \N bloom_filter \N \N YES d, b -tbl 0 mm1_idx \N \N \N \N \N \N \N minmax \N \N YES a, c, d -tbl 0 mm2_idx \N \N \N \N \N \N \N minmax \N \N YES c, d, e -tbl 0 PRIMARY \N \N A \N \N \N \N primary \N \N YES c, a -tbl 0 set_idx \N \N \N \N \N \N \N set \N \N YES e +tbl 1 blf_idx 1 \N 0 \N \N \N BLOOM_FILTER YES d, b +tbl 1 mm1_idx 1 \N 0 \N \N \N MINMAX YES a, c, d +tbl 1 mm2_idx 1 \N 0 \N \N \N MINMAX YES c, d, e +tbl 1 PRIMARY 1 a A 0 \N \N \N PRIMARY YES +tbl 1 PRIMARY 2 c A 0 \N \N \N PRIMARY YES +tbl 1 set_idx 1 \N 0 \N \N \N SET YES e --- Equally named table in other database -tbl 0 mmi_idx \N \N \N \N \N \N \N minmax \N \N YES b -tbl 0 PRIMARY \N \N A \N \N \N \N primary \N \N YES a +tbl 1 mmi_idx 1 \N 0 \N \N \N MINMAX YES b +tbl 1 PRIMARY 1 a A 0 \N \N \N PRIMARY YES --- Short form -tbl 0 mmi_idx \N \N \N \N \N \N \N minmax \N \N YES b -tbl 0 PRIMARY \N \N A \N \N \N \N primary \N \N YES a +tbl 1 mmi_idx 1 \N 0 \N \N \N MINMAX YES b +tbl 1 PRIMARY 1 a A 0 \N \N \N PRIMARY YES diff --git a/tests/queries/0_stateless/02725_memory-for-merges.sql b/tests/queries/0_stateless/02725_memory-for-merges.sql index 347c8b2a8d3..1a8402dff4b 100644 --- a/tests/queries/0_stateless/02725_memory-for-merges.sql +++ b/tests/queries/0_stateless/02725_memory-for-merges.sql @@ -1,4 +1,4 @@ --- Tags: no-s3-storage +-- Tags: no-s3-storage, no-random-merge-tree-settings -- We allocate a lot of memory for buffers when reading or writing to S3 DROP TABLE IF EXISTS 02725_memory_for_merges SYNC; @@ -21,7 +21,6 @@ OPTIMIZE TABLE 02725_memory_for_merges FINAL; SYSTEM FLUSH LOGS; -WITH (SELECT uuid FROM system.tables WHERE table='02725_memory_for_merges' and database=currentDatabase()) as uuid -SELECT (sum(peak_memory_usage) < 1024 * 1024 * 200 AS x) ? x : sum(peak_memory_usage) from system.part_log where table_uuid=uuid and event_type='MergeParts'; +SELECT (sum(peak_memory_usage) < 1024 * 1024 * 200 AS x) ? x : sum(peak_memory_usage) from system.part_log where database=currentDatabase() and table='02725_memory_for_merges' and event_type='MergeParts'; DROP TABLE IF EXISTS 02725_memory_for_merges SYNC; diff --git a/tests/queries/0_stateless/02752_forbidden_headers.reference b/tests/queries/0_stateless/02752_forbidden_headers.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02752_forbidden_headers.sql b/tests/queries/0_stateless/02752_forbidden_headers.sql new file mode 100644 index 00000000000..d966fc0a187 --- /dev/null +++ b/tests/queries/0_stateless/02752_forbidden_headers.sql @@ -0,0 +1,18 @@ +-- Tags: no-fasttest +-- Tag no-fasttest: Depends on AWS + +SELECT * FROM url('http://localhost:8123/', LineAsString, headers('exact_header' = 'value')); -- { serverError BAD_ARGUMENTS } +SELECT * FROM url('http://localhost:8123/', LineAsString, headers('cAsE_INSENSITIVE_header' = 'value')); -- { serverError BAD_ARGUMENTS } +SELECT * FROM url('http://localhost:8123/', LineAsString, headers('random_header' = 'value')) FORMAT Null; + +SELECT * FROM urlCluster('test_cluster_two_shards_localhost', 'http://localhost:8123/', LineAsString, headers('exact_header' = 'value')); -- { serverError BAD_ARGUMENTS } +SELECT * FROM urlCluster('test_cluster_two_shards_localhost', 'http://localhost:8123/', LineAsString, headers('cAsE_INSENSITIVE_header' = 'value')); -- { serverError BAD_ARGUMENTS } +SELECT * FROM urlCluster('test_cluster_two_shards_localhost', 'http://localhost:8123/', LineAsString, headers('random_header' = 'value')) FORMAT Null; + +SELECT * FROM s3('http://localhost:8123/123/4', LineAsString, headers('exact_header' = 'value')); -- { serverError BAD_ARGUMENTS } +SELECT * FROM s3('http://localhost:8123/123/4', LineAsString, headers('cAsE_INSENSITIVE_header' = 'value')); -- { serverError BAD_ARGUMENTS } +SELECT * FROM s3('http://localhost:8123/123/4', LineAsString, headers('random_header' = 'value')); -- { serverError S3_ERROR } + +SELECT * FROM s3Cluster('test_cluster_two_shards_localhost', 'http://localhost:8123/123/4', LineAsString, headers('exact_header' = 'value')); -- { serverError BAD_ARGUMENTS } +SELECT * FROM s3Cluster('test_cluster_two_shards_localhost', 'http://localhost:8123/123/4', LineAsString, headers('cAsE_INSENSITIVE_header' = 'value')); -- { serverError BAD_ARGUMENTS } +SELECT * FROM s3Cluster('test_cluster_two_shards_localhost', 'http://localhost:8123/123/4', LineAsString, headers('random_header' = 'value')); -- { serverError S3_ERROR } diff --git a/tests/queries/0_stateless/02766_prql.reference b/tests/queries/0_stateless/02766_prql.reference new file mode 100644 index 00000000000..90e0b26cee6 --- /dev/null +++ b/tests/queries/0_stateless/02766_prql.reference @@ -0,0 +1,19 @@ +101 Hello, ClickHouse! 2 He +101 Granules are the smallest chunks of data read 2 Gr +102 Insert a lot of rows per batch 2 In +102 Sort your data based on your commonly-used queries 2 So +103 This is an awesome message 2 Th +103 42 +102 4.132209897041321 +--- +101 Hello, ClickHouse! 2019-01-01 00:00:00.000 -1 +101 Granules are the smallest chunks of data read 2019-05-01 00:00:00.000 3.14159 +102 Insert a lot of rows per batch 2019-02-01 00:00:00.000 1.41421 +102 Sort your data based on your commonly-used queries 2019-03-01 00:00:00.000 2.718 +103 This is an awesome message 2019-04-01 00:00:00.000 42 +--- +101 Hello, ClickHouse! 2019-01-01 00:00:00.000 -1 +101 Granules are the smallest chunks of data read 2019-05-01 00:00:00.000 3.14159 +102 Insert a lot of rows per batch 2019-02-01 00:00:00.000 1.41421 +102 Sort your data based on your commonly-used queries 2019-03-01 00:00:00.000 2.718 +103 This is an awesome message 2019-04-01 00:00:00.000 42 diff --git a/tests/queries/0_stateless/02766_prql.sh b/tests/queries/0_stateless/02766_prql.sh new file mode 100755 index 00000000000..f8bbd72af4e --- /dev/null +++ b/tests/queries/0_stateless/02766_prql.sh @@ -0,0 +1,58 @@ +#!/usr/bin/env bash +# Tags: no-fasttest, no-random-settings + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -n -q " +CREATE TEMPORARY TABLE IF NOT EXISTS aboba +( + user_id UInt32, + message String, + creation_date DateTime64, + metric Float32 +) +ENGINE = MergeTree +ORDER BY user_id; + +INSERT INTO aboba (user_id, message, creation_date, metric) VALUES (101, 'Hello, ClickHouse!', toDateTime('2019-01-01 00:00:00', 3, 'Europe/Amsterdam'), -1.0), (102, 'Insert a lot of rows per batch', toDateTime('2019-02-01 00:00:00', 3, 'Europe/Amsterdam'), 1.41421 ), (102, 'Sort your data based on your commonly-used queries', toDateTime('2019-03-01 00:00:00', 3, 'Europe/Amsterdam'), 2.718), (101, 'Granules are the smallest chunks of data read', toDateTime('2019-05-01 00:00:00', 3, 'Europe/Amsterdam'), 3.14159), (103, 'This is an awesome message', toDateTime('2019-04-01 00:00:00', 3, 'Europe/Amsterdam'), 42); + +SET dialect = 'prql'; + +from aboba +derive [ + a = 2, + b = s\"LEFT(message, 2)\" +] +select [ user_id, message, a, b ]; + +from aboba +filter user_id > 101 +group user_id ( + aggregate [ + metrics = sum metric + ] +); + +SET dialect = 'clickhouse'; + +SELECT '---'; +SELECT + user_id, + message, + toTimeZone(creation_date, 'Europe/Amsterdam') as creation_date, + metric +FROM aboba; +SELECT '---'; + +SET dialect = 'prql'; + +from aboba +select [ user_id, message, metric ] +derive creation_date = s\"toTimeZone(creation_date, 'Europe/Amsterdam')\" +select [ user_id, message, creation_date, metric]; + +from s\"SELECT * FROM system.users\" | select non_existent_column; # {serverError UNKNOWN_IDENTIFIER} +from non_existent_table; # {serverError UNKNOWN_TABLE} +" \ No newline at end of file diff --git a/tests/queries/0_stateless/02771_complex_globs_in_storage_file_path.reference b/tests/queries/0_stateless/02771_complex_globs_in_storage_file_path.reference new file mode 100644 index 00000000000..e1b420ecf37 --- /dev/null +++ b/tests/queries/0_stateless/02771_complex_globs_in_storage_file_path.reference @@ -0,0 +1,4 @@ +This is file data1 data1.csv +This is file data2 data2.csv +This is file data1 data1.csv +This is file data2 data2.csv diff --git a/tests/queries/0_stateless/02771_complex_globs_in_storage_file_path.sql b/tests/queries/0_stateless/02771_complex_globs_in_storage_file_path.sql new file mode 100644 index 00000000000..1d125920220 --- /dev/null +++ b/tests/queries/0_stateless/02771_complex_globs_in_storage_file_path.sql @@ -0,0 +1,12 @@ +-- Tags: no-replicated-database, no-parallel + +SELECT *, _file FROM file('02771/dir{?/subdir?1/da,2/subdir2?/da}ta/non_existing.csv', CSV); -- {serverError CANNOT_EXTRACT_TABLE_STRUCTURE} + +INSERT INTO TABLE FUNCTION file('02771/dir1/subdir11/data1.csv', 'CSV', 's String') SELECT 'This is file data1' SETTINGS engine_file_truncate_on_insert=1; +INSERT INTO TABLE FUNCTION file('02771/dir2/subdir22/data2.csv', 'CSV', 's String') SELECT 'This is file data2' SETTINGS engine_file_truncate_on_insert=1; + +SELECT *, _file FROM file('02771/dir{?/subdir?1/da,2/subdir2?/da}ta1.csv', CSV); +SELECT *, _file FROM file('02771/dir{?/subdir?1/da,2/subdir2?/da}ta2.csv', CSV); + +SELECT *, _file FROM file('02771/dir?/{subdir?1/data1,subdir2?/data2}.csv', CSV) WHERE _file == 'data1.csv'; +SELECT *, _file FROM file('02771/dir?/{subdir?1/data1,subdir2?/data2}.csv', CSV) WHERE _file == 'data2.csv'; diff --git a/tests/queries/0_stateless/02771_ignore_data_skipping_indices.reference b/tests/queries/0_stateless/02771_ignore_data_skipping_indices.reference index 786360783fd..fcede2caf2a 100644 --- a/tests/queries/0_stateless/02771_ignore_data_skipping_indices.reference +++ b/tests/queries/0_stateless/02771_ignore_data_skipping_indices.reference @@ -1,40 +1,40 @@ 1 2 3 1 2 3 1 2 3 - ReadFromMergeTree (default.data_02771) - Indexes: - PrimaryKey - Condition: true - Parts: 1/1 - Granules: 1/1 - Skip - Name: x_idx - Description: minmax GRANULARITY 1 - Parts: 0/1 - Granules: 0/1 - Skip - Name: y_idx - Description: minmax GRANULARITY 1 - Parts: 0/0 - Granules: 0/0 - Skip - Name: xy_idx - Description: minmax GRANULARITY 1 - Parts: 0/0 - Granules: 0/0 - ReadFromMergeTree (default.data_02771) - Indexes: - PrimaryKey - Condition: true - Parts: 1/1 - Granules: 1/1 - Skip - Name: x_idx - Description: minmax GRANULARITY 1 - Parts: 0/1 - Granules: 0/1 - Skip - Name: y_idx - Description: minmax GRANULARITY 1 - Parts: 0/0 - Granules: 0/0 + ReadFromMergeTree (default.data_02771) + Indexes: + PrimaryKey + Condition: true + Parts: 1/1 + Granules: 1/1 + Skip + Name: x_idx + Description: minmax GRANULARITY 1 + Parts: 0/1 + Granules: 0/1 + Skip + Name: y_idx + Description: minmax GRANULARITY 1 + Parts: 0/0 + Granules: 0/0 + Skip + Name: xy_idx + Description: minmax GRANULARITY 1 + Parts: 0/0 + Granules: 0/0 + ReadFromMergeTree (default.data_02771) + Indexes: + PrimaryKey + Condition: true + Parts: 1/1 + Granules: 1/1 + Skip + Name: x_idx + Description: minmax GRANULARITY 1 + Parts: 0/1 + Granules: 0/1 + Skip + Name: y_idx + Description: minmax GRANULARITY 1 + Parts: 0/0 + Granules: 0/0 diff --git a/tests/queries/0_stateless/02772_s3_crash.reference b/tests/queries/0_stateless/02772_s3_crash.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02772_s3_crash.sql b/tests/queries/0_stateless/02772_s3_crash.sql new file mode 100644 index 00000000000..5cad83def63 --- /dev/null +++ b/tests/queries/0_stateless/02772_s3_crash.sql @@ -0,0 +1,5 @@ +-- Tags: no-fasttest +-- Tag no-fasttest: Depends on AWS + +SELECT * FROM s3(headers('random_header' = 'value')); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT * FROM s3Cluster('test_cluster_two_shards_localhost', headers('random_header' = 'value')); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } diff --git a/tests/queries/0_stateless/02801_backup_native_copy.sh b/tests/queries/0_stateless/02801_backup_native_copy.sh index 015dcb19b82..31a7cc3b410 100755 --- a/tests/queries/0_stateless/02801_backup_native_copy.sh +++ b/tests/queries/0_stateless/02801_backup_native_copy.sh @@ -10,7 +10,7 @@ set -e $CLICKHOUSE_CLIENT -nm -q " drop table if exists data; - create table data (key Int) engine=MergeTree() order by tuple() settings disk='s3_common_disk'; + create table data (key Int) engine=MergeTree() order by tuple() settings disk='s3_disk'; insert into data select * from numbers(10); " diff --git a/tests/queries/0_stateless/02805_distributed_queries_timeouts.reference b/tests/queries/0_stateless/02805_distributed_queries_timeouts.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02805_distributed_queries_timeouts.sql b/tests/queries/0_stateless/02805_distributed_queries_timeouts.sql new file mode 100644 index 00000000000..0b7337d1255 --- /dev/null +++ b/tests/queries/0_stateless/02805_distributed_queries_timeouts.sql @@ -0,0 +1,3 @@ +select * from remote('127.2', view(select sleep(3) from system.one)) settings receive_timeout=1, async_socket_for_remote=0, use_hedged_requests=1 format Null; +select * from remote('127.2', view(select sleep(3) from system.one)) settings receive_timeout=1, async_socket_for_remote=1, use_hedged_requests=0 format Null; +select * from remote('127.2', view(select sleep(3) from system.one)) settings receive_timeout=1, async_socket_for_remote=0, use_hedged_requests=0 format Null; diff --git a/tests/queries/0_stateless/02806_cte_block_cannot_be_empty.reference b/tests/queries/0_stateless/02806_cte_block_cannot_be_empty.reference new file mode 100644 index 00000000000..10bcae06939 --- /dev/null +++ b/tests/queries/0_stateless/02806_cte_block_cannot_be_empty.reference @@ -0,0 +1,3 @@ +{"dd":"2023-06-24 00:00:00"} +{"dd":"2023-06-24 00:00:00"} 2023-06-24 00:00:00 +{"result_date":"2023-08-24"} diff --git a/tests/queries/0_stateless/02806_cte_block_cannot_be_empty.sql b/tests/queries/0_stateless/02806_cte_block_cannot_be_empty.sql new file mode 100644 index 00000000000..688dc1017f7 --- /dev/null +++ b/tests/queries/0_stateless/02806_cte_block_cannot_be_empty.sql @@ -0,0 +1,18 @@ +with c as ( select 1 ID, toDate('2023-06-24') dt, 0 p ) select multiIf(t.ID = 1, formatRowNoNewline('JSONEachRow', dd), '') AS params from (select ID, case when p = 0 then toString(date_add(hour, p, dt)) else '2022-01-01' end as dd from c) t; +with c as ( select 1 ID, toDate('2023-06-24') dt, 0 p ) select multiIf(t.ID = 1, formatRowNoNewline('JSONEachRow', dd), '') AS params, dd from (select ID, case when p = 0 then toString(date_add(hour, p, dt)) else '2022-01-01' end as dd from c) t; + +select + if( + outer_table.condition_value = 1, + formatRowNoNewline('JSONEachRow', outer_table.result_date), + '' + ) as json +from ( + select + 1 as condition_value, + date_add(month, inner_table.offset, toDate('2023-06-24')) as result_date + from ( + select + 2 as offset + ) inner_table + ) outer_table; diff --git a/tests/queries/0_stateless/02810_row_binary_with_defaults.reference b/tests/queries/0_stateless/02810_row_binary_with_defaults.reference new file mode 100644 index 00000000000..5a556d1a6a9 --- /dev/null +++ b/tests/queries/0_stateless/02810_row_binary_with_defaults.reference @@ -0,0 +1,6 @@ +42 +1 +42 +1 +\N +[(42,42)] diff --git a/tests/queries/0_stateless/02810_row_binary_with_defaults.sql b/tests/queries/0_stateless/02810_row_binary_with_defaults.sql new file mode 100644 index 00000000000..73662352c0a --- /dev/null +++ b/tests/queries/0_stateless/02810_row_binary_with_defaults.sql @@ -0,0 +1,7 @@ +select * from format('RowBinaryWithDefaults', 'x UInt32 default 42', x'01'); +select * from format('RowBinaryWithDefaults', 'x UInt32 default 42', x'0001000000'); +select * from format('RowBinaryWithDefaults', 'x Nullable(UInt32) default 42', x'01'); +select * from format('RowBinaryWithDefaults', 'x Nullable(UInt32) default 42', x'000001000000'); +select * from format('RowBinaryWithDefaults', 'x Nullable(UInt32) default 42', x'0001'); +select * from format('RowBinaryWithDefaults', 'x Array(Tuple(UInt32, UInt32)) default [(42, 42)]', x'01'); + diff --git a/tests/queries/0_stateless/02811_csv_input_field_type_mismatch.reference b/tests/queries/0_stateless/02811_csv_input_field_type_mismatch.reference new file mode 100644 index 00000000000..6abcc56bacc --- /dev/null +++ b/tests/queries/0_stateless/02811_csv_input_field_type_mismatch.reference @@ -0,0 +1,5 @@ +0 111 1970-01-01 false +1 abc 2023-03-14 true +2 c 1970-01-01 false +4 888 2023-03-14 false +5 bks 1970-01-01 false diff --git a/tests/queries/0_stateless/02811_csv_input_field_type_mismatch.sh b/tests/queries/0_stateless/02811_csv_input_field_type_mismatch.sh new file mode 100755 index 00000000000..30223329eca --- /dev/null +++ b/tests/queries/0_stateless/02811_csv_input_field_type_mismatch.sh @@ -0,0 +1,13 @@ +#!/usr/bin/env bash + +# NOTE: this sh wrapper is required because of shell_config + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -q "drop table if exists test_tbl" +$CLICKHOUSE_CLIENT -q "create table test_tbl (a Int32, b String, c Date, e Boolean) engine=MergeTree order by a" +cat $CURDIR/data_csv/csv_with_bad_field_values.csv | ${CLICKHOUSE_CLIENT} -q "INSERT INTO test_tbl SETTINGS input_format_csv_use_default_on_bad_values=true FORMAT CSV" +$CLICKHOUSE_CLIENT -q "select * from test_tbl" +$CLICKHOUSE_CLIENT -q "drop table test_tbl" \ No newline at end of file diff --git a/tests/queries/0_stateless/02813_func_now_and_alias.reference b/tests/queries/0_stateless/02813_func_now_and_alias.reference new file mode 100644 index 00000000000..e8183f05f5d --- /dev/null +++ b/tests/queries/0_stateless/02813_func_now_and_alias.reference @@ -0,0 +1,3 @@ +1 +1 +1 diff --git a/tests/queries/0_stateless/02813_func_now_and_alias.sql b/tests/queries/0_stateless/02813_func_now_and_alias.sql new file mode 100644 index 00000000000..bcda1f26181 --- /dev/null +++ b/tests/queries/0_stateless/02813_func_now_and_alias.sql @@ -0,0 +1,3 @@ +SELECT now() = current_timestamp(); +SELECT now() = CURRENT_TIMESTAMP(); +SELECT now() = current_TIMESTAMP(); diff --git a/tests/queries/0_stateless/02813_func_today_and_alias.reference b/tests/queries/0_stateless/02813_func_today_and_alias.reference new file mode 100644 index 00000000000..a9e2f17562a --- /dev/null +++ b/tests/queries/0_stateless/02813_func_today_and_alias.reference @@ -0,0 +1,6 @@ +1 +1 +1 +1 +1 +1 diff --git a/tests/queries/0_stateless/02813_func_today_and_alias.sql b/tests/queries/0_stateless/02813_func_today_and_alias.sql new file mode 100644 index 00000000000..d379aa74f18 --- /dev/null +++ b/tests/queries/0_stateless/02813_func_today_and_alias.sql @@ -0,0 +1,6 @@ +SELECT today() = current_date(); +SELECT today() = CURRENT_DATE(); +SELECT today() = current_DATE(); +SELECT today() = curdate(); +SELECT today() = CURDATE(); +SELECT today() = curDATE(); diff --git a/tests/queries/0_stateless/02813_starting_in_text_log.reference b/tests/queries/0_stateless/02813_starting_in_text_log.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/02813_starting_in_text_log.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/02813_starting_in_text_log.sql b/tests/queries/0_stateless/02813_starting_in_text_log.sql new file mode 100644 index 00000000000..e007f58189e --- /dev/null +++ b/tests/queries/0_stateless/02813_starting_in_text_log.sql @@ -0,0 +1,2 @@ +SYSTEM FLUSH LOGS; +SELECT count() > 0 FROM system.text_log WHERE event_date >= yesterday() AND message LIKE '%Starting ClickHouse%'; diff --git a/tests/queries/0_stateless/02814_age_datediff.reference b/tests/queries/0_stateless/02814_age_datediff.reference new file mode 100644 index 00000000000..cbcb8c8a7b6 --- /dev/null +++ b/tests/queries/0_stateless/02814_age_datediff.reference @@ -0,0 +1,130 @@ +-- { echo } + +-- DateTime64 vs DateTime64 with fractional part +SELECT age('microsecond', toDateTime64('2015-08-18 20:30:36.100200005', 9, 'UTC'), toDateTime64('2015-08-18 20:30:41.200400005', 9, 'UTC')); +5100200 +SELECT age('microsecond', toDateTime64('2015-08-18 20:30:36.100200005', 9, 'UTC'), toDateTime64('2015-08-18 20:30:41.200400004', 9, 'UTC')); +5100200 +SELECT age('millisecond', toDateTime64('2015-08-18 20:30:36.450299', 6, 'UTC'), toDateTime64('2015-08-18 20:30:41.550299', 6, 'UTC')); +5100 +SELECT age('millisecond', toDateTime64('2015-08-18 20:30:36.450299', 6, 'UTC'), toDateTime64('2015-08-18 20:30:41.550298', 6, 'UTC')); +5099 +SELECT age('second', toDateTime64('2023-03-01 19:18:36.999003', 6, 'UTC'), toDateTime64('2023-03-01 19:18:41.999002', 6, 'UTC')); +4 +SELECT age('second', toDateTime64('2023-03-01 19:18:36.999', 3, 'UTC'), toDateTime64('2023-03-01 19:18:41.001', 3, 'UTC')); +4 +SELECT age('minute', toDateTime64('2015-01-01 20:30:36.200', 3, 'UTC'), toDateTime64('2015-01-01 20:35:36.300', 3, 'UTC')); +5 +SELECT age('minute', toDateTime64('2015-01-01 20:30:36.200', 3, 'UTC'), toDateTime64('2015-01-01 20:35:36.100', 3, 'UTC')); +4 +SELECT age('minute', toDateTime64('2015-01-01 20:30:36.200101', 6, 'UTC'), toDateTime64('2015-01-01 20:35:36.200100', 6, 'UTC')); +4 +SELECT age('hour', toDateTime64('2015-01-01 20:30:36.200', 3, 'UTC'), toDateTime64('2015-01-01 23:30:36.200', 3, 'UTC')); +3 +SELECT age('hour', toDateTime64('2015-01-01 20:31:36.200', 3, 'UTC'), toDateTime64('2015-01-01 23:30:36.200', 3, 'UTC')); +2 +SELECT age('hour', toDateTime64('2015-01-01 20:30:37.200', 3, 'UTC'), toDateTime64('2015-01-01 23:30:36.200', 3, 'UTC')); +2 +SELECT age('hour', toDateTime64('2015-01-01 20:30:36.300', 3, 'UTC'), toDateTime64('2015-01-01 23:30:36.200', 3, 'UTC')); +2 +SELECT age('hour', toDateTime64('2015-01-01 20:30:36.200101', 6, 'UTC'), toDateTime64('2015-01-01 23:30:36.200100', 6, 'UTC')); +2 +SELECT age('day', toDateTime64('2015-01-01 20:30:36.200', 3, 'UTC'), toDateTime64('2015-01-04 20:30:36.200', 3, 'UTC')); +3 +SELECT age('day', toDateTime64('2015-01-01 20:30:36.200', 3, 'UTC'), toDateTime64('2015-01-04 19:30:36.200', 3, 'UTC')); +2 +SELECT age('day', toDateTime64('2015-01-01 20:30:36.200', 3, 'UTC'), toDateTime64('2015-01-04 20:28:36.200', 3, 'UTC')); +2 +SELECT age('day', toDateTime64('2015-01-01 20:30:36.200', 3, 'UTC'), toDateTime64('2015-01-04 20:30:35.200', 3, 'UTC')); +2 +SELECT age('day', toDateTime64('2015-01-01 20:30:36.200', 3, 'UTC'), toDateTime64('2015-01-04 20:30:36.199', 3, 'UTC')); +2 +SELECT age('day', toDateTime64('2015-01-01 20:30:36.200101', 6, 'UTC'), toDateTime64('2015-01-04 20:30:36.200100', 6, 'UTC')); +2 +SELECT age('week', toDateTime64('2015-01-01 20:30:36.200', 3, 'UTC'), toDateTime64('2015-01-15 20:30:36.200', 3, 'UTC')); +2 +SELECT age('week', toDateTime64('2015-01-01 20:30:36.200', 3, 'UTC'), toDateTime64('2015-01-15 19:30:36.200', 3, 'UTC')); +1 +SELECT age('week', toDateTime64('2015-01-01 20:30:36.200', 3, 'UTC'), toDateTime64('2015-01-15 20:29:36.200', 3, 'UTC')); +1 +SELECT age('week', toDateTime64('2015-01-01 20:30:36.200', 3, 'UTC'), toDateTime64('2015-01-15 20:30:35.200', 3, 'UTC')); +1 +SELECT age('week', toDateTime64('2015-01-01 20:30:36.200', 3, 'UTC'), toDateTime64('2015-01-15 20:30:36.100', 3, 'UTC')); +1 +SELECT age('week', toDateTime64('2015-01-01 20:30:36.200101', 6, 'UTC'), toDateTime64('2015-01-15 20:30:36.200100', 6, 'UTC')); +1 +SELECT age('month', toDateTime64('2015-01-02 20:30:36.200', 3, 'UTC'), toDateTime64('2016-05-02 20:30:36.200', 3, 'UTC')); +16 +SELECT age('month', toDateTime64('2015-01-02 20:30:36.200', 3, 'UTC'), toDateTime64('2016-05-01 20:30:36.200', 3, 'UTC')); +15 +SELECT age('month', toDateTime64('2015-01-02 20:30:36.200', 3, 'UTC'), toDateTime64('2016-05-02 19:30:36.200', 3, 'UTC')); +15 +SELECT age('month', toDateTime64('2015-01-02 20:30:36.200', 3, 'UTC'), toDateTime64('2016-05-02 20:29:36.200', 3, 'UTC')); +15 +SELECT age('month', toDateTime64('2015-01-02 20:30:36.200', 3, 'UTC'), toDateTime64('2016-05-02 20:30:35.200', 3, 'UTC')); +15 +SELECT age('month', toDateTime64('2015-01-02 20:30:36.200', 3, 'UTC'), toDateTime64('2016-05-02 20:30:36.100', 3, 'UTC')); +15 +SELECT age('month', toDateTime64('2015-01-02 20:30:36.200101', 6, 'UTC'), toDateTime64('2016-05-02 20:30:36.200100', 6, 'UTC')); +15 +SELECT age('quarter', toDateTime64('2015-01-02 20:30:36.200', 3, 'UTC'), toDateTime64('2016-04-02 20:30:36.200', 3, 'UTC')); +5 +SELECT age('quarter', toDateTime64('2015-01-02 20:30:36.200', 3, 'UTC'), toDateTime64('2016-04-01 20:30:36.200', 3, 'UTC')); +4 +SELECT age('quarter', toDateTime64('2015-01-02 20:30:36.200', 3, 'UTC'), toDateTime64('2016-04-02 19:30:36.200', 3, 'UTC')); +4 +SELECT age('quarter', toDateTime64('2015-01-02 20:30:36.200', 3, 'UTC'), toDateTime64('2016-04-02 20:29:36.200', 3, 'UTC')); +4 +SELECT age('quarter', toDateTime64('2015-01-02 20:30:36.200', 3, 'UTC'), toDateTime64('2016-04-02 20:30:35.200', 3, 'UTC')); +4 +SELECT age('quarter', toDateTime64('2015-01-02 20:30:36.200', 3, 'UTC'), toDateTime64('2016-04-02 20:30:36.100', 3, 'UTC')); +4 +SELECT age('quarter', toDateTime64('2015-01-02 20:30:36.200101', 6, 'UTC'), toDateTime64('2016-04-02 20:30:36.200100', 6, 'UTC')); +4 +SELECT age('year', toDateTime64('2015-02-02 20:30:36.200', 3, 'UTC'), toDateTime64('2023-02-02 20:30:36.200', 3, 'UTC')); +8 +SELECT age('year', toDateTime64('2015-02-02 20:30:36.200', 3, 'UTC'), toDateTime64('2023-01-02 20:30:36.200', 3, 'UTC')); +7 +SELECT age('year', toDateTime64('2015-02-02 20:30:36.200', 3, 'UTC'), toDateTime64('2023-02-01 20:30:36.200', 3, 'UTC')); +7 +SELECT age('year', toDateTime64('2015-02-02 20:30:36.200', 3, 'UTC'), toDateTime64('2023-02-02 19:30:36.200', 3, 'UTC')); +7 +SELECT age('year', toDateTime64('2015-02-02 20:30:36.200', 3, 'UTC'), toDateTime64('2023-02-02 20:29:36.200', 3, 'UTC')); +7 +SELECT age('year', toDateTime64('2015-02-02 20:30:36.200', 3, 'UTC'), toDateTime64('2023-02-02 20:30:35.200', 3, 'UTC')); +7 +SELECT age('year', toDateTime64('2015-02-02 20:30:36.200', 3, 'UTC'), toDateTime64('2023-02-02 20:30:36.100', 3, 'UTC')); +7 +SELECT age('year', toDateTime64('2015-02-02 20:30:36.200101', 6, 'UTC'), toDateTime64('2023-02-02 20:30:36.200100', 6, 'UTC')); +7 +-- DateTime64 vs DateTime64 with negative time +SELECT age('millisecond', toDateTime64('1969-12-31 23:59:58.001', 3, 'UTC'), toDateTime64('1970-01-01 00:00:00.350', 3, 'UTC')); +2349 +SELECT age('second', toDateTime64('1969-12-31 23:59:58.001', 3, 'UTC'), toDateTime64('1970-01-01 00:00:00.35', 3, 'UTC')); +2 +SELECT age('second', toDateTime64('1969-12-31 23:59:50.001', 3, 'UTC'), toDateTime64('1969-12-31 23:59:55.002', 3, 'UTC')); +5 +SELECT age('second', toDateTime64('1969-12-31 23:59:50.003', 3, 'UTC'), toDateTime64('1969-12-31 23:59:55.002', 3, 'UTC')); +4 +SELECT DATEDIFF(millisecond, '2021-01-01'::Date, '2021-01-02'::Date); +86400000 +SELECT DATEDIFF(millisecond, '2021-01-01'::Date, '2021-01-03'::Date32); +172800000 +SELECT DATEDIFF(millisecond, '2021-01-01'::Date, '2021-01-02 00:01:01'::DateTime); +86461000 +SELECT DATEDIFF(millisecond, '2021-01-01'::Date, '2021-01-02 00:00:01.299'::DateTime64); +86401299 +SELECT DATEDIFF(millisecond, '2021-01-01 23:59:59.299'::DateTime64, '2021-01-02'::Date); +701 +SELECT DATEDIFF(millisecond, '2021-01-01 23:59:59.299999'::DateTime64(6), '2021-01-02'::Date); +701 +SELECT DATEDIFF(millisecond, '2021-01-01 23:59:59.2'::DateTime64(1), '2021-01-02'::Date); +800 +SELECT DATEDIFF(microsecond, '2021-01-01 23:59:59.899999'::DateTime64(6), '2021-01-02 00:01:00.100200300'::DateTime64(9)); +60200201 +SELECT DATEDIFF(microsecond, '1969-12-31 23:59:59.999950'::DateTime64(6, 'UTC'), '1970-01-01 00:00:00.000010'::DateTime64(6, 'UTC')); +60 +SELECT DATEDIFF(second, '1969-12-31 23:59:59.123'::DateTime64(6, 'UTC'), '1970-01-01 00:00:09.123'::DateTime64(6, 'UTC')); +10 +SELECT toYYYYMMDDhhmmss(toDateTime64('1969-12-31 23:59:59.900', 3)); +19691231235959 diff --git a/tests/queries/0_stateless/02814_age_datediff.sql b/tests/queries/0_stateless/02814_age_datediff.sql new file mode 100644 index 00000000000..934a95c035f --- /dev/null +++ b/tests/queries/0_stateless/02814_age_datediff.sql @@ -0,0 +1,80 @@ +-- { echo } + +-- DateTime64 vs DateTime64 with fractional part +SELECT age('microsecond', toDateTime64('2015-08-18 20:30:36.100200005', 9, 'UTC'), toDateTime64('2015-08-18 20:30:41.200400005', 9, 'UTC')); +SELECT age('microsecond', toDateTime64('2015-08-18 20:30:36.100200005', 9, 'UTC'), toDateTime64('2015-08-18 20:30:41.200400004', 9, 'UTC')); + +SELECT age('millisecond', toDateTime64('2015-08-18 20:30:36.450299', 6, 'UTC'), toDateTime64('2015-08-18 20:30:41.550299', 6, 'UTC')); +SELECT age('millisecond', toDateTime64('2015-08-18 20:30:36.450299', 6, 'UTC'), toDateTime64('2015-08-18 20:30:41.550298', 6, 'UTC')); + +SELECT age('second', toDateTime64('2023-03-01 19:18:36.999003', 6, 'UTC'), toDateTime64('2023-03-01 19:18:41.999002', 6, 'UTC')); +SELECT age('second', toDateTime64('2023-03-01 19:18:36.999', 3, 'UTC'), toDateTime64('2023-03-01 19:18:41.001', 3, 'UTC')); + +SELECT age('minute', toDateTime64('2015-01-01 20:30:36.200', 3, 'UTC'), toDateTime64('2015-01-01 20:35:36.300', 3, 'UTC')); +SELECT age('minute', toDateTime64('2015-01-01 20:30:36.200', 3, 'UTC'), toDateTime64('2015-01-01 20:35:36.100', 3, 'UTC')); +SELECT age('minute', toDateTime64('2015-01-01 20:30:36.200101', 6, 'UTC'), toDateTime64('2015-01-01 20:35:36.200100', 6, 'UTC')); + +SELECT age('hour', toDateTime64('2015-01-01 20:30:36.200', 3, 'UTC'), toDateTime64('2015-01-01 23:30:36.200', 3, 'UTC')); +SELECT age('hour', toDateTime64('2015-01-01 20:31:36.200', 3, 'UTC'), toDateTime64('2015-01-01 23:30:36.200', 3, 'UTC')); +SELECT age('hour', toDateTime64('2015-01-01 20:30:37.200', 3, 'UTC'), toDateTime64('2015-01-01 23:30:36.200', 3, 'UTC')); +SELECT age('hour', toDateTime64('2015-01-01 20:30:36.300', 3, 'UTC'), toDateTime64('2015-01-01 23:30:36.200', 3, 'UTC')); +SELECT age('hour', toDateTime64('2015-01-01 20:30:36.200101', 6, 'UTC'), toDateTime64('2015-01-01 23:30:36.200100', 6, 'UTC')); + +SELECT age('day', toDateTime64('2015-01-01 20:30:36.200', 3, 'UTC'), toDateTime64('2015-01-04 20:30:36.200', 3, 'UTC')); +SELECT age('day', toDateTime64('2015-01-01 20:30:36.200', 3, 'UTC'), toDateTime64('2015-01-04 19:30:36.200', 3, 'UTC')); +SELECT age('day', toDateTime64('2015-01-01 20:30:36.200', 3, 'UTC'), toDateTime64('2015-01-04 20:28:36.200', 3, 'UTC')); +SELECT age('day', toDateTime64('2015-01-01 20:30:36.200', 3, 'UTC'), toDateTime64('2015-01-04 20:30:35.200', 3, 'UTC')); +SELECT age('day', toDateTime64('2015-01-01 20:30:36.200', 3, 'UTC'), toDateTime64('2015-01-04 20:30:36.199', 3, 'UTC')); +SELECT age('day', toDateTime64('2015-01-01 20:30:36.200101', 6, 'UTC'), toDateTime64('2015-01-04 20:30:36.200100', 6, 'UTC')); + +SELECT age('week', toDateTime64('2015-01-01 20:30:36.200', 3, 'UTC'), toDateTime64('2015-01-15 20:30:36.200', 3, 'UTC')); +SELECT age('week', toDateTime64('2015-01-01 20:30:36.200', 3, 'UTC'), toDateTime64('2015-01-15 19:30:36.200', 3, 'UTC')); +SELECT age('week', toDateTime64('2015-01-01 20:30:36.200', 3, 'UTC'), toDateTime64('2015-01-15 20:29:36.200', 3, 'UTC')); +SELECT age('week', toDateTime64('2015-01-01 20:30:36.200', 3, 'UTC'), toDateTime64('2015-01-15 20:30:35.200', 3, 'UTC')); +SELECT age('week', toDateTime64('2015-01-01 20:30:36.200', 3, 'UTC'), toDateTime64('2015-01-15 20:30:36.100', 3, 'UTC')); +SELECT age('week', toDateTime64('2015-01-01 20:30:36.200101', 6, 'UTC'), toDateTime64('2015-01-15 20:30:36.200100', 6, 'UTC')); + +SELECT age('month', toDateTime64('2015-01-02 20:30:36.200', 3, 'UTC'), toDateTime64('2016-05-02 20:30:36.200', 3, 'UTC')); +SELECT age('month', toDateTime64('2015-01-02 20:30:36.200', 3, 'UTC'), toDateTime64('2016-05-01 20:30:36.200', 3, 'UTC')); +SELECT age('month', toDateTime64('2015-01-02 20:30:36.200', 3, 'UTC'), toDateTime64('2016-05-02 19:30:36.200', 3, 'UTC')); +SELECT age('month', toDateTime64('2015-01-02 20:30:36.200', 3, 'UTC'), toDateTime64('2016-05-02 20:29:36.200', 3, 'UTC')); +SELECT age('month', toDateTime64('2015-01-02 20:30:36.200', 3, 'UTC'), toDateTime64('2016-05-02 20:30:35.200', 3, 'UTC')); +SELECT age('month', toDateTime64('2015-01-02 20:30:36.200', 3, 'UTC'), toDateTime64('2016-05-02 20:30:36.100', 3, 'UTC')); +SELECT age('month', toDateTime64('2015-01-02 20:30:36.200101', 6, 'UTC'), toDateTime64('2016-05-02 20:30:36.200100', 6, 'UTC')); + +SELECT age('quarter', toDateTime64('2015-01-02 20:30:36.200', 3, 'UTC'), toDateTime64('2016-04-02 20:30:36.200', 3, 'UTC')); +SELECT age('quarter', toDateTime64('2015-01-02 20:30:36.200', 3, 'UTC'), toDateTime64('2016-04-01 20:30:36.200', 3, 'UTC')); +SELECT age('quarter', toDateTime64('2015-01-02 20:30:36.200', 3, 'UTC'), toDateTime64('2016-04-02 19:30:36.200', 3, 'UTC')); +SELECT age('quarter', toDateTime64('2015-01-02 20:30:36.200', 3, 'UTC'), toDateTime64('2016-04-02 20:29:36.200', 3, 'UTC')); +SELECT age('quarter', toDateTime64('2015-01-02 20:30:36.200', 3, 'UTC'), toDateTime64('2016-04-02 20:30:35.200', 3, 'UTC')); +SELECT age('quarter', toDateTime64('2015-01-02 20:30:36.200', 3, 'UTC'), toDateTime64('2016-04-02 20:30:36.100', 3, 'UTC')); +SELECT age('quarter', toDateTime64('2015-01-02 20:30:36.200101', 6, 'UTC'), toDateTime64('2016-04-02 20:30:36.200100', 6, 'UTC')); + +SELECT age('year', toDateTime64('2015-02-02 20:30:36.200', 3, 'UTC'), toDateTime64('2023-02-02 20:30:36.200', 3, 'UTC')); +SELECT age('year', toDateTime64('2015-02-02 20:30:36.200', 3, 'UTC'), toDateTime64('2023-01-02 20:30:36.200', 3, 'UTC')); +SELECT age('year', toDateTime64('2015-02-02 20:30:36.200', 3, 'UTC'), toDateTime64('2023-02-01 20:30:36.200', 3, 'UTC')); +SELECT age('year', toDateTime64('2015-02-02 20:30:36.200', 3, 'UTC'), toDateTime64('2023-02-02 19:30:36.200', 3, 'UTC')); +SELECT age('year', toDateTime64('2015-02-02 20:30:36.200', 3, 'UTC'), toDateTime64('2023-02-02 20:29:36.200', 3, 'UTC')); +SELECT age('year', toDateTime64('2015-02-02 20:30:36.200', 3, 'UTC'), toDateTime64('2023-02-02 20:30:35.200', 3, 'UTC')); +SELECT age('year', toDateTime64('2015-02-02 20:30:36.200', 3, 'UTC'), toDateTime64('2023-02-02 20:30:36.100', 3, 'UTC')); +SELECT age('year', toDateTime64('2015-02-02 20:30:36.200101', 6, 'UTC'), toDateTime64('2023-02-02 20:30:36.200100', 6, 'UTC')); + +-- DateTime64 vs DateTime64 with negative time +SELECT age('millisecond', toDateTime64('1969-12-31 23:59:58.001', 3, 'UTC'), toDateTime64('1970-01-01 00:00:00.350', 3, 'UTC')); +SELECT age('second', toDateTime64('1969-12-31 23:59:58.001', 3, 'UTC'), toDateTime64('1970-01-01 00:00:00.35', 3, 'UTC')); +SELECT age('second', toDateTime64('1969-12-31 23:59:50.001', 3, 'UTC'), toDateTime64('1969-12-31 23:59:55.002', 3, 'UTC')); +SELECT age('second', toDateTime64('1969-12-31 23:59:50.003', 3, 'UTC'), toDateTime64('1969-12-31 23:59:55.002', 3, 'UTC')); + +SELECT DATEDIFF(millisecond, '2021-01-01'::Date, '2021-01-02'::Date); +SELECT DATEDIFF(millisecond, '2021-01-01'::Date, '2021-01-03'::Date32); +SELECT DATEDIFF(millisecond, '2021-01-01'::Date, '2021-01-02 00:01:01'::DateTime); +SELECT DATEDIFF(millisecond, '2021-01-01'::Date, '2021-01-02 00:00:01.299'::DateTime64); +SELECT DATEDIFF(millisecond, '2021-01-01 23:59:59.299'::DateTime64, '2021-01-02'::Date); +SELECT DATEDIFF(millisecond, '2021-01-01 23:59:59.299999'::DateTime64(6), '2021-01-02'::Date); +SELECT DATEDIFF(millisecond, '2021-01-01 23:59:59.2'::DateTime64(1), '2021-01-02'::Date); +SELECT DATEDIFF(microsecond, '2021-01-01 23:59:59.899999'::DateTime64(6), '2021-01-02 00:01:00.100200300'::DateTime64(9)); + +SELECT DATEDIFF(microsecond, '1969-12-31 23:59:59.999950'::DateTime64(6, 'UTC'), '1970-01-01 00:00:00.000010'::DateTime64(6, 'UTC')); +SELECT DATEDIFF(second, '1969-12-31 23:59:59.123'::DateTime64(6, 'UTC'), '1970-01-01 00:00:09.123'::DateTime64(6, 'UTC')); + +SELECT toYYYYMMDDhhmmss(toDateTime64('1969-12-31 23:59:59.900', 3)); diff --git a/tests/queries/0_stateless/02815_first_line.reference b/tests/queries/0_stateless/02815_first_line.reference new file mode 100644 index 00000000000..cdc86229cc8 --- /dev/null +++ b/tests/queries/0_stateless/02815_first_line.reference @@ -0,0 +1,9 @@ +foo +foo +foo +foobarbaz +== vector +1 foo +2 quux +3 single line +4 windows diff --git a/tests/queries/0_stateless/02815_first_line.sql b/tests/queries/0_stateless/02815_first_line.sql new file mode 100644 index 00000000000..8c0affaebd3 --- /dev/null +++ b/tests/queries/0_stateless/02815_first_line.sql @@ -0,0 +1,12 @@ +select firstLine('foo\nbar\nbaz'); +select firstLine('foo\rbar\rbaz'); +select firstLine('foo\r\nbar\r\nbaz'); +select firstLine('foobarbaz'); + +select '== vector'; + +drop table if exists 02815_first_line_vector; +create table 02815_first_line_vector (n Int32, text String) engine = MergeTree order by n; + +insert into 02815_first_line_vector values (1, 'foo\nbar\nbaz'), (2, 'quux\n'), (3, 'single line'), (4, 'windows\r\nline breaks'); +select n, firstLine(text) from 02815_first_line_vector order by n; diff --git a/tests/queries/0_stateless/02815_fix_not_found_constants_col_in_block.reference b/tests/queries/0_stateless/02815_fix_not_found_constants_col_in_block.reference new file mode 100644 index 00000000000..f2d4d23d9e3 --- /dev/null +++ b/tests/queries/0_stateless/02815_fix_not_found_constants_col_in_block.reference @@ -0,0 +1,2 @@ +\N 1 19000 +\N 1 19000 diff --git a/tests/queries/0_stateless/02815_fix_not_found_constants_col_in_block.sql b/tests/queries/0_stateless/02815_fix_not_found_constants_col_in_block.sql new file mode 100644 index 00000000000..fa784cf12e3 --- /dev/null +++ b/tests/queries/0_stateless/02815_fix_not_found_constants_col_in_block.sql @@ -0,0 +1,6 @@ +DROP TABLE IF EXISTS t0; +CREATE TABLE t0 (vkey UInt32, c0 Float32, primary key(c0)) engine = AggregatingMergeTree; +insert into t0 values (19000, 1); +select null as c_2_0, ref_2.c0 as c_2_1, ref_2.vkey as c_2_2 from t0 as ref_2 order by c_2_0 asc, c_2_1 asc, c_2_2 asc; +select null as c_2_0, ref_2.c0 as c_2_1, ref_2.vkey as c_2_2 from t0 as ref_2 order by c_2_0 asc, c_2_1 asc; +DROP TABLE t0; diff --git a/tests/queries/0_stateless/02815_range_dict_no_direct_join.reference b/tests/queries/0_stateless/02815_range_dict_no_direct_join.reference new file mode 100644 index 00000000000..051716a791e --- /dev/null +++ b/tests/queries/0_stateless/02815_range_dict_no_direct_join.reference @@ -0,0 +1,12 @@ +1 0.1 +1 0.2 +2 0.3 +2 0.4 +3 0.5 +3 0.6 +1 0.1 +1 0.2 +2 0.3 +2 0.4 +3 0.5 +3 0.6 diff --git a/tests/queries/0_stateless/02815_range_dict_no_direct_join.sql b/tests/queries/0_stateless/02815_range_dict_no_direct_join.sql new file mode 100644 index 00000000000..e3af53fa335 --- /dev/null +++ b/tests/queries/0_stateless/02815_range_dict_no_direct_join.sql @@ -0,0 +1,34 @@ +CREATE TABLE discounts +( + advertiser_id UInt64, + discount_start_date Date, + discount_end_date Nullable(Date), + amount Float64 +) +ENGINE = Memory; + +INSERT INTO discounts VALUES (1, '2015-01-01', Null, 0.1); +INSERT INTO discounts VALUES (1, '2015-01-15', Null, 0.2); +INSERT INTO discounts VALUES (2, '2015-01-01', '2015-01-15', 0.3); +INSERT INTO discounts VALUES (2, '2015-01-04', '2015-01-10', 0.4); +INSERT INTO discounts VALUES (3, '1970-01-01', '2015-01-15', 0.5); +INSERT INTO discounts VALUES (3, '1970-01-01', '2015-01-10', 0.6); + +CREATE DICTIONARY discounts_dict +( + advertiser_id UInt64, + discount_start_date Date, + discount_end_date Nullable(Date), + amount Float64 +) +PRIMARY KEY advertiser_id +SOURCE(CLICKHOUSE(TABLE discounts)) +LIFETIME(MIN 600 MAX 900) +LAYOUT(RANGE_HASHED(RANGE_LOOKUP_STRATEGY 'max')) +RANGE(MIN discount_start_date MAX discount_end_date); + +CREATE TABLE ids (id UInt64) ENGINE = Memory; +INSERT INTO ids SELECT * FROM numbers(10); + +SELECT id, amount FROM ids INNER JOIN discounts_dict ON id = advertiser_id ORDER BY id, amount SETTINGS join_algorithm = 'direct'; +SELECT id, amount FROM ids INNER JOIN discounts_dict ON id = advertiser_id ORDER BY id, amount SETTINGS allow_experimental_analyzer = 1; diff --git a/tests/queries/0_stateless/02816_check_projection_metadata.reference b/tests/queries/0_stateless/02816_check_projection_metadata.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02816_check_projection_metadata.sql b/tests/queries/0_stateless/02816_check_projection_metadata.sql new file mode 100644 index 00000000000..e7da043ad41 --- /dev/null +++ b/tests/queries/0_stateless/02816_check_projection_metadata.sql @@ -0,0 +1,3 @@ +create table kek (uuid FixedString(16), id int, ns String, dt DateTime64(6), projection null_pk (select * order by ns, 1, 4)) engine=MergeTree order by (id, dt, uuid); -- {serverError ILLEGAL_COLUMN } +-- this query could segfault or throw LOGICAL_ERROR previously, when we did not check projection PK +-- insert into kek select * from generageRandom(10000); diff --git a/tests/queries/0_stateless/02816_s2_invalid_point.reference b/tests/queries/0_stateless/02816_s2_invalid_point.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02816_s2_invalid_point.sql b/tests/queries/0_stateless/02816_s2_invalid_point.sql new file mode 100644 index 00000000000..590eb8b5ec2 --- /dev/null +++ b/tests/queries/0_stateless/02816_s2_invalid_point.sql @@ -0,0 +1,3 @@ +-- Tags: no-fasttest + +SELECT geoToS2(toFloat64(toUInt64(-1)), toFloat64(toUInt64(-1))); -- { serverError BAD_ARGUMENTS } diff --git a/tests/queries/0_stateless/02818_memory_profiler_sample_min_max_allocation_size.reference b/tests/queries/0_stateless/02818_memory_profiler_sample_min_max_allocation_size.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/02818_memory_profiler_sample_min_max_allocation_size.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/02818_memory_profiler_sample_min_max_allocation_size.sh b/tests/queries/0_stateless/02818_memory_profiler_sample_min_max_allocation_size.sh new file mode 100755 index 00000000000..b1fbea26da7 --- /dev/null +++ b/tests/queries/0_stateless/02818_memory_profiler_sample_min_max_allocation_size.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash +# Tags: no-tsan, no-asan, no-ubsan, no-msan, no-cpu-aarch64, no-random-settings +# requires TraceCollector, does not available under sanitizers and aarch64 + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +query_id="${CLICKHOUSE_DATABASE}_min_max_allocation_size_$RANDOM$RANDOM" +${CLICKHOUSE_CLIENT} --query_id="$query_id" --memory_profiler_sample_min_allocation_size=4096 --memory_profiler_sample_max_allocation_size=8192 --log_queries=1 --max_threads=1 --max_untracked_memory=0 --memory_profiler_sample_probability=1 --query "select randomPrintableASCII(number) from numbers(1000) FORMAT Null" + +${CLICKHOUSE_CLIENT} --query "SYSTEM FLUSH LOGS" + +# at least something allocated +${CLICKHOUSE_CLIENT} --query "SELECT countDistinct(abs(size)) > 0 FROM system.trace_log where query_id='$query_id' and trace_type = 'MemorySample'" + +# show wrong allocations +${CLICKHOUSE_CLIENT} --query "SELECT abs(size) FROM system.trace_log where query_id='$query_id' and trace_type = 'MemorySample' and (abs(size) > 8192 or abs(size) < 4096)" diff --git a/tests/queries/0_stateless/02830_insert_values_time_interval.reference b/tests/queries/0_stateless/02830_insert_values_time_interval.reference new file mode 100644 index 00000000000..b5b57fbfbfe --- /dev/null +++ b/tests/queries/0_stateless/02830_insert_values_time_interval.reference @@ -0,0 +1,4 @@ +1 2023-07-21 22:54:02 +2 2023-07-21 21:53:01 +3 2023-07-21 21:53:01 +4 2023-07-20 21:54:02 diff --git a/tests/queries/0_stateless/02830_insert_values_time_interval.sql b/tests/queries/0_stateless/02830_insert_values_time_interval.sql new file mode 100644 index 00000000000..f5d5d8a4c04 --- /dev/null +++ b/tests/queries/0_stateless/02830_insert_values_time_interval.sql @@ -0,0 +1,25 @@ + +DROP TABLE IF EXISTS t1; + +CREATE TABLE t1 +( + c1 DateTime DEFAULT now() NOT NULL, + c2 DateTime DEFAULT now() NOT NULL, + c3 DateTime DEFAULT now() NOT NULL, + PRIMARY KEY(c1, c2, c3) +) ENGINE = MergeTree() +ORDER BY (c1, c2, c3); + +INSERT INTO t1 (c1,c2,c3) VALUES(now() + INTERVAL '1 day 1 hour 1 minute 1 second', now(), now()); + +DROP TABLE t1; + +CREATE TABLE t1 (n int, dt DateTime) ENGINE=Memory; + +SET input_format_values_interpret_expressions=0; +INSERT INTO t1 VALUES (1, toDateTime('2023-07-20 21:53:01') + INTERVAL '1 day 1 hour 1 minute 1 second'), (2, toDateTime('2023-07-20 21:53:01') + INTERVAL '1 day'); +INSERT INTO t1 VALUES (3, toDateTime('2023-07-20 21:53:01') + INTERVAL 1 DAY), (4, toDateTime('2023-07-20 21:53:01') + (toIntervalMinute(1), toIntervalSecond(1))); + +SELECT * FROM t1 ORDER BY n; + +DROP TABLE t1; diff --git a/tests/queries/0_stateless/02831_regexp_analyze_recursion.reference b/tests/queries/0_stateless/02831_regexp_analyze_recursion.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02831_regexp_analyze_recursion.sql b/tests/queries/0_stateless/02831_regexp_analyze_recursion.sql new file mode 100644 index 00000000000..018d1f031e6 --- /dev/null +++ b/tests/queries/0_stateless/02831_regexp_analyze_recursion.sql @@ -0,0 +1 @@ +SELECT match('', repeat('(', 100000)); -- { serverError 306 } diff --git a/tests/queries/0_stateless/02831_trash.reference b/tests/queries/0_stateless/02831_trash.reference new file mode 100644 index 00000000000..e25f2e9e23f --- /dev/null +++ b/tests/queries/0_stateless/02831_trash.reference @@ -0,0 +1,2 @@ +2761631236 +1210084689 diff --git a/tests/queries/0_stateless/02831_trash.sql b/tests/queries/0_stateless/02831_trash.sql new file mode 100644 index 00000000000..600e2ad0695 --- /dev/null +++ b/tests/queries/0_stateless/02831_trash.sql @@ -0,0 +1,2 @@ +SELECT CRC32IEEE(sipHash128()); +SELECT CRC32(murmurHash3_128()); diff --git a/tests/queries/0_stateless/data_csv/csv_with_bad_field_values.csv b/tests/queries/0_stateless/data_csv/csv_with_bad_field_values.csv new file mode 100644 index 00000000000..e829cc0106a --- /dev/null +++ b/tests/queries/0_stateless/data_csv/csv_with_bad_field_values.csv @@ -0,0 +1,5 @@ +1,abc,2023-03-14,true +2,c,ab,false +bc,111,ab,ban +4,888,2023-03-14,false +5,bks,2023-03,abdd \ No newline at end of file diff --git a/tests/queries/1_stateful/00091_prewhere_two_conditions.sql b/tests/queries/1_stateful/00091_prewhere_two_conditions.sql index 745bb125c2b..cbfbbaa2662 100644 --- a/tests/queries/1_stateful/00091_prewhere_two_conditions.sql +++ b/tests/queries/1_stateful/00091_prewhere_two_conditions.sql @@ -4,6 +4,7 @@ SET max_bytes_to_read = 600000000; SET optimize_move_to_prewhere = 1; +SET enable_multiple_prewhere_read_steps = 1; SELECT uniq(URL) FROM test.hits WHERE toTimeZone(EventTime, 'Asia/Dubai') >= '2014-03-20 00:00:00' AND toTimeZone(EventTime, 'Asia/Dubai') < '2014-03-21 00:00:00'; SELECT uniq(URL) FROM test.hits WHERE toTimeZone(EventTime, 'Asia/Dubai') >= '2014-03-20 00:00:00' AND URL != '' AND toTimeZone(EventTime, 'Asia/Dubai') < '2014-03-21 00:00:00'; @@ -11,6 +12,8 @@ SELECT uniq(*) FROM test.hits WHERE toTimeZone(EventTime, 'Asia/Dubai') >= '2014 WITH toTimeZone(EventTime, 'Asia/Dubai') AS xyz SELECT uniq(*) FROM test.hits WHERE xyz >= '2014-03-20 00:00:00' AND xyz < '2014-03-21 00:00:00' AND EventDate = '2014-03-21'; SET optimize_move_to_prewhere = 0; +SET enable_multiple_prewhere_read_steps = 0; SELECT uniq(URL) FROM test.hits WHERE toTimeZone(EventTime, 'Asia/Dubai') >= '2014-03-20 00:00:00' AND toTimeZone(EventTime, 'Asia/Dubai') < '2014-03-21 00:00:00'; -- { serverError 307 } SELECT uniq(URL) FROM test.hits WHERE toTimeZone(EventTime, 'Asia/Dubai') >= '2014-03-20 00:00:00' AND URL != '' AND toTimeZone(EventTime, 'Asia/Dubai') < '2014-03-21 00:00:00'; -- { serverError 307 } +SELECT uniq(URL) FROM test.hits PREWHERE toTimeZone(EventTime, 'Asia/Dubai') >= '2014-03-20 00:00:00' AND URL != '' AND toTimeZone(EventTime, 'Asia/Dubai') < '2014-03-21 00:00:00'; -- { serverError 307 } diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 6c88d63be49..538a21c88e2 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -762,6 +762,7 @@ Rollup RowBinary RowBinaryWithNames RowBinaryWithNamesAndTypes +RowBinaryWithDefaults Runtime SATA SELECTs @@ -1428,6 +1429,7 @@ filesystemFree filesystems finalizeAggregation fips +firstLine firstSignificantSubdomain firstSignificantSubdomainCustom fixedstring @@ -2124,6 +2126,7 @@ rowNumberInBlock rowbinary rowbinarywithnames rowbinarywithnamesandtypes +rowbinarywithdefaults rsync rsyslog runnable @@ -2545,3 +2548,4 @@ znode znodes zookeeperSessionUptime zstd +curdate diff --git a/utils/ci-slack-bot/ci-slack-bot.py b/utils/ci-slack-bot/ci-slack-bot.py index 6e694b4fdbd..ea883e3cda3 100755 --- a/utils/ci-slack-bot/ci-slack-bot.py +++ b/utils/ci-slack-bot/ci-slack-bot.py @@ -26,10 +26,11 @@ else: DRY_RUN_MARK = "" -MAX_FAILURES_DEFAULT = 40 +MAX_FAILURES_DEFAULT = 30 SLACK_URL_DEFAULT = DRY_RUN_MARK -FLAKY_ALERT_PROBABILITY = 0.20 +FLAKY_ALERT_PROBABILITY = 0.50 +REPORT_NO_FAILURES_PROBABILITY = 0.99 MAX_TESTS_TO_REPORT = 4 @@ -89,6 +90,22 @@ WHERE 1 AND check_name ILIKE check_name_pattern """ +# Returns percentage of failed checks (once per day, at noon) +FAILED_CHECKS_PERCENTAGE_QUERY = """ +SELECT if(toHour(now('Europe/Amsterdam')) = 12, v, 0) +FROM +( + SELECT + countDistinctIf((commit_sha, check_name), (test_status LIKE 'F%') AND (check_status != 'success')) + / countDistinct((commit_sha, check_name)) AS v + FROM checks + WHERE 1 + AND (pull_request_number = 0) + AND (test_status != 'SKIPPED') + AND (check_start_time > (now() - toIntervalDay(1))) +) +""" + # It shows all recent failures of the specified test (helps to find when it started) ALL_RECENT_FAILURES_QUERY = """ WITH @@ -202,9 +219,9 @@ def get_too_many_failures_message_impl(failures_count): curr_failures = int(failures_count[0][0]) prev_failures = int(failures_count[0][1]) if curr_failures == 0 and prev_failures != 0: - return ( - "Looks like CI is completely broken: there are *no failures* at all... 0_o" - ) + if random.random() < REPORT_NO_FAILURES_PROBABILITY: + return None + return "Wow, there are *no failures* at all... 0_o" if curr_failures < MAX_FAILURES: return None if prev_failures < MAX_FAILURES: @@ -227,6 +244,19 @@ def get_too_many_failures_message(failures_count): return msg +def get_failed_checks_percentage_message(percentage): + p = float(percentage[0][0]) * 100 + + # Always report more than 1% of failed checks + # For <= 1%: higher percentage of failures == higher probability + if p <= random.random(): + return None + + msg = ":alert: " if p > 1 else "Only " if p < 0.5 else "" + msg += "*{0:.2f}%* of all checks in master have failed yesterday".format(p) + return msg + + def split_slack_message(long_message): lines = long_message.split("\n") messages = [] @@ -280,6 +310,9 @@ def query_and_alert_if_needed(query, get_message_func): def check_and_alert(): query_and_alert_if_needed(NEW_BROKEN_TESTS_QUERY, get_new_broken_tests_message) query_and_alert_if_needed(COUNT_FAILURES_QUERY, get_too_many_failures_message) + query_and_alert_if_needed( + FAILED_CHECKS_PERCENTAGE_QUERY, get_failed_checks_percentage_message + ) def lambda_handler(event, context): diff --git a/utils/keeper-data-dumper/main.cpp b/utils/keeper-data-dumper/main.cpp index 51a09b676dc..9f089a26360 100644 --- a/utils/keeper-data-dumper/main.cpp +++ b/utils/keeper-data-dumper/main.cpp @@ -15,7 +15,7 @@ using namespace DB; void dumpMachine(std::shared_ptr machine) { - auto & storage = machine->getStorage(); + auto & storage = machine->getStorageUnsafe(); std::queue keys; keys.push("/"); @@ -65,7 +65,7 @@ int main(int argc, char *argv[]) CoordinationSettingsPtr settings = std::make_shared(); KeeperContextPtr keeper_context = std::make_shared(true); keeper_context->setLogDisk(std::make_shared("LogDisk", argv[2])); - keeper_context->setSnapshotDisk(std::make_shared("LogDisk", argv[1])); + keeper_context->setSnapshotDisk(std::make_shared("SnapshotDisk", argv[1])); auto state_machine = std::make_shared(queue, snapshots_queue, settings, keeper_context, nullptr); state_machine->init();