diff --git a/README.md b/README.md index dcaeda13acd..abaf27abf11 100644 --- a/README.md +++ b/README.md @@ -47,6 +47,7 @@ Upcoming meetups * [Dubai Meetup](https://www.meetup.com/clickhouse-dubai-meetup-group/events/303096989/) - November 21 * [Paris Meetup](https://www.meetup.com/clickhouse-france-user-group/events/303096434) - November 26 * [Amsterdam Meetup](https://www.meetup.com/clickhouse-netherlands-user-group/events/303638814) - December 3 +* [Stockholm Meetup](https://www.meetup.com/clickhouse-stockholm-user-group/events/304382411) - December 9 * [New York Meetup](https://www.meetup.com/clickhouse-new-york-user-group/events/304268174) - December 9 * [San Francisco Meetup](https://www.meetup.com/clickhouse-silicon-valley-meetup-group/events/304286951/) - December 12 diff --git a/docs/changelogs/v24.3.13.40-lts.md b/docs/changelogs/v24.3.13.40-lts.md new file mode 100644 index 00000000000..bce45e88710 --- /dev/null +++ b/docs/changelogs/v24.3.13.40-lts.md @@ -0,0 +1,31 @@ +--- +sidebar_position: 1 +sidebar_label: 2024 +--- + +# 2024 Changelog + +### ClickHouse release v24.3.13.40-lts (7acabd77389) FIXME as compared to v24.3.12.75-lts (7cb5dff8019) + +#### Bug Fix (user-visible misbehavior in an official stable release) +* Backported in [#63976](https://github.com/ClickHouse/ClickHouse/issues/63976): Fix intersect parts when restart after drop range. [#63202](https://github.com/ClickHouse/ClickHouse/pull/63202) ([Han Fei](https://github.com/hanfei1991)). +* Backported in [#71482](https://github.com/ClickHouse/ClickHouse/issues/71482): Fix `Content-Encoding` not sent in some compressed responses. [#64802](https://github.com/ClickHouse/ClickHouse/issues/64802). [#68975](https://github.com/ClickHouse/ClickHouse/pull/68975) ([Konstantin Bogdanov](https://github.com/thevar1able)). +* Backported in [#70451](https://github.com/ClickHouse/ClickHouse/issues/70451): Fix vrash during insertion into FixedString column in PostgreSQL engine. [#69584](https://github.com/ClickHouse/ClickHouse/pull/69584) ([Pavel Kruglov](https://github.com/Avogar)). +* Backported in [#70619](https://github.com/ClickHouse/ClickHouse/issues/70619): Fix server segfault on creating a materialized view with two selects and an `INTERSECT`, e.g. `CREATE MATERIALIZED VIEW v0 AS (SELECT 1) INTERSECT (SELECT 1);`. [#70264](https://github.com/ClickHouse/ClickHouse/pull/70264) ([Konstantin Bogdanov](https://github.com/thevar1able)). +* Backported in [#70877](https://github.com/ClickHouse/ClickHouse/issues/70877): Fix table creation with `CREATE ... AS table_function()` with database `Replicated` and unavailable table function source on secondary replica. [#70511](https://github.com/ClickHouse/ClickHouse/pull/70511) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#70571](https://github.com/ClickHouse/ClickHouse/issues/70571): Ignore all output on async insert with `wait_for_async_insert=1`. Closes [#62644](https://github.com/ClickHouse/ClickHouse/issues/62644). [#70530](https://github.com/ClickHouse/ClickHouse/pull/70530) ([Konstantin Bogdanov](https://github.com/thevar1able)). +* Backported in [#71146](https://github.com/ClickHouse/ClickHouse/issues/71146): Ignore frozen_metadata.txt while traversing shadow directory from system.remote_data_paths. [#70590](https://github.com/ClickHouse/ClickHouse/pull/70590) ([Aleksei Filatov](https://github.com/aalexfvk)). +* Backported in [#70682](https://github.com/ClickHouse/ClickHouse/issues/70682): Fix creation of stateful window functions on misaligned memory. [#70631](https://github.com/ClickHouse/ClickHouse/pull/70631) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#71113](https://github.com/ClickHouse/ClickHouse/issues/71113): Fix a crash and a leak in AggregateFunctionGroupArraySorted. [#70820](https://github.com/ClickHouse/ClickHouse/pull/70820) ([Michael Kolupaev](https://github.com/al13n321)). +* Backported in [#70990](https://github.com/ClickHouse/ClickHouse/issues/70990): Fix a logical error due to negative zeros in the two-level hash table. This closes [#70973](https://github.com/ClickHouse/ClickHouse/issues/70973). [#70979](https://github.com/ClickHouse/ClickHouse/pull/70979) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#71246](https://github.com/ClickHouse/ClickHouse/issues/71246): Fixed named sessions not being closed and hanging on forever under certain circumstances. [#70998](https://github.com/ClickHouse/ClickHouse/pull/70998) ([Márcio Martins](https://github.com/marcio-absmartly)). +* Backported in [#71371](https://github.com/ClickHouse/ClickHouse/issues/71371): Add try/catch to data parts destructors to avoid terminate. [#71364](https://github.com/ClickHouse/ClickHouse/pull/71364) ([alesapin](https://github.com/alesapin)). +* Backported in [#71594](https://github.com/ClickHouse/ClickHouse/issues/71594): Prevent crash in SortCursor with 0 columns (old analyzer). [#71494](https://github.com/ClickHouse/ClickHouse/pull/71494) ([Raúl Marín](https://github.com/Algunenano)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Backported in [#71022](https://github.com/ClickHouse/ClickHouse/issues/71022): Fix dropping of file cache in CHECK query in case of enabled transactions. [#69256](https://github.com/ClickHouse/ClickHouse/pull/69256) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#70384](https://github.com/ClickHouse/ClickHouse/issues/70384): CI: Enable Integration Tests for backport PRs. [#70329](https://github.com/ClickHouse/ClickHouse/pull/70329) ([Max Kainov](https://github.com/maxknv)). +* Backported in [#70538](https://github.com/ClickHouse/ClickHouse/issues/70538): Remove slow poll() logs in keeper. [#70508](https://github.com/ClickHouse/ClickHouse/pull/70508) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#70971](https://github.com/ClickHouse/ClickHouse/issues/70971): Limiting logging some lines about configs. [#70879](https://github.com/ClickHouse/ClickHouse/pull/70879) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). + diff --git a/docs/en/engines/table-engines/mergetree-family/annindexes.md b/docs/en/engines/table-engines/mergetree-family/annindexes.md index dc12a60e8ef..fcdc16637e6 100644 --- a/docs/en/engines/table-engines/mergetree-family/annindexes.md +++ b/docs/en/engines/table-engines/mergetree-family/annindexes.md @@ -54,7 +54,7 @@ Parameters: - `distance_function`: either `L2Distance` (the [Euclidean distance](https://en.wikipedia.org/wiki/Euclidean_distance) - the length of a line between two points in Euclidean space), or `cosineDistance` (the [cosine distance](https://en.wikipedia.org/wiki/Cosine_similarity#Cosine_distance)- the angle between two non-zero vectors). -- `quantization`: either `f64`, `f32`, `f16`, `bf16`, or `i8` for storing the vector with reduced precision (optional, default: `bf16`) +- `quantization`: either `f64`, `f32`, `f16`, `bf16`, or `i8` for storing vectors with reduced precision (optional, default: `bf16`) - `hnsw_max_connections_per_layer`: the number of neighbors per HNSW graph node, also known as `M` in the [HNSW paper](https://doi.org/10.1109/TPAMI.2018.2889473) (optional, default: 32) - `hnsw_candidate_list_size_for_construction`: the size of the dynamic candidate list when constructing the HNSW graph, also known as @@ -92,8 +92,8 @@ Vector similarity indexes currently support two distance functions: - `cosineDistance`, also called cosine similarity, is the cosine of the angle between two (non-zero) vectors ([Wikipedia](https://en.wikipedia.org/wiki/Cosine_similarity)). -Vector similarity indexes allows storing the vectors in reduced precision formats. Supported scalar kinds are `f64`, `f32`, `f16` or `i8`. -If no scalar kind was specified during index creation, `f16` is used as default. +Vector similarity indexes allows storing the vectors in reduced precision formats. Supported scalar kinds are `f64`, `f32`, `f16`, `bf16`, +and `i8`. If no scalar kind was specified during index creation, `bf16` is used as default. For normalized data, `L2Distance` is usually a better choice, otherwise `cosineDistance` is recommended to compensate for scale. If no distance function was specified during index creation, `L2Distance` is used as default. diff --git a/docs/en/operations/query-cache.md b/docs/en/operations/query-cache.md index 955cec0234e..f0941aa28aa 100644 --- a/docs/en/operations/query-cache.md +++ b/docs/en/operations/query-cache.md @@ -25,9 +25,10 @@ Query caches can generally be viewed as transactionally consistent or inconsiste slowly enough that the database only needs to compute the report once (represented by the first `SELECT` query). Further queries can be served directly from the query cache. In this example, a reasonable validity period could be 30 min. -Transactionally inconsistent caching is traditionally provided by client tools or proxy packages interacting with the database. As a result, -the same caching logic and configuration is often duplicated. With ClickHouse's query cache, the caching logic moves to the server side. -This reduces maintenance effort and avoids redundancy. +Transactionally inconsistent caching is traditionally provided by client tools or proxy packages (e.g. +[chproxy](https://www.chproxy.org/configuration/caching/)) interacting with the database. As a result, the same caching logic and +configuration is often duplicated. With ClickHouse's query cache, the caching logic moves to the server side. This reduces maintenance +effort and avoids redundancy. ## Configuration Settings and Usage @@ -138,7 +139,10 @@ is only cached if the query runs longer than 5 seconds. It is also possible to s cached - for that use setting [query_cache_min_query_runs](settings/settings.md#query-cache-min-query-runs). Entries in the query cache become stale after a certain time period (time-to-live). By default, this period is 60 seconds but a different -value can be specified at session, profile or query level using setting [query_cache_ttl](settings/settings.md#query-cache-ttl). +value can be specified at session, profile or query level using setting [query_cache_ttl](settings/settings.md#query-cache-ttl). The query +cache evicts entries "lazily", i.e. when an entry becomes stale, it is not immediately removed from the cache. Instead, when a new entry +is to be inserted into the query cache, the database checks whether the cache has enough free space for the new entry. If this is not the +case, the database tries to remove all stale entries. If the cache still has not enough free space, the new entry is not inserted. Entries in the query cache are compressed by default. This reduces the overall memory consumption at the cost of slower writes into / reads from the query cache. To disable compression, use setting [query_cache_compress_entries](settings/settings.md#query-cache-compress-entries). @@ -188,14 +192,9 @@ Also, results of queries with non-deterministic functions are not cached by defa To force caching of results of queries with non-deterministic functions regardless, use setting [query_cache_nondeterministic_function_handling](settings/settings.md#query-cache-nondeterministic-function-handling). -Results of queries that involve system tables, e.g. `system.processes` or `information_schema.tables`, are not cached by default. To force -caching of results of queries with system tables regardless, use setting -[query_cache_system_table_handling](settings/settings.md#query-cache-system-table-handling). - -:::note -Prior to ClickHouse v23.11, setting 'query_cache_store_results_of_queries_with_nondeterministic_functions = 0 / 1' controlled whether -results of queries with non-deterministic results were cached. In newer ClickHouse versions, this setting is obsolete and has no effect. -::: +Results of queries that involve system tables (e.g. [system.processes](system-tables/processes.md)` or +[information_schema.tables](system-tables/information_schema.md)) are not cached by default. To force caching of results of queries with +system tables regardless, use setting [query_cache_system_table_handling](settings/settings.md#query-cache-system-table-handling). Finally, entries in the query cache are not shared between users due to security reasons. For example, user A must not be able to bypass a row policy on a table by running the same query as another user B for whom no such policy exists. However, if necessary, cache entries can diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index 02fa5a8ca58..c5f92ccdf68 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -131,16 +131,6 @@ Type: UInt64 Default: 8 -## background_pool_size - -Sets the number of threads performing background merges and mutations for tables with MergeTree engines. You can only increase the number of threads at runtime. To lower the number of threads you have to restart the server. By adjusting this setting, you manage CPU and disk load. Smaller pool size utilizes less CPU and disk resources, but background processes advance slower which might eventually impact query performance. - -Before changing it, please also take a look at related MergeTree settings, such as `number_of_free_entries_in_pool_to_lower_max_size_of_merge` and `number_of_free_entries_in_pool_to_execute_mutation`. - -Type: UInt64 - -Default: 16 - ## background_schedule_pool_size The maximum number of threads that will be used for constantly executing some lightweight periodic operations for replicated tables, Kafka streaming, and DNS cache updates. diff --git a/docs/en/sql-reference/data-types/newjson.md b/docs/en/sql-reference/data-types/newjson.md index abb32388b7a..4a21900545d 100644 --- a/docs/en/sql-reference/data-types/newjson.md +++ b/docs/en/sql-reference/data-types/newjson.md @@ -670,6 +670,28 @@ SELECT arrayJoin(distinctJSONPathsAndTypes(json)) FROM s3('s3://clickhouse-publi └─arrayJoin(distinctJSONPathsAndTypes(json))──────────────────┘ ``` +## ALTER MODIFY COLUMN to JSON type + +It's possible to alter an existing table and change the type of the column to the new `JSON` type. Right now only alter from `String` type is supported. + +**Example** + +```sql +CREATE TABLE test (json String) ENGINE=MergeTree ORDeR BY tuple(); +INSERT INTO test VALUES ('{"a" : 42}'), ('{"a" : 43, "b" : "Hello"}'), ('{"a" : 44, "b" : [1, 2, 3]}')), ('{"c" : "2020-01-01"}'); +ALTER TABLE test MODIFY COLUMN json JSON; +SELECT json, json.a, json.b, json.c FROM test; +``` + +```text +┌─json─────────────────────────┬─json.a─┬─json.b──┬─json.c─────┐ +│ {"a":"42"} │ 42 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ +│ {"a":"43","b":"Hello"} │ 43 │ Hello │ ᴺᵁᴸᴸ │ +│ {"a":"44","b":["1","2","3"]} │ 44 │ [1,2,3] │ ᴺᵁᴸᴸ │ +│ {"c":"2020-01-01"} │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 2020-01-01 │ +└──────────────────────────────┴────────┴─────────┴────────────┘ +``` + ## Tips for better usage of the JSON type Before creating `JSON` column and loading data into it, consider the following tips: diff --git a/programs/su/su.cpp b/programs/su/su.cpp index 33d929898f4..40242d0687f 100644 --- a/programs/su/su.cpp +++ b/programs/su/su.cpp @@ -59,7 +59,13 @@ void setUserAndGroup(std::string arg_uid, std::string arg_gid) throw ErrnoException(ErrorCodes::SYSTEM_ERROR, "Cannot do 'getgrnam_r' to obtain gid from group name ({})", arg_gid); if (!result) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Group {} is not found in the system", arg_gid); + { + if (0 != getgrgid_r(gid, &entry, buf.get(), buf_size, &result)) + throw ErrnoException(ErrorCodes::SYSTEM_ERROR, "Cannot do 'getgrnam_r' to obtain gid from group name ({})", arg_gid); + + if (!result) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Group {} is not found in the system", arg_gid); + } gid = entry.gr_gid; } @@ -84,7 +90,13 @@ void setUserAndGroup(std::string arg_uid, std::string arg_gid) throw ErrnoException(ErrorCodes::SYSTEM_ERROR, "Cannot do 'getpwnam_r' to obtain uid from user name ({})", arg_uid); if (!result) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "User {} is not found in the system", arg_uid); + { + if (0 != getpwuid_r(uid, &entry, buf.get(), buf_size, &result)) + throw ErrnoException(ErrorCodes::SYSTEM_ERROR, "Cannot do 'getpwuid_r' to obtain uid from user name ({})", uid); + + if (!result) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "User {} is not found in the system", arg_uid); + } uid = entry.pw_uid; } diff --git a/src/Columns/ColumnArray.h b/src/Columns/ColumnArray.h index ec14b096055..a66f9041213 100644 --- a/src/Columns/ColumnArray.h +++ b/src/Columns/ColumnArray.h @@ -196,6 +196,13 @@ public: bool hasDynamicStructure() const override { return getData().hasDynamicStructure(); } void takeDynamicStructureFromSourceColumns(const Columns & source_columns) override; + bool dynamicStructureEquals(const IColumn & rhs) const override + { + if (const auto * rhs_concrete = typeid_cast(&rhs)) + return data->dynamicStructureEquals(*rhs_concrete->data); + return false; + } + private: WrappedPtr data; WrappedPtr offsets; diff --git a/src/Columns/ColumnDynamic.cpp b/src/Columns/ColumnDynamic.cpp index 41a9096bc0c..6eb22a8bdf7 100644 --- a/src/Columns/ColumnDynamic.cpp +++ b/src/Columns/ColumnDynamic.cpp @@ -1208,6 +1208,15 @@ void ColumnDynamic::prepareVariantsForSquashing(const Columns & source_columns) } } +bool ColumnDynamic::dynamicStructureEquals(const IColumn & rhs) const +{ + if (const auto * rhs_concrete = typeid_cast(&rhs)) + return max_dynamic_types == rhs_concrete->max_dynamic_types && global_max_dynamic_types == rhs_concrete->global_max_dynamic_types + && variant_info.variant_name == rhs_concrete->variant_info.variant_name + && variant_column->dynamicStructureEquals(*rhs_concrete->variant_column); + return false; +} + void ColumnDynamic::takeDynamicStructureFromSourceColumns(const Columns & source_columns) { if (!empty()) diff --git a/src/Columns/ColumnDynamic.h b/src/Columns/ColumnDynamic.h index 57a1545a832..fbab4d5da4c 100644 --- a/src/Columns/ColumnDynamic.h +++ b/src/Columns/ColumnDynamic.h @@ -376,6 +376,7 @@ public: bool addNewVariant(const DataTypePtr & new_variant) { return addNewVariant(new_variant, new_variant->getName()); } bool hasDynamicStructure() const override { return true; } + bool dynamicStructureEquals(const IColumn & rhs) const override; void takeDynamicStructureFromSourceColumns(const Columns & source_columns) override; const StatisticsPtr & getStatistics() const { return statistics; } diff --git a/src/Columns/ColumnMap.cpp b/src/Columns/ColumnMap.cpp index 7ebbed930d8..a5511dfeeb4 100644 --- a/src/Columns/ColumnMap.cpp +++ b/src/Columns/ColumnMap.cpp @@ -345,6 +345,13 @@ bool ColumnMap::structureEquals(const IColumn & rhs) const return false; } +bool ColumnMap::dynamicStructureEquals(const IColumn & rhs) const +{ + if (const auto * rhs_map = typeid_cast(&rhs)) + return nested->dynamicStructureEquals(*rhs_map->nested); + return false; +} + ColumnPtr ColumnMap::compress() const { auto compressed = nested->compress(); diff --git a/src/Columns/ColumnMap.h b/src/Columns/ColumnMap.h index 575114f8d3a..8dfa5bb5845 100644 --- a/src/Columns/ColumnMap.h +++ b/src/Columns/ColumnMap.h @@ -123,6 +123,7 @@ public: ColumnPtr compress() const override; bool hasDynamicStructure() const override { return nested->hasDynamicStructure(); } + bool dynamicStructureEquals(const IColumn & rhs) const override; void takeDynamicStructureFromSourceColumns(const Columns & source_columns) override; }; diff --git a/src/Columns/ColumnObject.cpp b/src/Columns/ColumnObject.cpp index 18ba8ed36ee..f4121435be9 100644 --- a/src/Columns/ColumnObject.cpp +++ b/src/Columns/ColumnObject.cpp @@ -1415,6 +1415,31 @@ void ColumnObject::prepareForSquashing(const std::vector & source_col } } +bool ColumnObject::dynamicStructureEquals(const IColumn & rhs) const +{ + const auto * rhs_object = typeid_cast(&rhs); + if (!rhs_object || typed_paths.size() != rhs_object->typed_paths.size() + || global_max_dynamic_paths != rhs_object->global_max_dynamic_paths || max_dynamic_types != rhs_object->max_dynamic_types + || dynamic_paths.size() != rhs_object->dynamic_paths.size()) + return false; + + for (const auto & [path, column] : typed_paths) + { + auto it = rhs_object->typed_paths.find(path); + if (it == rhs_object->typed_paths.end() || !it->second->dynamicStructureEquals(*column)) + return false; + } + + for (const auto & [path, column] : dynamic_paths) + { + auto it = rhs_object->dynamic_paths.find(path); + if (it == rhs_object->dynamic_paths.end() || !it->second->dynamicStructureEquals(*column)) + return false; + } + + return true; +} + void ColumnObject::takeDynamicStructureFromSourceColumns(const DB::Columns & source_columns) { if (!empty()) diff --git a/src/Columns/ColumnObject.h b/src/Columns/ColumnObject.h index 74ae7e136ce..7b8a381d571 100644 --- a/src/Columns/ColumnObject.h +++ b/src/Columns/ColumnObject.h @@ -177,6 +177,7 @@ public: bool isFinalized() const override; bool hasDynamicStructure() const override { return true; } + bool dynamicStructureEquals(const IColumn & rhs) const override; void takeDynamicStructureFromSourceColumns(const Columns & source_columns) override; const PathToColumnMap & getTypedPaths() const { return typed_paths; } @@ -227,6 +228,7 @@ public: void setDynamicPaths(const std::vector & paths); void setDynamicPaths(const std::vector> & paths); void setMaxDynamicPaths(size_t max_dynamic_paths_); + void setGlobalMaxDynamicPaths(size_t global_max_dynamic_paths_); void setStatistics(const StatisticsPtr & statistics_) { statistics = statistics_; } void serializePathAndValueIntoSharedData(ColumnString * shared_data_paths, ColumnString * shared_data_values, std::string_view path, const IColumn & column, size_t n); diff --git a/src/Columns/ColumnTuple.cpp b/src/Columns/ColumnTuple.cpp index c3f7d10f650..28e5f03cc3c 100644 --- a/src/Columns/ColumnTuple.cpp +++ b/src/Columns/ColumnTuple.cpp @@ -757,6 +757,26 @@ bool ColumnTuple::hasDynamicStructure() const return false; } +bool ColumnTuple::dynamicStructureEquals(const IColumn & rhs) const +{ + if (const auto * rhs_tuple = typeid_cast(&rhs)) + { + const size_t tuple_size = columns.size(); + if (tuple_size != rhs_tuple->columns.size()) + return false; + + for (size_t i = 0; i < tuple_size; ++i) + if (!columns[i]->dynamicStructureEquals(*rhs_tuple->columns[i])) + return false; + + return true; + } + else + { + return false; + } +} + void ColumnTuple::takeDynamicStructureFromSourceColumns(const Columns & source_columns) { std::vector nested_source_columns; diff --git a/src/Columns/ColumnTuple.h b/src/Columns/ColumnTuple.h index c73f90f13d9..d5eee911edc 100644 --- a/src/Columns/ColumnTuple.h +++ b/src/Columns/ColumnTuple.h @@ -141,6 +141,7 @@ public: ColumnPtr & getColumnPtr(size_t idx) { return columns[idx]; } bool hasDynamicStructure() const override; + bool dynamicStructureEquals(const IColumn & rhs) const override; void takeDynamicStructureFromSourceColumns(const Columns & source_columns) override; /// Empty tuple needs a public method to manage its size. diff --git a/src/Columns/ColumnVariant.cpp b/src/Columns/ColumnVariant.cpp index d5c8386d35f..2fa59b8e33c 100644 --- a/src/Columns/ColumnVariant.cpp +++ b/src/Columns/ColumnVariant.cpp @@ -1409,6 +1409,23 @@ bool ColumnVariant::structureEquals(const IColumn & rhs) const return true; } +bool ColumnVariant::dynamicStructureEquals(const IColumn & rhs) const +{ + const auto * rhs_variant = typeid_cast(&rhs); + if (!rhs_variant) + return false; + + const size_t num_variants = variants.size(); + if (num_variants != rhs_variant->variants.size()) + return false; + + for (size_t i = 0; i < num_variants; ++i) + if (!variants[i]->dynamicStructureEquals(rhs_variant->getVariantByGlobalDiscriminator(globalDiscriminatorByLocal(i)))) + return false; + + return true; +} + ColumnPtr ColumnVariant::compress() const { ColumnPtr local_discriminators_compressed = local_discriminators->compress(); diff --git a/src/Columns/ColumnVariant.h b/src/Columns/ColumnVariant.h index f90a812703d..a68a961169c 100644 --- a/src/Columns/ColumnVariant.h +++ b/src/Columns/ColumnVariant.h @@ -336,6 +336,7 @@ public: void extend(const std::vector & old_to_new_global_discriminators, std::vector> && new_variants_and_discriminators); bool hasDynamicStructure() const override; + bool dynamicStructureEquals(const IColumn & rhs) const override; void takeDynamicStructureFromSourceColumns(const Columns & source_columns) override; private: diff --git a/src/Columns/IColumn.h b/src/Columns/IColumn.h index 95becba3fdb..c77b089812e 100644 --- a/src/Columns/IColumn.h +++ b/src/Columns/IColumn.h @@ -635,6 +635,9 @@ public: /// Checks if column has dynamic subcolumns. virtual bool hasDynamicStructure() const { return false; } + + /// For columns with dynamic subcolumns checks if columns have equal dynamic structure. + [[nodiscard]] virtual bool dynamicStructureEquals(const IColumn & rhs) const { return structureEquals(rhs); } /// For columns with dynamic subcolumns this method takes dynamic structure from source columns /// and creates proper resulting dynamic structure in advance for merge of these source columns. virtual void takeDynamicStructureFromSourceColumns(const std::vector & /*source_columns*/) {} diff --git a/src/Coordination/tests/gtest_coordination.cpp b/src/Coordination/tests/gtest_coordination.cpp index 9648fdd4530..c56e698766a 100644 --- a/src/Coordination/tests/gtest_coordination.cpp +++ b/src/Coordination/tests/gtest_coordination.cpp @@ -330,7 +330,7 @@ TYPED_TEST(CoordinationTest, TestSummingRaft1) this->setLogDirectory("./logs"); this->setStateFileDirectory("."); - SummingRaftServer s1(1, "localhost", 44444, this->keeper_context); + SummingRaftServer s1(1, "localhost", 0, this->keeper_context); SCOPE_EXIT(if (std::filesystem::exists("./state")) std::filesystem::remove("./state");); /// Single node is leader diff --git a/src/Core/Settings.cpp b/src/Core/Settings.cpp index f3ada33cb37..01339226c2d 100644 --- a/src/Core/Settings.cpp +++ b/src/Core/Settings.cpp @@ -5865,7 +5865,7 @@ Experimental data deduplication for SELECT queries based on part UUIDs // Please add settings related to formats in Core/FormatFactorySettings.h, move obsolete settings to OBSOLETE_SETTINGS and obsolete format settings to OBSOLETE_FORMAT_SETTINGS. #define OBSOLETE_SETTINGS(M, ALIAS) \ - /** Obsolete settings that do nothing but left for compatibility reasons. Remove each one after half a year of obsolescence. */ \ + /** Obsolete settings which are kept around for compatibility reasons. They have no effect anymore. */ \ MAKE_OBSOLETE(M, Bool, update_insert_deduplication_token_in_dependent_materialized_views, 0) \ MAKE_OBSOLETE(M, UInt64, max_memory_usage_for_all_queries, 0) \ MAKE_OBSOLETE(M, UInt64, multiple_joins_rewriter_version, 0) \ diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index edf4e60706b..0ff9d0a6833 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -64,6 +64,7 @@ static std::initializer_list(version)) +SerializationDynamic::DynamicSerializationVersion::DynamicSerializationVersion(UInt64 version) : value(static_cast(version)) { checkVersion(version); } -void SerializationDynamic::DynamicStructureSerializationVersion::checkVersion(UInt64 version) +void SerializationDynamic::DynamicSerializationVersion::checkVersion(UInt64 version) { - if (version != VariantTypeName) - throw Exception(ErrorCodes::INCORRECT_DATA, "Invalid version for Dynamic structure serialization."); + if (version != V1 && version != V2) + throw Exception(ErrorCodes::INCORRECT_DATA, "Invalid version for Dynamic structure serialization: {}", version); } void SerializationDynamic::serializeBinaryBulkStatePrefix( @@ -108,22 +108,17 @@ void SerializationDynamic::serializeBinaryBulkStatePrefix( throw Exception(ErrorCodes::LOGICAL_ERROR, "Missing stream for Dynamic column structure during serialization of binary bulk state prefix"); /// Write structure serialization version. - UInt64 structure_version = DynamicStructureSerializationVersion::Value::VariantTypeName; + UInt64 structure_version = DynamicSerializationVersion::Value::V2; writeBinaryLittleEndian(structure_version, *stream); auto dynamic_state = std::make_shared(structure_version); - dynamic_state->max_dynamic_types = column_dynamic.getMaxDynamicTypes(); - /// Write max_dynamic_types parameter, because it can differ from the max_dynamic_types - /// that is specified in the Dynamic type (we could decrease it before merge). - writeVarUInt(dynamic_state->max_dynamic_types, *stream); - dynamic_state->variant_type = variant_info.variant_type; dynamic_state->variant_names = variant_info.variant_names; const auto & variant_column = column_dynamic.getVariantColumn(); - /// Write information about variants. - size_t num_variants = dynamic_state->variant_names.size() - 1; /// Don't write shared variant, Dynamic column should always have it. - writeVarUInt(num_variants, *stream); + /// Write information about dynamic types. + dynamic_state->num_dynamic_types = dynamic_state->variant_names.size() - 1; /// -1 for SharedVariant + writeVarUInt(dynamic_state->num_dynamic_types, *stream); if (settings.data_types_binary_encoding) { const auto & variants = assert_cast(*dynamic_state->variant_type).getVariants(); @@ -251,22 +246,25 @@ ISerialization::DeserializeBinaryBulkStatePtr SerializationDynamic::deserializeD UInt64 structure_version; readBinaryLittleEndian(structure_version, *structure_stream); auto structure_state = std::make_shared(structure_version); - /// Read max_dynamic_types parameter. - readVarUInt(structure_state->max_dynamic_types, *structure_stream); + if (structure_state->structure_version.value == DynamicSerializationVersion::Value::V1) + { + /// Skip max_dynamic_types parameter in V1 serialization version. + size_t max_dynamic_types; + readVarUInt(max_dynamic_types, *structure_stream); + } /// Read information about variants. DataTypes variants; - size_t num_variants; - readVarUInt(num_variants, *structure_stream); - variants.reserve(num_variants + 1); /// +1 for shared variant. + readVarUInt(structure_state->num_dynamic_types, *structure_stream); + variants.reserve(structure_state->num_dynamic_types + 1); /// +1 for shared variant. if (settings.data_types_binary_encoding) { - for (size_t i = 0; i != num_variants; ++i) + for (size_t i = 0; i != structure_state->num_dynamic_types; ++i) variants.push_back(decodeDataType(*structure_stream)); } else { String data_type_name; - for (size_t i = 0; i != num_variants; ++i) + for (size_t i = 0; i != structure_state->num_dynamic_types; ++i) { readStringBinary(data_type_name, *structure_stream); variants.push_back(DataTypeFactory::instance().get(data_type_name)); @@ -364,9 +362,6 @@ void SerializationDynamic::serializeBinaryBulkWithMultipleStreamsAndCountTotalSi if (!variant_info.variant_type->equals(*dynamic_state->variant_type)) throw Exception(ErrorCodes::LOGICAL_ERROR, "Mismatch of internal columns of Dynamic. Expected: {}, Got: {}", dynamic_state->variant_type->getName(), variant_info.variant_type->getName()); - if (column_dynamic.getMaxDynamicTypes() != dynamic_state->max_dynamic_types) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Mismatch of max_dynamic_types parameter of Dynamic. Expected: {}, Got: {}", dynamic_state->max_dynamic_types, column_dynamic.getMaxDynamicTypes()); - settings.path.push_back(Substream::DynamicData); assert_cast(*dynamic_state->variant_serialization) .serializeBinaryBulkWithMultipleStreamsAndUpdateVariantStatistics( @@ -424,7 +419,7 @@ void SerializationDynamic::deserializeBinaryBulkWithMultipleStreams( if (mutable_column->empty()) { - column_dynamic.setMaxDynamicPaths(structure_state->max_dynamic_types); + column_dynamic.setMaxDynamicPaths(structure_state->num_dynamic_types); column_dynamic.setVariantType(structure_state->variant_type); column_dynamic.setStatistics(structure_state->statistics); } diff --git a/src/DataTypes/Serializations/SerializationDynamic.h b/src/DataTypes/Serializations/SerializationDynamic.h index f34b5d0e770..ac98bbbc8b5 100644 --- a/src/DataTypes/Serializations/SerializationDynamic.h +++ b/src/DataTypes/Serializations/SerializationDynamic.h @@ -16,18 +16,28 @@ public: { } - struct DynamicStructureSerializationVersion + struct DynamicSerializationVersion { enum Value { - VariantTypeName = 1, + /// V1 serialization: + /// - DynamicStructure stream: + /// + /// + /// + /// (only in MergeTree serialization) + /// (only in MergeTree serialization) + /// - DynamicData stream: contains the data of nested Variant column. + V1 = 1, + /// V2 serialization: the same as V1 but without max_dynamic_types parameter in DynamicStructure stream. + V2 = 2, }; Value value; static void checkVersion(UInt64 version); - explicit DynamicStructureSerializationVersion(UInt64 version); + explicit DynamicSerializationVersion(UInt64 version); }; void enumerateStreams( @@ -113,9 +123,9 @@ private: struct DeserializeBinaryBulkStateDynamicStructure : public ISerialization::DeserializeBinaryBulkState { - DynamicStructureSerializationVersion structure_version; + DynamicSerializationVersion structure_version; DataTypePtr variant_type; - size_t max_dynamic_types; + size_t num_dynamic_types; ColumnDynamic::StatisticsPtr statistics; explicit DeserializeBinaryBulkStateDynamicStructure(UInt64 structure_version_) diff --git a/src/DataTypes/Serializations/SerializationObject.cpp b/src/DataTypes/Serializations/SerializationObject.cpp index 3e1badb25ca..1b95fddee9f 100644 --- a/src/DataTypes/Serializations/SerializationObject.cpp +++ b/src/DataTypes/Serializations/SerializationObject.cpp @@ -63,14 +63,13 @@ SerializationObject::ObjectSerializationVersion::ObjectSerializationVersion(UInt void SerializationObject::ObjectSerializationVersion::checkVersion(UInt64 version) { - if (version != V1 && version != STRING) + if (version != V1 && version != V2 && version != STRING) throw Exception(ErrorCodes::INCORRECT_DATA, "Invalid version for Object structure serialization."); } struct SerializeBinaryBulkStateObject: public ISerialization::SerializeBinaryBulkState { SerializationObject::ObjectSerializationVersion serialization_version; - size_t max_dynamic_paths; std::vector sorted_dynamic_paths; std::unordered_map typed_path_states; std::unordered_map dynamic_path_states; @@ -188,7 +187,7 @@ void SerializationObject::serializeBinaryBulkStatePrefix( throw Exception(ErrorCodes::LOGICAL_ERROR, "Missing stream for Object column structure during serialization of binary bulk state prefix"); /// Write serialization version. - UInt64 serialization_version = settings.write_json_as_string ? ObjectSerializationVersion::Value::STRING : ObjectSerializationVersion::Value::V1; + UInt64 serialization_version = settings.write_json_as_string ? ObjectSerializationVersion::Value::STRING : ObjectSerializationVersion::Value::V2; writeBinaryLittleEndian(serialization_version, *stream); auto object_state = std::make_shared(serialization_version); @@ -198,9 +197,6 @@ void SerializationObject::serializeBinaryBulkStatePrefix( return; } - object_state->max_dynamic_paths = column_object.getMaxDynamicPaths(); - /// Write max_dynamic_paths parameter. - writeVarUInt(object_state->max_dynamic_paths, *stream); /// Write all dynamic paths in sorted order. object_state->sorted_dynamic_paths.reserve(dynamic_paths.size()); for (const auto & [path, _] : dynamic_paths) @@ -360,10 +356,15 @@ ISerialization::DeserializeBinaryBulkStatePtr SerializationObject::deserializeOb UInt64 serialization_version; readBinaryLittleEndian(serialization_version, *structure_stream); auto structure_state = std::make_shared(serialization_version); - if (structure_state->serialization_version.value == ObjectSerializationVersion::Value::V1) + if (structure_state->serialization_version.value == ObjectSerializationVersion::Value::V1 || structure_state->serialization_version.value == ObjectSerializationVersion::Value::V2) { - /// Read max_dynamic_paths parameter. - readVarUInt(structure_state->max_dynamic_paths, *structure_stream); + if (structure_state->serialization_version.value == ObjectSerializationVersion::Value::V1) + { + /// Skip max_dynamic_paths parameter in V1 serialization version. + size_t max_dynamic_paths; + readVarUInt(max_dynamic_paths, *structure_stream); + } + /// Read the sorted list of dynamic paths. size_t dynamic_paths_size; readVarUInt(dynamic_paths_size, *structure_stream); @@ -446,9 +447,6 @@ void SerializationObject::serializeBinaryBulkWithMultipleStreams( const auto & dynamic_paths = column_object.getDynamicPaths(); const auto & shared_data = column_object.getSharedDataPtr(); - if (column_object.getMaxDynamicPaths() != object_state->max_dynamic_paths) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Mismatch of max_dynamic_paths parameter of Object. Expected: {}, Got: {}", object_state->max_dynamic_paths, column_object.getMaxDynamicPaths()); - if (column_object.getDynamicPaths().size() != object_state->sorted_dynamic_paths.size()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Mismatch of number of dynamic paths in Object. Expected: {}, Got: {}", object_state->sorted_dynamic_paths.size(), column_object.getDynamicPaths().size()); @@ -597,7 +595,7 @@ void SerializationObject::deserializeBinaryBulkWithMultipleStreams( /// If it's a new object column, set dynamic paths and statistics. if (column_object.empty()) { - column_object.setMaxDynamicPaths(structure_state->max_dynamic_paths); + column_object.setMaxDynamicPaths(structure_state->sorted_dynamic_paths.size()); column_object.setDynamicPaths(structure_state->sorted_dynamic_paths); column_object.setStatistics(structure_state->statistics); } diff --git a/src/DataTypes/Serializations/SerializationObject.h b/src/DataTypes/Serializations/SerializationObject.h index 8bc72312da1..db772756a20 100644 --- a/src/DataTypes/Serializations/SerializationObject.h +++ b/src/DataTypes/Serializations/SerializationObject.h @@ -31,6 +31,8 @@ public: /// - ObjectDynamicPath stream for each column in dynamic paths /// - ObjectSharedData stream shared data column. V1 = 0, + /// V2 serialization: the same as V1 but without max_dynamic_paths parameter in ObjectStructure stream. + V2 = 2, /// String serialization: /// - ObjectData stream with single String column containing serialized JSON. STRING = 1, @@ -98,7 +100,6 @@ private: struct DeserializeBinaryBulkStateObjectStructure : public ISerialization::DeserializeBinaryBulkState { ObjectSerializationVersion serialization_version; - size_t max_dynamic_paths; std::vector sorted_dynamic_paths; std::unordered_set dynamic_paths; /// Paths statistics. Map (dynamic path) -> (number of non-null values in this path). diff --git a/src/Functions/bitShiftLeft.cpp b/src/Functions/bitShiftLeft.cpp index 0eb0d82ef0f..7fd0f7cf631 100644 --- a/src/Functions/bitShiftLeft.cpp +++ b/src/Functions/bitShiftLeft.cpp @@ -25,8 +25,10 @@ struct BitShiftLeftImpl { if constexpr (is_big_int_v) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "BitShiftLeft is not implemented for big integers as second argument"); - else if (b < 0 || static_cast(b) > 8 * sizeof(A)) - throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "The number of shift positions needs to be a non-negative value and less or equal to the bit width of the value to shift"); + else if (b < 0) + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "The number of shift positions needs to be a non-negative value"); + else if (static_cast(b) > 8 * sizeof(A)) + return static_cast(0); else if constexpr (is_big_int_v) return static_cast(a) << static_cast(b); else @@ -43,9 +45,10 @@ struct BitShiftLeftImpl const UInt8 word_size = 8 * sizeof(*pos); size_t n = end - pos; const UInt128 bit_limit = static_cast(word_size) * n; - if (b < 0 || static_cast(b) > bit_limit) - throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "The number of shift positions needs to be a non-negative value and less or equal to the bit width of the value to shift"); - if (b == bit_limit) + if (b < 0) + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "The number of shift positions needs to be a non-negative value"); + + if (b == bit_limit || static_cast(b) > bit_limit) { // insert default value out_vec.push_back(0); @@ -111,9 +114,10 @@ struct BitShiftLeftImpl const UInt8 word_size = 8; size_t n = end - pos; const UInt128 bit_limit = static_cast(word_size) * n; - if (b < 0 || static_cast(b) > bit_limit) - throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "The number of shift positions needs to be a non-negative value and less or equal to the bit width of the value to shift"); - if (b == bit_limit) + if (b < 0) + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "The number of shift positions needs to be a non-negative value"); + + if (b == bit_limit || static_cast(b) > bit_limit) { // insert default value out_vec.resize_fill(out_vec.size() + n); diff --git a/src/Functions/bitShiftRight.cpp b/src/Functions/bitShiftRight.cpp index 16032b32f68..19ea7b8c751 100644 --- a/src/Functions/bitShiftRight.cpp +++ b/src/Functions/bitShiftRight.cpp @@ -26,8 +26,10 @@ struct BitShiftRightImpl { if constexpr (is_big_int_v) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "BitShiftRight is not implemented for big integers as second argument"); - else if (b < 0 || static_cast(b) > 8 * sizeof(A)) - throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "The number of shift positions needs to be a non-negative value and less or equal to the bit width of the value to shift"); + else if (b < 0) + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "The number of shift positions needs to be a non-negative value"); + else if (static_cast(b) > 8 * sizeof(A)) + return static_cast(0); else if constexpr (is_big_int_v) return static_cast(a) >> static_cast(b); else @@ -59,9 +61,10 @@ struct BitShiftRightImpl const UInt8 word_size = 8; size_t n = end - pos; const UInt128 bit_limit = static_cast(word_size) * n; - if (b < 0 || static_cast(b) > bit_limit) - throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "The number of shift positions needs to be a non-negative value and less or equal to the bit width of the value to shift"); - if (b == bit_limit) + if (b < 0) + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "The number of shift positions needs to be a non-negative value"); + + if (b == bit_limit || static_cast(b) > bit_limit) { /// insert default value out_vec.push_back(0); @@ -99,9 +102,10 @@ struct BitShiftRightImpl const UInt8 word_size = 8; size_t n = end - pos; const UInt128 bit_limit = static_cast(word_size) * n; - if (b < 0 || static_cast(b) > bit_limit) - throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "The number of shift positions needs to be a non-negative value and less or equal to the bit width of the value to shift"); - if (b == bit_limit) + if (b < 0) + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "The number of shift positions needs to be a non-negative value"); + + if (b == bit_limit || static_cast(b) > bit_limit) { // insert default value out_vec.resize_fill(out_vec.size() + n); diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp index f7b7ffc5aea..7de3f7af78d 100644 --- a/src/Interpreters/Cache/FileCache.cpp +++ b/src/Interpreters/Cache/FileCache.cpp @@ -37,6 +37,11 @@ namespace ProfileEvents extern const Event FilesystemCacheFailToReserveSpaceBecauseOfCacheResize; } +namespace CurrentMetrics +{ + extern const Metric FilesystemCacheDownloadQueueElements; +} + namespace DB { @@ -918,7 +923,13 @@ bool FileCache::tryReserve( if (!query_priority->collectCandidatesForEviction( size, required_elements_num, reserve_stat, eviction_candidates, {}, user.user_id, cache_lock)) { - failure_reason = "cannot evict enough space for query limit"; + const auto & stat = reserve_stat.total_stat; + failure_reason = fmt::format( + "cannot evict enough space for query limit " + "(non-releasable count: {}, non-releasable size: {}, " + "releasable count: {}, releasable size: {}, background download elements: {})", + stat.non_releasable_count, stat.non_releasable_size, stat.releasable_count, stat.releasable_size, + CurrentMetrics::get(CurrentMetrics::FilesystemCacheDownloadQueueElements)); return false; } @@ -933,7 +944,13 @@ bool FileCache::tryReserve( if (!main_priority->collectCandidatesForEviction( size, required_elements_num, reserve_stat, eviction_candidates, queue_iterator, user.user_id, cache_lock)) { - failure_reason = "cannot evict enough space"; + const auto & stat = reserve_stat.total_stat; + failure_reason = fmt::format( + "cannot evict enough space " + "(non-releasable count: {}, non-releasable size: {}, " + "releasable count: {}, releasable size: {}, background download elements: {})", + stat.non_releasable_count, stat.non_releasable_size, stat.releasable_count, stat.releasable_size, + CurrentMetrics::get(CurrentMetrics::FilesystemCacheDownloadQueueElements)); return false; } diff --git a/src/Processors/Sinks/SinkToStorage.h b/src/Processors/Sinks/SinkToStorage.h index c728fa87b1e..4bdcb2fe855 100644 --- a/src/Processors/Sinks/SinkToStorage.h +++ b/src/Processors/Sinks/SinkToStorage.h @@ -5,6 +5,8 @@ namespace DB { +class Context; + /// Sink which is returned from Storage::write. class SinkToStorage : public ExceptionKeepingTransform { @@ -16,12 +18,14 @@ public: const Block & getHeader() const { return inputs.front().getHeader(); } void addTableLock(const TableLockHolder & lock) { table_locks.push_back(lock); } + void addInterpreterContext(std::shared_ptr context) { interpreter_context.emplace_back(std::move(context)); } protected: virtual void consume(Chunk & chunk) = 0; private: std::vector table_locks; + std::vector> interpreter_context; void onConsume(Chunk chunk) override; GenerateResult onGenerate() override; diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp index c14775057a5..fbca222b1e7 100644 --- a/src/Storages/AlterCommands.cpp +++ b/src/Storages/AlterCommands.cpp @@ -1457,14 +1457,6 @@ void AlterCommands::validate(const StoragePtr & table, ContextPtr context) const ErrorCodes::BAD_ARGUMENTS, "The change of data type {} of column {} to {} is not allowed. It has known bugs", old_data_type->getName(), backQuote(column_name), command.data_type->getName()); - - bool has_object_type = isObject(command.data_type); - command.data_type->forEachChild([&](const IDataType & type){ has_object_type |= isObject(type); }); - if (has_object_type) - throw Exception( - ErrorCodes::BAD_ARGUMENTS, - "The change of data type {} of column {} to {} is not supported.", - old_data_type->getName(), backQuote(column_name), command.data_type->getName()); } if (command.isRemovingProperty()) diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 41783ffddb0..7453d609fa9 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -735,7 +735,9 @@ void IMergeTreeDataPart::loadColumnsChecksumsIndexes(bool require_columns_checks loadUUID(); loadColumns(require_columns_checksums); loadChecksums(require_columns_checksums); + loadIndexGranularity(); + index_granularity.shrinkToFitInMemory(); if (!(*storage.getSettings())[MergeTreeSetting::primary_key_lazy_load]) getIndex(); diff --git a/src/Storages/MergeTree/MergeSelectors/SimpleMergeSelector.cpp b/src/Storages/MergeTree/MergeSelectors/SimpleMergeSelector.cpp index c393349ef32..4f786215cbe 100644 --- a/src/Storages/MergeTree/MergeSelectors/SimpleMergeSelector.cpp +++ b/src/Storages/MergeTree/MergeSelectors/SimpleMergeSelector.cpp @@ -116,7 +116,7 @@ bool allow( double sum_size, double max_size, double min_age, - double range_size, + size_t range_size, double partition_size, double min_size_to_lower_base_log, double max_size_to_lower_base_log, @@ -125,6 +125,9 @@ bool allow( if (settings.min_age_to_force_merge && min_age >= settings.min_age_to_force_merge) return true; + if (settings.min_parts_to_merge_at_once && range_size < settings.min_parts_to_merge_at_once) + return false; + /// Map size to 0..1 using logarithmic scale /// Use log(1 + x) instead of log1p(x) because our sum_size is always integer. /// Also log1p seems to be slow and significantly affect performance of merges assignment. diff --git a/src/Storages/MergeTree/MergeSelectors/SimpleMergeSelector.h b/src/Storages/MergeTree/MergeSelectors/SimpleMergeSelector.h index 2d4129b8bf8..1e7676c6aed 100644 --- a/src/Storages/MergeTree/MergeSelectors/SimpleMergeSelector.h +++ b/src/Storages/MergeTree/MergeSelectors/SimpleMergeSelector.h @@ -90,6 +90,8 @@ public: { /// Zero means unlimited. Can be overridden by the same merge tree setting. size_t max_parts_to_merge_at_once = 100; + /// Zero means no minimum. Can be overridden by the same merge tree setting. + size_t min_parts_to_merge_at_once = 0; /// Some sort of a maximum number of parts in partition. Can be overridden by the same merge tree setting. size_t parts_to_throw_insert = 3000; diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index 176b5c00b0a..488f4b2390d 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -82,6 +82,7 @@ namespace MergeTreeSetting extern const MergeTreeSettingsMergeSelectorAlgorithm merge_selector_algorithm; extern const MergeTreeSettingsBool merge_selector_enable_heuristic_to_remove_small_parts_at_right; extern const MergeTreeSettingsFloat merge_selector_base; + extern const MergeTreeSettingsUInt64 min_parts_to_merge_at_once; } namespace ErrorCodes @@ -267,7 +268,8 @@ MergeTreeDataMergerMutator::PartitionIdsHint MergeTreeDataMergerMutator::getPart if (status == SelectPartsDecision::SELECTED) res.insert(all_partition_ids[i]); else - LOG_TEST(log, "Nothing to merge in partition {}: {}", all_partition_ids[i], out_disable_reason.text); + LOG_TEST(log, "Nothing to merge in partition {} with max_total_size_to_merge = {} (looked up {} ranges): {}", + all_partition_ids[i], ReadableSize(max_total_size_to_merge), ranges_per_partition[i].size(), out_disable_reason.text); } String best_partition_id_to_optimize = getBestPartitionToOptimizeEntire(info.partitions_info); @@ -565,6 +567,7 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMergeFromRanges( simple_merge_settings.max_parts_to_merge_at_once = (*data_settings)[MergeTreeSetting::max_parts_to_merge_at_once]; simple_merge_settings.enable_heuristic_to_remove_small_parts_at_right = (*data_settings)[MergeTreeSetting::merge_selector_enable_heuristic_to_remove_small_parts_at_right]; simple_merge_settings.base = (*data_settings)[MergeTreeSetting::merge_selector_base]; + simple_merge_settings.min_parts_to_merge_at_once = (*data_settings)[MergeTreeSetting::min_parts_to_merge_at_once]; if (!(*data_settings)[MergeTreeSetting::min_age_to_force_merge_on_partition_only]) simple_merge_settings.min_age_to_force_merge = (*data_settings)[MergeTreeSetting::min_age_to_force_merge_seconds]; diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp index 67a2c1ee9f1..c8d11ced683 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp @@ -63,23 +63,7 @@ MergeTreeDataPartWriterCompact::MergeTreeDataPartWriterCompact( for (const auto & column : columns_list) { auto compression = getCodecDescOrDefault(column.name, default_codec); - addStreams(column, nullptr, compression); - } -} - -void MergeTreeDataPartWriterCompact::initDynamicStreamsIfNeeded(const Block & block) -{ - if (is_dynamic_streams_initialized) - return; - - is_dynamic_streams_initialized = true; - for (const auto & column : columns_list) - { - if (column.type->hasDynamicSubcolumns()) - { - auto compression = getCodecDescOrDefault(column.name, default_codec); - addStreams(column, block.getByName(column.name).column, compression); - } + MergeTreeDataPartWriterCompact::addStreams(column, nullptr, compression); } } @@ -181,20 +165,25 @@ void writeColumnSingleGranule( void MergeTreeDataPartWriterCompact::write(const Block & block, const IColumn::Permutation * permutation) { - /// On first block of data initialize streams for dynamic subcolumns. - initDynamicStreamsIfNeeded(block); + Block result_block = block; + + /// During serialization columns with dynamic subcolumns (like JSON/Dynamic) must have the same dynamic structure. + /// But it may happen that they don't (for example during ALTER MODIFY COLUMN from some type to JSON/Dynamic). + /// In this case we use dynamic structure of the column from the first written block and adjust columns from + /// the next blocks so they match this dynamic structure. + initOrAdjustDynamicStructureIfNeeded(result_block); /// Fill index granularity for this block /// if it's unknown (in case of insert data or horizontal merge, /// but not in case of vertical merge) if (compute_granularity) { - size_t index_granularity_for_block = computeIndexGranularity(block); + size_t index_granularity_for_block = computeIndexGranularity(result_block); assert(index_granularity_for_block >= 1); - fillIndexGranularity(index_granularity_for_block, block.rows()); + fillIndexGranularity(index_granularity_for_block, result_block.rows()); } - Block result_block = permuteBlockIfNeeded(block, permutation); + result_block = permuteBlockIfNeeded(result_block, permutation); if (!header) header = result_block.cloneEmpty(); diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h index 20c47fb8314..b3e2e78491d 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h @@ -50,9 +50,7 @@ private: void addToChecksums(MergeTreeDataPartChecksums & checksums); - void addStreams(const NameAndTypePair & name_and_type, const ColumnPtr & column, const ASTPtr & effective_codec_desc); - - void initDynamicStreamsIfNeeded(const Block & block); + void addStreams(const NameAndTypePair & name_and_type, const ColumnPtr & column, const ASTPtr & effective_codec_desc) override; Block header; @@ -106,8 +104,6 @@ private: /// then finally to 'marks_file'. std::unique_ptr marks_compressor; std::unique_ptr marks_source_hashing; - - bool is_dynamic_streams_initialized = false; }; } diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp index 388737915ab..c483d47fed7 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp @@ -564,6 +564,45 @@ Names MergeTreeDataPartWriterOnDisk::getSkipIndicesColumns() const return Names(skip_indexes_column_names_set.begin(), skip_indexes_column_names_set.end()); } +void MergeTreeDataPartWriterOnDisk::initOrAdjustDynamicStructureIfNeeded(Block & block) +{ + if (!is_dynamic_streams_initialized) + { + for (const auto & column : columns_list) + { + if (column.type->hasDynamicSubcolumns()) + { + /// Create all streams for dynamic subcolumns using dynamic structure from block. + auto compression = getCodecDescOrDefault(column.name, default_codec); + addStreams(column, block.getByName(column.name).column, compression); + } + } + is_dynamic_streams_initialized = true; + block_sample = block.cloneEmpty(); + } + else + { + size_t size = block.columns(); + for (size_t i = 0; i != size; ++i) + { + auto & column = block.getByPosition(i); + const auto & sample_column = block_sample.getByPosition(i); + /// Check if the dynamic structure of this column is different from the sample column. + if (column.type->hasDynamicSubcolumns() && !column.column->dynamicStructureEquals(*sample_column.column)) + { + /// We need to change the dynamic structure of the column so it matches the sample column. + /// To do it, we create empty column of this type, take dynamic structure from sample column + /// and insert data into it. Resulting column will have required dynamic structure and the content + /// of the column in current block. + auto new_column = sample_column.type->createColumn(); + new_column->takeDynamicStructureFromSourceColumns({sample_column.column}); + new_column->insertRangeFrom(*column.column, 0, column.column->size()); + column.column = std::move(new_column); + } + } + } +} + template struct MergeTreeDataPartWriterOnDisk::Stream; template struct MergeTreeDataPartWriterOnDisk::Stream; diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h index 4a760c20b58..49d654c15e1 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h @@ -154,6 +154,14 @@ protected: /// Get unique non ordered skip indices column. Names getSkipIndicesColumns() const; + virtual void addStreams(const NameAndTypePair & name_and_type, const ColumnPtr & column, const ASTPtr & effective_codec_desc) = 0; + + /// On first block create all required streams for columns with dynamic subcolumns and remember the block sample. + /// On each next block check if dynamic structure of the columns equals to the dynamic structure of the same + /// columns in the sample block. If for some column dynamic structure is different, adjust it so it matches + /// the structure from the sample. + void initOrAdjustDynamicStructureIfNeeded(Block & block); + const MergeTreeIndices skip_indices; const ColumnsStatistics stats; @@ -188,6 +196,10 @@ protected: size_t current_mark = 0; GinIndexStoreFactory::GinIndexStores gin_index_stores; + + bool is_dynamic_streams_initialized = false; + Block block_sample; + private: void initSkipIndices(); void initPrimaryIndex(); diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp index 433c7c21613..7c9724b1b75 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp @@ -116,24 +116,7 @@ MergeTreeDataPartWriterWide::MergeTreeDataPartWriterWide( for (const auto & column : columns_list) { auto compression = getCodecDescOrDefault(column.name, default_codec); - addStreams(column, nullptr, compression); - } -} - -void MergeTreeDataPartWriterWide::initDynamicStreamsIfNeeded(const DB::Block & block) -{ - if (is_dynamic_streams_initialized) - return; - - is_dynamic_streams_initialized = true; - block_sample = block.cloneEmpty(); - for (const auto & column : columns_list) - { - if (column.type->hasDynamicSubcolumns()) - { - auto compression = getCodecDescOrDefault(column.name, default_codec); - addStreams(column, block_sample.getByName(column.name).column, compression); - } + MergeTreeDataPartWriterWide::addStreams(column, nullptr, compression); } } @@ -277,15 +260,20 @@ void MergeTreeDataPartWriterWide::shiftCurrentMark(const Granules & granules_wri void MergeTreeDataPartWriterWide::write(const Block & block, const IColumn::Permutation * permutation) { - /// On first block of data initialize streams for dynamic subcolumns. - initDynamicStreamsIfNeeded(block); + Block block_to_write = block; + + /// During serialization columns with dynamic subcolumns (like JSON/Dynamic) must have the same dynamic structure. + /// But it may happen that they don't (for example during ALTER MODIFY COLUMN from some type to JSON/Dynamic). + /// In this case we use dynamic structure of the column from the first written block and adjust columns from + /// the next blocks so they match this dynamic structure. + initOrAdjustDynamicStructureIfNeeded(block_to_write); /// Fill index granularity for this block /// if it's unknown (in case of insert data or horizontal merge, /// but not in case of vertical part of vertical merge) if (compute_granularity) { - size_t index_granularity_for_block = computeIndexGranularity(block); + size_t index_granularity_for_block = computeIndexGranularity(block_to_write); if (rows_written_in_last_mark > 0) { size_t rows_left_in_last_mark = index_granularity.getMarkRows(getCurrentMark()) - rows_written_in_last_mark; @@ -303,11 +291,9 @@ void MergeTreeDataPartWriterWide::write(const Block & block, const IColumn::Perm } } - fillIndexGranularity(index_granularity_for_block, block.rows()); + fillIndexGranularity(index_granularity_for_block, block_to_write.rows()); } - Block block_to_write = block; - auto granules_to_write = getGranulesToWrite(index_granularity, block_to_write.rows(), getCurrentMark(), rows_written_in_last_mark); auto offset_columns = written_offset_columns ? *written_offset_columns : WrittenOffsetColumns{}; diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h index b594b2d79bb..19304b28c6c 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h @@ -93,9 +93,7 @@ private: void addStreams( const NameAndTypePair & name_and_type, const ColumnPtr & column, - const ASTPtr & effective_codec_desc); - - void initDynamicStreamsIfNeeded(const Block & block); + const ASTPtr & effective_codec_desc) override; /// Method for self check (used in debug-build only). Checks that written /// data and corresponding marks are consistent. Otherwise throws logical @@ -144,10 +142,6 @@ private: /// How many rows we have already written in the current mark. /// More than zero when incoming blocks are smaller then their granularity. size_t rows_written_in_last_mark = 0; - - Block block_sample; - - bool is_dynamic_streams_initialized = false; }; } diff --git a/src/Storages/MergeTree/MergeTreeIndexGranularity.cpp b/src/Storages/MergeTree/MergeTreeIndexGranularity.cpp index d69a00643f0..c3e740bde84 100644 --- a/src/Storages/MergeTree/MergeTreeIndexGranularity.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexGranularity.cpp @@ -122,4 +122,10 @@ std::string MergeTreeIndexGranularity::describe() const { return fmt::format("initialized: {}, marks_rows_partial_sums: [{}]", initialized, fmt::join(marks_rows_partial_sums, ", ")); } + +void MergeTreeIndexGranularity::shrinkToFitInMemory() +{ + marks_rows_partial_sums.shrink_to_fit(); +} + } diff --git a/src/Storages/MergeTree/MergeTreeIndexGranularity.h b/src/Storages/MergeTree/MergeTreeIndexGranularity.h index f66e721ec1e..9b8375dd2d8 100644 --- a/src/Storages/MergeTree/MergeTreeIndexGranularity.h +++ b/src/Storages/MergeTree/MergeTreeIndexGranularity.h @@ -100,6 +100,8 @@ public: void resizeWithFixedGranularity(size_t size, size_t fixed_granularity); std::string describe() const; + + void shrinkToFitInMemory(); }; } diff --git a/src/Storages/MergeTree/MergeTreeIndexVectorSimilarity.cpp b/src/Storages/MergeTree/MergeTreeIndexVectorSimilarity.cpp index f95b840e223..0b17fa05072 100644 --- a/src/Storages/MergeTree/MergeTreeIndexVectorSimilarity.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexVectorSimilarity.cpp @@ -178,23 +178,20 @@ String USearchIndexWithSerialization::Statistics::toString() const } MergeTreeIndexGranuleVectorSimilarity::MergeTreeIndexGranuleVectorSimilarity( const String & index_name_, - const Block & index_sample_block_, unum::usearch::metric_kind_t metric_kind_, unum::usearch::scalar_kind_t scalar_kind_, UsearchHnswParams usearch_hnsw_params_) - : MergeTreeIndexGranuleVectorSimilarity(index_name_, index_sample_block_, metric_kind_, scalar_kind_, usearch_hnsw_params_, nullptr) + : MergeTreeIndexGranuleVectorSimilarity(index_name_, metric_kind_, scalar_kind_, usearch_hnsw_params_, nullptr) { } MergeTreeIndexGranuleVectorSimilarity::MergeTreeIndexGranuleVectorSimilarity( const String & index_name_, - const Block & index_sample_block_, unum::usearch::metric_kind_t metric_kind_, unum::usearch::scalar_kind_t scalar_kind_, UsearchHnswParams usearch_hnsw_params_, USearchIndexWithSerializationPtr index_) : index_name(index_name_) - , index_sample_block(index_sample_block_) , metric_kind(metric_kind_) , scalar_kind(scalar_kind_) , usearch_hnsw_params(usearch_hnsw_params_) @@ -261,7 +258,7 @@ MergeTreeIndexAggregatorVectorSimilarity::MergeTreeIndexAggregatorVectorSimilari MergeTreeIndexGranulePtr MergeTreeIndexAggregatorVectorSimilarity::getGranuleAndReset() { - auto granule = std::make_shared(index_name, index_sample_block, metric_kind, scalar_kind, usearch_hnsw_params, index); + auto granule = std::make_shared(index_name, metric_kind, scalar_kind, usearch_hnsw_params, index); index = nullptr; return granule; } @@ -490,7 +487,7 @@ MergeTreeIndexVectorSimilarity::MergeTreeIndexVectorSimilarity( MergeTreeIndexGranulePtr MergeTreeIndexVectorSimilarity::createIndexGranule() const { - return std::make_shared(index.name, index.sample_block, metric_kind, scalar_kind, usearch_hnsw_params); + return std::make_shared(index.name, metric_kind, scalar_kind, usearch_hnsw_params); } MergeTreeIndexAggregatorPtr MergeTreeIndexVectorSimilarity::createIndexAggregator(const MergeTreeWriterSettings & /*settings*/) const @@ -531,15 +528,17 @@ void vectorSimilarityIndexValidator(const IndexDescription & index, bool /* atta { const bool has_two_args = (index.arguments.size() == 2); const bool has_five_args = (index.arguments.size() == 5); + const bool has_six_args = (index.arguments.size() == 6); /// Legacy index creation syntax before #70616. Supported only to be able to load old tables, can be removed mid-2025. + /// The 6th argument (ef_search) is ignored. /// Check number and type of arguments - if (!has_two_args && !has_five_args) + if (!has_two_args && !has_five_args && !has_six_args) throw Exception(ErrorCodes::INCORRECT_QUERY, "Vector similarity index must have two or five arguments"); if (index.arguments[0].getType() != Field::Types::String) throw Exception(ErrorCodes::INCORRECT_QUERY, "First argument of vector similarity index (method) must be of type String"); if (index.arguments[1].getType() != Field::Types::String) throw Exception(ErrorCodes::INCORRECT_QUERY, "Second argument of vector similarity index (metric) must be of type String"); - if (has_five_args) + if (has_five_args || has_six_args) { if (index.arguments[2].getType() != Field::Types::String) throw Exception(ErrorCodes::INCORRECT_QUERY, "Third argument of vector similarity index (quantization) must be of type String"); diff --git a/src/Storages/MergeTree/MergeTreeIndexVectorSimilarity.h b/src/Storages/MergeTree/MergeTreeIndexVectorSimilarity.h index 9a81e168393..fe5049daf77 100644 --- a/src/Storages/MergeTree/MergeTreeIndexVectorSimilarity.h +++ b/src/Storages/MergeTree/MergeTreeIndexVectorSimilarity.h @@ -69,14 +69,12 @@ struct MergeTreeIndexGranuleVectorSimilarity final : public IMergeTreeIndexGranu { MergeTreeIndexGranuleVectorSimilarity( const String & index_name_, - const Block & index_sample_block_, unum::usearch::metric_kind_t metric_kind_, unum::usearch::scalar_kind_t scalar_kind_, UsearchHnswParams usearch_hnsw_params_); MergeTreeIndexGranuleVectorSimilarity( const String & index_name_, - const Block & index_sample_block_, unum::usearch::metric_kind_t metric_kind_, unum::usearch::scalar_kind_t scalar_kind_, UsearchHnswParams usearch_hnsw_params_, @@ -90,7 +88,6 @@ struct MergeTreeIndexGranuleVectorSimilarity final : public IMergeTreeIndexGranu bool empty() const override { return !index || index->size() == 0; } const String index_name; - const Block index_sample_block; const unum::usearch::metric_kind_t metric_kind; const unum::usearch::scalar_kind_t scalar_kind; const UsearchHnswParams usearch_hnsw_params; diff --git a/src/Storages/MergeTree/MergeTreeSettings.cpp b/src/Storages/MergeTree/MergeTreeSettings.cpp index 33910d1048d..fcd4e05cf00 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.cpp +++ b/src/Storages/MergeTree/MergeTreeSettings.cpp @@ -102,6 +102,7 @@ namespace ErrorCodes DECLARE(MergeSelectorAlgorithm, merge_selector_algorithm, MergeSelectorAlgorithm::SIMPLE, "The algorithm to select parts for merges assignment", EXPERIMENTAL) \ DECLARE(Bool, merge_selector_enable_heuristic_to_remove_small_parts_at_right, true, "Enable heuristic for selecting parts for merge which removes parts from right side of range, if their size is less than specified ratio (0.01) of sum_size. Works for Simple and StochasticSimple merge selectors", 0) \ DECLARE(Float, merge_selector_base, 5.0, "Affects write amplification of assigned merges (expert level setting, don't change if you don't understand what it is doing). Works for Simple and StochasticSimple merge selectors", 0) \ + DECLARE(UInt64, min_parts_to_merge_at_once, 0, "Minimal amount of data parts which merge selector can pick to merge at once (expert level setting, don't change if you don't understand what it is doing). 0 - disabled. Works for Simple and StochasticSimple merge selectors.", 0) \ \ /** Inserts settings. */ \ DECLARE(UInt64, parts_to_delay_insert, 1000, "If table contains at least that many active parts in single partition, artificially slow down insert into table. Disabled if set to 0", 0) \ diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/src/Storages/MergeTree/MergedBlockOutputStream.cpp index 77c34aae30a..39096718b5c 100644 --- a/src/Storages/MergeTree/MergedBlockOutputStream.cpp +++ b/src/Storages/MergeTree/MergedBlockOutputStream.cpp @@ -207,6 +207,8 @@ MergedBlockOutputStream::Finalizer MergedBlockOutputStream::finalizePartAsync( new_part->setBytesOnDisk(checksums.getTotalSizeOnDisk()); new_part->setBytesUncompressedOnDisk(checksums.getTotalSizeUncompressedOnDisk()); new_part->index_granularity = writer->getIndexGranularity(); + /// Just in case + new_part->index_granularity.shrinkToFitInMemory(); new_part->calculateColumnsAndSecondaryIndicesSizesOnDisk(); /// In mutation, existing_rows_count is already calculated in PartMergerWriter diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 936df7b0275..7f6588fc632 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -984,6 +984,8 @@ void finalizeMutatedPart( new_data_part->rows_count = source_part->rows_count; new_data_part->index_granularity = source_part->index_granularity; + /// Just in case + new_data_part->index_granularity.shrinkToFitInMemory(); new_data_part->setIndex(*source_part->getIndex()); new_data_part->minmax_idx = source_part->minmax_idx; new_data_part->modification_time = time(nullptr); diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index d047b28e076..9491c40b65f 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -228,10 +228,20 @@ StorageMaterializedView::StorageMaterializedView( if (!fixed_uuid) { - if (to_inner_uuid != UUIDHelpers::Nil) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "TO INNER UUID is not allowed for materialized views with REFRESH without APPEND"); - if (to_table_id.hasUUID()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "explicit UUID is not allowed for target table of materialized view with REFRESH without APPEND"); + if (mode >= LoadingStrictnessLevel::ATTACH) + { + /// Old versions of ClickHouse (when refreshable MV was experimental) could add useless + /// UUIDs to attach queries. + to_table_id.uuid = UUIDHelpers::Nil; + to_inner_uuid = UUIDHelpers::Nil; + } + else + { + if (to_inner_uuid != UUIDHelpers::Nil) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "TO INNER UUID is not allowed for materialized views with REFRESH without APPEND"); + if (to_table_id.hasUUID()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "explicit UUID is not allowed for target table of materialized view with REFRESH without APPEND"); + } } if (!has_inner_table) @@ -382,6 +392,7 @@ void StorageMaterializedView::read( } query_plan.addStorageHolder(storage); + query_plan.addInterpreterContext(context); query_plan.addTableLock(std::move(lock)); } } @@ -405,6 +416,7 @@ SinkToStoragePtr StorageMaterializedView::write(const ASTPtr & query, const Stor auto sink = storage->write(query, metadata_snapshot, context, async_insert); + sink->addInterpreterContext(context); sink->addTableLock(lock); return sink; } diff --git a/tests/ci/ci_cache.py b/tests/ci/ci_cache.py index 6f2e3e70736..c271339db8b 100644 --- a/tests/ci/ci_cache.py +++ b/tests/ci/ci_cache.py @@ -795,11 +795,12 @@ class CiCache: # start waiting for the next TIMEOUT seconds if there are more than X(=4) jobs to wait # wait TIMEOUT seconds in rounds. Y(=5) is the max number of rounds expired_sec = 0 - start_at = int(time.time()) + start_at = time.time() while expired_sec < TIMEOUT and self.jobs_to_wait: await_finished: Set[str] = set() if not dry_run: - time.sleep(poll_interval_sec) + # Do not sleep longer than required + time.sleep(min(poll_interval_sec, TIMEOUT - expired_sec)) self.update() for job_name, job_config in self.jobs_to_wait.items(): num_batches = job_config.num_batches @@ -844,10 +845,12 @@ class CiCache: del self.jobs_to_wait[job] if not dry_run: - expired_sec = int(time.time()) - start_at - print( - f"...awaiting continues... seconds left [{TIMEOUT - expired_sec}]" - ) + expired_sec = int(time.time() - start_at) + msg = f"...awaiting continues... seconds left [{TIMEOUT - expired_sec}]" + if expired_sec >= TIMEOUT: + # Avoid `seconds left [-3]` + msg = f"awaiting for round {round_cnt} is finished" + print(msg) else: # make up for 2 iterations in dry_run expired_sec += int(TIMEOUT / 2) + 1 diff --git a/tests/config/config.d/storage_conf.xml b/tests/config/config.d/storage_conf.xml index 74bad7528c8..fee7ce841a6 100644 --- a/tests/config/config.d/storage_conf.xml +++ b/tests/config/config.d/storage_conf.xml @@ -27,6 +27,7 @@ 0.3 0.15 0.15 + 50 0 diff --git a/tests/integration/compose/docker_compose_mysql.yml b/tests/integration/compose/docker_compose_mysql.yml index f45410bde78..91df21165ea 100644 --- a/tests/integration/compose/docker_compose_mysql.yml +++ b/tests/integration/compose/docker_compose_mysql.yml @@ -5,7 +5,7 @@ services: environment: MYSQL_ROOT_PASSWORD: clickhouse MYSQL_ROOT_HOST: ${MYSQL_ROOT_HOST} - DATADIR: /mysql/ + DATADIR: /var/log/mysql/ expose: - ${MYSQL_PORT:-3306} command: --server_id=100 @@ -14,11 +14,11 @@ services: --gtid-mode="ON" --enforce-gtid-consistency --log-error-verbosity=3 - --log-error=/mysql/error.log + --log-error=/var/log/mysql/error.log --general-log=ON - --general-log-file=/mysql/general.log + --general-log-file=/var/log/mysql/general.log volumes: - type: ${MYSQL_LOGS_FS:-tmpfs} source: ${MYSQL_LOGS:-} - target: /mysql/ + target: /var/log/mysql/ user: ${MYSQL_DOCKER_USER} diff --git a/tests/integration/compose/docker_compose_mysql_8_0.yml b/tests/integration/compose/docker_compose_mysql_8_0.yml index e1ff1633bc7..e1e2e241443 100644 --- a/tests/integration/compose/docker_compose_mysql_8_0.yml +++ b/tests/integration/compose/docker_compose_mysql_8_0.yml @@ -4,8 +4,8 @@ services: restart: always environment: MYSQL_ROOT_PASSWORD: clickhouse - MYSQL_ROOT_HOST: ${MYSQL_ROOT_HOST} - DATADIR: /mysql/ + MYSQL_ROOT_HOST: ${MYSQL8_ROOT_HOST} + DATADIR: /var/log/mysql/ expose: - ${MYSQL8_PORT:-3306} command: --server_id=100 --log-bin='mysql-bin-1.log' @@ -13,11 +13,11 @@ services: --default-time-zone='+3:00' --gtid-mode="ON" --enforce-gtid-consistency --log-error-verbosity=3 - --log-error=/mysql/error.log + --log-error=/var/log/mysql/error.log --general-log=ON - --general-log-file=/mysql/general.log + --general-log-file=/var/log/mysql/general.log volumes: - type: ${MYSQL8_LOGS_FS:-tmpfs} source: ${MYSQL8_LOGS:-} - target: /mysql/ + target: /var/log/mysql/ user: ${MYSQL8_DOCKER_USER} diff --git a/tests/integration/test_storage_s3_queue/test.py b/tests/integration/test_storage_s3_queue/test.py index c495fc1d44f..284b304c632 100644 --- a/tests/integration/test_storage_s3_queue/test.py +++ b/tests/integration/test_storage_s3_queue/test.py @@ -1403,8 +1403,8 @@ def test_shards_distributed(started_cluster, mode, processing_threads): # A unique path is necessary for repeatable tests keeper_path = f"/clickhouse/test_{table_name}_{generate_random_string()}" files_path = f"{table_name}_data" - files_to_generate = 300 - row_num = 300 + files_to_generate = 600 + row_num = 1000 total_rows = row_num * files_to_generate shards_num = 2 diff --git a/tests/queries/0_stateless/02354_vector_search_bug_52282.reference b/tests/queries/0_stateless/02354_vector_search_adaptive_index_granularity.reference similarity index 100% rename from tests/queries/0_stateless/02354_vector_search_bug_52282.reference rename to tests/queries/0_stateless/02354_vector_search_adaptive_index_granularity.reference diff --git a/tests/queries/0_stateless/02354_vector_search_bug_adaptive_index_granularity.sql b/tests/queries/0_stateless/02354_vector_search_adaptive_index_granularity.sql similarity index 100% rename from tests/queries/0_stateless/02354_vector_search_bug_adaptive_index_granularity.sql rename to tests/queries/0_stateless/02354_vector_search_adaptive_index_granularity.sql diff --git a/tests/queries/0_stateless/02354_vector_search_bug_71381.reference b/tests/queries/0_stateless/02354_vector_search_and_other_skipping_indexes.reference similarity index 100% rename from tests/queries/0_stateless/02354_vector_search_bug_71381.reference rename to tests/queries/0_stateless/02354_vector_search_and_other_skipping_indexes.reference diff --git a/tests/queries/0_stateless/02354_vector_search_bug_71381.sql b/tests/queries/0_stateless/02354_vector_search_and_other_skipping_indexes.sql similarity index 79% rename from tests/queries/0_stateless/02354_vector_search_bug_71381.sql rename to tests/queries/0_stateless/02354_vector_search_and_other_skipping_indexes.sql index 9e3246700b8..386d3b6e26e 100644 --- a/tests/queries/0_stateless/02354_vector_search_bug_71381.sql +++ b/tests/queries/0_stateless/02354_vector_search_and_other_skipping_indexes.sql @@ -2,7 +2,7 @@ SET allow_experimental_vector_similarity_index = 1; --- Issue #71381: Usage of vector similarity index and further skipping indexes on the same table +-- Usage of vector similarity index and further skipping indexes on the same table (issue #71381) DROP TABLE IF EXISTS tab; diff --git a/tests/queries/0_stateless/02354_vector_search_bug_adaptive_index_granularity.reference b/tests/queries/0_stateless/02354_vector_search_different_array_sizes.reference similarity index 100% rename from tests/queries/0_stateless/02354_vector_search_bug_adaptive_index_granularity.reference rename to tests/queries/0_stateless/02354_vector_search_different_array_sizes.reference diff --git a/tests/queries/0_stateless/02354_vector_search_bug_different_array_sizes.sql b/tests/queries/0_stateless/02354_vector_search_different_array_sizes.sql similarity index 100% rename from tests/queries/0_stateless/02354_vector_search_bug_different_array_sizes.sql rename to tests/queries/0_stateless/02354_vector_search_different_array_sizes.sql diff --git a/tests/queries/0_stateless/02354_vector_search_bug_different_array_sizes.reference b/tests/queries/0_stateless/02354_vector_search_empty_arrays_or_default_values.reference similarity index 100% rename from tests/queries/0_stateless/02354_vector_search_bug_different_array_sizes.reference rename to tests/queries/0_stateless/02354_vector_search_empty_arrays_or_default_values.reference diff --git a/tests/queries/0_stateless/02354_vector_search_bug_52282.sql b/tests/queries/0_stateless/02354_vector_search_empty_arrays_or_default_values.sql similarity index 80% rename from tests/queries/0_stateless/02354_vector_search_bug_52282.sql rename to tests/queries/0_stateless/02354_vector_search_empty_arrays_or_default_values.sql index b8066ce278a..e24b1a527be 100644 --- a/tests/queries/0_stateless/02354_vector_search_bug_52282.sql +++ b/tests/queries/0_stateless/02354_vector_search_empty_arrays_or_default_values.sql @@ -2,7 +2,7 @@ SET allow_experimental_vector_similarity_index = 1; --- Issue #52258: Vector similarity indexes must reject empty Arrays or Arrays with default values +-- Vector similarity indexes must reject empty Arrays or Arrays with default values (issue #52258) DROP TABLE IF EXISTS tab; diff --git a/tests/queries/0_stateless/02354_vector_search_bug_multiple_indexes.reference b/tests/queries/0_stateless/02354_vector_search_legacy_index_creation_syntax.reference similarity index 100% rename from tests/queries/0_stateless/02354_vector_search_bug_multiple_indexes.reference rename to tests/queries/0_stateless/02354_vector_search_legacy_index_creation_syntax.reference diff --git a/tests/queries/0_stateless/02354_vector_search_legacy_index_creation_syntax.sql b/tests/queries/0_stateless/02354_vector_search_legacy_index_creation_syntax.sql new file mode 100644 index 00000000000..e5dbc6aa6a9 --- /dev/null +++ b/tests/queries/0_stateless/02354_vector_search_legacy_index_creation_syntax.sql @@ -0,0 +1,13 @@ +-- Tags: no-fasttest, no-ordinary-database + +-- Tests the legacy syntax to create vector similarity indexes before #70616. +-- Support for this syntax can be removed after mid-2025. + +SET allow_experimental_vector_similarity_index = 1; + +DROP TABLE IF EXISTS tab; + +CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE vector_similarity('hnsw', 'cosineDistance', 'f32', 42, 99, 113)) ENGINE = MergeTree ORDER BY id; -- Note the 6th parameter: 133 + +DROP TABLE tab; + diff --git a/tests/queries/0_stateless/03225_alter_to_json_not_supported.reference b/tests/queries/0_stateless/02354_vector_search_multiple_indexes.reference similarity index 100% rename from tests/queries/0_stateless/03225_alter_to_json_not_supported.reference rename to tests/queries/0_stateless/02354_vector_search_multiple_indexes.reference diff --git a/tests/queries/0_stateless/02354_vector_search_bug_multiple_indexes.sql b/tests/queries/0_stateless/02354_vector_search_multiple_indexes.sql similarity index 100% rename from tests/queries/0_stateless/02354_vector_search_bug_multiple_indexes.sql rename to tests/queries/0_stateless/02354_vector_search_multiple_indexes.sql diff --git a/tests/queries/0_stateless/02354_vector_search_bug_multiple_marks.reference b/tests/queries/0_stateless/02354_vector_search_multiple_marks.reference similarity index 100% rename from tests/queries/0_stateless/02354_vector_search_bug_multiple_marks.reference rename to tests/queries/0_stateless/02354_vector_search_multiple_marks.reference diff --git a/tests/queries/0_stateless/02354_vector_search_bug_multiple_marks.sql b/tests/queries/0_stateless/02354_vector_search_multiple_marks.sql similarity index 100% rename from tests/queries/0_stateless/02354_vector_search_bug_multiple_marks.sql rename to tests/queries/0_stateless/02354_vector_search_multiple_marks.sql diff --git a/tests/queries/0_stateless/02354_vector_search_bug_69085.reference b/tests/queries/0_stateless/02354_vector_search_subquery.reference similarity index 100% rename from tests/queries/0_stateless/02354_vector_search_bug_69085.reference rename to tests/queries/0_stateless/02354_vector_search_subquery.reference diff --git a/tests/queries/0_stateless/02354_vector_search_bug_69085.sql b/tests/queries/0_stateless/02354_vector_search_subquery.sql similarity index 93% rename from tests/queries/0_stateless/02354_vector_search_bug_69085.sql rename to tests/queries/0_stateless/02354_vector_search_subquery.sql index 4dbcdf66e36..65ad0dbcd97 100644 --- a/tests/queries/0_stateless/02354_vector_search_bug_69085.sql +++ b/tests/queries/0_stateless/02354_vector_search_subquery.sql @@ -3,7 +3,7 @@ SET allow_experimental_vector_similarity_index = 1; SET enable_analyzer = 0; --- Issue #69085: Reference vector for vector search is computed by a subquery +-- Reference vector for vector search is computed by a subquery (issue #69085) DROP TABLE IF EXISTS tab; diff --git a/tests/queries/0_stateless/02494_query_cache_normalize_ast.sql b/tests/queries/0_stateless/02494_query_cache_normalize_ast.sql index 1dbb3ef8158..cb53c4db7de 100644 --- a/tests/queries/0_stateless/02494_query_cache_normalize_ast.sql +++ b/tests/queries/0_stateless/02494_query_cache_normalize_ast.sql @@ -7,7 +7,7 @@ SYSTEM DROP QUERY CACHE; -- Run query whose result gets cached in the query cache. -- Besides "use_query_cache", pass two more knobs (one QC-specific knob and one non-QC-specific knob). We just care -- *that* they are passed and not about their effect. -SELECT 1 SETTINGS use_query_cache = true, query_cache_store_results_of_queries_with_nondeterministic_functions = true, max_threads = 16; +SELECT 1 SETTINGS use_query_cache = true, query_cache_nondeterministic_function_handling = 'save', max_threads = 16; -- Check that entry in QC exists SELECT COUNT(*) FROM system.query_cache; diff --git a/tests/queries/0_stateless/02766_bitshift_with_const_arguments.sql b/tests/queries/0_stateless/02766_bitshift_with_const_arguments.sql index 91e8624057c..6b2961f0555 100644 --- a/tests/queries/0_stateless/02766_bitshift_with_const_arguments.sql +++ b/tests/queries/0_stateless/02766_bitshift_with_const_arguments.sql @@ -10,7 +10,7 @@ DROP TABLE IF EXISTS t1; CREATE TABLE t0 (vkey UInt32, pkey UInt32, c0 UInt32) engine = TinyLog; CREATE TABLE t1 (vkey UInt32) ENGINE = AggregatingMergeTree ORDER BY vkey; INSERT INTO t0 VALUES (15, 25000, 58); -SELECT ref_5.pkey AS c_2_c2392_6 FROM t0 AS ref_5 WHERE 'J[' < multiIf(ref_5.pkey IN ( SELECT 1 ), bitShiftLeft(multiIf(ref_5.c0 > NULL, '1', ')'), 40), NULL); -- { serverError ARGUMENT_OUT_OF_BOUND } +SELECT ref_5.pkey AS c_2_c2392_6 FROM t0 AS ref_5 WHERE 'J[' < multiIf(ref_5.pkey IN ( SELECT 1 ), bitShiftLeft(multiIf(ref_5.c0 > NULL, '1', ')'), 40), NULL); DROP TABLE t0; DROP TABLE t1; diff --git a/tests/queries/0_stateless/03198_bit_shift_throws_error_for_out_of_bounds.reference b/tests/queries/0_stateless/03198_bit_shift_throws_error_for_out_of_bounds.reference index 33b8cd6ee26..1fda82a9747 100644 --- a/tests/queries/0_stateless/03198_bit_shift_throws_error_for_out_of_bounds.reference +++ b/tests/queries/0_stateless/03198_bit_shift_throws_error_for_out_of_bounds.reference @@ -1,3 +1,9 @@ -- bitShiftRight +0 + +\0\0\0\0\0\0\0\0 -- bitShiftLeft +0 + +\0\0\0\0\0\0\0\0 OK diff --git a/tests/queries/0_stateless/03198_bit_shift_throws_error_for_out_of_bounds.sql b/tests/queries/0_stateless/03198_bit_shift_throws_error_for_out_of_bounds.sql index aec01753673..340cc1292e4 100644 --- a/tests/queries/0_stateless/03198_bit_shift_throws_error_for_out_of_bounds.sql +++ b/tests/queries/0_stateless/03198_bit_shift_throws_error_for_out_of_bounds.sql @@ -1,17 +1,17 @@ SELECT '-- bitShiftRight'; SELECT bitShiftRight(1, -1); -- { serverError ARGUMENT_OUT_OF_BOUND } -SELECT bitShiftRight(toUInt8(1), 8 + 1); -- { serverError ARGUMENT_OUT_OF_BOUND } +SELECT bitShiftRight(toUInt8(1), 8 + 1); SELECT bitShiftRight('hola', -1); -- { serverError ARGUMENT_OUT_OF_BOUND } -SELECT bitShiftRight('hola', 4 * 8 + 1); -- { serverError ARGUMENT_OUT_OF_BOUND } +SELECT bitShiftRight('hola', 4 * 8 + 1); SELECT bitShiftRight(toFixedString('hola', 8), -1); -- { serverError ARGUMENT_OUT_OF_BOUND } -SELECT bitShiftRight(toFixedString('hola', 8), 8 * 8 + 1); -- { serverError ARGUMENT_OUT_OF_BOUND } +SELECT bitShiftRight(toFixedString('hola', 8), 8 * 8 + 1); SELECT '-- bitShiftLeft'; SELECT bitShiftLeft(1, -1); -- { serverError ARGUMENT_OUT_OF_BOUND } -SELECT bitShiftLeft(toUInt8(1), 8 + 1); -- { serverError ARGUMENT_OUT_OF_BOUND } +SELECT bitShiftLeft(toUInt8(1), 8 + 1); SELECT bitShiftLeft('hola', -1); -- { serverError ARGUMENT_OUT_OF_BOUND } -SELECT bitShiftLeft('hola', 4 * 8 + 1); -- { serverError ARGUMENT_OUT_OF_BOUND } +SELECT bitShiftLeft('hola', 4 * 8 + 1); SELECT bitShiftLeft(toFixedString('hola', 8), -1); -- { serverError ARGUMENT_OUT_OF_BOUND } -SELECT bitShiftLeft(toFixedString('hola', 8), 8 * 8 + 1); -- { serverError ARGUMENT_OUT_OF_BOUND } +SELECT bitShiftLeft(toFixedString('hola', 8), 8 * 8 + 1); SELECT 'OK'; \ No newline at end of file diff --git a/tests/queries/0_stateless/03225_alter_to_json_not_supported.sql b/tests/queries/0_stateless/03225_alter_to_json_not_supported.sql deleted file mode 100644 index 398494d56de..00000000000 --- a/tests/queries/0_stateless/03225_alter_to_json_not_supported.sql +++ /dev/null @@ -1,15 +0,0 @@ -set allow_experimental_json_type = 1; - -drop table if exists test; -create table test (s String) engine=MergeTree order by tuple(); -alter table test modify column s JSON; -- { serverError BAD_ARGUMENTS } -drop table test; - -create table test (s Array(String)) engine=MergeTree order by tuple(); -alter table test modify column s Array(JSON); -- { serverError BAD_ARGUMENTS } -drop table test; - -create table test (s Tuple(String, String)) engine=MergeTree order by tuple(); -alter table test modify column s Tuple(JSON, String); -- { serverError BAD_ARGUMENTS } -drop table test; - diff --git a/tests/queries/0_stateless/03246_alter_from_string_to_json.reference b/tests/queries/0_stateless/03246_alter_from_string_to_json.reference new file mode 100644 index 00000000000..8253c4fef48 --- /dev/null +++ b/tests/queries/0_stateless/03246_alter_from_string_to_json.reference @@ -0,0 +1,134 @@ +All paths: +['key0','key1','key2','key3','key4','key5'] +Shared data paths: +key2 +key3 +key4 +key5 +{"key0":"value0"} +{"key1":"value1"} +{"key0":"value2"} +{"key1":"value3"} +{"key0":"value4"} +{"key1":"value5"} +{"key0":"value6"} +{"key1":"value7"} +{"key0":"value8"} +{"key1":"value9"} +{"key2":"value60000"} +{"key3":"value60001"} +{"key2":"value60002"} +{"key3":"value60003"} +{"key2":"value60004"} +{"key3":"value60005"} +{"key2":"value60006"} +{"key3":"value60007"} +{"key2":"value60008"} +{"key3":"value60009"} +{"key4":"value120000"} +{"key5":"value120001"} +{"key4":"value120002"} +{"key5":"value120003"} +{"key4":"value120004"} +{"key5":"value120005"} +{"key4":"value120006"} +{"key5":"value120007"} +{"key4":"value120008"} +{"key5":"value120009"} +value0 \N \N \N \N \N +\N value1 \N \N \N \N +value2 \N \N \N \N \N +\N value3 \N \N \N \N +value4 \N \N \N \N \N +\N value5 \N \N \N \N +value6 \N \N \N \N \N +\N value7 \N \N \N \N +value8 \N \N \N \N \N +\N value9 \N \N \N \N +\N \N value60000 \N \N \N +\N \N \N value60001 \N \N +\N \N value60002 \N \N \N +\N \N \N value60003 \N \N +\N \N value60004 \N \N \N +\N \N \N value60005 \N \N +\N \N value60006 \N \N \N +\N \N \N value60007 \N \N +\N \N value60008 \N \N \N +\N \N \N value60009 \N \N +\N \N \N \N value120000 \N +\N \N \N \N \N value120001 +\N \N \N \N value120002 \N +\N \N \N \N \N value120003 +\N \N \N \N value120004 \N +\N \N \N \N \N value120005 +\N \N \N \N value120006 \N +\N \N \N \N \N value120007 +\N \N \N \N value120008 \N +\N \N \N \N \N value120009 +All paths: +['key0','key1','key2','key3','key4','key5'] +Shared data paths: +key2 +key3 +key4 +key5 +{"key0":"value0"} +{"key1":"value1"} +{"key0":"value2"} +{"key1":"value3"} +{"key0":"value4"} +{"key1":"value5"} +{"key0":"value6"} +{"key1":"value7"} +{"key0":"value8"} +{"key1":"value9"} +{"key2":"value60000"} +{"key3":"value60001"} +{"key2":"value60002"} +{"key3":"value60003"} +{"key2":"value60004"} +{"key3":"value60005"} +{"key2":"value60006"} +{"key3":"value60007"} +{"key2":"value60008"} +{"key3":"value60009"} +{"key4":"value120000"} +{"key5":"value120001"} +{"key4":"value120002"} +{"key5":"value120003"} +{"key4":"value120004"} +{"key5":"value120005"} +{"key4":"value120006"} +{"key5":"value120007"} +{"key4":"value120008"} +{"key5":"value120009"} +value0 \N \N \N \N \N +\N value1 \N \N \N \N +value2 \N \N \N \N \N +\N value3 \N \N \N \N +value4 \N \N \N \N \N +\N value5 \N \N \N \N +value6 \N \N \N \N \N +\N value7 \N \N \N \N +value8 \N \N \N \N \N +\N value9 \N \N \N \N +\N \N value60000 \N \N \N +\N \N \N value60001 \N \N +\N \N value60002 \N \N \N +\N \N \N value60003 \N \N +\N \N value60004 \N \N \N +\N \N \N value60005 \N \N +\N \N value60006 \N \N \N +\N \N \N value60007 \N \N +\N \N value60008 \N \N \N +\N \N \N value60009 \N \N +\N \N \N \N value120000 \N +\N \N \N \N \N value120001 +\N \N \N \N value120002 \N +\N \N \N \N \N value120003 +\N \N \N \N value120004 \N +\N \N \N \N \N value120005 +\N \N \N \N value120006 \N +\N \N \N \N \N value120007 +\N \N \N \N value120008 \N +\N \N \N \N \N value120009 diff --git a/tests/queries/0_stateless/03246_alter_from_string_to_json.sql.j2 b/tests/queries/0_stateless/03246_alter_from_string_to_json.sql.j2 new file mode 100644 index 00000000000..2ccf2153699 --- /dev/null +++ b/tests/queries/0_stateless/03246_alter_from_string_to_json.sql.j2 @@ -0,0 +1,36 @@ +-- Random settings limits: index_granularity=(None, 60000) +-- Tags: long + +set allow_experimental_json_type = 1; +set max_block_size = 20000; + +drop table if exists test; + +{% for create_command in ['create table test (x UInt64, json String) engine=MergeTree order by x settings min_rows_for_wide_part=100000000, min_bytes_for_wide_part=1000000000;', + 'create table test (x UInt64, json String) engine=MergeTree order by x settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;'] -%} + +{{ create_command }} + +insert into test select number, toJSONString(map('key' || multiIf(number < 60000, number % 2, number < 120000, number % 2 + 2, number % 2 + 4), 'value' || number)) from numbers(200000); + +alter table test modify column json JSON settings mutations_sync=1; + +select 'All paths:'; +select distinctJSONPaths(json) from test; +select 'Shared data paths:'; +select distinct (arrayJoin(JSONSharedDataPaths(json))) as path from test order by path; +select json from test order by x limit 10; +select json from test order by x limit 10 offset 60000; +select json from test order by x limit 10 offset 120000; +select json.key0, json.key1, json.key2, json.key3, json.key4, json.key5 from test order by x limit 10; +select json.key0, json.key1, json.key2, json.key3, json.key4, json.key5 from test order by x limit 10 offset 60000; +select json.key0, json.key1, json.key2, json.key3, json.key4, json.key5 from test order by x limit 10 offset 120000; + +select json from test format Null; +select json from test order by x format Null; +select json.key0, json.key1, json.key2, json.key3, json.key4, json.key5 from test format Null; +select json.key0, json.key1, json.key2, json.key3, json.key4, json.key5 from test order by x format Null; + +drop table test; + +{% endfor -%} diff --git a/tests/queries/0_stateless/03247_ghdata_string_to_json_alter.reference b/tests/queries/0_stateless/03247_ghdata_string_to_json_alter.reference new file mode 100644 index 00000000000..ca2fb7e8ff9 --- /dev/null +++ b/tests/queries/0_stateless/03247_ghdata_string_to_json_alter.reference @@ -0,0 +1,12 @@ +5000 +leonardomso/33-js-concepts 3 +ytdl-org/youtube-dl 3 +Bogdanp/neko 2 +bminossi/AllVideoPocsFromHackerOne 2 +disclose/diodata 2 +Commit 182 +chipeo345 119 +phanwi346 114 +Nicholas Piggin 95 +direwolf-github 49 +2 diff --git a/tests/queries/0_stateless/03247_ghdata_string_to_json_alter.sh b/tests/queries/0_stateless/03247_ghdata_string_to_json_alter.sh new file mode 100755 index 00000000000..e8368b6702a --- /dev/null +++ b/tests/queries/0_stateless/03247_ghdata_string_to_json_alter.sh @@ -0,0 +1,30 @@ +#!/usr/bin/env bash +# Tags: no-fasttest, no-s3-storage, long +# ^ no-s3-storage: too memory hungry + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS ghdata" +${CLICKHOUSE_CLIENT} -q "CREATE TABLE ghdata (data String) ENGINE = MergeTree ORDER BY tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'" + +cat $CUR_DIR/data_json/ghdata_sample.json | ${CLICKHOUSE_CLIENT} \ + --max_memory_usage 10G --query "INSERT INTO ghdata FORMAT JSONAsString" + +${CLICKHOUSE_CLIENT} -q "ALTER TABLE ghdata MODIFY column data JSON SETTINGS mutations_sync=1" --allow_experimental_json_type 1 + +${CLICKHOUSE_CLIENT} -q "SELECT count() FROM ghdata WHERE NOT ignore(*)" + +${CLICKHOUSE_CLIENT} -q \ +"SELECT data.repo.name, count() AS stars FROM ghdata \ + WHERE data.type = 'WatchEvent' GROUP BY data.repo.name ORDER BY stars DESC, data.repo.name LIMIT 5" --allow_suspicious_types_in_group_by=1 --allow_suspicious_types_in_order_by=1 + +${CLICKHOUSE_CLIENT} --enable_analyzer=1 -q \ +"SELECT data.payload.commits[].author.name AS name, count() AS c FROM ghdata \ + ARRAY JOIN data.payload.commits[].author.name \ + GROUP BY name ORDER BY c DESC, name LIMIT 5" --allow_suspicious_types_in_group_by=1 --allow_suspicious_types_in_order_by=1 + +${CLICKHOUSE_CLIENT} -q "SELECT max(data.payload.pull_request.assignees[].size0) FROM ghdata" + +${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS ghdata" diff --git a/tests/queries/0_stateless/03267_materialized_view_keeps_security_context.reference b/tests/queries/0_stateless/03267_materialized_view_keeps_security_context.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/03267_materialized_view_keeps_security_context.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/03267_materialized_view_keeps_security_context.sql b/tests/queries/0_stateless/03267_materialized_view_keeps_security_context.sql new file mode 100644 index 00000000000..bb44e4920af --- /dev/null +++ b/tests/queries/0_stateless/03267_materialized_view_keeps_security_context.sql @@ -0,0 +1,9 @@ +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.rview; +DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}.wview; + +-- Read from view +CREATE MATERIALIZED VIEW rview ENGINE = File(CSV) POPULATE AS SELECT 1 AS c0; +SELECT 1 FROM rview; + +-- Write through view populate +CREATE MATERIALIZED VIEW wview ENGINE = Join(ALL, INNER, c0) POPULATE AS SELECT 1 AS c0; diff --git a/tests/queries/0_stateless/03267_min_parts_to_merge_at_once.reference b/tests/queries/0_stateless/03267_min_parts_to_merge_at_once.reference new file mode 100644 index 00000000000..966a0980e59 --- /dev/null +++ b/tests/queries/0_stateless/03267_min_parts_to_merge_at_once.reference @@ -0,0 +1,4 @@ +2 +3 +4 +1 diff --git a/tests/queries/0_stateless/03267_min_parts_to_merge_at_once.sh b/tests/queries/0_stateless/03267_min_parts_to_merge_at_once.sh new file mode 100755 index 00000000000..90b9d0339cf --- /dev/null +++ b/tests/queries/0_stateless/03267_min_parts_to_merge_at_once.sh @@ -0,0 +1,42 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS t;" + +$CLICKHOUSE_CLIENT --query "CREATE TABLE t (key UInt64) ENGINE = MergeTree() ORDER BY tuple() SETTINGS min_parts_to_merge_at_once=5, merge_selector_base=1" + +$CLICKHOUSE_CLIENT --query "INSERT INTO t VALUES (1)" +$CLICKHOUSE_CLIENT --query "INSERT INTO t VALUES (2);" + +# doesn't make test flaky +sleep 1 + +$CLICKHOUSE_CLIENT --query "SELECT count() FROM system.parts WHERE active and database = currentDatabase() and table = 't'" + +$CLICKHOUSE_CLIENT --query "INSERT INTO t VALUES (3)" + +$CLICKHOUSE_CLIENT --query "SELECT count() FROM system.parts WHERE active and database = currentDatabase() and table = 't'" + +$CLICKHOUSE_CLIENT --query "INSERT INTO t VALUES (4)" + +$CLICKHOUSE_CLIENT --query "SELECT count() FROM system.parts WHERE active and database = currentDatabase() and table = 't'" + +$CLICKHOUSE_CLIENT --query "INSERT INTO t VALUES (5)" + +counter=0 retries=60 + +while [[ $counter -lt $retries ]]; do + result=$($CLICKHOUSE_CLIENT --query "SELECT count() FROM system.parts WHERE active and database = currentDatabase() and table = 't'") + if [ "$result" -eq "1" ];then + break; + fi + sleep 0.5 + counter=$((counter + 1)) +done + +$CLICKHOUSE_CLIENT --query "SELECT count() FROM system.parts WHERE active and database = currentDatabase() and table = 't'" + +$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS t" diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index cf28db5d49a..fab562a8cbb 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -31,6 +31,7 @@ v24.4.4.113-stable 2024-08-02 v24.4.3.25-stable 2024-06-14 v24.4.2.141-stable 2024-06-07 v24.4.1.2088-stable 2024-05-01 +v24.3.13.40-lts 2024-11-07 v24.3.12.75-lts 2024-10-08 v24.3.11.7-lts 2024-09-06 v24.3.10.33-lts 2024-09-03